4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved.
24 * Copyright (c) 2017 by Delphix. All rights reserved.
28 * utility routines for the /dev fs
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/t_lock.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
39 #include <sys/vnode.h>
41 #include <sys/fcntl.h>
42 #include <sys/flock.h>
45 #include <sys/errno.h>
48 #include <sys/dirent.h>
49 #include <sys/pathname.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
53 #include <sys/policy.h>
54 #include <fs/fs_subr.h>
55 #include <sys/mount.h>
56 #include <sys/fs/snode.h>
57 #include <sys/fs/dv_node.h>
58 #include <sys/fs/sdev_impl.h>
59 #include <sys/sunndi.h>
60 #include <sys/sunmdi.h>
64 #include <sys/modctl.h>
67 int sdev_debug
= 0x00000001;
68 int sdev_debug_cache_flags
= 0;
74 /* prototype memory vattrs */
75 vattr_t sdev_vattr_dir
= {
76 AT_TYPE
|AT_MODE
|AT_UID
|AT_GID
, /* va_mask */
78 SDEV_DIRMODE_DEFAULT
, /* va_mode */
79 SDEV_UID_DEFAULT
, /* va_uid */
80 SDEV_GID_DEFAULT
, /* va_gid */
94 vattr_t sdev_vattr_lnk
= {
95 AT_TYPE
|AT_MODE
, /* va_mask */
97 SDEV_LNKMODE_DEFAULT
, /* va_mode */
98 SDEV_UID_DEFAULT
, /* va_uid */
99 SDEV_GID_DEFAULT
, /* va_gid */
113 vattr_t sdev_vattr_blk
= {
114 AT_TYPE
|AT_MODE
|AT_UID
|AT_GID
, /* va_mask */
116 S_IFBLK
| SDEV_DEVMODE_DEFAULT
, /* va_mode */
117 SDEV_UID_DEFAULT
, /* va_uid */
118 SDEV_GID_DEFAULT
, /* va_gid */
132 vattr_t sdev_vattr_chr
= {
133 AT_TYPE
|AT_MODE
|AT_UID
|AT_GID
, /* va_mask */
135 S_IFCHR
| SDEV_DEVMODE_DEFAULT
, /* va_mode */
136 SDEV_UID_DEFAULT
, /* va_uid */
137 SDEV_GID_DEFAULT
, /* va_gid */
151 kmem_cache_t
*sdev_node_cache
; /* sdev_node cache */
152 int devtype
; /* fstype */
155 static struct vnodeops
*sdev_get_vop(struct sdev_node
*);
156 static void sdev_set_no_negcache(struct sdev_node
*);
157 static fs_operation_def_t
*sdev_merge_vtab(const fs_operation_def_t
[]);
158 static void sdev_free_vtab(fs_operation_def_t
*);
161 sdev_prof_free(struct sdev_node
*dv
)
163 ASSERT(!SDEV_IS_GLOBAL(dv
));
164 nvlist_free(dv
->sdev_prof
.dev_name
);
165 nvlist_free(dv
->sdev_prof
.dev_map
);
166 nvlist_free(dv
->sdev_prof
.dev_symlink
);
167 nvlist_free(dv
->sdev_prof
.dev_glob_incdir
);
168 nvlist_free(dv
->sdev_prof
.dev_glob_excdir
);
169 bzero(&dv
->sdev_prof
, sizeof (dv
->sdev_prof
));
172 /* sdev_node cache constructor */
175 i_sdev_node_ctor(void *buf
, void *cfarg
, int flag
)
177 struct sdev_node
*dv
= (struct sdev_node
*)buf
;
180 bzero(buf
, sizeof (struct sdev_node
));
181 vp
= dv
->sdev_vnode
= vn_alloc(flag
);
186 rw_init(&dv
->sdev_contents
, NULL
, RW_DEFAULT
, NULL
);
190 /* sdev_node cache destructor */
193 i_sdev_node_dtor(void *buf
, void *arg
)
195 struct sdev_node
*dv
= (struct sdev_node
*)buf
;
196 struct vnode
*vp
= SDEVTOV(dv
);
198 rw_destroy(&dv
->sdev_contents
);
202 /* initialize sdev_node cache */
204 sdev_node_cache_init()
209 flags
= sdev_debug_cache_flags
;
211 sdcmn_err(("cache debug flags 0x%x\n", flags
));
214 ASSERT(sdev_node_cache
== NULL
);
215 sdev_node_cache
= kmem_cache_create("sdev_node_cache",
216 sizeof (struct sdev_node
), 0, i_sdev_node_ctor
, i_sdev_node_dtor
,
217 NULL
, NULL
, NULL
, flags
);
220 /* destroy sdev_node cache */
222 sdev_node_cache_fini()
224 ASSERT(sdev_node_cache
!= NULL
);
225 kmem_cache_destroy(sdev_node_cache
);
226 sdev_node_cache
= NULL
;
230 * Compare two nodes lexographically to balance avl tree
233 sdev_compare_nodes(const struct sdev_node
*dv1
, const struct sdev_node
*dv2
)
236 if ((rv
= strcmp(dv1
->sdev_name
, dv2
->sdev_name
)) == 0)
238 return ((rv
< 0) ? -1 : 1);
242 sdev_set_nodestate(struct sdev_node
*dv
, sdev_node_state_t state
)
245 ASSERT(RW_WRITE_HELD(&dv
->sdev_contents
));
246 dv
->sdev_state
= state
;
250 sdev_attr_update(struct sdev_node
*dv
, vattr_t
*vap
)
256 ASSERT(dv
->sdev_attr
);
259 attrp
= dv
->sdev_attr
;
262 attrp
->va_type
= vap
->va_type
;
264 attrp
->va_mode
= vap
->va_mode
;
266 attrp
->va_uid
= vap
->va_uid
;
268 attrp
->va_gid
= vap
->va_gid
;
270 attrp
->va_rdev
= vap
->va_rdev
;
273 attrp
->va_atime
= (mask
& AT_ATIME
) ? vap
->va_atime
: now
;
274 attrp
->va_mtime
= (mask
& AT_MTIME
) ? vap
->va_mtime
: now
;
275 attrp
->va_ctime
= (mask
& AT_CTIME
) ? vap
->va_ctime
: now
;
279 sdev_attr_alloc(struct sdev_node
*dv
, vattr_t
*vap
)
281 ASSERT(dv
->sdev_attr
== NULL
);
282 ASSERT(vap
->va_mask
& AT_TYPE
);
283 ASSERT(vap
->va_mask
& AT_MODE
);
285 dv
->sdev_attr
= kmem_zalloc(sizeof (struct vattr
), KM_SLEEP
);
286 sdev_attr_update(dv
, vap
);
289 /* alloc and initialize a sdev_node */
291 sdev_nodeinit(struct sdev_node
*ddv
, char *nm
, struct sdev_node
**newdv
,
294 struct sdev_node
*dv
= NULL
;
297 devname_handle_t
*dhl
;
299 nmlen
= strlen(nm
) + 1;
300 if (nmlen
> MAXNAMELEN
) {
301 sdcmn_err9(("sdev_nodeinit: node name %s"
304 return (ENAMETOOLONG
);
307 dv
= kmem_cache_alloc(sdev_node_cache
, KM_SLEEP
);
309 dv
->sdev_name
= kmem_alloc(nmlen
, KM_SLEEP
);
310 bcopy(nm
, dv
->sdev_name
, nmlen
);
311 dv
->sdev_namelen
= nmlen
- 1; /* '\0' not included */
312 len
= strlen(ddv
->sdev_path
) + strlen(nm
) + 2;
313 dv
->sdev_path
= kmem_alloc(len
, KM_SLEEP
);
314 (void) snprintf(dv
->sdev_path
, len
, "%s/%s", ddv
->sdev_path
, nm
);
315 /* overwritten for VLNK nodes */
316 dv
->sdev_symlink
= NULL
;
320 vp
->v_vfsp
= SDEVTOV(ddv
)->v_vfsp
;
322 vp
->v_type
= vap
->va_type
;
325 * initialized to the parent's vnodeops.
326 * maybe overwriten for a VDIR
328 vn_setops(vp
, vn_getops(SDEVTOV(ddv
)));
331 dv
->sdev_dotdot
= NULL
;
332 dv
->sdev_attrvp
= NULL
;
334 sdev_attr_alloc(dv
, vap
);
336 dv
->sdev_attr
= NULL
;
339 dv
->sdev_ino
= sdev_mkino(dv
);
340 dv
->sdev_nlink
= 0; /* updated on insert */
341 dv
->sdev_flags
= ddv
->sdev_flags
; /* inherit from the parent first */
342 dv
->sdev_flags
|= SDEV_BUILD
;
343 mutex_init(&dv
->sdev_lookup_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
344 cv_init(&dv
->sdev_lookup_cv
, NULL
, CV_DEFAULT
, NULL
);
345 if (SDEV_IS_GLOBAL(ddv
)) {
346 dv
->sdev_flags
|= SDEV_GLOBAL
;
347 dhl
= &(dv
->sdev_handle
);
350 sdev_set_no_negcache(dv
);
351 dv
->sdev_gdir_gen
= 0;
353 dv
->sdev_flags
&= ~SDEV_GLOBAL
;
354 dv
->sdev_origin
= NULL
; /* set later */
355 bzero(&dv
->sdev_prof
, sizeof (dv
->sdev_prof
));
356 dv
->sdev_ldir_gen
= 0;
357 dv
->sdev_devtree_gen
= 0;
360 rw_enter(&dv
->sdev_contents
, RW_WRITER
);
361 sdev_set_nodestate(dv
, SDEV_INIT
);
362 rw_exit(&dv
->sdev_contents
);
369 * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
370 * caller to transition the node to the SDEV_ZOMBIE state.
373 sdev_nodeready(struct sdev_node
*dv
, struct vattr
*vap
, struct vnode
*avp
,
374 void *args
, struct cred
*cred
)
377 struct vnode
*vp
= SDEVTOV(dv
);
380 ASSERT(dv
&& (dv
->sdev_state
!= SDEV_READY
) && vap
);
384 vp
->v_rdev
= vap
->va_rdev
;
385 rw_enter(&dv
->sdev_contents
, RW_WRITER
);
388 dv
->sdev_flags
&= ~SDEV_PERSIST
;
389 dv
->sdev_flags
&= ~SDEV_DYNAMIC
;
390 vn_setops(vp
, sdev_get_vop(dv
)); /* from internal vtab */
391 ASSERT(dv
->sdev_dotdot
);
392 ASSERT(SDEVTOV(dv
->sdev_dotdot
)->v_type
== VDIR
);
393 vp
->v_rdev
= SDEVTOV(dv
->sdev_dotdot
)->v_rdev
;
394 avl_create(&dv
->sdev_entries
,
395 (int (*)(const void *, const void *))sdev_compare_nodes
,
396 sizeof (struct sdev_node
),
397 offsetof(struct sdev_node
, sdev_avllink
));
398 } else if (type
== VLNK
) {
401 dv
->sdev_symlink
= i_ddi_strdup((char *)args
, KM_SLEEP
);
406 if (!(SDEV_IS_GLOBAL(dv
))) {
407 dv
->sdev_origin
= (struct sdev_node
*)args
;
408 dv
->sdev_flags
&= ~SDEV_PERSIST
;
412 * shadow node is created here OR
413 * if failed (indicated by dv->sdev_attrvp == NULL),
414 * created later in sdev_setattr
417 dv
->sdev_attrvp
= avp
;
419 if (dv
->sdev_attr
== NULL
) {
420 sdev_attr_alloc(dv
, vap
);
422 sdev_attr_update(dv
, vap
);
425 if ((dv
->sdev_attrvp
== NULL
) && SDEV_IS_PERSIST(dv
))
426 error
= sdev_shadow_node(dv
, cred
);
430 /* transition to READY state */
431 sdev_set_nodestate(dv
, SDEV_READY
);
432 sdev_nc_node_exists(dv
);
434 rw_exit(&dv
->sdev_contents
);
439 * Build the VROOT sdev_node.
443 sdev_mkroot(struct vfs
*vfsp
, dev_t devdev
, struct vnode
*mvp
,
444 struct vnode
*avp
, struct cred
*cred
)
446 struct sdev_node
*dv
;
448 char devdir
[] = "/dev";
450 ASSERT(sdev_node_cache
!= NULL
);
452 dv
= kmem_cache_alloc(sdev_node_cache
, KM_SLEEP
);
459 vn_setops(vp
, sdev_vnodeops
); /* apply the default vnodeops at /dev */
463 dv
->sdev_name
= i_ddi_strdup(
464 (char *)refstr_value(vfsp
->vfs_mntpt
), KM_SLEEP
);
466 /* vfs_mountdev1 set mount point later */
467 dv
->sdev_name
= i_ddi_strdup("/dev", KM_SLEEP
);
468 dv
->sdev_namelen
= strlen(dv
->sdev_name
); /* '\0' not included */
469 dv
->sdev_path
= i_ddi_strdup(devdir
, KM_SLEEP
);
470 dv
->sdev_ino
= SDEV_ROOTINO
;
471 dv
->sdev_nlink
= 2; /* name + . (no sdev_insert) */
472 dv
->sdev_dotdot
= dv
; /* .. == self */
473 dv
->sdev_attrvp
= avp
;
474 dv
->sdev_attr
= NULL
;
475 mutex_init(&dv
->sdev_lookup_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
476 cv_init(&dv
->sdev_lookup_cv
, NULL
, CV_DEFAULT
, NULL
);
477 if (strcmp(dv
->sdev_name
, "/dev") == 0) {
478 dv
->sdev_flags
= SDEV_BUILD
|SDEV_GLOBAL
|SDEV_PERSIST
;
479 bzero(&dv
->sdev_handle
, sizeof (dv
->sdev_handle
));
480 dv
->sdev_gdir_gen
= 0;
482 dv
->sdev_flags
= SDEV_BUILD
;
483 dv
->sdev_flags
&= ~SDEV_PERSIST
;
484 bzero(&dv
->sdev_prof
, sizeof (dv
->sdev_prof
));
485 dv
->sdev_ldir_gen
= 0;
486 dv
->sdev_devtree_gen
= 0;
489 avl_create(&dv
->sdev_entries
,
490 (int (*)(const void *, const void *))sdev_compare_nodes
,
491 sizeof (struct sdev_node
),
492 offsetof(struct sdev_node
, sdev_avllink
));
494 rw_enter(&dv
->sdev_contents
, RW_WRITER
);
495 sdev_set_nodestate(dv
, SDEV_READY
);
496 rw_exit(&dv
->sdev_contents
);
497 sdev_nc_node_exists(dv
);
501 /* directory dependent vop table */
502 struct sdev_vop_table
{
503 char *vt_name
; /* subdirectory name */
504 const fs_operation_def_t
*vt_service
; /* vnodeops table */
505 struct vnodeops
*vt_vops
; /* constructed vop */
506 struct vnodeops
**vt_global_vops
; /* global container for vop */
507 int (*vt_vtor
)(struct sdev_node
*); /* validate sdev_node */
512 * A nice improvement would be to provide a plug-in mechanism
513 * for this table instead of a const table.
515 static struct sdev_vop_table vtab
[] =
517 { "pts", devpts_vnodeops_tbl
, NULL
, &devpts_vnodeops
, devpts_validate
,
518 SDEV_DYNAMIC
| SDEV_VTOR
},
520 { "vt", devvt_vnodeops_tbl
, NULL
, &devvt_vnodeops
, devvt_validate
,
521 SDEV_DYNAMIC
| SDEV_VTOR
},
523 { "zvol", devzvol_vnodeops_tbl
, NULL
, &devzvol_vnodeops
,
524 devzvol_validate
, SDEV_ZONED
| SDEV_DYNAMIC
| SDEV_VTOR
| SDEV_SUBDIR
},
526 { "zcons", NULL
, NULL
, NULL
, NULL
, SDEV_NO_NCACHE
},
528 { "net", devnet_vnodeops_tbl
, NULL
, &devnet_vnodeops
, devnet_validate
,
529 SDEV_DYNAMIC
| SDEV_VTOR
},
531 { "ipnet", devipnet_vnodeops_tbl
, NULL
, &devipnet_vnodeops
,
532 devipnet_validate
, SDEV_DYNAMIC
| SDEV_VTOR
| SDEV_NO_NCACHE
},
535 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
536 * lofi driver controls child nodes.
538 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
539 * stale nodes (e.g. from devfsadm -R).
541 * In addition, devfsadm knows not to attempt a rmdir: a zone
542 * may hold a reference, which would zombify the node,
543 * preventing a mkdir.
546 { "lofi", NULL
, NULL
, NULL
, NULL
,
547 SDEV_ZONED
| SDEV_DYNAMIC
| SDEV_PERSIST
},
548 { "rlofi", NULL
, NULL
, NULL
, NULL
,
549 SDEV_ZONED
| SDEV_DYNAMIC
| SDEV_PERSIST
},
551 { NULL
, NULL
, NULL
, NULL
, NULL
, 0}
555 * We need to match off of the sdev_path, not the sdev_name. We are only allowed
556 * to exist directly under /dev.
558 struct sdev_vop_table
*
559 sdev_match(struct sdev_node
*dv
)
565 if (strlen(dv
->sdev_path
) <= 5)
568 if (strncmp(dv
->sdev_path
, "/dev/", 5) != 0)
570 path
= dv
->sdev_path
+ 5;
572 for (i
= 0; vtab
[i
].vt_name
; i
++) {
573 if (strcmp(vtab
[i
].vt_name
, path
) == 0)
575 if (vtab
[i
].vt_flags
& SDEV_SUBDIR
) {
576 vlen
= strlen(vtab
[i
].vt_name
);
577 if ((strncmp(vtab
[i
].vt_name
, path
,
578 vlen
- 1) == 0) && path
[vlen
] == '/')
587 * sets a directory's vnodeops if the directory is in the vtab;
589 static struct vnodeops
*
590 sdev_get_vop(struct sdev_node
*dv
)
592 struct sdev_vop_table
*vtp
;
595 path
= dv
->sdev_path
;
598 /* gets the relative path to /dev/ */
601 /* gets the vtab entry it matches */
602 if ((vtp
= sdev_match(dv
)) != NULL
) {
603 dv
->sdev_flags
|= vtp
->vt_flags
;
604 if (SDEV_IS_PERSIST(dv
->sdev_dotdot
) &&
605 (SDEV_IS_PERSIST(dv
) || !SDEV_IS_DYNAMIC(dv
)))
606 dv
->sdev_flags
|= SDEV_PERSIST
;
609 if (vtp
->vt_global_vops
)
610 *(vtp
->vt_global_vops
) = vtp
->vt_vops
;
612 return (vtp
->vt_vops
);
615 if (vtp
->vt_service
) {
616 fs_operation_def_t
*templ
;
617 templ
= sdev_merge_vtab(vtp
->vt_service
);
618 if (vn_make_ops(vtp
->vt_name
,
619 (const fs_operation_def_t
*)templ
,
620 &vtp
->vt_vops
) != 0) {
621 cmn_err(CE_PANIC
, "%s: malformed vnode ops\n",
625 if (vtp
->vt_global_vops
) {
626 *(vtp
->vt_global_vops
) = vtp
->vt_vops
;
628 sdev_free_vtab(templ
);
630 return (vtp
->vt_vops
);
633 return (sdev_vnodeops
);
636 /* child inherits the persistence of the parent */
637 if (SDEV_IS_PERSIST(dv
->sdev_dotdot
))
638 dv
->sdev_flags
|= SDEV_PERSIST
;
640 return (sdev_vnodeops
);
644 sdev_set_no_negcache(struct sdev_node
*dv
)
649 ASSERT(dv
->sdev_path
);
650 path
= dv
->sdev_path
+ strlen("/dev/");
652 for (i
= 0; vtab
[i
].vt_name
; i
++) {
653 if (strcmp(vtab
[i
].vt_name
, path
) == 0) {
654 if (vtab
[i
].vt_flags
& SDEV_NO_NCACHE
)
655 dv
->sdev_flags
|= SDEV_NO_NCACHE
;
662 sdev_get_vtor(struct sdev_node
*dv
)
664 struct sdev_vop_table
*vtp
;
666 vtp
= sdev_match(dv
);
668 return ((void *)vtp
->vt_vtor
);
674 * Build the base root inode
677 sdev_mkino(struct sdev_node
*dv
)
682 * for now, follow the lead of tmpfs here
683 * need to someday understand the requirements here
685 ino
= (ino_t
)(uint32_t)((uintptr_t)dv
>> 3);
686 ino
+= SDEV_ROOTINO
+ 1;
692 sdev_getlink(struct vnode
*linkvp
, char **link
)
696 struct uio uio
= {0};
697 struct iovec iov
= {0};
701 ASSERT(linkvp
->v_type
== VLNK
);
703 buf
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
705 iov
.iov_len
= MAXPATHLEN
;
708 uio
.uio_resid
= MAXPATHLEN
;
709 uio
.uio_segflg
= UIO_SYSSPACE
;
710 uio
.uio_llimit
= MAXOFFSET_T
;
712 err
= VOP_READLINK(linkvp
, &uio
, kcred
, NULL
);
714 cmn_err(CE_WARN
, "readlink %s failed in dev\n", buf
);
715 kmem_free(buf
, MAXPATHLEN
);
719 /* mission complete */
720 *link
= i_ddi_strdup(buf
, KM_SLEEP
);
721 kmem_free(buf
, MAXPATHLEN
);
726 * A convenient wrapper to get the devfs node vnode for a device
727 * minor functionality: readlink() of a /dev symlink
728 * Place the link into dv->sdev_symlink
731 sdev_follow_link(struct sdev_node
*dv
)
734 struct vnode
*linkvp
;
737 linkvp
= SDEVTOV(dv
);
740 ASSERT(linkvp
->v_type
== VLNK
);
741 err
= sdev_getlink(linkvp
, &link
);
743 dv
->sdev_symlink
= NULL
;
747 ASSERT(link
!= NULL
);
748 dv
->sdev_symlink
= link
;
753 sdev_node_check(struct sdev_node
*dv
, struct vattr
*nvap
, void *nargs
)
755 vtype_t otype
= SDEVTOV(dv
)->v_type
;
758 * existing sdev_node has a different type.
760 if (otype
!= nvap
->va_type
) {
761 sdcmn_err9(("sdev_node_check: existing node "
762 " %s type %d does not match new node type %d\n",
763 dv
->sdev_name
, otype
, nvap
->va_type
));
768 * For a symlink, the target should be the same.
771 ASSERT(nargs
!= NULL
);
772 ASSERT(dv
->sdev_symlink
!= NULL
);
773 if (strcmp(dv
->sdev_symlink
, (char *)nargs
) != 0) {
774 sdcmn_err9(("sdev_node_check: existing node "
775 " %s has different symlink %s as new node "
776 " %s\n", dv
->sdev_name
, dv
->sdev_symlink
,
786 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
791 * - newdv (sdev_node for nm is returned here)
792 * - vap (vattr for the node to be created, va_type should be set.
793 * - avp (attribute vnode)
794 * the defaults should be used if unknown)
798 * . global sdev_node (for !SDEV_GLOBAL)
799 * - state: SDEV_INIT, SDEV_READY
801 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
803 * NOTE: directory contents writers lock needs to be held before
804 * calling this routine.
807 sdev_mknode(struct sdev_node
*ddv
, char *nm
, struct sdev_node
**newdv
,
808 struct vattr
*vap
, struct vnode
*avp
, void *args
, struct cred
*cred
,
809 sdev_node_state_t state
)
812 sdev_node_state_t node_state
;
813 struct sdev_node
*dv
= NULL
;
815 ASSERT(state
!= SDEV_ZOMBIE
);
816 ASSERT(RW_WRITE_HELD(&ddv
->sdev_contents
));
821 /* allocate and initialize a sdev_node */
822 if (ddv
->sdev_state
== SDEV_ZOMBIE
) {
823 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
828 error
= sdev_nodeinit(ddv
, nm
, &dv
, vap
);
830 sdcmn_err9(("sdev_mknode: error %d,"
831 " name %s can not be initialized\n",
837 /* insert into the directory cache */
838 sdev_cache_update(ddv
, &dv
, nm
, SDEV_CACHE_ADD
);
842 node_state
= dv
->sdev_state
;
843 ASSERT(node_state
!= SDEV_ZOMBIE
);
845 if (state
== SDEV_READY
) {
846 switch (node_state
) {
848 error
= sdev_nodeready(dv
, vap
, avp
, args
, cred
);
850 sdcmn_err9(("sdev_mknode: node %s can NOT"
851 " be transitioned into READY state, "
852 "error %d\n", nm
, error
));
857 * Do some sanity checking to make sure
858 * the existing sdev_node is what has been
861 error
= sdev_node_check(dv
, vap
, args
);
870 ASSERT((*newdv
)->sdev_state
!= SDEV_ZOMBIE
);
872 sdev_cache_update(ddv
, &dv
, nm
, SDEV_CACHE_DELETE
);
874 * We created this node, it wasn't passed into us. Therefore it
875 * is up to us to delete it.
878 SDEV_SIMPLE_RELE(dv
);
886 * convenient wrapper to change vp's ATIME, CTIME and MTIME
889 sdev_update_timestamps(struct vnode
*vp
, cred_t
*cred
, uint_t mask
)
904 attr
.va_mask
= (mask
& AT_TIMES
);
905 err
= VOP_SETATTR(vp
, &attr
, 0, cred
, NULL
);
906 if (err
&& (err
!= EROFS
)) {
907 sdcmn_err(("update timestamps error %d\n", err
));
912 * the backing store vnode is released here
916 sdev_nodedestroy(struct sdev_node
*dv
, uint_t flags
)
919 ASSERT(dv
->sdev_nlink
== 0);
921 if (dv
->sdev_attrvp
!= NULLVP
) {
922 VN_RELE(dv
->sdev_attrvp
);
924 * reset the attrvp so that no more
925 * references can be made on this already
928 dv
->sdev_attrvp
= NULLVP
;
931 if (dv
->sdev_attr
!= NULL
) {
932 kmem_free(dv
->sdev_attr
, sizeof (struct vattr
));
933 dv
->sdev_attr
= NULL
;
936 if (dv
->sdev_name
!= NULL
) {
937 kmem_free(dv
->sdev_name
, dv
->sdev_namelen
+ 1);
938 dv
->sdev_name
= NULL
;
941 if (dv
->sdev_symlink
!= NULL
) {
942 kmem_free(dv
->sdev_symlink
, strlen(dv
->sdev_symlink
) + 1);
943 dv
->sdev_symlink
= NULL
;
947 kmem_free(dv
->sdev_path
, strlen(dv
->sdev_path
) + 1);
948 dv
->sdev_path
= NULL
;
951 if (!SDEV_IS_GLOBAL(dv
))
954 if (SDEVTOV(dv
)->v_type
== VDIR
) {
955 ASSERT(SDEV_FIRST_ENTRY(dv
) == NULL
);
956 avl_destroy(&dv
->sdev_entries
);
959 mutex_destroy(&dv
->sdev_lookup_lock
);
960 cv_destroy(&dv
->sdev_lookup_cv
);
962 /* return node to initial state as per constructor */
963 (void) memset((void *)&dv
->sdev_instance_data
, 0,
964 sizeof (dv
->sdev_instance_data
));
965 vn_invalid(SDEVTOV(dv
));
966 kmem_cache_free(sdev_node_cache
, dv
);
970 * DIRECTORY CACHE lookup
973 sdev_findbyname(struct sdev_node
*ddv
, char *nm
)
975 struct sdev_node
*dv
;
976 struct sdev_node dvtmp
;
979 ASSERT(RW_LOCK_HELD(&ddv
->sdev_contents
));
981 dvtmp
.sdev_name
= nm
;
982 dv
= avl_find(&ddv
->sdev_entries
, &dvtmp
, &where
);
984 ASSERT(dv
->sdev_dotdot
== ddv
);
985 ASSERT(strcmp(dv
->sdev_name
, nm
) == 0);
986 ASSERT(dv
->sdev_state
!= SDEV_ZOMBIE
);
994 * Inserts a new sdev_node in a parent directory
997 sdev_direnter(struct sdev_node
*ddv
, struct sdev_node
*dv
)
1001 ASSERT(RW_WRITE_HELD(&ddv
->sdev_contents
));
1002 ASSERT(SDEVTOV(ddv
)->v_type
== VDIR
);
1003 ASSERT(ddv
->sdev_nlink
>= 2);
1004 ASSERT(dv
->sdev_nlink
== 0);
1005 ASSERT(dv
->sdev_state
!= SDEV_ZOMBIE
);
1007 dv
->sdev_dotdot
= ddv
;
1008 VERIFY(avl_find(&ddv
->sdev_entries
, dv
, &where
) == NULL
);
1009 avl_insert(&ddv
->sdev_entries
, dv
, where
);
1014 * The following check is needed because while sdev_nodes are linked
1015 * in SDEV_INIT state, they have their link counts incremented only
1016 * in SDEV_READY state.
1019 decr_link(struct sdev_node
*dv
)
1021 VERIFY(RW_WRITE_HELD(&dv
->sdev_contents
));
1022 if (dv
->sdev_state
!= SDEV_INIT
) {
1023 VERIFY(dv
->sdev_nlink
>= 1);
1026 VERIFY(dv
->sdev_nlink
== 0);
1031 * Delete an existing dv from directory cache
1033 * In the case of a node is still held by non-zero reference count, the node is
1034 * put into ZOMBIE state. The node is always unlinked from its parent, but it is
1035 * not destroyed via sdev_inactive until its reference count reaches "0".
1038 sdev_dirdelete(struct sdev_node
*ddv
, struct sdev_node
*dv
)
1041 sdev_node_state_t os
;
1043 ASSERT(RW_WRITE_HELD(&ddv
->sdev_contents
));
1046 mutex_enter(&vp
->v_lock
);
1047 rw_enter(&dv
->sdev_contents
, RW_WRITER
);
1048 os
= dv
->sdev_state
;
1049 ASSERT(os
!= SDEV_ZOMBIE
);
1050 dv
->sdev_state
= SDEV_ZOMBIE
;
1053 * unlink ourselves from the parent directory now to take care of the ..
1054 * link. However, if we're a directory, we don't remove our reference to
1055 * ourself eg. '.' until we are torn down in the inactive callback.
1058 avl_remove(&ddv
->sdev_entries
, dv
);
1060 * sdev_inactive expects nodes to have a link to themselves when we're
1061 * tearing them down. If we're transitioning from the initial state to
1062 * zombie and not via ready, then we're not going to have this link that
1063 * comes from the node being ready. As a result, we need to increment
1064 * our link count by one to account for this.
1066 if (os
== SDEV_INIT
&& dv
->sdev_nlink
== 0)
1068 rw_exit(&dv
->sdev_contents
);
1069 mutex_exit(&vp
->v_lock
);
1073 * check if the source is in the path of the target
1075 * source and target are different
1079 sdev_checkpath(struct sdev_node
*sdv
, struct sdev_node
*tdv
, struct cred
*cred
)
1082 struct sdev_node
*dotdot
, *dir
;
1084 dotdot
= tdv
->sdev_dotdot
;
1088 if (dotdot
== tdv
) {
1094 * avoid error cases like
1099 if (dotdot
== sdv
) {
1105 dotdot
= dir
->sdev_dotdot
;
1107 /* done checking because root is reached */
1108 if (dir
== dotdot
) {
1116 sdev_rnmnode(struct sdev_node
*oddv
, struct sdev_node
*odv
,
1117 struct sdev_node
*nddv
, struct sdev_node
**ndvp
, char *nnm
,
1121 struct vnode
*ovp
= SDEVTOV(odv
);
1124 int doingdir
= (ovp
->v_type
== VDIR
);
1126 int samedir
= (oddv
== nddv
) ? 1 : 0;
1128 struct sdev_node
*idv
= NULL
;
1129 struct sdev_node
*ndv
= NULL
;
1132 vattr
.va_mask
= AT_TYPE
|AT_MODE
|AT_UID
|AT_GID
;
1133 error
= VOP_GETATTR(ovp
, &vattr
, 0, cred
, NULL
);
1138 rw_enter(&oddv
->sdev_contents
, RW_WRITER
);
1139 rw_enter(&nddv
->sdev_contents
, RW_WRITER
);
1142 * the source may have been deleted by another thread before
1145 if (odv
->sdev_state
!= SDEV_READY
) {
1150 if (doingdir
&& (odv
== nddv
)) {
1156 * If renaming a directory, and the parents are different (".." must be
1157 * changed) then the source dir must not be in the dir hierarchy above
1158 * the target since it would orphan everything below the source dir.
1160 if (doingdir
&& (oddv
!= nddv
)) {
1161 error
= sdev_checkpath(odv
, nddv
, cred
);
1166 /* fix the source for a symlink */
1167 if (vattr
.va_type
== VLNK
) {
1168 if (odv
->sdev_symlink
== NULL
) {
1169 error
= sdev_follow_link(odv
);
1172 * The underlying symlink doesn't exist. This
1173 * node probably shouldn't even exist. While
1174 * it's a bit jarring to consumers, we're going
1175 * to remove the node from /dev.
1177 if (SDEV_IS_PERSIST((*ndvp
)))
1179 sdev_dirdelete(oddv
, odv
);
1181 ASSERT(nddv
->sdev_attrvp
);
1182 error
= VOP_REMOVE(nddv
->sdev_attrvp
,
1183 nnm
, cred
, NULL
, 0);
1191 ASSERT(odv
->sdev_symlink
);
1192 link
= i_ddi_strdup(odv
->sdev_symlink
, KM_SLEEP
);
1195 /* destination existing */
1197 nvp
= SDEVTOV(*ndvp
);
1200 /* handling renaming to itself */
1206 if (nvp
->v_type
== VDIR
) {
1212 if (vn_vfswlock(nvp
)) {
1217 if (vn_mountedvfs(nvp
) != NULL
) {
1223 /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1224 if ((*ndvp
)->sdev_nlink
> 2) {
1232 * We did not place the hold on *ndvp, so even though
1233 * we're deleting the node, we should not get rid of our
1236 sdev_dirdelete(nddv
, *ndvp
);
1238 ASSERT(nddv
->sdev_attrvp
);
1239 error
= VOP_RMDIR(nddv
->sdev_attrvp
, nnm
,
1240 nddv
->sdev_attrvp
, cred
, NULL
, 0);
1249 if (SDEV_IS_PERSIST((*ndvp
))) {
1254 * Get rid of the node from the directory cache note.
1255 * Don't forget that it's not up to us to remove the vn
1256 * ref on the sdev node, as we did not place it.
1258 sdev_dirdelete(nddv
, *ndvp
);
1261 ASSERT(nddv
->sdev_attrvp
);
1262 error
= VOP_REMOVE(nddv
->sdev_attrvp
,
1263 nnm
, cred
, NULL
, 0);
1271 * make a fresh node from the source attrs
1273 ASSERT(RW_WRITE_HELD(&nddv
->sdev_contents
));
1274 error
= sdev_mknode(nddv
, nnm
, ndvp
, &vattr
,
1275 NULL
, (void *)link
, cred
, SDEV_READY
);
1278 kmem_free(link
, strlen(link
) + 1);
1285 ASSERT((*ndvp
)->sdev_state
== SDEV_READY
);
1287 /* move dir contents */
1289 for (idv
= SDEV_FIRST_ENTRY(odv
); idv
;
1290 idv
= SDEV_NEXT_ENTRY(odv
, idv
)) {
1292 error
= sdev_rnmnode(odv
, idv
,
1293 (struct sdev_node
*)(*ndvp
), &ndv
,
1294 idv
->sdev_name
, cred
);
1302 if ((*ndvp
)->sdev_attrvp
) {
1303 sdev_update_timestamps((*ndvp
)->sdev_attrvp
, kcred
,
1306 ASSERT((*ndvp
)->sdev_attr
);
1308 (*ndvp
)->sdev_attr
->va_ctime
= now
;
1309 (*ndvp
)->sdev_attr
->va_atime
= now
;
1312 if (nddv
->sdev_attrvp
) {
1313 sdev_update_timestamps(nddv
->sdev_attrvp
, kcred
,
1316 ASSERT(nddv
->sdev_attr
);
1318 nddv
->sdev_attr
->va_mtime
= now
;
1319 nddv
->sdev_attr
->va_atime
= now
;
1321 rw_exit(&nddv
->sdev_contents
);
1323 rw_exit(&oddv
->sdev_contents
);
1330 kmem_free(link
, strlen(link
) + 1);
1334 rw_exit(&nddv
->sdev_contents
);
1336 rw_exit(&oddv
->sdev_contents
);
1341 * Merge sdev_node specific information into an attribute structure.
1343 * note: sdev_node is not locked here
1346 sdev_vattr_merge(struct sdev_node
*dv
, struct vattr
*vap
)
1348 struct vnode
*vp
= SDEVTOV(dv
);
1350 vap
->va_nlink
= dv
->sdev_nlink
;
1351 vap
->va_nodeid
= dv
->sdev_ino
;
1352 vap
->va_fsid
= SDEVTOV(dv
->sdev_dotdot
)->v_rdev
;
1353 vap
->va_type
= vp
->v_type
;
1355 if (vp
->v_type
== VDIR
) {
1357 vap
->va_fsid
= vp
->v_rdev
;
1358 } else if (vp
->v_type
== VLNK
) {
1360 vap
->va_mode
&= ~S_IFMT
;
1361 vap
->va_mode
|= S_IFLNK
;
1362 } else if ((vp
->v_type
== VCHR
) || (vp
->v_type
== VBLK
)) {
1363 vap
->va_rdev
= vp
->v_rdev
;
1364 vap
->va_mode
&= ~S_IFMT
;
1365 if (vap
->va_type
== VCHR
)
1366 vap
->va_mode
|= S_IFCHR
;
1368 vap
->va_mode
|= S_IFBLK
;
1375 sdev_getdefault_attr(enum vtype type
)
1378 return (&sdev_vattr_dir
);
1379 else if (type
== VCHR
)
1380 return (&sdev_vattr_chr
);
1381 else if (type
== VBLK
)
1382 return (&sdev_vattr_blk
);
1383 else if (type
== VLNK
)
1384 return (&sdev_vattr_lnk
);
1389 sdev_to_vp(struct sdev_node
*dv
, struct vnode
**vpp
)
1392 struct vnode
*vp
= SDEVTOV(dv
);
1394 switch (vp
->v_type
) {
1398 * If vnode is a device, return special vnode instead
1399 * (though it knows all about -us- via sp->s_realvp)
1401 *vpp
= specvp(vp
, vp
->v_rdev
, vp
->v_type
, kcred
);
1406 default: /* most types are returned as is */
1414 * junction between devname and root file system, e.g. ufs
1417 devname_backstore_lookup(struct sdev_node
*ddv
, char *nm
, struct vnode
**rvp
)
1419 struct vnode
*rdvp
= ddv
->sdev_attrvp
;
1424 rval
= VOP_LOOKUP(rdvp
, nm
, rvp
, NULL
, 0, NULL
, kcred
, NULL
, NULL
,
1430 sdev_filldir_from_store(struct sdev_node
*ddv
, int dlen
, struct cred
*cred
)
1432 struct sdev_node
*dv
= NULL
;
1434 struct vnode
*dirvp
;
1440 struct dirent64
*dp
;
1446 if (ddv
->sdev_attrvp
== NULL
)
1448 if (!(ddv
->sdev_flags
& SDEV_BUILD
))
1451 dirvp
= ddv
->sdev_attrvp
;
1453 dbuf
= kmem_zalloc(dlen
, KM_SLEEP
);
1457 uio
.uio_segflg
= UIO_SYSSPACE
;
1459 uio
.uio_extflg
= UIO_COPY_CACHED
;
1460 uio
.uio_loffset
= 0;
1461 uio
.uio_llimit
= MAXOFFSET_T
;
1465 while (!error
&& !eof
) {
1466 uio
.uio_resid
= dlen
;
1467 iov
.iov_base
= (char *)dbuf
;
1469 (void) VOP_RWLOCK(dirvp
, V_WRITELOCK_FALSE
, NULL
);
1470 error
= VOP_READDIR(dirvp
, &uio
, kcred
, &eof
, NULL
, 0);
1471 VOP_RWUNLOCK(dirvp
, V_WRITELOCK_FALSE
, NULL
);
1473 dbuflen
= dlen
- uio
.uio_resid
;
1474 if (error
|| dbuflen
== 0)
1477 if (!(ddv
->sdev_flags
& SDEV_BUILD
))
1480 for (dp
= dbuf
; ((intptr_t)dp
<
1481 (intptr_t)dbuf
+ dbuflen
);
1482 dp
= (dirent64_t
*)((intptr_t)dp
+ dp
->d_reclen
)) {
1485 if (strcmp(nm
, ".") == 0 ||
1486 strcmp(nm
, "..") == 0)
1490 dv
= sdev_cache_lookup(ddv
, nm
);
1492 VERIFY(dv
->sdev_state
!= SDEV_ZOMBIE
);
1493 SDEV_SIMPLE_RELE(dv
);
1497 /* refill the cache if not already */
1498 error
= devname_backstore_lookup(ddv
, nm
, &vp
);
1502 vattr
.va_mask
= AT_TYPE
|AT_MODE
|AT_UID
|AT_GID
;
1503 error
= VOP_GETATTR(vp
, &vattr
, 0, cred
, NULL
);
1507 if (vattr
.va_type
== VLNK
) {
1508 error
= sdev_getlink(vp
, &link
);
1512 ASSERT(link
!= NULL
);
1515 if (!rw_tryupgrade(&ddv
->sdev_contents
)) {
1516 rw_exit(&ddv
->sdev_contents
);
1517 rw_enter(&ddv
->sdev_contents
, RW_WRITER
);
1519 error
= sdev_mknode(ddv
, nm
, &dv
, &vattr
, vp
, link
,
1521 rw_downgrade(&ddv
->sdev_contents
);
1524 kmem_free(link
, strlen(link
) + 1);
1530 ASSERT(dv
->sdev_state
!= SDEV_ZOMBIE
);
1531 SDEV_SIMPLE_RELE(dv
);
1540 kmem_free(dbuf
, dlen
);
1546 sdev_filldir_dynamic(struct sdev_node
*ddv
)
1551 struct vattr
*vap
= &vattr
;
1553 struct sdev_node
*dv
= NULL
;
1555 ASSERT(RW_WRITE_HELD(&ddv
->sdev_contents
));
1556 ASSERT((ddv
->sdev_flags
& SDEV_BUILD
));
1558 *vap
= *sdev_getdefault_attr(VDIR
); /* note structure copy here */
1559 gethrestime(&vap
->va_atime
);
1560 vap
->va_mtime
= vap
->va_atime
;
1561 vap
->va_ctime
= vap
->va_atime
;
1562 for (i
= 0; vtab
[i
].vt_name
!= NULL
; i
++) {
1564 * This early, we may be in a read-only /dev environment: leave
1565 * the creation of any nodes we'd attempt to persist to
1566 * devfsadm. Because /dev itself is normally persistent, any
1567 * node which is not marked dynamic will end up being marked
1568 * persistent. However, some nodes are both dynamic and
1569 * persistent, mostly lofi and rlofi, so we need to be careful
1572 if ((vtab
[i
].vt_flags
& SDEV_PERSIST
) ||
1573 !(vtab
[i
].vt_flags
& SDEV_DYNAMIC
))
1575 nm
= vtab
[i
].vt_name
;
1576 ASSERT(RW_WRITE_HELD(&ddv
->sdev_contents
));
1578 error
= sdev_mknode(ddv
, nm
, &dv
, vap
, NULL
,
1579 NULL
, kcred
, SDEV_READY
);
1581 cmn_err(CE_WARN
, "%s/%s: error %d\n",
1582 ddv
->sdev_name
, nm
, error
);
1585 ASSERT(dv
->sdev_state
!= SDEV_ZOMBIE
);
1586 SDEV_SIMPLE_RELE(dv
);
1592 * Creating a backing store entry based on sdev_attr.
1593 * This is called either as part of node creation in a persistent directory
1594 * or from setattr/setsecattr to persist access attributes across reboot.
1597 sdev_shadow_node(struct sdev_node
*dv
, struct cred
*cred
)
1600 struct vnode
*dvp
= SDEVTOV(dv
->sdev_dotdot
);
1601 struct vnode
*rdvp
= VTOSDEV(dvp
)->sdev_attrvp
;
1602 struct vattr
*vap
= dv
->sdev_attr
;
1603 char *nm
= dv
->sdev_name
;
1604 struct vnode
*tmpvp
, **rvp
= &tmpvp
, *rrvp
= NULL
;
1606 ASSERT(dv
&& dv
->sdev_name
&& rdvp
);
1607 ASSERT(RW_WRITE_HELD(&dv
->sdev_contents
) && dv
->sdev_attrvp
== NULL
);
1610 /* try to find it in the backing store */
1611 error
= VOP_LOOKUP(rdvp
, nm
, rvp
, NULL
, 0, NULL
, cred
, NULL
, NULL
,
1614 if (VOP_REALVP(*rvp
, &rrvp
, NULL
) == 0) {
1620 kmem_free(dv
->sdev_attr
, sizeof (vattr_t
));
1621 dv
->sdev_attr
= NULL
;
1622 dv
->sdev_attrvp
= *rvp
;
1626 /* let's try to persist the node */
1627 gethrestime(&vap
->va_atime
);
1628 vap
->va_mtime
= vap
->va_atime
;
1629 vap
->va_ctime
= vap
->va_atime
;
1630 vap
->va_mask
|= AT_TYPE
|AT_MODE
;
1631 switch (vap
->va_type
) {
1633 error
= VOP_MKDIR(rdvp
, nm
, vap
, rvp
, cred
, NULL
, 0, NULL
);
1634 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1635 (void *)(*rvp
), error
));
1643 error
= VOP_CREATE(rdvp
, nm
, vap
, NONEXCL
, VREAD
|VWRITE
,
1644 rvp
, cred
, 0, NULL
, NULL
);
1645 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1646 (void *)(*rvp
), error
));
1651 ASSERT(dv
->sdev_symlink
);
1652 error
= VOP_SYMLINK(rdvp
, nm
, vap
, dv
->sdev_symlink
, cred
,
1654 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1658 cmn_err(CE_PANIC
, "dev: %s: sdev_shadow_node "
1663 /* go back to lookup to factor out spec node and set attrvp */
1667 sdcmn_err(("cannot persist %s - error %d\n", dv
->sdev_path
, error
));
1672 sdev_cache_add(struct sdev_node
*ddv
, struct sdev_node
**dv
, char *nm
)
1674 struct sdev_node
*dup
= NULL
;
1676 ASSERT(RW_WRITE_HELD(&ddv
->sdev_contents
));
1677 if ((dup
= sdev_findbyname(ddv
, nm
)) == NULL
) {
1678 sdev_direnter(ddv
, *dv
);
1680 VERIFY(dup
->sdev_state
!= SDEV_ZOMBIE
);
1681 SDEV_SIMPLE_RELE(*dv
);
1682 sdev_nodedestroy(*dv
, 0);
1688 sdev_cache_delete(struct sdev_node
*ddv
, struct sdev_node
**dv
)
1690 ASSERT(RW_WRITE_HELD(&ddv
->sdev_contents
));
1691 sdev_dirdelete(ddv
, *dv
);
1695 * update the in-core directory cache
1698 sdev_cache_update(struct sdev_node
*ddv
, struct sdev_node
**dv
, char *nm
,
1699 sdev_cache_ops_t ops
)
1701 ASSERT((SDEV_HELD(*dv
)));
1703 ASSERT(RW_WRITE_HELD(&ddv
->sdev_contents
));
1705 case SDEV_CACHE_ADD
:
1706 sdev_cache_add(ddv
, dv
, nm
);
1708 case SDEV_CACHE_DELETE
:
1709 sdev_cache_delete(ddv
, dv
);
1717 * retrieve the named entry from the directory cache
1720 sdev_cache_lookup(struct sdev_node
*ddv
, char *nm
)
1722 struct sdev_node
*dv
= NULL
;
1724 ASSERT(RW_LOCK_HELD(&ddv
->sdev_contents
));
1725 dv
= sdev_findbyname(ddv
, nm
);
1731 * Implicit reconfig for nodes constructed by a link generator
1732 * Start devfsadm if needed, or if devfsadm is in progress,
1733 * prepare to block on devfsadm either completing or
1734 * constructing the desired node. As devfsadmd is global
1735 * in scope, constructing all necessary nodes, we only
1736 * need to initiate it once.
1739 sdev_call_devfsadmd(struct sdev_node
*ddv
, struct sdev_node
*dv
, char *nm
)
1743 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state
)) {
1744 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1745 ddv
->sdev_name
, nm
, devfsadm_state
));
1746 mutex_enter(&dv
->sdev_lookup_lock
);
1747 SDEV_BLOCK_OTHERS(dv
, (SDEV_LOOKUP
| SDEV_LGWAITING
));
1748 mutex_exit(&dv
->sdev_lookup_lock
);
1750 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state
)) {
1751 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1752 ddv
->sdev_name
, nm
, devfsadm_state
));
1754 sdev_devfsadmd_thread(ddv
, dv
, kcred
);
1755 mutex_enter(&dv
->sdev_lookup_lock
);
1756 SDEV_BLOCK_OTHERS(dv
,
1757 (SDEV_LOOKUP
| SDEV_LGWAITING
));
1758 mutex_exit(&dv
->sdev_lookup_lock
);
1768 * Support for specialized device naming construction mechanisms
1771 sdev_call_dircallback(struct sdev_node
*ddv
, struct sdev_node
**dvp
, char *nm
,
1772 int (*callback
)(struct sdev_node
*, char *, void **, struct cred
*,
1773 void *, char *), int flags
, struct cred
*cred
)
1776 char *physpath
= NULL
;
1778 struct vattr
*vap
= &vattr
;
1779 struct sdev_node
*dv
= NULL
;
1781 ASSERT(RW_WRITE_HELD(&ddv
->sdev_contents
));
1782 if (flags
& SDEV_VLINK
) {
1783 physpath
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
1784 rv
= callback(ddv
, nm
, (void *)&physpath
, kcred
, NULL
,
1787 kmem_free(physpath
, MAXPATHLEN
);
1791 *vap
= *sdev_getdefault_attr(VLNK
); /* structure copy */
1792 vap
->va_size
= strlen(physpath
);
1793 gethrestime(&vap
->va_atime
);
1794 vap
->va_mtime
= vap
->va_atime
;
1795 vap
->va_ctime
= vap
->va_atime
;
1797 rv
= sdev_mknode(ddv
, nm
, &dv
, vap
, NULL
,
1798 (void *)physpath
, cred
, SDEV_READY
);
1799 kmem_free(physpath
, MAXPATHLEN
);
1802 } else if (flags
& SDEV_VATTR
) {
1806 * callback is responsible to set the basic attributes,
1807 * e.g. va_type/va_uid/va_gid/
1808 * dev_t if VCHR or VBLK/
1811 rv
= callback(ddv
, nm
, (void *)&vattr
, kcred
, NULL
, NULL
);
1813 sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1814 "callback failed \n"));
1818 rv
= sdev_mknode(ddv
, nm
, &dv
, &vattr
, NULL
, NULL
,
1825 impossible(("lookup: %s/%s by %s not supported (%d)\n",
1826 SDEVTOV(ddv
)->v_path
, nm
, curproc
->p_user
.u_comm
,
1836 is_devfsadm_thread(char *exec_name
)
1839 * note: because devfsadmd -> /usr/sbin/devfsadm
1840 * it is safe to use "devfsadm" to capture the lookups
1841 * from devfsadm and its daemon version.
1843 if (strcmp(exec_name
, "devfsadm") == 0)
1851 * backing store (SDEV_PERSIST);
1852 * DBNR: a. dir_ops implemented in the loadable modules;
1853 * b. vnode ops in vtab.
1856 devname_lookup_func(struct sdev_node
*ddv
, char *nm
, struct vnode
**vpp
,
1857 struct cred
*cred
, int (*callback
)(struct sdev_node
*, char *, void **,
1858 struct cred
*, void *, char *), int flags
)
1861 struct vnode
*rvp
= NULL
;
1862 struct sdev_node
*dv
= NULL
;
1866 char *lookup_thread
= curproc
->p_user
.u_comm
;
1867 int failed_flags
= 0;
1868 int (*vtor
)(struct sdev_node
*) = NULL
;
1873 if (SDEVTOV(ddv
)->v_type
!= VDIR
)
1877 * Empty name or ., return node itself.
1880 if ((nmlen
== 0) || ((nmlen
== 1) && (nm
[0] == '.'))) {
1881 *vpp
= SDEVTOV(ddv
);
1887 * .., return the parent directory
1889 if ((nmlen
== 2) && (strcmp(nm
, "..") == 0)) {
1890 *vpp
= SDEVTOV(ddv
->sdev_dotdot
);
1895 rw_enter(&ddv
->sdev_contents
, RW_READER
);
1896 if (ddv
->sdev_flags
& SDEV_VTOR
) {
1897 vtor
= (int (*)(struct sdev_node
*))sdev_get_vtor(ddv
);
1903 * (a) directory cache lookup:
1905 ASSERT(RW_READ_HELD(&ddv
->sdev_contents
));
1906 parent_state
= ddv
->sdev_state
;
1907 dv
= sdev_cache_lookup(ddv
, nm
);
1909 state
= dv
->sdev_state
;
1912 if (is_devfsadm_thread(lookup_thread
))
1915 /* ZOMBIED parent won't allow node creation */
1916 if (parent_state
== SDEV_ZOMBIE
) {
1917 SD_TRACE_FAILED_LOOKUP(ddv
, nm
,
1919 goto nolock_notfound
;
1922 mutex_enter(&dv
->sdev_lookup_lock
);
1923 /* compensate the threads started after devfsadm */
1924 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state
) &&
1925 !(SDEV_IS_LOOKUP(dv
)))
1926 SDEV_BLOCK_OTHERS(dv
,
1927 (SDEV_LOOKUP
| SDEV_LGWAITING
));
1929 if (SDEV_IS_LOOKUP(dv
)) {
1930 failed_flags
|= SLF_REBUILT
;
1931 rw_exit(&ddv
->sdev_contents
);
1932 error
= sdev_wait4lookup(dv
, SDEV_LOOKUP
);
1933 mutex_exit(&dv
->sdev_lookup_lock
);
1934 rw_enter(&ddv
->sdev_contents
, RW_READER
);
1937 SD_TRACE_FAILED_LOOKUP(ddv
, nm
,
1939 goto nolock_notfound
;
1942 state
= dv
->sdev_state
;
1943 if (state
== SDEV_INIT
) {
1944 SD_TRACE_FAILED_LOOKUP(ddv
, nm
,
1946 goto nolock_notfound
;
1947 } else if (state
== SDEV_READY
) {
1949 } else if (state
== SDEV_ZOMBIE
) {
1950 rw_exit(&ddv
->sdev_contents
);
1951 SD_TRACE_FAILED_LOOKUP(ddv
, nm
,
1957 mutex_exit(&dv
->sdev_lookup_lock
);
1963 rw_exit(&ddv
->sdev_contents
);
1964 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
1968 rw_exit(&ddv
->sdev_contents
);
1969 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
1970 sdev_lookup_failed(ddv
, nm
, failed_flags
);
1975 ASSERT(RW_READ_HELD(&ddv
->sdev_contents
));
1978 * ZOMBIED parent does not allow new node creation.
1981 if (parent_state
== SDEV_ZOMBIE
) {
1982 rw_exit(&ddv
->sdev_contents
);
1984 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
1989 * (b0): backing store lookup
1990 * SDEV_PERSIST is default except:
1992 * 2) non-chmod'ed local nodes
1995 if (SDEV_IS_PERSIST(ddv
)) {
1996 error
= devname_backstore_lookup(ddv
, nm
, &rvp
);
2000 vattr
.va_mask
= AT_TYPE
|AT_MODE
|AT_UID
|AT_GID
;
2001 error
= VOP_GETATTR(rvp
, &vattr
, 0, cred
, NULL
);
2003 rw_exit(&ddv
->sdev_contents
);
2006 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2007 sdev_lookup_failed(ddv
, nm
, failed_flags
);
2012 if (vattr
.va_type
== VLNK
) {
2013 error
= sdev_getlink(rvp
, &link
);
2015 rw_exit(&ddv
->sdev_contents
);
2018 SD_TRACE_FAILED_LOOKUP(ddv
, nm
,
2020 sdev_lookup_failed(ddv
, nm
,
2025 ASSERT(link
!= NULL
);
2028 if (!rw_tryupgrade(&ddv
->sdev_contents
)) {
2029 rw_exit(&ddv
->sdev_contents
);
2030 rw_enter(&ddv
->sdev_contents
, RW_WRITER
);
2032 error
= sdev_mknode(ddv
, nm
, &dv
, &vattr
,
2033 rvp
, link
, cred
, SDEV_READY
);
2034 rw_downgrade(&ddv
->sdev_contents
);
2037 kmem_free(link
, strlen(link
) + 1);
2042 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2043 rw_exit(&ddv
->sdev_contents
);
2050 } else if (retried
) {
2051 rw_exit(&ddv
->sdev_contents
);
2052 sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2053 ddv
->sdev_name
, nm
));
2056 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2057 sdev_lookup_failed(ddv
, nm
, failed_flags
);
2064 /* first thread that is doing the lookup on this node */
2067 if (!rw_tryupgrade(&ddv
->sdev_contents
)) {
2068 rw_exit(&ddv
->sdev_contents
);
2069 rw_enter(&ddv
->sdev_contents
, RW_WRITER
);
2071 error
= sdev_call_dircallback(ddv
, &dv
, nm
, callback
,
2073 rw_downgrade(&ddv
->sdev_contents
);
2077 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2078 rw_exit(&ddv
->sdev_contents
);
2083 if (!rw_tryupgrade(&ddv
->sdev_contents
)) {
2084 rw_exit(&ddv
->sdev_contents
);
2085 rw_enter(&ddv
->sdev_contents
, RW_WRITER
);
2087 error
= sdev_mknode(ddv
, nm
, &dv
, NULL
, NULL
, NULL
,
2090 rw_exit(&ddv
->sdev_contents
);
2091 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2092 sdev_lookup_failed(ddv
, nm
, failed_flags
);
2096 rw_downgrade(&ddv
->sdev_contents
);
2100 * (b1) invoking devfsadm once per life time for devfsadm nodes
2102 ASSERT(SDEV_HELD(dv
));
2104 if (SDEV_IS_NO_NCACHE(dv
))
2105 failed_flags
|= SLF_NO_NCACHE
;
2106 if (sdev_reconfig_boot
|| !i_ddi_io_initialized() ||
2107 SDEV_IS_DYNAMIC(ddv
) || SDEV_IS_NO_NCACHE(dv
) ||
2108 ((moddebug
& MODDEBUG_FINI_EBUSY
) != 0)) {
2109 ASSERT(SDEV_HELD(dv
));
2110 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2111 goto nolock_notfound
;
2115 * filter out known non-existent devices recorded
2116 * during initial reconfiguration boot for which
2117 * reconfig should not be done and lookup may
2118 * be short-circuited now.
2120 if (sdev_lookup_filter(ddv
, nm
)) {
2121 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2122 goto nolock_notfound
;
2125 /* bypassing devfsadm internal nodes */
2126 if (is_devfsadm_thread(lookup_thread
)) {
2127 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2128 goto nolock_notfound
;
2131 if (sdev_reconfig_disable
) {
2132 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2133 goto nolock_notfound
;
2136 error
= sdev_call_devfsadmd(ddv
, dv
, nm
);
2138 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2139 ddv
->sdev_name
, nm
, curproc
->p_user
.u_comm
));
2140 if (sdev_reconfig_verbose
) {
2142 "?lookup of %s/%s by %s: reconfig\n",
2143 ddv
->sdev_name
, nm
, curproc
->p_user
.u_comm
);
2146 failed_flags
|= SLF_REBUILT
;
2147 ASSERT(dv
->sdev_state
!= SDEV_ZOMBIE
);
2148 SDEV_SIMPLE_RELE(dv
);
2151 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2152 goto nolock_notfound
;
2156 ASSERT(dv
->sdev_state
== SDEV_READY
);
2159 * Check validity of returned node
2162 case SDEV_VTOR_VALID
:
2164 case SDEV_VTOR_STALE
:
2166 * The name exists, but the cache entry is
2167 * stale and needs to be re-created.
2169 ASSERT(RW_READ_HELD(&ddv
->sdev_contents
));
2170 if (rw_tryupgrade(&ddv
->sdev_contents
) == 0) {
2171 rw_exit(&ddv
->sdev_contents
);
2172 rw_enter(&ddv
->sdev_contents
, RW_WRITER
);
2174 sdev_cache_update(ddv
, &dv
, nm
, SDEV_CACHE_DELETE
);
2175 rw_downgrade(&ddv
->sdev_contents
);
2178 goto lookup_create_node
;
2180 case SDEV_VTOR_INVALID
:
2181 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2182 sdcmn_err7(("lookup: destroy invalid "
2183 "node: %s(%p)\n", dv
->sdev_name
, (void *)dv
));
2184 goto nolock_notfound
;
2185 case SDEV_VTOR_SKIP
:
2186 sdcmn_err7(("lookup: node not applicable - "
2187 "skipping: %s(%p)\n", dv
->sdev_name
, (void *)dv
));
2188 rw_exit(&ddv
->sdev_contents
);
2189 SD_TRACE_FAILED_LOOKUP(ddv
, nm
, retried
);
2194 "dev fs: validator failed: %s(%p)\n",
2195 dv
->sdev_name
, (void *)dv
);
2200 rw_exit(&ddv
->sdev_contents
);
2201 rv
= sdev_to_vp(dv
, vpp
);
2202 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2203 "for nm %s, error %d\n", (void *)*vpp
, (*vpp
)->v_count
,
2204 dv
->sdev_state
, nm
, rv
));
2209 * Destroy the node that is created for synchronization purposes.
2211 sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2212 nm
, dv
->sdev_state
));
2213 ASSERT(RW_READ_HELD(&ddv
->sdev_contents
));
2214 if (dv
->sdev_state
== SDEV_INIT
) {
2215 if (!rw_tryupgrade(&ddv
->sdev_contents
)) {
2216 rw_exit(&ddv
->sdev_contents
);
2217 rw_enter(&ddv
->sdev_contents
, RW_WRITER
);
2221 * Node state may have changed during the lock
2222 * changes. Re-check.
2224 if (dv
->sdev_state
== SDEV_INIT
) {
2225 sdev_dirdelete(ddv
, dv
);
2226 rw_exit(&ddv
->sdev_contents
);
2227 sdev_lookup_failed(ddv
, nm
, failed_flags
);
2234 rw_exit(&ddv
->sdev_contents
);
2238 sdev_lookup_failed(ddv
, nm
, failed_flags
);
2244 * Given a directory node, mark all nodes beneath as
2245 * STALE, i.e. nodes that don't exist as far as new
2246 * consumers are concerned. Remove them from the
2247 * list of directory entries so that no lookup or
2248 * directory traversal will find them. The node
2249 * not deallocated so existing holds are not affected.
2252 sdev_stale(struct sdev_node
*ddv
)
2254 struct sdev_node
*dv
;
2257 ASSERT(SDEVTOV(ddv
)->v_type
== VDIR
);
2259 rw_enter(&ddv
->sdev_contents
, RW_WRITER
);
2260 while ((dv
= SDEV_FIRST_ENTRY(ddv
)) != NULL
) {
2263 if (vp
->v_type
== VDIR
)
2266 sdev_dirdelete(ddv
, dv
);
2269 ddv
->sdev_flags
|= SDEV_BUILD
;
2270 rw_exit(&ddv
->sdev_contents
);
2274 * Given a directory node, clean out all the nodes beneath.
2275 * If expr is specified, clean node with names matching expr.
2276 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2277 * so they are excluded from future lookups.
2280 sdev_cleandir(struct sdev_node
*ddv
, char *expr
, uint_t flags
)
2285 struct sdev_node
*dv
, *next
;
2288 char *bks_name
= NULL
;
2290 ASSERT(SDEVTOV(ddv
)->v_type
== VDIR
);
2293 * We try our best to destroy all unused sdev_node's
2295 rw_enter(&ddv
->sdev_contents
, RW_WRITER
);
2296 for (dv
= SDEV_FIRST_ENTRY(ddv
); dv
!= NULL
; dv
= next
) {
2297 next
= SDEV_NEXT_ENTRY(ddv
, dv
);
2300 if (expr
&& gmatch(dv
->sdev_name
, expr
) == 0)
2303 if (vp
->v_type
== VDIR
&&
2304 sdev_cleandir(dv
, NULL
, flags
) != 0) {
2305 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2311 if (vp
->v_count
> 0 && (flags
& SDEV_ENFORCE
) == 0) {
2312 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2319 * at this point, either dv is not held or SDEV_ENFORCE
2320 * is specified. In either case, dv needs to be deleted
2324 bkstore
= SDEV_IS_PERSIST(dv
) ? 1 : 0;
2325 if (bkstore
&& (vp
->v_type
== VDIR
))
2329 len
= strlen(dv
->sdev_name
) + 1;
2330 bks_name
= kmem_alloc(len
, KM_SLEEP
);
2331 bcopy(dv
->sdev_name
, bks_name
, len
);
2334 sdev_dirdelete(ddv
, dv
);
2336 /* take care the backing store clean up */
2339 ASSERT(ddv
->sdev_attrvp
);
2342 error
= VOP_REMOVE(ddv
->sdev_attrvp
,
2343 bks_name
, kcred
, NULL
, 0);
2344 } else if (bkstore
== 2) {
2345 error
= VOP_RMDIR(ddv
->sdev_attrvp
,
2346 bks_name
, ddv
->sdev_attrvp
, kcred
, NULL
, 0);
2349 /* do not propagate the backing store errors */
2351 sdcmn_err9(("sdev_cleandir: backing store"
2357 kmem_free(bks_name
, len
);
2362 ddv
->sdev_flags
|= SDEV_BUILD
;
2366 ddv
->sdev_flags
|= SDEV_BUILD
;
2367 rw_exit(&ddv
->sdev_contents
);
2377 * a convenient wrapper for readdir() funcs
2380 add_dir_entry(dirent64_t
*de
, char *nm
, size_t size
, ino_t ino
, offset_t off
)
2382 size_t reclen
= DIRENT64_RECLEN(strlen(nm
));
2386 de
->d_ino
= (ino64_t
)ino
;
2387 de
->d_off
= (off64_t
)off
+ 1;
2388 de
->d_reclen
= (ushort_t
)reclen
;
2389 (void) strncpy(de
->d_name
, nm
, DIRENT64_NAMELEN(reclen
));
2394 * sdev_mount service routines
2397 sdev_copyin_mountargs(struct mounta
*uap
, struct sdev_mountargs
*args
)
2401 if (uap
->datalen
!= sizeof (*args
))
2404 if (error
= copyin(uap
->dataptr
, args
, sizeof (*args
))) {
2405 cmn_err(CE_WARN
, "sdev_copyin_mountargs: can not"
2406 "get user data. error %d\n", error
);
2416 #define nextdp(dp) ((struct dirent64 *) \
2417 (intptr_t)((char *)(dp) + (dp)->d_reclen))
2420 * readdir helper func
2423 devname_readdir_func(vnode_t
*vp
, uio_t
*uiop
, cred_t
*cred
, int *eofp
,
2426 struct sdev_node
*ddv
= VTOSDEV(vp
);
2427 struct sdev_node
*dv
;
2429 ulong_t outcount
= 0;
2431 ulong_t alloc_count
;
2439 int (*vtor
)(struct sdev_node
*) = NULL
;
2443 ASSERT(ddv
->sdev_attr
|| ddv
->sdev_attrvp
);
2444 ASSERT(RW_READ_HELD(&ddv
->sdev_contents
));
2446 if (uiop
->uio_loffset
>= MAXOFF_T
) {
2452 if (uiop
->uio_iovcnt
!= 1)
2455 if (vp
->v_type
!= VDIR
)
2458 if (ddv
->sdev_flags
& SDEV_VTOR
) {
2459 vtor
= (int (*)(struct sdev_node
*))sdev_get_vtor(ddv
);
2466 soff
= uiop
->uio_loffset
;
2467 iovp
= uiop
->uio_iov
;
2468 alloc_count
= iovp
->iov_len
;
2469 dp
= outbuf
= kmem_alloc(alloc_count
, KM_SLEEP
);
2472 if (ddv
->sdev_state
== SDEV_ZOMBIE
)
2475 if (SDEV_IS_GLOBAL(ddv
)) {
2477 if ((sdev_boot_state
== SDEV_BOOT_STATE_COMPLETE
) &&
2478 !sdev_reconfig_boot
&& (flags
& SDEV_BROWSE
) &&
2479 !SDEV_IS_DYNAMIC(ddv
) && !SDEV_IS_NO_NCACHE(ddv
) &&
2480 ((moddebug
& MODDEBUG_FINI_EBUSY
) == 0) &&
2481 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state
) &&
2482 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state
) &&
2483 !sdev_reconfig_disable
) {
2485 * invoking "devfsadm" to do system device reconfig
2487 mutex_enter(&ddv
->sdev_lookup_lock
);
2488 SDEV_BLOCK_OTHERS(ddv
,
2489 (SDEV_READDIR
|SDEV_LGWAITING
));
2490 mutex_exit(&ddv
->sdev_lookup_lock
);
2492 sdcmn_err8(("readdir of %s by %s: reconfig\n",
2493 ddv
->sdev_path
, curproc
->p_user
.u_comm
));
2494 if (sdev_reconfig_verbose
) {
2496 "?readdir of %s by %s: reconfig\n",
2497 ddv
->sdev_path
, curproc
->p_user
.u_comm
);
2500 sdev_devfsadmd_thread(ddv
, NULL
, kcred
);
2501 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state
)) {
2503 * compensate the "ls" started later than "devfsadm"
2505 mutex_enter(&ddv
->sdev_lookup_lock
);
2506 SDEV_BLOCK_OTHERS(ddv
, (SDEV_READDIR
|SDEV_LGWAITING
));
2507 mutex_exit(&ddv
->sdev_lookup_lock
);
2511 * release the contents lock so that
2512 * the cache may be updated by devfsadmd
2514 rw_exit(&ddv
->sdev_contents
);
2515 mutex_enter(&ddv
->sdev_lookup_lock
);
2516 if (SDEV_IS_READDIR(ddv
))
2517 (void) sdev_wait4lookup(ddv
, SDEV_READDIR
);
2518 mutex_exit(&ddv
->sdev_lookup_lock
);
2519 rw_enter(&ddv
->sdev_contents
, RW_READER
);
2521 sdcmn_err4(("readdir of directory %s by %s\n",
2522 ddv
->sdev_name
, curproc
->p_user
.u_comm
));
2523 if (ddv
->sdev_flags
& SDEV_BUILD
) {
2524 if (SDEV_IS_PERSIST(ddv
)) {
2525 error
= sdev_filldir_from_store(ddv
,
2528 ddv
->sdev_flags
&= ~SDEV_BUILD
;
2533 /* handle "." and ".." */
2537 this_reclen
= DIRENT64_RECLEN(1);
2538 if (alloc_count
< this_reclen
) {
2543 dp
->d_ino
= (ino64_t
)ddv
->sdev_ino
;
2544 dp
->d_off
= (off64_t
)1;
2545 dp
->d_reclen
= (ushort_t
)this_reclen
;
2547 (void) strncpy(dp
->d_name
, ".",
2548 DIRENT64_NAMELEN(this_reclen
));
2549 outcount
+= dp
->d_reclen
;
2555 this_reclen
= DIRENT64_RECLEN(2);
2556 if (alloc_count
< outcount
+ this_reclen
) {
2561 dp
->d_reclen
= (ushort_t
)this_reclen
;
2562 dp
->d_ino
= (ino64_t
)ddv
->sdev_dotdot
->sdev_ino
;
2563 dp
->d_off
= (off64_t
)2;
2565 (void) strncpy(dp
->d_name
, "..",
2566 DIRENT64_NAMELEN(this_reclen
));
2567 outcount
+= dp
->d_reclen
;
2573 /* gets the cache */
2575 for (dv
= SDEV_FIRST_ENTRY(ddv
); dv
;
2576 dv
= SDEV_NEXT_ENTRY(ddv
, dv
), diroff
++) {
2577 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2578 diroff
, soff
, dv
->sdev_name
));
2580 /* bypassing pre-matured nodes */
2581 if (diroff
< soff
|| (dv
->sdev_state
!= SDEV_READY
)) {
2582 sdcmn_err3(("sdev_readdir: pre-mature node "
2583 "%s %d\n", dv
->sdev_name
, dv
->sdev_state
));
2588 * Check validity of node
2589 * Drop invalid and nodes to be skipped.
2590 * A node the validator indicates as stale needs
2591 * to be returned as presumably the node name itself
2592 * is valid and the node data itself will be refreshed
2593 * on lookup. An application performing a readdir then
2594 * stat on each entry should thus always see consistent
2595 * data. In any case, it is not possible to synchronize
2596 * with dynamic kernel state, and any view we return can
2597 * never be anything more than a snapshot at a point in time.
2601 case SDEV_VTOR_VALID
:
2603 case SDEV_VTOR_INVALID
:
2604 case SDEV_VTOR_SKIP
:
2606 case SDEV_VTOR_STALE
:
2607 sdcmn_err3(("sdev_readir: %s stale\n",
2612 "dev fs: validator failed: %s(%p)\n",
2613 dv
->sdev_name
, (void *)dv
);
2619 namelen
= strlen(dv
->sdev_name
);
2620 reclen
= DIRENT64_RECLEN(namelen
);
2621 if (outcount
+ reclen
> alloc_count
) {
2624 dp
->d_reclen
= (ushort_t
)reclen
;
2625 dp
->d_ino
= (ino64_t
)dv
->sdev_ino
;
2626 dp
->d_off
= (off64_t
)diroff
+ 1;
2627 (void) strncpy(dp
->d_name
, dv
->sdev_name
,
2628 DIRENT64_NAMELEN(reclen
));
2634 sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2635 "diroff %lld, soff %lld, dv %p\n", outcount
, diroff
, soff
,
2639 error
= uiomove(outbuf
, outcount
, UIO_READ
, uiop
);
2642 uiop
->uio_loffset
= diroff
;
2648 if (ddv
->sdev_attrvp
) {
2650 attr
.va_ctime
= now
;
2651 attr
.va_atime
= now
;
2652 attr
.va_mask
= AT_CTIME
|AT_ATIME
;
2654 (void) VOP_SETATTR(ddv
->sdev_attrvp
, &attr
, 0, kcred
, NULL
);
2657 kmem_free(outbuf
, alloc_count
);
2662 sdev_modctl_lookup(const char *path
, vnode_t
**r_vp
)
2666 struct sdev_node
*svp
;
2672 ASSERT(INGLOBALZONE(curproc
));
2674 if (error
= pn_get((char *)path
, UIO_SYSSPACE
, &pn
))
2676 nm
= kmem_alloc(MAXNAMELEN
, KM_SLEEP
);
2681 while (pn_pathleft(&pn
)) {
2682 ASSERT(vp
->v_type
== VDIR
|| vp
->v_type
== VLNK
);
2683 (void) pn_getcomponent(&pn
, nm
);
2686 * Deal with the .. special case where we may be
2687 * traversing up across a mount point, to the
2688 * root of this filesystem or global root.
2690 if (nm
[0] == '.' && nm
[1] == '.' && nm
[2] == 0) {
2692 if (VN_CMP(vp
, rootdir
)) {
2694 } else if (vp
->v_flag
& VROOT
) {
2698 vfs_rlock_wait(vfsp
);
2699 vp
= cvp
->v_vfsp
->vfs_vnodecovered
;
2701 (cvp
->v_vfsp
->vfs_flag
& VFS_UNMOUNTED
)) {
2715 error
= VOP_LOOKUP(vp
, nm
, &cvp
, NULL
, 0, NULL
, kcred
, NULL
,
2722 /* traverse mount points encountered on our journey */
2723 if (vn_ismntpt(cvp
) && (error
= traverse(&cvp
)) != 0) {
2730 * symbolic link, can be either relative and absolute
2732 if ((cvp
->v_type
== VLNK
) && pn_pathleft(&pn
)) {
2733 struct pathname linkpath
;
2734 pn_alloc(&linkpath
);
2735 if (error
= pn_getsymlink(cvp
, &linkpath
, kcred
)) {
2739 if (pn_pathleft(&linkpath
) == 0)
2740 (void) pn_set(&linkpath
, ".");
2741 error
= pn_insert(&pn
, &linkpath
, strlen(nm
));
2743 if (pn
.pn_pathlen
== 0) {
2747 if (pn
.pn_path
[0] == '/') {
2762 * Direct the operation to the persisting filesystem
2763 * underlying /dev. Bail if we encounter a
2764 * non-persistent dev entity here.
2766 if (cvp
->v_vfsp
->vfs_fstype
== devtype
) {
2768 if ((VTOSDEV(cvp
)->sdev_flags
& SDEV_PERSIST
) == 0) {
2774 if (VTOSDEV(cvp
) == NULL
) {
2780 if ((vp
= svp
->sdev_attrvp
) == NULL
) {
2795 kmem_free(nm
, MAXNAMELEN
);
2802 * Only return persisted nodes in the filesystem underlying /dev.
2814 sdev_modctl_readdir(const char *dir
, char ***dirlistp
, int *npathsp
,
2815 int *npathsp_alloc
, int checking_empty
)
2817 char **pathlist
= NULL
;
2818 char **newlist
= NULL
;
2820 int npaths_alloc
= 0;
2821 dirent64_t
*dbuf
= NULL
;
2829 struct dirent64
*dp
;
2835 error
= sdev_modctl_lookup(dir
, &vp
);
2836 sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2837 dir
, curproc
->p_user
.u_comm
,
2838 (error
== 0) ? "ok" : "failed"));
2842 dlen
= ndirents
* (sizeof (*dbuf
));
2843 dbuf
= kmem_alloc(dlen
, KM_SLEEP
);
2847 uio
.uio_segflg
= UIO_SYSSPACE
;
2849 uio
.uio_extflg
= UIO_COPY_CACHED
;
2850 uio
.uio_loffset
= 0;
2851 uio
.uio_llimit
= MAXOFFSET_T
;
2855 while (!error
&& !eof
) {
2856 uio
.uio_resid
= dlen
;
2857 iov
.iov_base
= (char *)dbuf
;
2860 (void) VOP_RWLOCK(vp
, V_WRITELOCK_FALSE
, NULL
);
2861 error
= VOP_READDIR(vp
, &uio
, kcred
, &eof
, NULL
, 0);
2862 VOP_RWUNLOCK(vp
, V_WRITELOCK_FALSE
, NULL
);
2864 dbuflen
= dlen
- uio
.uio_resid
;
2866 if (error
|| dbuflen
== 0)
2869 for (dp
= dbuf
; ((intptr_t)dp
< (intptr_t)dbuf
+ dbuflen
);
2870 dp
= (dirent64_t
*)((intptr_t)dp
+ dp
->d_reclen
)) {
2874 if (strcmp(nm
, ".") == 0 || strcmp(nm
, "..") == 0)
2876 if (npaths
== npaths_alloc
) {
2879 kmem_zalloc((npaths_alloc
+ 1) *
2880 sizeof (char *), KM_SLEEP
);
2882 bcopy(pathlist
, newlist
,
2883 npaths
* sizeof (char *));
2885 (npaths
+ 1) * sizeof (char *));
2890 s
= kmem_alloc(n
, KM_SLEEP
);
2892 pathlist
[npaths
++] = s
;
2893 sdcmn_err11((" %s/%s\n", dir
, s
));
2895 /* if checking empty, one entry is as good as many */
2896 if (checking_empty
) {
2907 kmem_free(dbuf
, dlen
);
2912 *dirlistp
= pathlist
;
2914 *npathsp_alloc
= npaths_alloc
;
2920 sdev_modctl_readdir_free(char **pathlist
, int npaths
, int npaths_alloc
)
2924 for (i
= 0; i
< npaths
; i
++) {
2925 n
= strlen(pathlist
[i
]) + 1;
2926 kmem_free(pathlist
[i
], n
);
2929 kmem_free(pathlist
, (npaths_alloc
+ 1) * sizeof (char *));
2933 sdev_modctl_devexists(const char *path
)
2938 error
= sdev_modctl_lookup(path
, &vp
);
2939 sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2940 path
, curproc
->p_user
.u_comm
,
2941 (error
== 0) ? "ok" : "failed"));
2948 extern int sdev_vnodeops_tbl_size
;
2951 * construct a new template with overrides from vtab
2953 static fs_operation_def_t
*
2954 sdev_merge_vtab(const fs_operation_def_t tab
[])
2956 fs_operation_def_t
*new;
2957 const fs_operation_def_t
*tab_entry
;
2959 /* make a copy of standard vnode ops table */
2960 new = kmem_alloc(sdev_vnodeops_tbl_size
, KM_SLEEP
);
2961 bcopy((void *)sdev_vnodeops_tbl
, new, sdev_vnodeops_tbl_size
);
2963 /* replace the overrides from tab */
2964 for (tab_entry
= tab
; tab_entry
->name
!= NULL
; tab_entry
++) {
2965 fs_operation_def_t
*std_entry
= new;
2966 while (std_entry
->name
) {
2967 if (strcmp(tab_entry
->name
, std_entry
->name
) == 0) {
2968 std_entry
->func
= tab_entry
->func
;
2973 if (std_entry
->name
== NULL
)
2974 cmn_err(CE_NOTE
, "sdev_merge_vtab: entry %s unused.",
2981 /* free memory allocated by sdev_merge_vtab */
2983 sdev_free_vtab(fs_operation_def_t
*new)
2985 kmem_free(new, sdev_vnodeops_tbl_size
);
2989 * a generic setattr() function
2991 * note: flags only supports AT_UID and AT_GID.
2992 * Future enhancements can be done for other types, e.g. AT_MODE
2995 devname_setattr_func(struct vnode
*vp
, struct vattr
*vap
, int flags
,
2996 struct cred
*cred
, int (*callback
)(struct sdev_node
*, struct vattr
*,
2999 struct sdev_node
*dv
= VTOSDEV(vp
);
3000 struct sdev_node
*parent
= dv
->sdev_dotdot
;
3002 uint_t mask
= vap
->va_mask
;
3005 /* some sanity checks */
3006 if (vap
->va_mask
& AT_NOSET
)
3009 if (vap
->va_mask
& AT_SIZE
) {
3010 if (vp
->v_type
== VDIR
) {
3015 /* no need to set attribute, but do not fail either */
3017 rw_enter(&parent
->sdev_contents
, RW_READER
);
3018 if (dv
->sdev_state
== SDEV_ZOMBIE
) {
3019 rw_exit(&parent
->sdev_contents
);
3023 /* If backing store exists, just set it. */
3024 if (dv
->sdev_attrvp
) {
3025 rw_exit(&parent
->sdev_contents
);
3026 return (VOP_SETATTR(dv
->sdev_attrvp
, vap
, flags
, cred
, NULL
));
3030 * Otherwise, for nodes with the persistence attribute, create it.
3032 ASSERT(dv
->sdev_attr
);
3033 if (SDEV_IS_PERSIST(dv
) ||
3034 ((vap
->va_mask
& ~AT_TIMES
) != 0 && !SDEV_IS_DYNAMIC(dv
))) {
3035 sdev_vattr_merge(dv
, vap
);
3036 rw_enter(&dv
->sdev_contents
, RW_WRITER
);
3037 error
= sdev_shadow_node(dv
, cred
);
3038 rw_exit(&dv
->sdev_contents
);
3039 rw_exit(&parent
->sdev_contents
);
3043 return (VOP_SETATTR(dv
->sdev_attrvp
, vap
, flags
, cred
, NULL
));
3048 * sdev_attr was allocated in sdev_mknode
3050 rw_enter(&dv
->sdev_contents
, RW_WRITER
);
3051 error
= secpolicy_vnode_setattr(cred
, vp
, vap
,
3052 dv
->sdev_attr
, flags
, sdev_unlocked_access
, dv
);
3054 rw_exit(&dv
->sdev_contents
);
3055 rw_exit(&parent
->sdev_contents
);
3059 get
= dv
->sdev_attr
;
3060 if (mask
& AT_MODE
) {
3061 get
->va_mode
&= S_IFMT
;
3062 get
->va_mode
|= vap
->va_mode
& ~S_IFMT
;
3065 if ((mask
& AT_UID
) || (mask
& AT_GID
)) {
3067 get
->va_uid
= vap
->va_uid
;
3069 get
->va_gid
= vap
->va_gid
;
3071 * a callback must be provided if the protocol is set
3073 if ((protocol
& AT_UID
) || (protocol
& AT_GID
)) {
3075 error
= callback(dv
, get
, protocol
);
3077 rw_exit(&dv
->sdev_contents
);
3078 rw_exit(&parent
->sdev_contents
);
3084 if (mask
& AT_ATIME
)
3085 get
->va_atime
= vap
->va_atime
;
3086 if (mask
& AT_MTIME
)
3087 get
->va_mtime
= vap
->va_mtime
;
3088 if (mask
& (AT_MODE
| AT_UID
| AT_GID
| AT_CTIME
)) {
3089 gethrestime(&get
->va_ctime
);
3092 sdev_vattr_merge(dv
, get
);
3093 rw_exit(&dv
->sdev_contents
);
3094 rw_exit(&parent
->sdev_contents
);
3099 * a generic inactive() function
3103 devname_inactive_func(struct vnode
*vp
, struct cred
*cred
,
3104 void (*callback
)(struct vnode
*))
3107 struct sdev_node
*dv
= VTOSDEV(vp
);
3110 mutex_enter(&vp
->v_lock
);
3111 ASSERT(vp
->v_count
>= 1);
3114 if (vp
->v_count
== 1 && callback
!= NULL
)
3117 rw_enter(&dv
->sdev_contents
, RW_WRITER
);
3118 state
= dv
->sdev_state
;
3120 clean
= (vp
->v_count
== 1) && (state
== SDEV_ZOMBIE
);
3123 * sdev is a rather bad public citizen. It violates the general
3124 * agreement that in memory nodes should always have a valid reference
3125 * count on their vnode. But that's not the case here. This means that
3126 * we do actually have to distinguish between getting inactive callbacks
3127 * for zombies and otherwise. This should probably be fixed.
3130 /* Remove the . entry to ourselves */
3131 if (vp
->v_type
== VDIR
) {
3134 VERIFY(dv
->sdev_nlink
== 1);
3137 rw_exit(&dv
->sdev_contents
);
3138 mutex_exit(&vp
->v_lock
);
3139 sdev_nodedestroy(dv
, 0);
3142 rw_exit(&dv
->sdev_contents
);
3143 mutex_exit(&vp
->v_lock
);