4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2017 by Delphix. All rights reserved.
27 * vnode ops for the devfs
29 * For leaf vnode special files (VCHR|VBLK) specfs will always see the VOP
30 * first because dv_find always performs leaf vnode substitution, returning
31 * a specfs vnode with an s_realvp pointing to the devfs leaf vnode. This
32 * means that the only leaf special file VOP operations that devfs will see
33 * after fop_lookup are the ones that specfs forwards.
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/t_lock.h>
39 #include <sys/systm.h>
40 #include <sys/sysmacros.h>
44 #include <sys/vnode.h>
46 #include <sys/fcntl.h>
47 #include <sys/flock.h>
50 #include <sys/errno.h>
53 #include <sys/dirent.h>
54 #include <sys/pathname.h>
55 #include <sys/cmn_err.h>
56 #include <sys/debug.h>
57 #include <sys/policy.h>
58 #include <sys/modctl.h>
59 #include <sys/sunndi.h>
60 #include <sys/fs_subr.h>
61 #include <sys/fs/dv_node.h>
63 extern struct vattr dv_vattr_dir
, dv_vattr_file
;
64 extern dev_t rconsdev
;
67 * Open of devices (leaf nodes) is handled by specfs.
68 * There is nothing to do to open a directory
72 devfs_open(struct vnode
**vpp
, int flag
, struct cred
*cred
,
75 struct dv_node
*dv
= VTODV(*vpp
);
77 dcmn_err2(("devfs_open %s\n", dv
->dv_name
));
78 ASSERT((*vpp
)->v_type
== VDIR
);
83 * Close of devices (leaf nodes) is handled by specfs.
84 * There is nothing much to do inorder to close a directory.
88 devfs_close(struct vnode
*vp
, int flag
, int count
,
89 offset_t offset
, struct cred
*cred
, caller_context_t
*ct
)
91 struct dv_node
*dv
= VTODV(vp
);
93 dcmn_err2(("devfs_close %s\n", dv
->dv_name
));
94 ASSERT(vp
->v_type
== VDIR
);
96 cleanlocks(vp
, ttoproc(curthread
)->p_pid
, 0);
97 cleanshares(vp
, ttoproc(curthread
)->p_pid
);
102 * Read of devices (leaf nodes) is handled by specfs.
103 * Read of directories is not supported.
107 devfs_read(struct vnode
*vp
, struct uio
*uiop
, int ioflag
, struct cred
*cred
,
108 struct caller_context
*ct
)
110 dcmn_err2(("devfs_read %s\n", VTODV(vp
)->dv_name
));
111 ASSERT(vp
->v_type
== VDIR
);
112 ASSERT(RW_READ_HELD(&VTODV(vp
)->dv_contents
));
117 * Write of devices (leaf nodes) is handled by specfs.
118 * Write of directories is not supported.
122 devfs_write(struct vnode
*vp
, struct uio
*uiop
, int ioflag
, struct cred
*cred
,
123 struct caller_context
*ct
)
125 dcmn_err2(("devfs_write %s\n", VTODV(vp
)->dv_name
));
126 ASSERT(vp
->v_type
== VDIR
);
127 ASSERT(RW_WRITE_HELD(&VTODV(vp
)->dv_contents
));
132 * Ioctls to device (leaf nodes) is handled by specfs.
133 * Ioctl to directories is not supported.
137 devfs_ioctl(struct vnode
*vp
, int cmd
, intptr_t arg
, int flag
,
138 struct cred
*cred
, int *rvalp
, caller_context_t
*ct
)
140 dcmn_err2(("devfs_ioctl %s\n", VTODV(vp
)->dv_name
));
141 ASSERT(vp
->v_type
== VDIR
);
143 return (ENOTTY
); /* no ioctls supported */
147 * We can be asked directly about the attributes of directories, or
148 * (via sp->s_realvp) about the filesystem attributes of special files.
150 * For directories, we just believe the attribute store
151 * though we mangle the nodeid, fsid, and rdev to convince userland we
152 * really are a different filesystem.
154 * For special files, a little more fakery is required.
156 * If the attribute store is not there (read only root), we believe our
157 * memory based attributes.
160 devfs_getattr(struct vnode
*vp
, struct vattr
*vap
, int flags
, struct cred
*cr
,
161 caller_context_t
*ct
)
163 struct dv_node
*dv
= VTODV(vp
);
168 * Message goes to console only. Otherwise, the message
169 * causes devfs_getattr to be invoked again... infinite loop
171 dcmn_err2(("?devfs_getattr %s\n", dv
->dv_name
));
172 ASSERT(dv
->dv_attr
|| dv
->dv_attrvp
);
174 if (!(vp
->v_type
== VDIR
|| vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
)) {
175 cmn_err(CE_WARN
, /* panic ? */
176 "?%s: getattr on vnode type %d", dvnm
, vp
->v_type
);
180 rw_enter(&dv
->dv_contents
, RW_READER
);
183 * obtain from the memory version of attribute.
184 * preserve mask for those that optimize.
185 * devfs specific fields are already merged on creation.
191 /* obtain from attribute store and merge */
192 error
= fop_getattr(dv
->dv_attrvp
, vap
, flags
, cr
, ct
);
193 dsysdebug(error
, ("vop_getattr %s %d\n", dv
->dv_name
, error
));
194 dv_vattr_merge(dv
, vap
);
196 rw_exit(&dv
->dv_contents
);
199 * Restrict the permissions of the node fronting the console
200 * to 0600 with root as the owner. This prevents a non-root
201 * user from gaining access to a serial terminal (like /dev/term/a)
202 * which is in reality serving as the console device (/dev/console).
204 if (vp
->v_rdev
== rconsdev
) {
205 mode_t rconsmask
= S_IXUSR
|S_IRWXG
|S_IRWXO
;
206 vap
->va_mode
&= (~rconsmask
);
213 static int devfs_unlocked_access(void *, int, struct cred
*);
229 ASSERT(dv
->dv_attr
|| dv
->dv_attrvp
);
231 ASSERT(vp
->v_type
== VDIR
);
232 ASSERT((dv
->dv_flags
& DV_NO_FSPERM
) == 0);
234 if (vap
->va_mask
& AT_NOSET
)
237 /* to ensure consistency, single thread setting of attributes */
238 rw_enter(&dv
->dv_contents
, RW_WRITER
);
240 again
: if (dv
->dv_attr
) {
242 error
= secpolicy_vnode_setattr(cr
, vp
, vap
,
243 dv
->dv_attr
, flags
, devfs_unlocked_access
, dv
);
249 * Apply changes to the memory based attribute. This code
250 * is modeled after the tmpfs implementation of memory
256 /* Change file access modes. */
257 if (mask
& AT_MODE
) {
258 map
->va_mode
&= S_IFMT
;
259 map
->va_mode
|= vap
->va_mode
& ~S_IFMT
;
262 map
->va_uid
= vap
->va_uid
;
264 map
->va_gid
= vap
->va_gid
;
266 map
->va_atime
= vap
->va_atime
;
268 map
->va_mtime
= vap
->va_mtime
;
270 if (mask
& (AT_MODE
| AT_UID
| AT_GID
| AT_MTIME
))
271 gethrestime(&map
->va_ctime
);
273 /* use the backing attribute store */
274 ASSERT(dv
->dv_attrvp
);
277 * See if we are changing something we care about
278 * the persistence of - return success if we don't care.
280 if (vap
->va_mask
& (AT_MODE
|AT_UID
|AT_GID
|AT_ATIME
|AT_MTIME
)) {
281 /* Set the attributes */
282 error
= fop_setattr(dv
->dv_attrvp
,
283 vap
, flags
, cr
, NULL
);
285 ("vop_setattr %s %d\n", dv
->dv_name
, error
));
288 * Some file systems may return EROFS for a setattr
289 * on a readonly file system. In this case we create
290 * our own memory based attribute.
292 if (error
== EROFS
) {
294 * obtain attributes from existing file
295 * that we will modify and switch to memory
296 * based attribute until attribute store is
299 vattr
= dv_vattr_dir
;
300 if (fop_getattr(dv
->dv_attrvp
,
301 &vattr
, flags
, cr
, NULL
) == 0) {
302 dv
->dv_attr
= kmem_alloc(
303 sizeof (struct vattr
), KM_SLEEP
);
304 *dv
->dv_attr
= vattr
;
305 dv_vattr_merge(dv
, dv
->dv_attr
);
312 rw_exit(&dv
->dv_contents
);
318 * Compare the uid/gid/mode changes requested for a setattr
319 * operation with the same details of a node's default minor
320 * perm information. Return 0 if identical.
323 dv_setattr_cmp(struct vattr
*map
, mperm_t
*mp
)
325 if ((map
->va_mode
& S_IAMB
) != (mp
->mp_mode
& S_IAMB
))
327 if (map
->va_uid
!= mp
->mp_uid
)
329 if (map
->va_gid
!= mp
->mp_gid
)
342 caller_context_t
*ct
)
344 struct dv_node
*dv
= VTODV(vp
);
350 struct vattr
*free_vattr
= NULL
;
351 struct vattr
*vattrp
= NULL
;
356 * Message goes to console only. Otherwise, the message
357 * causes devfs_getattr to be invoked again... infinite loop
359 dcmn_err2(("?devfs_setattr %s\n", dv
->dv_name
));
360 ASSERT(dv
->dv_attr
|| dv
->dv_attrvp
);
362 if (!(vp
->v_type
== VDIR
|| vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
)) {
363 cmn_err(CE_WARN
, /* panic ? */
364 "?%s: getattr on vnode type %d", dvnm
, vp
->v_type
);
368 if (vap
->va_mask
& AT_NOSET
)
372 * If we are changing something we don't care about
373 * the persistence of, return success.
376 (AT_MODE
|AT_UID
|AT_GID
|AT_ATIME
|AT_MTIME
)) == 0)
380 * If driver overrides fs perm, disallow chmod
381 * and do not create attribute nodes.
383 if (dv
->dv_flags
& DV_NO_FSPERM
) {
385 if (vap
->va_mask
& (AT_MODE
| AT_UID
| AT_GID
))
387 if ((vap
->va_mask
& (AT_ATIME
|AT_MTIME
)) == 0)
389 rw_enter(&dv
->dv_contents
, RW_WRITER
);
390 if (vap
->va_mask
& AT_ATIME
)
391 dv
->dv_attr
->va_atime
= vap
->va_atime
;
392 if (vap
->va_mask
& AT_MTIME
)
393 dv
->dv_attr
->va_mtime
= vap
->va_mtime
;
394 rw_exit(&dv
->dv_contents
);
399 * Directories are always created but device nodes are
400 * only used to persist non-default permissions.
402 if (vp
->v_type
== VDIR
) {
403 ASSERT(dv
->dv_attr
|| dv
->dv_attrvp
);
404 return (devfs_setattr_dir(dv
, vp
, vap
, flags
, cr
));
408 * Allocate now before we take any locks
410 vattrp
= kmem_zalloc(sizeof (*vattrp
), KM_SLEEP
);
412 /* to ensure consistency, single thread setting of attributes */
413 rw_enter(&dv
->dv_contents
, RW_WRITER
);
416 * We don't need to create an attribute node
417 * to persist access or modification times.
419 persist
= (vap
->va_mask
& (AT_MODE
| AT_UID
| AT_GID
));
422 * If persisting something, get the default permissions
423 * for this minor to compare against what the attributes
424 * are now being set to. Default ordering is:
425 * - minor_perm match for this minor
426 * - mode supplied by ddi_create_priv_minor_node
430 if (dev_minorperm(dv
->dv_devi
, dv
->dv_name
, &mp
) != 0) {
431 mp
.mp_uid
= dv_vattr_file
.va_uid
;
432 mp
.mp_gid
= dv_vattr_file
.va_gid
;
433 mp
.mp_mode
= dv_vattr_file
.va_mode
;
434 if (dv
->dv_flags
& DV_DFLT_MODE
) {
435 ASSERT((dv
->dv_dflt_mode
& ~S_IAMB
) == 0);
436 mp
.mp_mode
&= ~S_IAMB
;
437 mp
.mp_mode
|= dv
->dv_dflt_mode
;
438 dcmn_err5(("%s: setattr priv default 0%o\n",
439 dv
->dv_name
, mp
.mp_mode
));
441 dcmn_err5(("%s: setattr devfs default 0%o\n",
442 dv
->dv_name
, mp
.mp_mode
));
445 dcmn_err5(("%s: setattr minor perm default 0%o\n",
446 dv
->dv_name
, mp
.mp_mode
));
451 * If we don't have a vattr for this node, construct one.
457 ASSERT(dv
->dv_attrvp
);
458 ASSERT(vp
->v_type
!= VDIR
);
459 *vattrp
= dv_vattr_file
;
460 error
= fop_getattr(dv
->dv_attrvp
, vattrp
, 0, cr
, ct
);
461 dsysdebug(error
, ("vop_getattr %s %d\n", dv
->dv_name
, error
));
464 dv
->dv_attr
= vattrp
;
465 dv_vattr_merge(dv
, dv
->dv_attr
);
469 error
= secpolicy_vnode_setattr(cr
, vp
, vap
, dv
->dv_attr
,
470 flags
, devfs_unlocked_access
, dv
);
472 dsysdebug(error
, ("devfs_setattr %s secpolicy error %d\n",
473 dv
->dv_name
, error
));
478 * Apply changes to the memory based attribute. This code
479 * is modeled after the tmpfs implementation of memory
485 /* Change file access modes. */
486 if (mask
& AT_MODE
) {
487 map
->va_mode
&= S_IFMT
;
488 map
->va_mode
|= vap
->va_mode
& ~S_IFMT
;
491 map
->va_uid
= vap
->va_uid
;
493 map
->va_gid
= vap
->va_gid
;
495 map
->va_atime
= vap
->va_atime
;
497 map
->va_mtime
= vap
->va_mtime
;
499 if (mask
& (AT_MODE
| AT_UID
| AT_GID
| AT_MTIME
)) {
500 gethrestime(&map
->va_ctime
);
504 * A setattr to defaults means we no longer need the
505 * shadow node as a persistent store, unless there
506 * are ACLs. Otherwise create a shadow node if one
510 if ((dv_setattr_cmp(map
, &mp
) == 0) &&
511 ((dv
->dv_flags
& DV_ACL
) == 0)) {
515 ASSERT(ddv
->dv_attrvp
);
516 error
= fop_remove(ddv
->dv_attrvp
,
517 dv
->dv_name
, cr
, ct
, 0);
519 ("vop_remove %s %s %d\n",
520 ddv
->dv_name
, dv
->dv_name
, error
));
524 VN_RELE(dv
->dv_attrvp
);
525 dv
->dv_attrvp
= NULL
;
530 dcmn_err5(("%s persisting mode 0%o\n",
531 dv
->dv_name
, vap
->va_mode
));
533 dcmn_err5(("%s persisting uid %d\n",
534 dv
->dv_name
, vap
->va_uid
));
536 dcmn_err5(("%s persisting gid %d\n",
537 dv
->dv_name
, vap
->va_gid
));
539 if (dv
->dv_attrvp
== NULL
) {
540 dvp
= DVTOV(dv
->dv_dotdot
);
541 dv_shadow_node(dvp
, dv
->dv_name
, vp
,
543 DV_SHADOW_CREATE
| DV_SHADOW_WRITE_HELD
);
546 /* If map still valid do TIME for free. */
547 if (dv
->dv_attr
== map
) {
550 vap
->va_mask
| AT_ATIME
| AT_MTIME
;
551 error
= fop_setattr(dv
->dv_attrvp
, map
,
555 error
= fop_setattr(dv
->dv_attrvp
,
556 vap
, flags
, cr
, NULL
);
558 dsysdebug(error
, ("vop_setattr %s %d\n",
559 dv
->dv_name
, error
));
562 * Some file systems may return EROFS for a setattr
563 * on a readonly file system. In this case save
564 * as our own memory based attribute.
565 * NOTE: ufs is NOT one of these (see ufs_iupdat).
567 if (dv
->dv_attr
&& dv
->dv_attrvp
&& error
== 0) {
568 vattrp
= dv
->dv_attr
;
570 } else if (error
== EROFS
)
576 rw_exit(&dv
->dv_contents
);
579 kmem_free(vattrp
, sizeof (*vattrp
));
581 kmem_free(free_vattr
, sizeof (*free_vattr
));
586 devfs_pathconf(vnode_t
*vp
, int cmd
, ulong_t
*valp
, cred_t
*cr
,
587 caller_context_t
*ct
)
590 case _PC_ACL_ENABLED
:
592 * We rely on the underlying filesystem for ACLs,
593 * so direct the query for ACL support there.
594 * ACL support isn't relative to the file
595 * and we can't guarantee that the dv node
596 * has an attribute node, so any valid
597 * attribute node will suffice.
600 ASSERT(dvroot
->dv_attrvp
);
601 return (fop_pathconf(dvroot
->dv_attrvp
, cmd
, valp
, cr
, ct
));
605 return (fs_pathconf(vp
, cmd
, valp
, cr
, ct
));
609 * Let avp handle security attributes (acl's).
612 devfs_getsecattr(struct vnode
*vp
, struct vsecattr
*vsap
, int flags
,
613 struct cred
*cr
, caller_context_t
*ct
)
615 dvnode_t
*dv
= VTODV(vp
);
619 dcmn_err2(("devfs_getsecattr %s\n", dv
->dv_name
));
620 ASSERT(vp
->v_type
== VDIR
|| vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
);
622 rw_enter(&dv
->dv_contents
, RW_READER
);
626 /* fabricate the acl */
628 error
= fs_fab_acl(vp
, vsap
, flags
, cr
, ct
);
629 rw_exit(&dv
->dv_contents
);
633 error
= fop_getsecattr(avp
, vsap
, flags
, cr
, ct
);
634 dsysdebug(error
, ("vop_getsecattr %s %d\n", VTODV(vp
)->dv_name
, error
));
635 rw_exit(&dv
->dv_contents
);
640 * Set security attributes (acl's)
642 * Note that the dv_contents lock has already been acquired
643 * by the caller's fop_rwlock.
646 devfs_setsecattr(struct vnode
*vp
, struct vsecattr
*vsap
, int flags
,
647 struct cred
*cr
, caller_context_t
*ct
)
649 dvnode_t
*dv
= VTODV(vp
);
653 dcmn_err2(("devfs_setsecattr %s\n", dv
->dv_name
));
654 ASSERT(vp
->v_type
== VDIR
|| vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
);
655 ASSERT(RW_LOCK_HELD(&dv
->dv_contents
));
658 * Not a supported operation on drivers not providing
659 * file system based permissions.
661 if (dv
->dv_flags
& DV_NO_FSPERM
)
665 * To complete, the setsecattr requires an underlying attribute node.
667 if (dv
->dv_attrvp
== NULL
) {
668 ASSERT(vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
);
669 dv_shadow_node(DVTOV(dv
->dv_dotdot
), dv
->dv_name
, vp
,
670 NULL
, NULLVP
, cr
, DV_SHADOW_CREATE
| DV_SHADOW_WRITE_HELD
);
673 if ((avp
= dv
->dv_attrvp
) == NULL
) {
674 dcmn_err2(("devfs_setsecattr %s: "
675 "cannot construct attribute node\n", dv
->dv_name
));
680 * The acl(2) system call issues a fop_rwlock before setting an ACL.
681 * Since backing file systems expect the lock to be held before seeing
682 * a fop_setsecattr ACL, we need to issue the fop_rwlock to the backing
683 * store before forwarding the ACL.
685 (void) fop_rwlock(avp
, V_WRITELOCK_TRUE
, NULL
);
686 error
= fop_setsecattr(avp
, vsap
, flags
, cr
, ct
);
687 dsysdebug(error
, ("vop_setsecattr %s %d\n", VTODV(vp
)->dv_name
, error
));
688 fop_rwunlock(avp
, V_WRITELOCK_TRUE
, NULL
);
691 * Set DV_ACL if we have a non-trivial set of ACLs. It is not
692 * necessary to hold fop_rwlock since fs_acl_nontrivial only does
693 * fop_getsecattr calls.
695 if (fs_acl_nontrivial(avp
, cr
))
696 dv
->dv_flags
|= DV_ACL
;
701 * This function is used for secpolicy_setattr(). It must call an
702 * access() like function while it is already holding the
703 * dv_contents lock. We only care about this when dv_attr != NULL;
704 * so the unlocked access call only concerns itself with that
705 * particular branch of devfs_access().
708 devfs_unlocked_access(void *vdv
, int mode
, struct cred
*cr
)
710 struct dv_node
*dv
= vdv
;
712 uid_t owner
= dv
->dv_attr
->va_uid
;
714 /* Check access based on owner, group and public permissions. */
715 if (crgetuid(cr
) != owner
) {
717 if (groupmember(dv
->dv_attr
->va_gid
, cr
) == 0)
721 return (secpolicy_vnode_access2(cr
, DVTOV(dv
), owner
,
722 dv
->dv_attr
->va_mode
<< shift
, mode
));
726 devfs_access(struct vnode
*vp
, int mode
, int flags
, struct cred
*cr
,
727 caller_context_t
*ct
)
729 struct dv_node
*dv
= VTODV(vp
);
732 dcmn_err2(("devfs_access %s\n", dv
->dv_name
));
733 ASSERT(dv
->dv_attr
|| dv
->dv_attrvp
);
735 /* restrict console access to privileged processes */
736 if ((vp
->v_rdev
== rconsdev
) && secpolicy_console(cr
) != 0) {
740 rw_enter(&dv
->dv_contents
, RW_READER
);
741 if (dv
->dv_attr
&& ((dv
->dv_flags
& DV_ACL
) == 0)) {
742 res
= devfs_unlocked_access(dv
, mode
, cr
);
744 res
= fop_access(dv
->dv_attrvp
, mode
, flags
, cr
, ct
);
746 rw_exit(&dv
->dv_contents
);
753 * Given the directory vnode and the name of the component, return
754 * the corresponding held vnode for that component.
756 * Of course in these fictional filesystems, nothing's ever quite
759 * devfs name type shadow (fs attributes) type comments
760 * -------------------------------------------------------------------------
761 * drv[@addr] VDIR drv[@addr] VDIR nexus driver
762 * drv[@addr]:m VCHR/VBLK drv[@addr]:m VREG leaf driver
763 * drv[@addr] VCHR/VBLK drv[@addr]:.default VREG leaf driver
764 * -------------------------------------------------------------------------
766 * The following names are reserved for the attribute filesystem (which
767 * could easily be another layer on top of this one - we simply need to
768 * hold the vnode of the thing we're looking at)
770 * attr name type shadow (fs attributes) type comments
771 * -------------------------------------------------------------------------
772 * drv[@addr] VDIR - - attribute dir
773 * minorname VDIR - - minorname
774 * attribute VREG - - attribute
775 * -------------------------------------------------------------------------
779 * devfs:/devices/.../mm@0:zero VCHR
780 * shadow:/.devices/.../mm@0:zero VREG, fs attrs
781 * devfs:/devices/.../mm@0:/zero/attr VREG, driver attribute
783 * devfs:/devices/.../sd@0,0:a VBLK
784 * shadow:/.devices/.../sd@0,0:a VREG, fs attrs
785 * devfs:/devices/.../sd@0,0:/a/.type VREG, "ddi_block:chan"
787 * devfs:/devices/.../mm@0 VCHR
788 * shadow:/.devices/.../mm@0:.default VREG, fs attrs
789 * devfs:/devices/.../mm@0:/.default/attr VREG, driver attribute
790 * devfs:/devices/.../mm@0:/.default/.type VREG, "ddi_pseudo"
792 * devfs:/devices/.../obio VDIR
793 * shadow:/devices/.../obio VDIR, needed for fs attrs.
794 * devfs:/devices/.../obio:/.default/attr VDIR, driver attribute
796 * We also need to be able deal with "old" devices that have gone away,
797 * though I think that provided we return them with readdir, they can
798 * be removed (i.e. they don't have to respond to lookup, though it might
799 * be weird if they didn't ;-)
801 * Lookup has side-effects.
803 * - It will create directories and fs attribute files in the shadow hierarchy.
804 * - It should cause non-SID devices to be probed (ask the parent nexi).
808 devfs_lookup(struct vnode
*dvp
, char *nm
, struct vnode
**vpp
,
809 struct pathname
*pnp
, int flags
, struct vnode
*rdir
, struct cred
*cred
,
810 caller_context_t
*ct
, int *direntflags
, pathname_t
*realpnp
)
812 ASSERT(dvp
->v_type
== VDIR
);
813 dcmn_err2(("devfs_lookup: %s\n", nm
));
814 return (dv_find(VTODV(dvp
), nm
, vpp
, pnp
, rdir
, cred
, 0));
818 * devfs nodes can't really be created directly by userland - however,
819 * we do allow creates to find existing nodes:
821 * - any create fails if the node doesn't exist - EROFS.
822 * - creating an existing directory read-only succeeds, otherwise EISDIR.
823 * - exclusive creates fail if the node already exists - EEXIST.
824 * - failure to create the snode for an existing device - ENOSYS.
828 devfs_create(struct vnode
*dvp
, char *nm
, struct vattr
*vap
, vcexcl_t excl
,
829 int mode
, struct vnode
**vpp
, struct cred
*cred
, int flag
,
830 caller_context_t
*ct
, vsecattr_t
*vsecp
)
835 dcmn_err2(("devfs_create %s\n", nm
));
836 error
= dv_find(VTODV(dvp
), nm
, &vp
, NULL
, NULLVP
, cred
, 0);
840 else if (vp
->v_type
== VDIR
&& (mode
& VWRITE
))
843 error
= fop_access(vp
, mode
, 0, cred
, ct
);
849 } else if (error
== ENOENT
)
856 * If DV_BUILD is set, we call into nexus driver to do a BUS_CONFIG_ALL.
857 * Otherwise, simply return cached dv_node's. Hotplug code always call
858 * devfs_clean() to invalid the dv_node cache.
862 devfs_readdir(struct vnode
*dvp
, struct uio
*uiop
, struct cred
*cred
, int *eofp
,
863 caller_context_t
*ct
, int flags
)
865 struct dv_node
*ddv
, *dv
;
866 struct dirent64
*de
, *bufp
;
869 size_t reclen
, movesz
;
875 dcmn_err2(("devfs_readdir %s: offset %lld len %ld\n",
876 ddv
->dv_name
, uiop
->uio_loffset
, uiop
->uio_iov
->iov_len
));
877 ASSERT(ddv
->dv_attr
|| ddv
->dv_attrvp
);
878 ASSERT(RW_READ_HELD(&ddv
->dv_contents
));
880 if (uiop
->uio_loffset
>= MAXOFF_T
) {
886 if (uiop
->uio_iovcnt
!= 1)
889 if (dvp
->v_type
!= VDIR
)
892 /* Load the initial contents */
893 if (ddv
->dv_flags
& DV_BUILD
) {
894 if (!rw_tryupgrade(&ddv
->dv_contents
)) {
895 rw_exit(&ddv
->dv_contents
);
896 rw_enter(&ddv
->dv_contents
, RW_WRITER
);
899 /* recheck and fill */
900 if (ddv
->dv_flags
& DV_BUILD
)
903 rw_downgrade(&ddv
->dv_contents
);
906 soff
= uiop
->uio_loffset
;
907 bufsz
= uiop
->uio_iov
->iov_len
;
908 de
= bufp
= kmem_alloc(bufsz
, KM_SLEEP
);
910 dv
= (struct dv_node
*)-1;
913 * Move as many entries into the uio structure as it will take.
914 * Special case "." and "..".
917 if (soff
== 0) { /* . */
918 reclen
= DIRENT64_RECLEN(strlen("."));
919 if ((movesz
+ reclen
) > bufsz
)
921 de
->d_ino
= (ino64_t
)ddv
->dv_ino
;
922 de
->d_off
= (off64_t
)diroff
+ 1;
923 de
->d_reclen
= (ushort_t
)reclen
;
925 /* use strncpy(9f) to zero out uninitialized bytes */
927 (void) strncpy(de
->d_name
, ".", DIRENT64_NAMELEN(reclen
));
929 de
= (dirent64_t
*)(intptr_t)((char *)de
+ reclen
);
930 dcmn_err3(("devfs_readdir: A: diroff %lld, soff %lld: '%s' "
931 "reclen %lu\n", diroff
, soff
, ".", reclen
));
935 if (soff
<= 1) { /* .. */
936 reclen
= DIRENT64_RECLEN(strlen(".."));
937 if ((movesz
+ reclen
) > bufsz
)
939 de
->d_ino
= (ino64_t
)ddv
->dv_dotdot
->dv_ino
;
940 de
->d_off
= (off64_t
)diroff
+ 1;
941 de
->d_reclen
= (ushort_t
)reclen
;
943 /* use strncpy(9f) to zero out uninitialized bytes */
945 (void) strncpy(de
->d_name
, "..", DIRENT64_NAMELEN(reclen
));
947 de
= (dirent64_t
*)(intptr_t)((char *)de
+ reclen
);
948 dcmn_err3(("devfs_readdir: B: diroff %lld, soff %lld: '%s' "
949 "reclen %lu\n", diroff
, soff
, "..", reclen
));
953 for (dv
= DV_FIRST_ENTRY(ddv
); dv
;
954 dv
= DV_NEXT_ENTRY(ddv
, dv
), diroff
++) {
955 /* skip entries until at correct directory offset */
960 * hidden nodes are skipped (but they still occupy a
963 if (dv
->dv_devi
&& ndi_dev_is_hidden_node(dv
->dv_devi
))
967 * DDM_INTERNAL_PATH minor nodes are skipped for readdirs
968 * outside the kernel (but they still occupy a directory
971 if ((dv
->dv_flags
& DV_INTERNAL
) && (cred
!= kcred
))
974 reclen
= DIRENT64_RECLEN(strlen(dv
->dv_name
));
975 if ((movesz
+ reclen
) > bufsz
) {
976 dcmn_err3(("devfs_readdir: C: diroff "
977 "%lld, soff %lld: '%s' reclen %lu\n",
978 diroff
, soff
, dv
->dv_name
, reclen
));
981 de
->d_ino
= (ino64_t
)dv
->dv_ino
;
982 de
->d_off
= (off64_t
)diroff
+ 1;
983 de
->d_reclen
= (ushort_t
)reclen
;
985 /* use strncpy(9f) to zero out uninitialized bytes */
987 ASSERT(strlen(dv
->dv_name
) + 1 <=
988 DIRENT64_NAMELEN(reclen
));
989 (void) strncpy(de
->d_name
, dv
->dv_name
,
990 DIRENT64_NAMELEN(reclen
));
993 de
= (dirent64_t
*)(intptr_t)((char *)de
+ reclen
);
994 dcmn_err4(("devfs_readdir: D: diroff "
995 "%lld, soff %lld: '%s' reclen %lu\n", diroff
, soff
,
996 dv
->dv_name
, reclen
));
999 /* the buffer is full, or we exhausted everything */
1000 full
: dcmn_err3(("devfs_readdir: moving %lu bytes: "
1001 "diroff %lld, soff %lld, dv %p\n",
1002 movesz
, diroff
, soff
, (void *)dv
));
1004 if ((movesz
== 0) && dv
)
1005 error
= EINVAL
; /* cannot be represented */
1007 error
= uiomove(bufp
, movesz
, UIO_READ
, uiop
);
1011 uiop
->uio_loffset
= diroff
;
1014 va
.va_mask
= AT_ATIME
;
1015 gethrestime(&va
.va_atime
);
1016 rw_exit(&ddv
->dv_contents
);
1017 (void) devfs_setattr(dvp
, &va
, 0, cred
, ct
);
1018 rw_enter(&ddv
->dv_contents
, RW_READER
);
1021 kmem_free(bufp
, bufsz
);
1027 devfs_fsync(struct vnode
*vp
, int syncflag
, struct cred
*cred
,
1028 caller_context_t
*ct
)
1031 * Message goes to console only. Otherwise, the message
1032 * causes devfs_fsync to be invoked again... infinite loop
1034 dcmn_err2(("devfs_fsync %s\n", VTODV(vp
)->dv_name
));
1039 * Normally, we leave the dv_node here at count of 0.
1040 * The node will be destroyed when dv_cleandir() is called.
1042 * Stale dv_node's are already unlinked from the fs tree,
1043 * so dv_cleandir() won't find them. We destroy such nodes
1048 devfs_inactive(struct vnode
*vp
, struct cred
*cred
, caller_context_t
*ct
)
1051 struct dv_node
*dv
= VTODV(vp
);
1053 dcmn_err2(("devfs_inactive: %s\n", dv
->dv_name
));
1054 mutex_enter(&vp
->v_lock
);
1055 ASSERT(vp
->v_count
>= 1);
1057 destroy
= (DV_STALE(dv
) && vp
->v_count
== 0);
1058 mutex_exit(&vp
->v_lock
);
1060 /* stale nodes cannot be rediscovered, destroy it here */
1066 * XXX Why do we need this? NFS mounted /dev directories?
1067 * XXX Talk to peter staubach about this.
1071 devfs_fid(struct vnode
*vp
, struct fid
*fidp
, caller_context_t
*ct
)
1073 struct dv_node
*dv
= VTODV(vp
);
1074 struct dv_fid
*dv_fid
;
1076 if (fidp
->fid_len
< (sizeof (struct dv_fid
) - sizeof (ushort_t
))) {
1077 fidp
->fid_len
= sizeof (struct dv_fid
) - sizeof (ushort_t
);
1081 dv_fid
= (struct dv_fid
*)fidp
;
1082 bzero(dv_fid
, sizeof (struct dv_fid
));
1083 dv_fid
->dvfid_len
= (int)sizeof (struct dv_fid
) - sizeof (ushort_t
);
1084 dv_fid
->dvfid_ino
= dv
->dv_ino
;
1085 /* dv_fid->dvfid_gen = dv->tn_gen; XXX ? */
1091 * This pair of routines bracket all fop_read, fop_write
1092 * and fop_readdir requests. The contents lock stops things
1093 * moving around while we're looking at them.
1095 * Also used by file and record locking.
1099 devfs_rwlock(struct vnode
*vp
, int write_flag
, caller_context_t
*ct
)
1101 dcmn_err2(("devfs_rwlock %s\n", VTODV(vp
)->dv_name
));
1102 rw_enter(&VTODV(vp
)->dv_contents
, write_flag
? RW_WRITER
: RW_READER
);
1103 return (write_flag
);
1108 devfs_rwunlock(struct vnode
*vp
, int write_flag
, caller_context_t
*ct
)
1110 dcmn_err2(("devfs_rwunlock %s\n", VTODV(vp
)->dv_name
));
1111 rw_exit(&VTODV(vp
)->dv_contents
);
1115 * XXX Should probably do a better job of computing the maximum
1116 * offset available in the directory.
1120 devfs_seek(struct vnode
*vp
, offset_t ooff
, offset_t
*noffp
,
1121 caller_context_t
*ct
)
1123 ASSERT(vp
->v_type
== VDIR
);
1124 dcmn_err2(("devfs_seek %s\n", VTODV(vp
)->dv_name
));
1125 return ((*noffp
< 0 || *noffp
> MAXOFFSET_T
) ? EINVAL
: 0);
1128 const struct vnodeops dv_vnodeops
= {
1129 .vnop_name
= "devfs",
1130 .vop_open
= devfs_open
,
1131 .vop_close
= devfs_close
,
1132 .vop_read
= devfs_read
,
1133 .vop_write
= devfs_write
,
1134 .vop_ioctl
= devfs_ioctl
,
1135 .vop_getattr
= devfs_getattr
,
1136 .vop_setattr
= devfs_setattr
,
1137 .vop_access
= devfs_access
,
1138 .vop_lookup
= devfs_lookup
,
1139 .vop_create
= devfs_create
,
1140 .vop_readdir
= devfs_readdir
,
1141 .vop_fsync
= devfs_fsync
,
1142 .vop_inactive
= devfs_inactive
,
1143 .vop_fid
= devfs_fid
,
1144 .vop_rwlock
= devfs_rwlock
,
1145 .vop_rwunlock
= devfs_rwunlock
,
1146 .vop_seek
= devfs_seek
,
1147 .vop_pathconf
= devfs_pathconf
,
1148 .vop_dispose
= fs_nodispose
,
1149 .vop_setsecattr
= devfs_setsecattr
,
1150 .vop_getsecattr
= devfs_getsecattr
,