Merge commit 'dfc115332c94a2f62058ac7f2bce7631fbd20b3d'
[unleashed/tickless.git] / kernel / fs / devfs / devfs_vnops.c
blob6a4f63e4bd5af98f7308e104c2367264fd0b47bd
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2017 by Delphix. All rights reserved.
27 * vnode ops for the devfs
29 * For leaf vnode special files (VCHR|VBLK) specfs will always see the VOP
30 * first because dv_find always performs leaf vnode substitution, returning
31 * a specfs vnode with an s_realvp pointing to the devfs leaf vnode. This
32 * means that the only leaf special file VOP operations that devfs will see
33 * after fop_lookup are the ones that specfs forwards.
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/t_lock.h>
39 #include <sys/systm.h>
40 #include <sys/sysmacros.h>
41 #include <sys/user.h>
42 #include <sys/time.h>
43 #include <sys/vfs.h>
44 #include <sys/vnode.h>
45 #include <sys/file.h>
46 #include <sys/fcntl.h>
47 #include <sys/flock.h>
48 #include <sys/kmem.h>
49 #include <sys/uio.h>
50 #include <sys/errno.h>
51 #include <sys/stat.h>
52 #include <sys/cred.h>
53 #include <sys/dirent.h>
54 #include <sys/pathname.h>
55 #include <sys/cmn_err.h>
56 #include <sys/debug.h>
57 #include <sys/policy.h>
58 #include <sys/modctl.h>
59 #include <sys/sunndi.h>
60 #include <sys/fs_subr.h>
61 #include <sys/fs/dv_node.h>
63 extern struct vattr dv_vattr_dir, dv_vattr_file;
64 extern dev_t rconsdev;
67 * Open of devices (leaf nodes) is handled by specfs.
68 * There is nothing to do to open a directory
70 /*ARGSUSED*/
71 static int
72 devfs_open(struct vnode **vpp, int flag, struct cred *cred,
73 caller_context_t *ct)
75 struct dv_node *dv = VTODV(*vpp);
77 dcmn_err2(("devfs_open %s\n", dv->dv_name));
78 ASSERT((*vpp)->v_type == VDIR);
79 return (0);
83 * Close of devices (leaf nodes) is handled by specfs.
84 * There is nothing much to do inorder to close a directory.
86 /*ARGSUSED1*/
87 static int
88 devfs_close(struct vnode *vp, int flag, int count,
89 offset_t offset, struct cred *cred, caller_context_t *ct)
91 struct dv_node *dv = VTODV(vp);
93 dcmn_err2(("devfs_close %s\n", dv->dv_name));
94 ASSERT(vp->v_type == VDIR);
96 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
97 cleanshares(vp, ttoproc(curthread)->p_pid);
98 return (0);
102 * Read of devices (leaf nodes) is handled by specfs.
103 * Read of directories is not supported.
105 /*ARGSUSED*/
106 static int
107 devfs_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred,
108 struct caller_context *ct)
110 dcmn_err2(("devfs_read %s\n", VTODV(vp)->dv_name));
111 ASSERT(vp->v_type == VDIR);
112 ASSERT(RW_READ_HELD(&VTODV(vp)->dv_contents));
113 return (EISDIR);
117 * Write of devices (leaf nodes) is handled by specfs.
118 * Write of directories is not supported.
120 /*ARGSUSED*/
121 static int
122 devfs_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred,
123 struct caller_context *ct)
125 dcmn_err2(("devfs_write %s\n", VTODV(vp)->dv_name));
126 ASSERT(vp->v_type == VDIR);
127 ASSERT(RW_WRITE_HELD(&VTODV(vp)->dv_contents));
128 return (EISDIR);
132 * Ioctls to device (leaf nodes) is handled by specfs.
133 * Ioctl to directories is not supported.
135 /*ARGSUSED*/
136 static int
137 devfs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag,
138 struct cred *cred, int *rvalp, caller_context_t *ct)
140 dcmn_err2(("devfs_ioctl %s\n", VTODV(vp)->dv_name));
141 ASSERT(vp->v_type == VDIR);
143 return (ENOTTY); /* no ioctls supported */
147 * We can be asked directly about the attributes of directories, or
148 * (via sp->s_realvp) about the filesystem attributes of special files.
150 * For directories, we just believe the attribute store
151 * though we mangle the nodeid, fsid, and rdev to convince userland we
152 * really are a different filesystem.
154 * For special files, a little more fakery is required.
156 * If the attribute store is not there (read only root), we believe our
157 * memory based attributes.
159 static int
160 devfs_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr,
161 caller_context_t *ct)
163 struct dv_node *dv = VTODV(vp);
164 int error = 0;
165 uint_t mask;
168 * Message goes to console only. Otherwise, the message
169 * causes devfs_getattr to be invoked again... infinite loop
171 dcmn_err2(("?devfs_getattr %s\n", dv->dv_name));
172 ASSERT(dv->dv_attr || dv->dv_attrvp);
174 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) {
175 cmn_err(CE_WARN, /* panic ? */
176 "?%s: getattr on vnode type %d", dvnm, vp->v_type);
177 return (ENOENT);
180 rw_enter(&dv->dv_contents, RW_READER);
181 if (dv->dv_attr) {
183 * obtain from the memory version of attribute.
184 * preserve mask for those that optimize.
185 * devfs specific fields are already merged on creation.
187 mask = vap->va_mask;
188 *vap = *dv->dv_attr;
189 vap->va_mask = mask;
190 } else {
191 /* obtain from attribute store and merge */
192 error = fop_getattr(dv->dv_attrvp, vap, flags, cr, ct);
193 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error));
194 dv_vattr_merge(dv, vap);
196 rw_exit(&dv->dv_contents);
199 * Restrict the permissions of the node fronting the console
200 * to 0600 with root as the owner. This prevents a non-root
201 * user from gaining access to a serial terminal (like /dev/term/a)
202 * which is in reality serving as the console device (/dev/console).
204 if (vp->v_rdev == rconsdev) {
205 mode_t rconsmask = S_IXUSR|S_IRWXG|S_IRWXO;
206 vap->va_mode &= (~rconsmask);
207 vap->va_uid = 0;
210 return (error);
213 static int devfs_unlocked_access(void *, int, struct cred *);
215 /*ARGSUSED4*/
216 static int
217 devfs_setattr_dir(
218 struct dv_node *dv,
219 struct vnode *vp,
220 struct vattr *vap,
221 int flags,
222 struct cred *cr)
224 struct vattr *map;
225 uint_t mask;
226 int error = 0;
227 struct vattr vattr;
229 ASSERT(dv->dv_attr || dv->dv_attrvp);
231 ASSERT(vp->v_type == VDIR);
232 ASSERT((dv->dv_flags & DV_NO_FSPERM) == 0);
234 if (vap->va_mask & AT_NOSET)
235 return (EINVAL);
237 /* to ensure consistency, single thread setting of attributes */
238 rw_enter(&dv->dv_contents, RW_WRITER);
240 again: if (dv->dv_attr) {
242 error = secpolicy_vnode_setattr(cr, vp, vap,
243 dv->dv_attr, flags, devfs_unlocked_access, dv);
245 if (error)
246 goto out;
249 * Apply changes to the memory based attribute. This code
250 * is modeled after the tmpfs implementation of memory
251 * based vnodes
253 map = dv->dv_attr;
254 mask = vap->va_mask;
256 /* Change file access modes. */
257 if (mask & AT_MODE) {
258 map->va_mode &= S_IFMT;
259 map->va_mode |= vap->va_mode & ~S_IFMT;
261 if (mask & AT_UID)
262 map->va_uid = vap->va_uid;
263 if (mask & AT_GID)
264 map->va_gid = vap->va_gid;
265 if (mask & AT_ATIME)
266 map->va_atime = vap->va_atime;
267 if (mask & AT_MTIME)
268 map->va_mtime = vap->va_mtime;
270 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME))
271 gethrestime(&map->va_ctime);
272 } else {
273 /* use the backing attribute store */
274 ASSERT(dv->dv_attrvp);
277 * See if we are changing something we care about
278 * the persistence of - return success if we don't care.
280 if (vap->va_mask & (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) {
281 /* Set the attributes */
282 error = fop_setattr(dv->dv_attrvp,
283 vap, flags, cr, NULL);
284 dsysdebug(error,
285 ("vop_setattr %s %d\n", dv->dv_name, error));
288 * Some file systems may return EROFS for a setattr
289 * on a readonly file system. In this case we create
290 * our own memory based attribute.
292 if (error == EROFS) {
294 * obtain attributes from existing file
295 * that we will modify and switch to memory
296 * based attribute until attribute store is
297 * read/write.
299 vattr = dv_vattr_dir;
300 if (fop_getattr(dv->dv_attrvp,
301 &vattr, flags, cr, NULL) == 0) {
302 dv->dv_attr = kmem_alloc(
303 sizeof (struct vattr), KM_SLEEP);
304 *dv->dv_attr = vattr;
305 dv_vattr_merge(dv, dv->dv_attr);
306 goto again;
311 out:
312 rw_exit(&dv->dv_contents);
313 return (error);
318 * Compare the uid/gid/mode changes requested for a setattr
319 * operation with the same details of a node's default minor
320 * perm information. Return 0 if identical.
322 static int
323 dv_setattr_cmp(struct vattr *map, mperm_t *mp)
325 if ((map->va_mode & S_IAMB) != (mp->mp_mode & S_IAMB))
326 return (1);
327 if (map->va_uid != mp->mp_uid)
328 return (1);
329 if (map->va_gid != mp->mp_gid)
330 return (1);
331 return (0);
335 /*ARGSUSED4*/
336 static int
337 devfs_setattr(
338 struct vnode *vp,
339 struct vattr *vap,
340 int flags,
341 struct cred *cr,
342 caller_context_t *ct)
344 struct dv_node *dv = VTODV(vp);
345 struct dv_node *ddv;
346 struct vnode *dvp;
347 struct vattr *map;
348 uint_t mask;
349 int error = 0;
350 struct vattr *free_vattr = NULL;
351 struct vattr *vattrp = NULL;
352 mperm_t mp;
353 int persist;
356 * Message goes to console only. Otherwise, the message
357 * causes devfs_getattr to be invoked again... infinite loop
359 dcmn_err2(("?devfs_setattr %s\n", dv->dv_name));
360 ASSERT(dv->dv_attr || dv->dv_attrvp);
362 if (!(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK)) {
363 cmn_err(CE_WARN, /* panic ? */
364 "?%s: getattr on vnode type %d", dvnm, vp->v_type);
365 return (ENOENT);
368 if (vap->va_mask & AT_NOSET)
369 return (EINVAL);
372 * If we are changing something we don't care about
373 * the persistence of, return success.
375 if ((vap->va_mask &
376 (AT_MODE|AT_UID|AT_GID|AT_ATIME|AT_MTIME)) == 0)
377 return (0);
380 * If driver overrides fs perm, disallow chmod
381 * and do not create attribute nodes.
383 if (dv->dv_flags & DV_NO_FSPERM) {
384 ASSERT(dv->dv_attr);
385 if (vap->va_mask & (AT_MODE | AT_UID | AT_GID))
386 return (EPERM);
387 if ((vap->va_mask & (AT_ATIME|AT_MTIME)) == 0)
388 return (0);
389 rw_enter(&dv->dv_contents, RW_WRITER);
390 if (vap->va_mask & AT_ATIME)
391 dv->dv_attr->va_atime = vap->va_atime;
392 if (vap->va_mask & AT_MTIME)
393 dv->dv_attr->va_mtime = vap->va_mtime;
394 rw_exit(&dv->dv_contents);
395 return (0);
399 * Directories are always created but device nodes are
400 * only used to persist non-default permissions.
402 if (vp->v_type == VDIR) {
403 ASSERT(dv->dv_attr || dv->dv_attrvp);
404 return (devfs_setattr_dir(dv, vp, vap, flags, cr));
408 * Allocate now before we take any locks
410 vattrp = kmem_zalloc(sizeof (*vattrp), KM_SLEEP);
412 /* to ensure consistency, single thread setting of attributes */
413 rw_enter(&dv->dv_contents, RW_WRITER);
416 * We don't need to create an attribute node
417 * to persist access or modification times.
419 persist = (vap->va_mask & (AT_MODE | AT_UID | AT_GID));
422 * If persisting something, get the default permissions
423 * for this minor to compare against what the attributes
424 * are now being set to. Default ordering is:
425 * - minor_perm match for this minor
426 * - mode supplied by ddi_create_priv_minor_node
427 * - devfs defaults
429 if (persist) {
430 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) != 0) {
431 mp.mp_uid = dv_vattr_file.va_uid;
432 mp.mp_gid = dv_vattr_file.va_gid;
433 mp.mp_mode = dv_vattr_file.va_mode;
434 if (dv->dv_flags & DV_DFLT_MODE) {
435 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0);
436 mp.mp_mode &= ~S_IAMB;
437 mp.mp_mode |= dv->dv_dflt_mode;
438 dcmn_err5(("%s: setattr priv default 0%o\n",
439 dv->dv_name, mp.mp_mode));
440 } else {
441 dcmn_err5(("%s: setattr devfs default 0%o\n",
442 dv->dv_name, mp.mp_mode));
444 } else {
445 dcmn_err5(("%s: setattr minor perm default 0%o\n",
446 dv->dv_name, mp.mp_mode));
451 * If we don't have a vattr for this node, construct one.
453 if (dv->dv_attr) {
454 free_vattr = vattrp;
455 vattrp = NULL;
456 } else {
457 ASSERT(dv->dv_attrvp);
458 ASSERT(vp->v_type != VDIR);
459 *vattrp = dv_vattr_file;
460 error = fop_getattr(dv->dv_attrvp, vattrp, 0, cr, ct);
461 dsysdebug(error, ("vop_getattr %s %d\n", dv->dv_name, error));
462 if (error)
463 goto out;
464 dv->dv_attr = vattrp;
465 dv_vattr_merge(dv, dv->dv_attr);
466 vattrp = NULL;
469 error = secpolicy_vnode_setattr(cr, vp, vap, dv->dv_attr,
470 flags, devfs_unlocked_access, dv);
471 if (error) {
472 dsysdebug(error, ("devfs_setattr %s secpolicy error %d\n",
473 dv->dv_name, error));
474 goto out;
478 * Apply changes to the memory based attribute. This code
479 * is modeled after the tmpfs implementation of memory
480 * based vnodes
482 map = dv->dv_attr;
483 mask = vap->va_mask;
485 /* Change file access modes. */
486 if (mask & AT_MODE) {
487 map->va_mode &= S_IFMT;
488 map->va_mode |= vap->va_mode & ~S_IFMT;
490 if (mask & AT_UID)
491 map->va_uid = vap->va_uid;
492 if (mask & AT_GID)
493 map->va_gid = vap->va_gid;
494 if (mask & AT_ATIME)
495 map->va_atime = vap->va_atime;
496 if (mask & AT_MTIME)
497 map->va_mtime = vap->va_mtime;
499 if (mask & (AT_MODE | AT_UID | AT_GID | AT_MTIME)) {
500 gethrestime(&map->va_ctime);
504 * A setattr to defaults means we no longer need the
505 * shadow node as a persistent store, unless there
506 * are ACLs. Otherwise create a shadow node if one
507 * doesn't exist yet.
509 if (persist) {
510 if ((dv_setattr_cmp(map, &mp) == 0) &&
511 ((dv->dv_flags & DV_ACL) == 0)) {
513 if (dv->dv_attrvp) {
514 ddv = dv->dv_dotdot;
515 ASSERT(ddv->dv_attrvp);
516 error = fop_remove(ddv->dv_attrvp,
517 dv->dv_name, cr, ct, 0);
518 dsysdebug(error,
519 ("vop_remove %s %s %d\n",
520 ddv->dv_name, dv->dv_name, error));
522 if (error == EROFS)
523 error = 0;
524 VN_RELE(dv->dv_attrvp);
525 dv->dv_attrvp = NULL;
527 ASSERT(dv->dv_attr);
528 } else {
529 if (mask & AT_MODE)
530 dcmn_err5(("%s persisting mode 0%o\n",
531 dv->dv_name, vap->va_mode));
532 if (mask & AT_UID)
533 dcmn_err5(("%s persisting uid %d\n",
534 dv->dv_name, vap->va_uid));
535 if (mask & AT_GID)
536 dcmn_err5(("%s persisting gid %d\n",
537 dv->dv_name, vap->va_gid));
539 if (dv->dv_attrvp == NULL) {
540 dvp = DVTOV(dv->dv_dotdot);
541 dv_shadow_node(dvp, dv->dv_name, vp,
542 NULL, NULLVP, cr,
543 DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD);
545 if (dv->dv_attrvp) {
546 /* If map still valid do TIME for free. */
547 if (dv->dv_attr == map) {
548 mask = map->va_mask;
549 map->va_mask =
550 vap->va_mask | AT_ATIME | AT_MTIME;
551 error = fop_setattr(dv->dv_attrvp, map,
552 flags, cr, NULL);
553 map->va_mask = mask;
554 } else {
555 error = fop_setattr(dv->dv_attrvp,
556 vap, flags, cr, NULL);
558 dsysdebug(error, ("vop_setattr %s %d\n",
559 dv->dv_name, error));
562 * Some file systems may return EROFS for a setattr
563 * on a readonly file system. In this case save
564 * as our own memory based attribute.
565 * NOTE: ufs is NOT one of these (see ufs_iupdat).
567 if (dv->dv_attr && dv->dv_attrvp && error == 0) {
568 vattrp = dv->dv_attr;
569 dv->dv_attr = NULL;
570 } else if (error == EROFS)
571 error = 0;
575 out:
576 rw_exit(&dv->dv_contents);
578 if (vattrp)
579 kmem_free(vattrp, sizeof (*vattrp));
580 if (free_vattr)
581 kmem_free(free_vattr, sizeof (*free_vattr));
582 return (error);
585 static int
586 devfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
587 caller_context_t *ct)
589 switch (cmd) {
590 case _PC_ACL_ENABLED:
592 * We rely on the underlying filesystem for ACLs,
593 * so direct the query for ACL support there.
594 * ACL support isn't relative to the file
595 * and we can't guarantee that the dv node
596 * has an attribute node, so any valid
597 * attribute node will suffice.
599 ASSERT(dvroot);
600 ASSERT(dvroot->dv_attrvp);
601 return (fop_pathconf(dvroot->dv_attrvp, cmd, valp, cr, ct));
602 /*NOTREACHED*/
605 return (fs_pathconf(vp, cmd, valp, cr, ct));
609 * Let avp handle security attributes (acl's).
611 static int
612 devfs_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags,
613 struct cred *cr, caller_context_t *ct)
615 dvnode_t *dv = VTODV(vp);
616 struct vnode *avp;
617 int error;
619 dcmn_err2(("devfs_getsecattr %s\n", dv->dv_name));
620 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK);
622 rw_enter(&dv->dv_contents, RW_READER);
624 avp = dv->dv_attrvp;
626 /* fabricate the acl */
627 if (avp == NULL) {
628 error = fs_fab_acl(vp, vsap, flags, cr, ct);
629 rw_exit(&dv->dv_contents);
630 return (error);
633 error = fop_getsecattr(avp, vsap, flags, cr, ct);
634 dsysdebug(error, ("vop_getsecattr %s %d\n", VTODV(vp)->dv_name, error));
635 rw_exit(&dv->dv_contents);
636 return (error);
640 * Set security attributes (acl's)
642 * Note that the dv_contents lock has already been acquired
643 * by the caller's fop_rwlock.
645 static int
646 devfs_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags,
647 struct cred *cr, caller_context_t *ct)
649 dvnode_t *dv = VTODV(vp);
650 struct vnode *avp;
651 int error;
653 dcmn_err2(("devfs_setsecattr %s\n", dv->dv_name));
654 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK);
655 ASSERT(RW_LOCK_HELD(&dv->dv_contents));
658 * Not a supported operation on drivers not providing
659 * file system based permissions.
661 if (dv->dv_flags & DV_NO_FSPERM)
662 return (ENOTSUP);
665 * To complete, the setsecattr requires an underlying attribute node.
667 if (dv->dv_attrvp == NULL) {
668 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
669 dv_shadow_node(DVTOV(dv->dv_dotdot), dv->dv_name, vp,
670 NULL, NULLVP, cr, DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD);
673 if ((avp = dv->dv_attrvp) == NULL) {
674 dcmn_err2(("devfs_setsecattr %s: "
675 "cannot construct attribute node\n", dv->dv_name));
676 return (fs_nosys());
680 * The acl(2) system call issues a fop_rwlock before setting an ACL.
681 * Since backing file systems expect the lock to be held before seeing
682 * a fop_setsecattr ACL, we need to issue the fop_rwlock to the backing
683 * store before forwarding the ACL.
685 (void) fop_rwlock(avp, V_WRITELOCK_TRUE, NULL);
686 error = fop_setsecattr(avp, vsap, flags, cr, ct);
687 dsysdebug(error, ("vop_setsecattr %s %d\n", VTODV(vp)->dv_name, error));
688 fop_rwunlock(avp, V_WRITELOCK_TRUE, NULL);
691 * Set DV_ACL if we have a non-trivial set of ACLs. It is not
692 * necessary to hold fop_rwlock since fs_acl_nontrivial only does
693 * fop_getsecattr calls.
695 if (fs_acl_nontrivial(avp, cr))
696 dv->dv_flags |= DV_ACL;
697 return (error);
701 * This function is used for secpolicy_setattr(). It must call an
702 * access() like function while it is already holding the
703 * dv_contents lock. We only care about this when dv_attr != NULL;
704 * so the unlocked access call only concerns itself with that
705 * particular branch of devfs_access().
707 static int
708 devfs_unlocked_access(void *vdv, int mode, struct cred *cr)
710 struct dv_node *dv = vdv;
711 int shift = 0;
712 uid_t owner = dv->dv_attr->va_uid;
714 /* Check access based on owner, group and public permissions. */
715 if (crgetuid(cr) != owner) {
716 shift += 3;
717 if (groupmember(dv->dv_attr->va_gid, cr) == 0)
718 shift += 3;
721 return (secpolicy_vnode_access2(cr, DVTOV(dv), owner,
722 dv->dv_attr->va_mode << shift, mode));
725 static int
726 devfs_access(struct vnode *vp, int mode, int flags, struct cred *cr,
727 caller_context_t *ct)
729 struct dv_node *dv = VTODV(vp);
730 int res;
732 dcmn_err2(("devfs_access %s\n", dv->dv_name));
733 ASSERT(dv->dv_attr || dv->dv_attrvp);
735 /* restrict console access to privileged processes */
736 if ((vp->v_rdev == rconsdev) && secpolicy_console(cr) != 0) {
737 return (EACCES);
740 rw_enter(&dv->dv_contents, RW_READER);
741 if (dv->dv_attr && ((dv->dv_flags & DV_ACL) == 0)) {
742 res = devfs_unlocked_access(dv, mode, cr);
743 } else {
744 res = fop_access(dv->dv_attrvp, mode, flags, cr, ct);
746 rw_exit(&dv->dv_contents);
747 return (res);
751 * Lookup
753 * Given the directory vnode and the name of the component, return
754 * the corresponding held vnode for that component.
756 * Of course in these fictional filesystems, nothing's ever quite
757 * -that- simple.
759 * devfs name type shadow (fs attributes) type comments
760 * -------------------------------------------------------------------------
761 * drv[@addr] VDIR drv[@addr] VDIR nexus driver
762 * drv[@addr]:m VCHR/VBLK drv[@addr]:m VREG leaf driver
763 * drv[@addr] VCHR/VBLK drv[@addr]:.default VREG leaf driver
764 * -------------------------------------------------------------------------
766 * The following names are reserved for the attribute filesystem (which
767 * could easily be another layer on top of this one - we simply need to
768 * hold the vnode of the thing we're looking at)
770 * attr name type shadow (fs attributes) type comments
771 * -------------------------------------------------------------------------
772 * drv[@addr] VDIR - - attribute dir
773 * minorname VDIR - - minorname
774 * attribute VREG - - attribute
775 * -------------------------------------------------------------------------
777 * Examples:
779 * devfs:/devices/.../mm@0:zero VCHR
780 * shadow:/.devices/.../mm@0:zero VREG, fs attrs
781 * devfs:/devices/.../mm@0:/zero/attr VREG, driver attribute
783 * devfs:/devices/.../sd@0,0:a VBLK
784 * shadow:/.devices/.../sd@0,0:a VREG, fs attrs
785 * devfs:/devices/.../sd@0,0:/a/.type VREG, "ddi_block:chan"
787 * devfs:/devices/.../mm@0 VCHR
788 * shadow:/.devices/.../mm@0:.default VREG, fs attrs
789 * devfs:/devices/.../mm@0:/.default/attr VREG, driver attribute
790 * devfs:/devices/.../mm@0:/.default/.type VREG, "ddi_pseudo"
792 * devfs:/devices/.../obio VDIR
793 * shadow:/devices/.../obio VDIR, needed for fs attrs.
794 * devfs:/devices/.../obio:/.default/attr VDIR, driver attribute
796 * We also need to be able deal with "old" devices that have gone away,
797 * though I think that provided we return them with readdir, they can
798 * be removed (i.e. they don't have to respond to lookup, though it might
799 * be weird if they didn't ;-)
801 * Lookup has side-effects.
803 * - It will create directories and fs attribute files in the shadow hierarchy.
804 * - It should cause non-SID devices to be probed (ask the parent nexi).
806 /*ARGSUSED3*/
807 static int
808 devfs_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
809 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
810 caller_context_t *ct, int *direntflags, pathname_t *realpnp)
812 ASSERT(dvp->v_type == VDIR);
813 dcmn_err2(("devfs_lookup: %s\n", nm));
814 return (dv_find(VTODV(dvp), nm, vpp, pnp, rdir, cred, 0));
818 * devfs nodes can't really be created directly by userland - however,
819 * we do allow creates to find existing nodes:
821 * - any create fails if the node doesn't exist - EROFS.
822 * - creating an existing directory read-only succeeds, otherwise EISDIR.
823 * - exclusive creates fail if the node already exists - EEXIST.
824 * - failure to create the snode for an existing device - ENOSYS.
826 /*ARGSUSED2*/
827 static int
828 devfs_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl,
829 int mode, struct vnode **vpp, struct cred *cred, int flag,
830 caller_context_t *ct, vsecattr_t *vsecp)
832 int error;
833 struct vnode *vp;
835 dcmn_err2(("devfs_create %s\n", nm));
836 error = dv_find(VTODV(dvp), nm, &vp, NULL, NULLVP, cred, 0);
837 if (error == 0) {
838 if (excl == EXCL)
839 error = EEXIST;
840 else if (vp->v_type == VDIR && (mode & VWRITE))
841 error = EISDIR;
842 else
843 error = fop_access(vp, mode, 0, cred, ct);
845 if (error) {
846 VN_RELE(vp);
847 } else
848 *vpp = vp;
849 } else if (error == ENOENT)
850 error = EROFS;
852 return (error);
856 * If DV_BUILD is set, we call into nexus driver to do a BUS_CONFIG_ALL.
857 * Otherwise, simply return cached dv_node's. Hotplug code always call
858 * devfs_clean() to invalid the dv_node cache.
860 /*ARGSUSED5*/
861 static int
862 devfs_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp,
863 caller_context_t *ct, int flags)
865 struct dv_node *ddv, *dv;
866 struct dirent64 *de, *bufp;
867 offset_t diroff;
868 offset_t soff;
869 size_t reclen, movesz;
870 int error;
871 struct vattr va;
872 size_t bufsz;
874 ddv = VTODV(dvp);
875 dcmn_err2(("devfs_readdir %s: offset %lld len %ld\n",
876 ddv->dv_name, uiop->uio_loffset, uiop->uio_iov->iov_len));
877 ASSERT(ddv->dv_attr || ddv->dv_attrvp);
878 ASSERT(RW_READ_HELD(&ddv->dv_contents));
880 if (uiop->uio_loffset >= MAXOFF_T) {
881 if (eofp)
882 *eofp = 1;
883 return (0);
886 if (uiop->uio_iovcnt != 1)
887 return (EINVAL);
889 if (dvp->v_type != VDIR)
890 return (ENOTDIR);
892 /* Load the initial contents */
893 if (ddv->dv_flags & DV_BUILD) {
894 if (!rw_tryupgrade(&ddv->dv_contents)) {
895 rw_exit(&ddv->dv_contents);
896 rw_enter(&ddv->dv_contents, RW_WRITER);
899 /* recheck and fill */
900 if (ddv->dv_flags & DV_BUILD)
901 dv_filldir(ddv);
903 rw_downgrade(&ddv->dv_contents);
906 soff = uiop->uio_loffset;
907 bufsz = uiop->uio_iov->iov_len;
908 de = bufp = kmem_alloc(bufsz, KM_SLEEP);
909 movesz = 0;
910 dv = (struct dv_node *)-1;
913 * Move as many entries into the uio structure as it will take.
914 * Special case "." and "..".
916 diroff = 0;
917 if (soff == 0) { /* . */
918 reclen = DIRENT64_RECLEN(strlen("."));
919 if ((movesz + reclen) > bufsz)
920 goto full;
921 de->d_ino = (ino64_t)ddv->dv_ino;
922 de->d_off = (off64_t)diroff + 1;
923 de->d_reclen = (ushort_t)reclen;
925 /* use strncpy(9f) to zero out uninitialized bytes */
927 (void) strncpy(de->d_name, ".", DIRENT64_NAMELEN(reclen));
928 movesz += reclen;
929 de = (dirent64_t *)(intptr_t)((char *)de + reclen);
930 dcmn_err3(("devfs_readdir: A: diroff %lld, soff %lld: '%s' "
931 "reclen %lu\n", diroff, soff, ".", reclen));
934 diroff++;
935 if (soff <= 1) { /* .. */
936 reclen = DIRENT64_RECLEN(strlen(".."));
937 if ((movesz + reclen) > bufsz)
938 goto full;
939 de->d_ino = (ino64_t)ddv->dv_dotdot->dv_ino;
940 de->d_off = (off64_t)diroff + 1;
941 de->d_reclen = (ushort_t)reclen;
943 /* use strncpy(9f) to zero out uninitialized bytes */
945 (void) strncpy(de->d_name, "..", DIRENT64_NAMELEN(reclen));
946 movesz += reclen;
947 de = (dirent64_t *)(intptr_t)((char *)de + reclen);
948 dcmn_err3(("devfs_readdir: B: diroff %lld, soff %lld: '%s' "
949 "reclen %lu\n", diroff, soff, "..", reclen));
952 diroff++;
953 for (dv = DV_FIRST_ENTRY(ddv); dv;
954 dv = DV_NEXT_ENTRY(ddv, dv), diroff++) {
955 /* skip entries until at correct directory offset */
956 if (diroff < soff)
957 continue;
960 * hidden nodes are skipped (but they still occupy a
961 * directory offset).
963 if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi))
964 continue;
967 * DDM_INTERNAL_PATH minor nodes are skipped for readdirs
968 * outside the kernel (but they still occupy a directory
969 * offset).
971 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred))
972 continue;
974 reclen = DIRENT64_RECLEN(strlen(dv->dv_name));
975 if ((movesz + reclen) > bufsz) {
976 dcmn_err3(("devfs_readdir: C: diroff "
977 "%lld, soff %lld: '%s' reclen %lu\n",
978 diroff, soff, dv->dv_name, reclen));
979 goto full;
981 de->d_ino = (ino64_t)dv->dv_ino;
982 de->d_off = (off64_t)diroff + 1;
983 de->d_reclen = (ushort_t)reclen;
985 /* use strncpy(9f) to zero out uninitialized bytes */
987 ASSERT(strlen(dv->dv_name) + 1 <=
988 DIRENT64_NAMELEN(reclen));
989 (void) strncpy(de->d_name, dv->dv_name,
990 DIRENT64_NAMELEN(reclen));
992 movesz += reclen;
993 de = (dirent64_t *)(intptr_t)((char *)de + reclen);
994 dcmn_err4(("devfs_readdir: D: diroff "
995 "%lld, soff %lld: '%s' reclen %lu\n", diroff, soff,
996 dv->dv_name, reclen));
999 /* the buffer is full, or we exhausted everything */
1000 full: dcmn_err3(("devfs_readdir: moving %lu bytes: "
1001 "diroff %lld, soff %lld, dv %p\n",
1002 movesz, diroff, soff, (void *)dv));
1004 if ((movesz == 0) && dv)
1005 error = EINVAL; /* cannot be represented */
1006 else {
1007 error = uiomove(bufp, movesz, UIO_READ, uiop);
1008 if (error == 0) {
1009 if (eofp)
1010 *eofp = dv ? 0 : 1;
1011 uiop->uio_loffset = diroff;
1014 va.va_mask = AT_ATIME;
1015 gethrestime(&va.va_atime);
1016 rw_exit(&ddv->dv_contents);
1017 (void) devfs_setattr(dvp, &va, 0, cred, ct);
1018 rw_enter(&ddv->dv_contents, RW_READER);
1021 kmem_free(bufp, bufsz);
1022 return (error);
1025 /*ARGSUSED*/
1026 static int
1027 devfs_fsync(struct vnode *vp, int syncflag, struct cred *cred,
1028 caller_context_t *ct)
1031 * Message goes to console only. Otherwise, the message
1032 * causes devfs_fsync to be invoked again... infinite loop
1034 dcmn_err2(("devfs_fsync %s\n", VTODV(vp)->dv_name));
1035 return (0);
1039 * Normally, we leave the dv_node here at count of 0.
1040 * The node will be destroyed when dv_cleandir() is called.
1042 * Stale dv_node's are already unlinked from the fs tree,
1043 * so dv_cleandir() won't find them. We destroy such nodes
1044 * immediately.
1046 /*ARGSUSED1*/
1047 static void
1048 devfs_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct)
1050 int destroy;
1051 struct dv_node *dv = VTODV(vp);
1053 dcmn_err2(("devfs_inactive: %s\n", dv->dv_name));
1054 mutex_enter(&vp->v_lock);
1055 ASSERT(vp->v_count >= 1);
1056 VN_RELE_LOCKED(vp);
1057 destroy = (DV_STALE(dv) && vp->v_count == 0);
1058 mutex_exit(&vp->v_lock);
1060 /* stale nodes cannot be rediscovered, destroy it here */
1061 if (destroy)
1062 dv_destroy(dv, 0);
1066 * XXX Why do we need this? NFS mounted /dev directories?
1067 * XXX Talk to peter staubach about this.
1069 /*ARGSUSED2*/
1070 static int
1071 devfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1073 struct dv_node *dv = VTODV(vp);
1074 struct dv_fid *dv_fid;
1076 if (fidp->fid_len < (sizeof (struct dv_fid) - sizeof (ushort_t))) {
1077 fidp->fid_len = sizeof (struct dv_fid) - sizeof (ushort_t);
1078 return (ENOSPC);
1081 dv_fid = (struct dv_fid *)fidp;
1082 bzero(dv_fid, sizeof (struct dv_fid));
1083 dv_fid->dvfid_len = (int)sizeof (struct dv_fid) - sizeof (ushort_t);
1084 dv_fid->dvfid_ino = dv->dv_ino;
1085 /* dv_fid->dvfid_gen = dv->tn_gen; XXX ? */
1087 return (0);
1091 * This pair of routines bracket all fop_read, fop_write
1092 * and fop_readdir requests. The contents lock stops things
1093 * moving around while we're looking at them.
1095 * Also used by file and record locking.
1097 /*ARGSUSED2*/
1098 static int
1099 devfs_rwlock(struct vnode *vp, int write_flag, caller_context_t *ct)
1101 dcmn_err2(("devfs_rwlock %s\n", VTODV(vp)->dv_name));
1102 rw_enter(&VTODV(vp)->dv_contents, write_flag ? RW_WRITER : RW_READER);
1103 return (write_flag);
1106 /*ARGSUSED1*/
1107 static void
1108 devfs_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ct)
1110 dcmn_err2(("devfs_rwunlock %s\n", VTODV(vp)->dv_name));
1111 rw_exit(&VTODV(vp)->dv_contents);
1115 * XXX Should probably do a better job of computing the maximum
1116 * offset available in the directory.
1118 /*ARGSUSED1*/
1119 static int
1120 devfs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
1121 caller_context_t *ct)
1123 ASSERT(vp->v_type == VDIR);
1124 dcmn_err2(("devfs_seek %s\n", VTODV(vp)->dv_name));
1125 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1128 const struct vnodeops dv_vnodeops = {
1129 .vnop_name = "devfs",
1130 .vop_open = devfs_open,
1131 .vop_close = devfs_close,
1132 .vop_read = devfs_read,
1133 .vop_write = devfs_write,
1134 .vop_ioctl = devfs_ioctl,
1135 .vop_getattr = devfs_getattr,
1136 .vop_setattr = devfs_setattr,
1137 .vop_access = devfs_access,
1138 .vop_lookup = devfs_lookup,
1139 .vop_create = devfs_create,
1140 .vop_readdir = devfs_readdir,
1141 .vop_fsync = devfs_fsync,
1142 .vop_inactive = devfs_inactive,
1143 .vop_fid = devfs_fid,
1144 .vop_rwlock = devfs_rwlock,
1145 .vop_rwunlock = devfs_rwunlock,
1146 .vop_seek = devfs_seek,
1147 .vop_pathconf = devfs_pathconf,
1148 .vop_dispose = fs_nodispose,
1149 .vop_setsecattr = devfs_setsecattr,
1150 .vop_getsecattr = devfs_getsecattr,