4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * This is the device filesystem.
29 * It is a combination of a namer to drive autoconfiguration,
30 * plus the access methods for the device drivers of the system.
32 * The prototype is fairly dependent on specfs for the latter part
33 * of its implementation, though a final version would integrate the two.
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/sysmacros.h>
38 #include <sys/systm.h>
41 #include <sys/pathname.h>
43 #include <sys/vnode.h>
47 #include <sys/errno.h>
48 #include <sys/cmn_err.h>
50 #include <sys/statvfs.h>
51 #include <sys/mount.h>
52 #include <sys/debug.h>
53 #include <sys/modctl.h>
54 #include <sys/fs_subr.h>
55 #include <sys/fs/dv_node.h>
56 #include <sys/fs/snode.h>
57 #include <sys/sunndi.h>
58 #include <sys/policy.h>
59 #include <sys/sunmdi.h>
62 * devfs vfs operations.
64 static int devfs_mount(struct vfs
*, struct vnode
*, struct mounta
*,
66 static int devfs_unmount(struct vfs
*, int, struct cred
*);
67 static int devfs_root(struct vfs
*, struct vnode
**);
68 static int devfs_statvfs(struct vfs
*, struct statvfs64
*);
69 static int devfs_mountroot(struct vfs
*, enum whymountroot
);
71 static int devfsinit(int, char *);
73 static vfsdef_t devfs_vfssw
= {
75 "devfs", /* type name string */
76 devfsinit
, /* init routine */
78 NULL
/* mount options table prototype */
81 static kmutex_t devfs_lock
; /* protects global data */
82 static int devfstype
; /* fstype */
83 static dev_t devfsdev
; /* the fictious 'device' we live on */
84 static struct devfs_data
*devfs_mntinfo
; /* linked list of instances */
87 * Module linkage information
89 static struct modlfs modlfs
= {
90 &mod_fsops
, "devices filesystem", &devfs_vfssw
93 static struct modlinkage modlinkage
= {
94 MODREV_1
, (void *)&modlfs
, NULL
102 mutex_init(&devfs_lock
, "devfs lock", MUTEX_DEFAULT
, NULL
);
103 dv_node_cache_init();
104 if ((e
= mod_install(&modlinkage
)) != 0) {
105 dv_node_cache_fini();
106 mutex_destroy(&devfs_lock
);
109 dcmn_err(("devfs loaded\n"));
120 _info(struct modinfo
*modinfop
)
122 return (mod_info(&modlinkage
, modinfop
));
125 static const struct vfsops devfs_vfsops
= {
126 .vfs_mount
= devfs_mount
,
127 .vfs_unmount
= devfs_unmount
,
128 .vfs_root
= devfs_root
,
129 .vfs_statvfs
= devfs_statvfs
,
130 .vfs_mountroot
= devfs_mountroot
,
135 devfsinit(int fstype
, char *name
)
139 extern major_t
getudev(void); /* gack - what a function */
143 * Associate VFS ops vector with this fstype
145 error
= vfs_setfsops(fstype
, &devfs_vfsops
);
147 cmn_err(CE_WARN
, "devfsinit: bad fstype");
152 * Invent a dev_t (sigh).
154 if ((dev
= getudev()) == DDI_MAJOR_T_NONE
) {
155 cmn_err(CE_NOTE
, "%s: can't get unique dev", devfs_vfssw
.name
);
158 devfsdev
= makedevice(dev
, 0);
164 * The name of the mount point and the name of the attribute
165 * filesystem are passed down from userland for now.
168 devfs_mount(struct vfs
*vfsp
, struct vnode
*mvp
, struct mounta
*uap
,
171 struct devfs_data
*devfs_data
;
176 dcmn_err(("devfs_mount\n"));
178 if (secpolicy_fs_mount(cr
, mvp
, vfsp
) != 0)
182 * check that the mount point is sane
184 if (mvp
->v_type
!= VDIR
)
187 ASSERT(uap
->flags
& MS_SYSSPACE
);
189 * Devfs can only be mounted from kernel during boot.
190 * avp is the existing /devices, the same as the mount point.
195 * Create and initialize the vfs-private data.
196 * This includes a hand-crafted root vnode (we build
197 * this here mostly so that traverse() doesn't sleep
200 mutex_enter(&devfs_lock
);
201 ASSERT(devfs_mntinfo
== NULL
);
202 dv
= dv_mkroot(vfsp
, devfsdev
);
203 dv
->dv_attrvp
= avp
; /* attribute root vp */
205 ASSERT(dv
== dv
->dv_dotdot
);
207 devfs_data
= kmem_zalloc(sizeof (struct devfs_data
), KM_SLEEP
);
208 devfs_data
->devfs_vfsp
= vfsp
;
209 devfs_data
->devfs_root
= dv
;
211 vfsp
->vfs_data
= (caddr_t
)devfs_data
;
212 vfsp
->vfs_fstype
= devfstype
;
213 vfsp
->vfs_dev
= devfsdev
;
214 vfsp
->vfs_bsize
= DEV_BSIZE
;
215 vfsp
->vfs_mtime
= ddi_get_time();
216 vfs_make_fsid(&vfsp
->vfs_fsid
, vfsp
->vfs_dev
, devfstype
);
219 devfs_mntinfo
= devfs_data
;
220 mutex_exit(&devfs_lock
);
222 va
.va_mask
= AT_ATIME
|AT_MTIME
;
223 gethrestime(&va
.va_atime
);
224 gethrestime(&va
.va_mtime
);
225 (void) fop_setattr(DVTOV(dv
), &va
, 0, cr
, NULL
);
231 * We never unmount devfs in a real production system.
235 devfs_unmount(struct vfs
*vfsp
, int flag
, struct cred
*cr
)
241 * return root vnode for given vfs
244 devfs_root(struct vfs
*vfsp
, struct vnode
**vpp
)
246 dcmn_err(("devfs_root\n"));
247 *vpp
= DVTOV(VFSTODVFS(vfsp
)->devfs_root
);
253 * return 'generic superblock' information to userland.
255 * not much that we can usefully admit to here
258 devfs_statvfs(struct vfs
*vfsp
, struct statvfs64
*sbp
)
260 extern kmem_cache_t
*dv_node_cache
;
264 dcmn_err(("devfs_statvfs\n"));
265 bzero(sbp
, sizeof (*sbp
));
266 sbp
->f_frsize
= sbp
->f_bsize
= vfsp
->vfs_bsize
;
268 * We could compute the number of devfsnodes here .. but since
269 * it's dynamic anyway, it's not clear how useful this is.
271 sbp
->f_files
= kmem_cache_stat(dv_node_cache
, "alloc");
273 /* no illusions that free/avail files is relevant to devfs */
277 /* no illusions that blocks are relevant to devfs */
282 (void) cmpldev(&d32
, vfsp
->vfs_dev
);
284 (void) strcpy(sbp
->f_basetype
, vfssw
[devfstype
].vsw_name
);
285 sbp
->f_flag
= vf_to_stf(vfsp
->vfs_flag
);
286 sbp
->f_namemax
= MAXNAMELEN
- 1;
287 (void) strcpy(sbp
->f_fstr
, "devices");
293 * devfs always mount after root is mounted, so this should never
298 devfs_mountroot(struct vfs
*vfsp
, enum whymountroot why
)
300 dcmn_err(("devfs_mountroot\n"));
306 devfs_dip_to_dvnode(dev_info_t
*dip
)
313 /* no-op if devfs not mounted yet */
314 if (devfs_mntinfo
== NULL
)
318 * The lookupname below only looks up cached dv_nodes
319 * because devfs_clean_key is set in thread specific data.
321 dirpath
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
322 (void) ddi_pathname(dip
, dirpath
);
323 if (devfs_lookupname(dirpath
, NULLVPP
, &dirvp
)) {
324 dcmn_err(("directory %s not found\n", dirpath
));
325 kmem_free(dirpath
, MAXPATHLEN
);
329 kmem_free(dirpath
, MAXPATHLEN
);
330 return (VTODV(dirvp
));
334 * If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root
335 * and not a vHCI we also need to clean any vHCI branches because they
336 * may contain pHCI nodes. A detach_node() of a pHCI will fail if its
337 * mdi_devi_offline() fails, and the mdi_devi_offline() of the last
338 * pHCI will fail unless an ndi_devi_offline() of the Client nodes under
339 * the vHCI is successful - which requires a clean vHCI branch to removed
340 * the devi_refs associated with devfs vnodes.
343 devfs_clean_vhci(dev_info_t
*dip
, void *args
)
346 uint_t flags
= (uint_t
)(uintptr_t)args
;
348 (void) tsd_set(devfs_clean_key
, (void *)1);
349 dvp
= devfs_dip_to_dvnode(dip
);
351 (void) dv_cleandir(dvp
, NULL
, flags
);
354 (void) tsd_set(devfs_clean_key
, NULL
);
355 return (DDI_WALK_CONTINUE
);
361 * Destroy unreferenced dv_node's and detach devices.
363 * devfs_clean will try its best to clean up unused nodes. It is
364 * no longer valid to assume that just because devfs_clean fails,
365 * the device is not removable. This is because device contracts
366 * can result in userland processes releasing a device during the
367 * device offline process in the kernel. Thus it is no longer
368 * correct to fail an offline just because devfs_clean finds
369 * referenced dv_nodes. To enforce this, devfs_clean() always
370 * returns success i.e. 0.
372 * devfs_clean() may return before removing all possible nodes if
373 * we cannot acquire locks in areas of the code where potential for
374 * deadlock exists (see comments in dv_find() and dv_cleandir() for
377 * devfs caches unreferenced dv_node to speed by the performance
378 * of ls, find, etc. devfs_clean() is invoked to cleanup cached
379 * dv_nodes to reclaim memory as well as to facilitate device
380 * removal (dv_node reference devinfo nodes, which prevents driver
383 * If a shell parks in a /devices directory, the dv_node will be
384 * held, preventing the corresponding device to be detached.
385 * This would be a denial of service against DR. To prevent this,
386 * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag.
387 * The dv_cleandir() implementation does the right thing to ensure
391 devfs_clean(dev_info_t
*dip
, char *devnm
, uint_t flags
)
395 dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x",
396 (void *)dip
, flags
));
398 /* avoid recursion back into the device tree */
399 (void) tsd_set(devfs_clean_key
, (void *)1);
400 dvp
= devfs_dip_to_dvnode(dip
);
402 (void) tsd_set(devfs_clean_key
, NULL
);
406 (void) dv_cleandir(dvp
, devnm
, flags
);
407 (void) tsd_set(devfs_clean_key
, NULL
);
411 * If we are doing a DV_CLEAN_FORCE, and we did not start at the
412 * root, and we did not start at a vHCI node then clean vHCI
413 * branches too. Failure to clean vHCI branch does not cause EBUSY.
415 * Also, to accommodate nexus callers that clean 'self' to DR 'child'
416 * (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch
417 * above fails - this prevents a busy DR 'child' sibling from causing
418 * the DR of 'child' to fail because a vHCI branch was not cleaned.
420 if ((flags
& DV_CLEAN_FORCE
) && (dip
!= ddi_root_node()) &&
421 (mdi_component_is_vhci(dip
, NULL
) != MDI_SUCCESS
)) {
423 * NOTE: for backport the following is recommended
424 * (void) devfs_clean_vhci(scsi_vhci_dip,
425 * (void *)(uintptr_t)flags);
427 mdi_walk_vhcis(devfs_clean_vhci
, (void *)(uintptr_t)flags
);
434 * lookup a devfs relative pathname, returning held vnodes for the final
435 * component and the containing directory (if requested).
437 * NOTE: We can't use lookupname because this would use the current
438 * processes credentials (CRED) in the call lookuppnvp instead
439 * of kcred. It also does not give you the flexibility so
440 * specify the directory to start the resolution in (devicesdir).
444 char *pathname
, /* user pathname */
445 vnode_t
**dirvpp
, /* ret for ptr to parent dir vnode */
446 vnode_t
**compvpp
) /* ret for ptr to component vnode */
451 ASSERT(devicesdir
); /* devfs must be initialized */
452 ASSERT(pathname
); /* must have some path */
454 if (error
= pn_get(pathname
, UIO_SYSSPACE
, &pn
))
457 /* make the path relative to /devices. */
459 if (pn_pathleft(&pn
) == 0) {
460 /* all we had was "\0" or "/" (which skipslash skiped) */
465 *compvpp
= devicesdir
;
469 * Use devfs lookup to resolve pathname to the vnode for
470 * the device via relative lookup in devfs. Extra holds for
471 * using devicesdir as directory we are searching and for
472 * being our root without being == rootdir.
476 error
= lookuppnvp(&pn
, NULL
, FOLLOW
, dirvpp
, compvpp
,
477 devicesdir
, devicesdir
, kcred
);
485 * Given a devfs path (without the /devices prefix), walk
486 * the dv_node sub-tree rooted at the path.
491 void (*callback
)(struct dv_node
*, void *),
494 char *dirpath
, *devnm
;
497 ASSERT(path
&& callback
);
499 if (*path
!= '/' || devfs_mntinfo
== NULL
)
502 dcmn_err(("devfs_walk: path = %s", path
));
504 dirpath
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
506 (void) snprintf(dirpath
, MAXPATHLEN
, "/devices%s", path
);
508 devnm
= strrchr(dirpath
, '/');
514 if (lookupname(dirpath
, UIO_SYSSPACE
, 0, NULL
, &dirvp
)) {
515 dcmn_err(("directory %s not found\n", dirpath
));
516 kmem_free(dirpath
, MAXPATHLEN
);
521 * if path == "/", visit the root dv_node
523 if (*devnm
== '\0') {
524 callback(VTODV(dirvp
), arg
);
528 dv_walk(VTODV(dirvp
), devnm
, callback
, arg
);
532 kmem_free(dirpath
, MAXPATHLEN
);
538 devfs_devpolicy(vnode_t
*vp
, devplcy_t
**dpp
)
544 /* fail if devfs not mounted yet */
545 if (devfs_mntinfo
== NULL
)
548 if (fop_realvp(vp
, &rvp
, NULL
) == 0 &&
549 vn_matchops(rvp
, &dv_vnodeops
)) {
551 rw_enter(&dvp
->dv_contents
, RW_READER
);
553 dphold(dvp
->dv_priv
);
557 rw_exit(&dvp
->dv_contents
);