dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / devfs / devfs_vfsops.c
blob371e1032bb973cce086373e4925b43974a07b91e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * This is the device filesystem.
29 * It is a combination of a namer to drive autoconfiguration,
30 * plus the access methods for the device drivers of the system.
32 * The prototype is fairly dependent on specfs for the latter part
33 * of its implementation, though a final version would integrate the two.
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/sysmacros.h>
38 #include <sys/systm.h>
39 #include <sys/kmem.h>
40 #include <sys/time.h>
41 #include <sys/pathname.h>
42 #include <sys/vfs.h>
43 #include <sys/vnode.h>
44 #include <sys/stat.h>
45 #include <sys/uio.h>
46 #include <sys/stat.h>
47 #include <sys/errno.h>
48 #include <sys/cmn_err.h>
49 #include <sys/cred.h>
50 #include <sys/statvfs.h>
51 #include <sys/mount.h>
52 #include <sys/debug.h>
53 #include <sys/modctl.h>
54 #include <sys/fs_subr.h>
55 #include <sys/fs/dv_node.h>
56 #include <sys/fs/snode.h>
57 #include <sys/sunndi.h>
58 #include <sys/policy.h>
59 #include <sys/sunmdi.h>
62 * devfs vfs operations.
64 static int devfs_mount(struct vfs *, struct vnode *, struct mounta *,
65 struct cred *);
66 static int devfs_unmount(struct vfs *, int, struct cred *);
67 static int devfs_root(struct vfs *, struct vnode **);
68 static int devfs_statvfs(struct vfs *, struct statvfs64 *);
69 static int devfs_mountroot(struct vfs *, enum whymountroot);
71 static int devfsinit(int, char *);
73 static vfsdef_t devfs_vfssw = {
74 VFSDEF_VERSION,
75 "devfs", /* type name string */
76 devfsinit, /* init routine */
77 0, /* flags */
78 NULL /* mount options table prototype */
81 static kmutex_t devfs_lock; /* protects global data */
82 static int devfstype; /* fstype */
83 static dev_t devfsdev; /* the fictious 'device' we live on */
84 static struct devfs_data *devfs_mntinfo; /* linked list of instances */
87 * Module linkage information
89 static struct modlfs modlfs = {
90 &mod_fsops, "devices filesystem", &devfs_vfssw
93 static struct modlinkage modlinkage = {
94 MODREV_1, (void *)&modlfs, NULL
97 int
98 _init(void)
100 int e;
102 mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL);
103 dv_node_cache_init();
104 if ((e = mod_install(&modlinkage)) != 0) {
105 dv_node_cache_fini();
106 mutex_destroy(&devfs_lock);
107 return (e);
109 dcmn_err(("devfs loaded\n"));
110 return (0);
114 _fini(void)
116 return (EBUSY);
120 _info(struct modinfo *modinfop)
122 return (mod_info(&modlinkage, modinfop));
125 static const struct vfsops devfs_vfsops = {
126 .vfs_mount = devfs_mount,
127 .vfs_unmount = devfs_unmount,
128 .vfs_root = devfs_root,
129 .vfs_statvfs = devfs_statvfs,
130 .vfs_mountroot = devfs_mountroot,
133 /*ARGSUSED1*/
134 static int
135 devfsinit(int fstype, char *name)
137 int error;
138 int dev;
139 extern major_t getudev(void); /* gack - what a function */
141 devfstype = fstype;
143 * Associate VFS ops vector with this fstype
145 error = vfs_setfsops(fstype, &devfs_vfsops);
146 if (error != 0) {
147 cmn_err(CE_WARN, "devfsinit: bad fstype");
148 return (error);
152 * Invent a dev_t (sigh).
154 if ((dev = getudev()) == DDI_MAJOR_T_NONE) {
155 cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name);
156 dev = 0;
158 devfsdev = makedevice(dev, 0);
160 return (0);
164 * The name of the mount point and the name of the attribute
165 * filesystem are passed down from userland for now.
167 static int
168 devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
169 struct cred *cr)
171 struct devfs_data *devfs_data;
172 struct vnode *avp;
173 struct dv_node *dv;
174 struct vattr va;
176 dcmn_err(("devfs_mount\n"));
178 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
179 return (EPERM);
182 * check that the mount point is sane
184 if (mvp->v_type != VDIR)
185 return (ENOTDIR);
187 ASSERT(uap->flags & MS_SYSSPACE);
189 * Devfs can only be mounted from kernel during boot.
190 * avp is the existing /devices, the same as the mount point.
192 avp = mvp;
195 * Create and initialize the vfs-private data.
196 * This includes a hand-crafted root vnode (we build
197 * this here mostly so that traverse() doesn't sleep
198 * in VFS_ROOT()).
200 mutex_enter(&devfs_lock);
201 ASSERT(devfs_mntinfo == NULL);
202 dv = dv_mkroot(vfsp, devfsdev);
203 dv->dv_attrvp = avp; /* attribute root vp */
205 ASSERT(dv == dv->dv_dotdot);
207 devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP);
208 devfs_data->devfs_vfsp = vfsp;
209 devfs_data->devfs_root = dv;
211 vfsp->vfs_data = (caddr_t)devfs_data;
212 vfsp->vfs_fstype = devfstype;
213 vfsp->vfs_dev = devfsdev;
214 vfsp->vfs_bsize = DEV_BSIZE;
215 vfsp->vfs_mtime = ddi_get_time();
216 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype);
218 /* We're there. */
219 devfs_mntinfo = devfs_data;
220 mutex_exit(&devfs_lock);
222 va.va_mask = AT_ATIME|AT_MTIME;
223 gethrestime(&va.va_atime);
224 gethrestime(&va.va_mtime);
225 (void) fop_setattr(DVTOV(dv), &va, 0, cr, NULL);
226 return (0);
231 * We never unmount devfs in a real production system.
233 /*ARGSUSED*/
234 static int
235 devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr)
237 return (EBUSY);
241 * return root vnode for given vfs
243 static int
244 devfs_root(struct vfs *vfsp, struct vnode **vpp)
246 dcmn_err(("devfs_root\n"));
247 *vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root);
248 VN_HOLD(*vpp);
249 return (0);
253 * return 'generic superblock' information to userland.
255 * not much that we can usefully admit to here
257 static int
258 devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
260 extern kmem_cache_t *dv_node_cache;
262 dev32_t d32;
264 dcmn_err(("devfs_statvfs\n"));
265 bzero(sbp, sizeof (*sbp));
266 sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
268 * We could compute the number of devfsnodes here .. but since
269 * it's dynamic anyway, it's not clear how useful this is.
271 sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc");
273 /* no illusions that free/avail files is relevant to devfs */
274 sbp->f_ffree = 0;
275 sbp->f_favail = 0;
277 /* no illusions that blocks are relevant to devfs */
278 sbp->f_bfree = 0;
279 sbp->f_bavail = 0;
280 sbp->f_blocks = 0;
282 (void) cmpldev(&d32, vfsp->vfs_dev);
283 sbp->f_fsid = d32;
284 (void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name);
285 sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
286 sbp->f_namemax = MAXNAMELEN - 1;
287 (void) strcpy(sbp->f_fstr, "devices");
289 return (0);
293 * devfs always mount after root is mounted, so this should never
294 * be invoked.
296 /*ARGSUSED*/
297 static int
298 devfs_mountroot(struct vfs *vfsp, enum whymountroot why)
300 dcmn_err(("devfs_mountroot\n"));
302 return (EINVAL);
305 struct dv_node *
306 devfs_dip_to_dvnode(dev_info_t *dip)
308 char *dirpath;
309 struct vnode *dirvp;
311 ASSERT(dip != NULL);
313 /* no-op if devfs not mounted yet */
314 if (devfs_mntinfo == NULL)
315 return (NULL);
318 * The lookupname below only looks up cached dv_nodes
319 * because devfs_clean_key is set in thread specific data.
321 dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
322 (void) ddi_pathname(dip, dirpath);
323 if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) {
324 dcmn_err(("directory %s not found\n", dirpath));
325 kmem_free(dirpath, MAXPATHLEN);
326 return (NULL);
329 kmem_free(dirpath, MAXPATHLEN);
330 return (VTODV(dirvp));
334 * If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root
335 * and not a vHCI we also need to clean any vHCI branches because they
336 * may contain pHCI nodes. A detach_node() of a pHCI will fail if its
337 * mdi_devi_offline() fails, and the mdi_devi_offline() of the last
338 * pHCI will fail unless an ndi_devi_offline() of the Client nodes under
339 * the vHCI is successful - which requires a clean vHCI branch to removed
340 * the devi_refs associated with devfs vnodes.
342 static int
343 devfs_clean_vhci(dev_info_t *dip, void *args)
345 struct dv_node *dvp;
346 uint_t flags = (uint_t)(uintptr_t)args;
348 (void) tsd_set(devfs_clean_key, (void *)1);
349 dvp = devfs_dip_to_dvnode(dip);
350 if (dvp) {
351 (void) dv_cleandir(dvp, NULL, flags);
352 VN_RELE(DVTOV(dvp));
354 (void) tsd_set(devfs_clean_key, NULL);
355 return (DDI_WALK_CONTINUE);
359 * devfs_clean()
361 * Destroy unreferenced dv_node's and detach devices.
363 * devfs_clean will try its best to clean up unused nodes. It is
364 * no longer valid to assume that just because devfs_clean fails,
365 * the device is not removable. This is because device contracts
366 * can result in userland processes releasing a device during the
367 * device offline process in the kernel. Thus it is no longer
368 * correct to fail an offline just because devfs_clean finds
369 * referenced dv_nodes. To enforce this, devfs_clean() always
370 * returns success i.e. 0.
372 * devfs_clean() may return before removing all possible nodes if
373 * we cannot acquire locks in areas of the code where potential for
374 * deadlock exists (see comments in dv_find() and dv_cleandir() for
375 * examples of this).
377 * devfs caches unreferenced dv_node to speed by the performance
378 * of ls, find, etc. devfs_clean() is invoked to cleanup cached
379 * dv_nodes to reclaim memory as well as to facilitate device
380 * removal (dv_node reference devinfo nodes, which prevents driver
381 * detach).
383 * If a shell parks in a /devices directory, the dv_node will be
384 * held, preventing the corresponding device to be detached.
385 * This would be a denial of service against DR. To prevent this,
386 * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag.
387 * The dv_cleandir() implementation does the right thing to ensure
388 * successful DR.
391 devfs_clean(dev_info_t *dip, char *devnm, uint_t flags)
393 struct dv_node *dvp;
395 dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x",
396 (void *)dip, flags));
398 /* avoid recursion back into the device tree */
399 (void) tsd_set(devfs_clean_key, (void *)1);
400 dvp = devfs_dip_to_dvnode(dip);
401 if (dvp == NULL) {
402 (void) tsd_set(devfs_clean_key, NULL);
403 return (0);
406 (void) dv_cleandir(dvp, devnm, flags);
407 (void) tsd_set(devfs_clean_key, NULL);
408 VN_RELE(DVTOV(dvp));
411 * If we are doing a DV_CLEAN_FORCE, and we did not start at the
412 * root, and we did not start at a vHCI node then clean vHCI
413 * branches too. Failure to clean vHCI branch does not cause EBUSY.
415 * Also, to accommodate nexus callers that clean 'self' to DR 'child'
416 * (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch
417 * above fails - this prevents a busy DR 'child' sibling from causing
418 * the DR of 'child' to fail because a vHCI branch was not cleaned.
420 if ((flags & DV_CLEAN_FORCE) && (dip != ddi_root_node()) &&
421 (mdi_component_is_vhci(dip, NULL) != MDI_SUCCESS)) {
423 * NOTE: for backport the following is recommended
424 * (void) devfs_clean_vhci(scsi_vhci_dip,
425 * (void *)(uintptr_t)flags);
427 mdi_walk_vhcis(devfs_clean_vhci, (void *)(uintptr_t)flags);
430 return (0);
434 * lookup a devfs relative pathname, returning held vnodes for the final
435 * component and the containing directory (if requested).
437 * NOTE: We can't use lookupname because this would use the current
438 * processes credentials (CRED) in the call lookuppnvp instead
439 * of kcred. It also does not give you the flexibility so
440 * specify the directory to start the resolution in (devicesdir).
443 devfs_lookupname(
444 char *pathname, /* user pathname */
445 vnode_t **dirvpp, /* ret for ptr to parent dir vnode */
446 vnode_t **compvpp) /* ret for ptr to component vnode */
448 struct pathname pn;
449 int error;
451 ASSERT(devicesdir); /* devfs must be initialized */
452 ASSERT(pathname); /* must have some path */
454 if (error = pn_get(pathname, UIO_SYSSPACE, &pn))
455 return (error);
457 /* make the path relative to /devices. */
458 pn_skipslash(&pn);
459 if (pn_pathleft(&pn) == 0) {
460 /* all we had was "\0" or "/" (which skipslash skiped) */
461 if (dirvpp)
462 *dirvpp = NULL;
463 if (compvpp) {
464 VN_HOLD(devicesdir);
465 *compvpp = devicesdir;
467 } else {
469 * Use devfs lookup to resolve pathname to the vnode for
470 * the device via relative lookup in devfs. Extra holds for
471 * using devicesdir as directory we are searching and for
472 * being our root without being == rootdir.
474 VN_HOLD(devicesdir);
475 VN_HOLD(devicesdir);
476 error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp,
477 devicesdir, devicesdir, kcred);
479 pn_free(&pn);
481 return (error);
485 * Given a devfs path (without the /devices prefix), walk
486 * the dv_node sub-tree rooted at the path.
489 devfs_walk(
490 char *path,
491 void (*callback)(struct dv_node *, void *),
492 void *arg)
494 char *dirpath, *devnm;
495 struct vnode *dirvp;
497 ASSERT(path && callback);
499 if (*path != '/' || devfs_mntinfo == NULL)
500 return (ENXIO);
502 dcmn_err(("devfs_walk: path = %s", path));
504 dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
506 (void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path);
508 devnm = strrchr(dirpath, '/');
510 ASSERT(devnm);
512 *devnm++ = '\0';
514 if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) {
515 dcmn_err(("directory %s not found\n", dirpath));
516 kmem_free(dirpath, MAXPATHLEN);
517 return (ENXIO);
521 * if path == "/", visit the root dv_node
523 if (*devnm == '\0') {
524 callback(VTODV(dirvp), arg);
525 devnm = NULL;
528 dv_walk(VTODV(dirvp), devnm, callback, arg);
530 VN_RELE(dirvp);
532 kmem_free(dirpath, MAXPATHLEN);
534 return (0);
538 devfs_devpolicy(vnode_t *vp, devplcy_t **dpp)
540 struct vnode *rvp;
541 struct dv_node *dvp;
542 int rval = -1;
544 /* fail if devfs not mounted yet */
545 if (devfs_mntinfo == NULL)
546 return (rval);
548 if (fop_realvp(vp, &rvp, NULL) == 0 &&
549 vn_matchops(rvp, &dv_vnodeops)) {
550 dvp = VTODV(rvp);
551 rw_enter(&dvp->dv_contents, RW_READER);
552 if (dvp->dv_priv) {
553 dphold(dvp->dv_priv);
554 *dpp = dvp->dv_priv;
555 rval = 0;
557 rw_exit(&dvp->dv_contents);
559 return (rval);