dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / fdfs / fdops.c
blob877bf8297269b95488ee2f9579c8350935f2cb39
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2017 by Delphix. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All rights reserved. */
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/cmn_err.h>
33 #include <sys/debug.h>
34 #include <sys/dirent.h>
35 #include <sys/errno.h>
36 #include <sys/file.h>
37 #include <sys/inline.h>
38 #include <sys/kmem.h>
39 #include <sys/pathname.h>
40 #include <sys/resource.h>
41 #include <sys/statvfs.h>
42 #include <sys/mount.h>
43 #include <sys/sysmacros.h>
44 #include <sys/systm.h>
45 #include <sys/uio.h>
46 #include <sys/vfs.h>
47 #include <sys/vnode.h>
48 #include <sys/cred.h>
49 #include <sys/mntent.h>
50 #include <sys/mount.h>
51 #include <sys/user.h>
52 #include <sys/t_lock.h>
53 #include <sys/modctl.h>
54 #include <sys/policy.h>
55 #include <sys/fs_subr.h>
56 #include <sys/atomic.h>
57 #include <sys/mkdev.h>
59 #define round(r) (((r)+sizeof (int)-1)&(~(sizeof (int)-1)))
60 #define fdtoi(n) ((n)+100)
62 #define FDDIRSIZE 14
63 struct fddirect {
64 short d_ino;
65 char d_name[FDDIRSIZE];
68 #define FDROOTINO 2
69 #define FDSDSIZE sizeof (struct fddirect)
70 #define FDNSIZE 10
72 static int fdfstype = 0;
73 static major_t fdfsmaj;
74 static minor_t fdfsmin;
75 static major_t fdrmaj;
76 static kmutex_t fd_minor_lock;
78 static int fdget(vnode_t *, char *, vnode_t **);
80 /* ARGSUSED */
81 static int
82 fdopen(vnode_t **vpp, int mode, cred_t *cr, caller_context_t *ct)
84 if ((*vpp)->v_type != VDIR) {
85 mutex_enter(&(*vpp)->v_lock);
86 (*vpp)->v_flag |= VDUP;
87 mutex_exit(&(*vpp)->v_lock);
89 return (0);
92 /* ARGSUSED */
93 static int
94 fdclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
95 caller_context_t *ct)
97 return (0);
100 /* ARGSUSED */
101 static int
102 fdread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
104 static struct fddirect dotbuf[] = {
105 { FDROOTINO, "." },
106 { FDROOTINO, ".." }
108 struct fddirect dirbuf;
109 int i, n;
110 int minfd, maxfd, modoff, error = 0;
111 int nentries;
112 rctl_qty_t fdno_ctl;
113 int endoff;
115 if (vp->v_type != VDIR)
116 return (ENOSYS);
118 mutex_enter(&curproc->p_lock);
119 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE],
120 curproc->p_rctls, curproc);
121 nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl);
122 mutex_exit(&curproc->p_lock);
124 endoff = (nentries + 2) * FDSDSIZE;
127 * Fake up ".", "..", and the /dev/fd directory entries.
129 if (uiop->uio_loffset < (offset_t)0 ||
130 uiop->uio_loffset >= (offset_t)endoff ||
131 uiop->uio_resid <= 0)
132 return (0);
133 ASSERT(uiop->uio_loffset <= MAXOFF_T);
134 if (uiop->uio_offset < 2*FDSDSIZE) {
135 error = uiomove((caddr_t)dotbuf + uiop->uio_offset,
136 MIN(uiop->uio_resid, 2*FDSDSIZE - uiop->uio_offset),
137 UIO_READ, uiop);
138 if (uiop->uio_resid <= 0 || error)
139 return (error);
141 minfd = (uiop->uio_offset - 2*FDSDSIZE)/FDSDSIZE;
142 maxfd = (uiop->uio_offset + uiop->uio_resid - 1)/FDSDSIZE;
143 modoff = uiop->uio_offset % FDSDSIZE;
145 for (i = 0; i < FDDIRSIZE; i++)
146 dirbuf.d_name[i] = '\0';
147 for (i = minfd; i < MIN(maxfd, nentries); i++) {
148 n = i;
149 dirbuf.d_ino = fdtoi(n);
150 numtos((ulong_t)n, dirbuf.d_name);
151 error = uiomove((caddr_t)&dirbuf + modoff,
152 MIN(uiop->uio_resid, FDSDSIZE - modoff),
153 UIO_READ, uiop);
154 if (uiop->uio_resid <= 0 || error)
155 return (error);
156 modoff = 0;
159 return (error);
162 /* ARGSUSED */
163 static int
164 fdgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
165 caller_context_t *ct)
167 vfs_t *vfsp = vp->v_vfsp;
168 timestruc_t now;
170 if (vp->v_type == VDIR) {
171 vap->va_nlink = 2;
172 vap->va_size = (uoff_t)
173 ((P_FINFO(curproc)->fi_nfiles + 2) * FDSDSIZE);
174 vap->va_mode = 0555;
175 vap->va_nodeid = (ino64_t)FDROOTINO;
176 } else {
177 vap->va_nlink = 1;
178 vap->va_size = 0;
179 vap->va_mode = 0666;
180 vap->va_nodeid = (ino64_t)fdtoi(getminor(vp->v_rdev));
182 vap->va_type = vp->v_type;
183 vap->va_rdev = vp->v_rdev;
184 vap->va_blksize = vfsp->vfs_bsize;
185 vap->va_nblocks = (fsblkcnt64_t)0;
186 gethrestime(&now);
187 vap->va_atime = vap->va_mtime = vap->va_ctime = now;
188 vap->va_uid = 0;
189 vap->va_gid = 0;
190 vap->va_fsid = vfsp->vfs_dev;
191 vap->va_seq = 0;
192 return (0);
195 /* ARGSUSED */
196 static int
197 fdaccess(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
199 return (0);
202 /* ARGSUSED */
203 static int
204 fdlookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pnp, int flags,
205 vnode_t *rdir, cred_t *cr, caller_context_t *ct, int *direntflags,
206 pathname_t *realpnp)
208 if (comp[0] == 0 || strcmp(comp, ".") == 0 || strcmp(comp, "..") == 0) {
209 VN_HOLD(dp);
210 *vpp = dp;
211 return (0);
213 return (fdget(dp, comp, vpp));
216 /* ARGSUSED */
217 static int
218 fdcreate(vnode_t *dvp, char *comp, vattr_t *vap, enum vcexcl excl, int mode,
219 vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
220 vsecattr_t *vsecp)
222 return (fdget(dvp, comp, vpp));
225 /* ARGSUSED */
226 static int
227 fdreaddir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, caller_context_t *ct,
228 int flags)
230 /* bp holds one dirent structure */
231 uoff_t bp[DIRENT64_RECLEN(FDNSIZE) / sizeof (uoff_t)];
232 struct dirent64 *dirent = (struct dirent64 *)bp;
233 int reclen, nentries;
234 rctl_qty_t fdno_ctl;
235 int n;
236 int oresid;
237 off_t off;
239 if (uiop->uio_offset < 0 || uiop->uio_resid <= 0 ||
240 (uiop->uio_offset % FDSDSIZE) != 0)
241 return (ENOENT);
243 ASSERT(uiop->uio_loffset <= MAXOFF_T);
244 oresid = uiop->uio_resid;
245 bzero(bp, sizeof (bp));
247 mutex_enter(&curproc->p_lock);
248 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE],
249 curproc->p_rctls, curproc);
250 nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl);
251 mutex_exit(&curproc->p_lock);
253 while (uiop->uio_resid > 0) {
254 if ((off = uiop->uio_offset) == 0) { /* "." */
255 dirent->d_ino = (ino64_t)FDROOTINO;
256 dirent->d_name[0] = '.';
257 dirent->d_name[1] = '\0';
258 reclen = DIRENT64_RECLEN(1);
259 } else if (off == FDSDSIZE) { /* ".." */
260 dirent->d_ino = (ino64_t)FDROOTINO;
261 dirent->d_name[0] = '.';
262 dirent->d_name[1] = '.';
263 dirent->d_name[2] = '\0';
264 reclen = DIRENT64_RECLEN(2);
265 } else {
267 * Return entries corresponding to the allowable
268 * number of file descriptors for this process.
270 if ((n = (off-2*FDSDSIZE)/FDSDSIZE) >= nentries)
271 break;
272 dirent->d_ino = (ino64_t)fdtoi(n);
273 numtos((ulong_t)n, dirent->d_name);
274 reclen = DIRENT64_RECLEN(strlen(dirent->d_name));
276 dirent->d_off = (offset_t)(uiop->uio_offset + FDSDSIZE);
277 dirent->d_reclen = (ushort_t)reclen;
279 if (reclen > uiop->uio_resid) {
281 * Error if no entries have been returned yet.
283 if (uiop->uio_resid == oresid)
284 return (EINVAL);
285 break;
288 * uiomove() updates both resid and offset by the same
289 * amount. But we want offset to change in increments
290 * of FDSDSIZE, which is different from the number of bytes
291 * being returned to the user. So we set uio_offset
292 * separately, ignoring what uiomove() does.
294 if (uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))
295 return (EFAULT);
296 uiop->uio_offset = off + FDSDSIZE;
298 if (eofp)
299 *eofp = ((uiop->uio_offset-2*FDSDSIZE)/FDSDSIZE >= nentries);
300 return (0);
303 /* ARGSUSED */
304 static void
305 fdinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
307 mutex_enter(&vp->v_lock);
308 ASSERT(vp->v_count >= 1);
309 VN_RELE_LOCKED(vp);
310 if (vp->v_count != 0) {
311 mutex_exit(&vp->v_lock);
312 return;
314 mutex_exit(&vp->v_lock);
315 vn_invalid(vp);
316 vn_free(vp);
319 static const struct vnodeops fd_vnodeops = {
320 .vnop_name = "fdfs",
321 .vop_open = fdopen,
322 .vop_close = fdclose,
323 .vop_read = fdread,
324 .vop_getattr = fdgetattr,
325 .vop_access = fdaccess,
326 .vop_lookup = fdlookup,
327 .vop_create = fdcreate,
328 .vop_readdir = fdreaddir,
329 .vop_inactive = fdinactive,
330 .vop_frlock = fs_nosys,
331 .vop_poll = fs_nosys_poll,
332 .vop_dispose = fs_nodispose,
335 static int
336 fdget(struct vnode *dvp, char *comp, struct vnode **vpp)
338 int n = 0;
339 struct vnode *vp;
341 while (*comp) {
342 if (*comp < '0' || *comp > '9')
343 return (ENOENT);
344 n = 10 * n + *comp++ - '0';
346 vp = vn_alloc(KM_SLEEP);
347 vp->v_type = VCHR;
348 vp->v_vfsp = dvp->v_vfsp;
349 vn_setops(vp, &fd_vnodeops);
350 vp->v_data = NULL;
351 vp->v_flag = VNOMAP;
352 vp->v_rdev = makedevice(fdrmaj, n);
353 vn_exists(vp);
354 *vpp = vp;
355 return (0);
359 * fdfs is mounted on /dev/fd, however, there are two interesting
360 * possibilities - two threads racing to do the same mount (protected
361 * by vfs locking), and two threads mounting fdfs in different places.
363 /*ARGSUSED*/
364 static int
365 fdmount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
367 struct vnode *vp;
369 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
370 return (EPERM);
371 if (mvp->v_type != VDIR)
372 return (ENOTDIR);
374 mutex_enter(&mvp->v_lock);
375 if ((uap->flags & MS_OVERLAY) == 0 &&
376 (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
377 mutex_exit(&mvp->v_lock);
378 return (EBUSY);
380 mutex_exit(&mvp->v_lock);
383 * Having the resource be anything but "fd" doesn't make sense
385 vfs_setresource(vfsp, "fd", 0);
387 vp = vn_alloc(KM_SLEEP);
388 vp->v_vfsp = vfsp;
389 vn_setops(vp, &fd_vnodeops);
390 vp->v_type = VDIR;
391 vp->v_data = NULL;
392 vp->v_flag |= VROOT;
393 vfsp->vfs_fstype = fdfstype;
394 vfsp->vfs_data = (char *)vp;
395 mutex_enter(&fd_minor_lock);
396 do {
397 fdfsmin = (fdfsmin + 1) & L_MAXMIN32;
398 vfsp->vfs_dev = makedevice(fdfsmaj, fdfsmin);
399 } while (vfs_devismounted(vfsp->vfs_dev));
400 mutex_exit(&fd_minor_lock);
401 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, fdfstype);
402 vfsp->vfs_bsize = 1024;
403 return (0);
406 /* ARGSUSED */
407 static int
408 fdunmount(vfs_t *vfsp, int flag, cred_t *cr)
410 vnode_t *rvp;
412 if (secpolicy_fs_unmount(cr, vfsp) != 0)
413 return (EPERM);
416 * forced unmount is not supported by this file system
417 * and thus, ENOTSUP, is being returned.
419 if (flag & MS_FORCE)
420 return (ENOTSUP);
422 rvp = (vnode_t *)vfsp->vfs_data;
423 if (rvp->v_count > 1)
424 return (EBUSY);
426 VN_RELE(rvp);
427 return (0);
430 /* ARGSUSED */
431 static int
432 fdroot(vfs_t *vfsp, vnode_t **vpp)
434 vnode_t *vp = (vnode_t *)vfsp->vfs_data;
436 VN_HOLD(vp);
437 *vpp = vp;
438 return (0);
442 * No locking required because I held the root vnode before calling this
443 * function so the vfs won't disappear on me. To be more explicit:
444 * fdvrootp->v_count will be greater than 1 so fdunmount will just return.
446 static int
447 fdstatvfs(struct vfs *vfsp, struct statvfs64 *sp)
449 dev32_t d32;
450 rctl_qty_t fdno_ctl;
452 mutex_enter(&curproc->p_lock);
453 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE],
454 curproc->p_rctls, curproc);
455 mutex_exit(&curproc->p_lock);
457 bzero(sp, sizeof (*sp));
458 sp->f_bsize = 1024;
459 sp->f_frsize = 1024;
460 sp->f_blocks = (fsblkcnt64_t)0;
461 sp->f_bfree = (fsblkcnt64_t)0;
462 sp->f_bavail = (fsblkcnt64_t)0;
463 sp->f_files = (fsfilcnt64_t)
464 (MIN(P_FINFO(curproc)->fi_nfiles, fdno_ctl + 2));
465 sp->f_ffree = (fsfilcnt64_t)0;
466 sp->f_favail = (fsfilcnt64_t)0;
467 (void) cmpldev(&d32, vfsp->vfs_dev);
468 sp->f_fsid = d32;
469 (void) strcpy(sp->f_basetype, vfssw[fdfstype].vsw_name);
470 sp->f_flag = vf_to_stf(vfsp->vfs_flag);
471 sp->f_namemax = FDNSIZE;
472 (void) strcpy(sp->f_fstr, "/dev/fd");
473 (void) strcpy(&sp->f_fstr[8], "/dev/fd");
474 return (0);
477 static const struct vfsops fd_vfsops = {
478 .vfs_mount = fdmount,
479 .vfs_unmount = fdunmount,
480 .vfs_root = fdroot,
481 .vfs_statvfs = fdstatvfs,
485 fdinit(int fstype, char *name)
487 int error;
489 fdfstype = fstype;
490 ASSERT(fdfstype != 0);
493 * Associate VFS ops vector with this fstype.
495 error = vfs_setfsops(fstype, &fd_vfsops);
496 if (error != 0) {
497 cmn_err(CE_WARN, "fdinit: bad fstype");
498 return (error);
502 * Assign unique "device" numbers (reported by stat(2)).
504 fdfsmaj = getudev();
505 fdrmaj = getudev();
506 if (fdfsmaj == (major_t)-1 || fdrmaj == (major_t)-1) {
507 cmn_err(CE_WARN, "fdinit: can't get unique device numbers");
508 if (fdfsmaj == (major_t)-1)
509 fdfsmaj = 0;
510 if (fdrmaj == (major_t)-1)
511 fdrmaj = 0;
513 mutex_init(&fd_minor_lock, NULL, MUTEX_DEFAULT, NULL);
514 return (0);
518 * FDFS Mount options table
520 static char *rw_cancel[] = { MNTOPT_RO, NULL };
522 static mntopt_t mntopts[] = {
524 * option name cancel option default arg flags
526 { MNTOPT_RW, rw_cancel, NULL, MO_DEFAULT,
527 (void *)MNTOPT_NOINTR },
528 { MNTOPT_IGNORE, NULL, NULL, 0,
529 NULL },
532 static mntopts_t fdfs_mntopts = {
533 sizeof (mntopts) / sizeof (mntopt_t),
534 mntopts
537 static vfsdef_t vfw = {
538 VFSDEF_VERSION,
539 "fd",
540 fdinit,
541 VSW_HASPROTO | VSW_ZMOUNT,
542 &fdfs_mntopts
545 static struct modlfs modlfs = {
546 &mod_fsops,
547 "filesystem for fd",
548 &vfw
551 static struct modlinkage modlinkage = {
552 MODREV_1,
553 &modlfs,
554 NULL
558 _init(void)
560 return (mod_install(&modlinkage));
564 _info(struct modinfo *modinfop)
566 return (mod_info(&modlinkage, modinfop));