4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2017 by Delphix. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All rights reserved. */
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/cmn_err.h>
33 #include <sys/debug.h>
34 #include <sys/dirent.h>
35 #include <sys/errno.h>
37 #include <sys/inline.h>
39 #include <sys/pathname.h>
40 #include <sys/resource.h>
41 #include <sys/statvfs.h>
42 #include <sys/mount.h>
43 #include <sys/sysmacros.h>
44 #include <sys/systm.h>
47 #include <sys/vnode.h>
49 #include <sys/mntent.h>
50 #include <sys/mount.h>
52 #include <sys/t_lock.h>
53 #include <sys/modctl.h>
54 #include <sys/policy.h>
55 #include <sys/fs_subr.h>
56 #include <sys/atomic.h>
57 #include <sys/mkdev.h>
59 #define round(r) (((r)+sizeof (int)-1)&(~(sizeof (int)-1)))
60 #define fdtoi(n) ((n)+100)
65 char d_name
[FDDIRSIZE
];
69 #define FDSDSIZE sizeof (struct fddirect)
72 static int fdfstype
= 0;
73 static major_t fdfsmaj
;
74 static minor_t fdfsmin
;
75 static major_t fdrmaj
;
76 static kmutex_t fd_minor_lock
;
78 static int fdget(vnode_t
*, char *, vnode_t
**);
82 fdopen(vnode_t
**vpp
, int mode
, cred_t
*cr
, caller_context_t
*ct
)
84 if ((*vpp
)->v_type
!= VDIR
) {
85 mutex_enter(&(*vpp
)->v_lock
);
86 (*vpp
)->v_flag
|= VDUP
;
87 mutex_exit(&(*vpp
)->v_lock
);
94 fdclose(vnode_t
*vp
, int flag
, int count
, offset_t offset
, cred_t
*cr
,
102 fdread(vnode_t
*vp
, uio_t
*uiop
, int ioflag
, cred_t
*cr
, caller_context_t
*ct
)
104 static struct fddirect dotbuf
[] = {
108 struct fddirect dirbuf
;
110 int minfd
, maxfd
, modoff
, error
= 0;
115 if (vp
->v_type
!= VDIR
)
118 mutex_enter(&curproc
->p_lock
);
119 fdno_ctl
= rctl_enforced_value(rctlproc_legacy
[RLIMIT_NOFILE
],
120 curproc
->p_rctls
, curproc
);
121 nentries
= MIN(P_FINFO(curproc
)->fi_nfiles
, (int)fdno_ctl
);
122 mutex_exit(&curproc
->p_lock
);
124 endoff
= (nentries
+ 2) * FDSDSIZE
;
127 * Fake up ".", "..", and the /dev/fd directory entries.
129 if (uiop
->uio_loffset
< (offset_t
)0 ||
130 uiop
->uio_loffset
>= (offset_t
)endoff
||
131 uiop
->uio_resid
<= 0)
133 ASSERT(uiop
->uio_loffset
<= MAXOFF_T
);
134 if (uiop
->uio_offset
< 2*FDSDSIZE
) {
135 error
= uiomove((caddr_t
)dotbuf
+ uiop
->uio_offset
,
136 MIN(uiop
->uio_resid
, 2*FDSDSIZE
- uiop
->uio_offset
),
138 if (uiop
->uio_resid
<= 0 || error
)
141 minfd
= (uiop
->uio_offset
- 2*FDSDSIZE
)/FDSDSIZE
;
142 maxfd
= (uiop
->uio_offset
+ uiop
->uio_resid
- 1)/FDSDSIZE
;
143 modoff
= uiop
->uio_offset
% FDSDSIZE
;
145 for (i
= 0; i
< FDDIRSIZE
; i
++)
146 dirbuf
.d_name
[i
] = '\0';
147 for (i
= minfd
; i
< MIN(maxfd
, nentries
); i
++) {
149 dirbuf
.d_ino
= fdtoi(n
);
150 numtos((ulong_t
)n
, dirbuf
.d_name
);
151 error
= uiomove((caddr_t
)&dirbuf
+ modoff
,
152 MIN(uiop
->uio_resid
, FDSDSIZE
- modoff
),
154 if (uiop
->uio_resid
<= 0 || error
)
164 fdgetattr(vnode_t
*vp
, vattr_t
*vap
, int flags
, cred_t
*cr
,
165 caller_context_t
*ct
)
167 vfs_t
*vfsp
= vp
->v_vfsp
;
170 if (vp
->v_type
== VDIR
) {
172 vap
->va_size
= (uoff_t
)
173 ((P_FINFO(curproc
)->fi_nfiles
+ 2) * FDSDSIZE
);
175 vap
->va_nodeid
= (ino64_t
)FDROOTINO
;
180 vap
->va_nodeid
= (ino64_t
)fdtoi(getminor(vp
->v_rdev
));
182 vap
->va_type
= vp
->v_type
;
183 vap
->va_rdev
= vp
->v_rdev
;
184 vap
->va_blksize
= vfsp
->vfs_bsize
;
185 vap
->va_nblocks
= (fsblkcnt64_t
)0;
187 vap
->va_atime
= vap
->va_mtime
= vap
->va_ctime
= now
;
190 vap
->va_fsid
= vfsp
->vfs_dev
;
197 fdaccess(vnode_t
*vp
, int mode
, int flags
, cred_t
*cr
, caller_context_t
*ct
)
204 fdlookup(vnode_t
*dp
, char *comp
, vnode_t
**vpp
, pathname_t
*pnp
, int flags
,
205 vnode_t
*rdir
, cred_t
*cr
, caller_context_t
*ct
, int *direntflags
,
208 if (comp
[0] == 0 || strcmp(comp
, ".") == 0 || strcmp(comp
, "..") == 0) {
213 return (fdget(dp
, comp
, vpp
));
218 fdcreate(vnode_t
*dvp
, char *comp
, vattr_t
*vap
, enum vcexcl excl
, int mode
,
219 vnode_t
**vpp
, cred_t
*cr
, int flag
, caller_context_t
*ct
,
222 return (fdget(dvp
, comp
, vpp
));
227 fdreaddir(vnode_t
*vp
, uio_t
*uiop
, cred_t
*cr
, int *eofp
, caller_context_t
*ct
,
230 /* bp holds one dirent structure */
231 uoff_t bp
[DIRENT64_RECLEN(FDNSIZE
) / sizeof (uoff_t
)];
232 struct dirent64
*dirent
= (struct dirent64
*)bp
;
233 int reclen
, nentries
;
239 if (uiop
->uio_offset
< 0 || uiop
->uio_resid
<= 0 ||
240 (uiop
->uio_offset
% FDSDSIZE
) != 0)
243 ASSERT(uiop
->uio_loffset
<= MAXOFF_T
);
244 oresid
= uiop
->uio_resid
;
245 bzero(bp
, sizeof (bp
));
247 mutex_enter(&curproc
->p_lock
);
248 fdno_ctl
= rctl_enforced_value(rctlproc_legacy
[RLIMIT_NOFILE
],
249 curproc
->p_rctls
, curproc
);
250 nentries
= MIN(P_FINFO(curproc
)->fi_nfiles
, (int)fdno_ctl
);
251 mutex_exit(&curproc
->p_lock
);
253 while (uiop
->uio_resid
> 0) {
254 if ((off
= uiop
->uio_offset
) == 0) { /* "." */
255 dirent
->d_ino
= (ino64_t
)FDROOTINO
;
256 dirent
->d_name
[0] = '.';
257 dirent
->d_name
[1] = '\0';
258 reclen
= DIRENT64_RECLEN(1);
259 } else if (off
== FDSDSIZE
) { /* ".." */
260 dirent
->d_ino
= (ino64_t
)FDROOTINO
;
261 dirent
->d_name
[0] = '.';
262 dirent
->d_name
[1] = '.';
263 dirent
->d_name
[2] = '\0';
264 reclen
= DIRENT64_RECLEN(2);
267 * Return entries corresponding to the allowable
268 * number of file descriptors for this process.
270 if ((n
= (off
-2*FDSDSIZE
)/FDSDSIZE
) >= nentries
)
272 dirent
->d_ino
= (ino64_t
)fdtoi(n
);
273 numtos((ulong_t
)n
, dirent
->d_name
);
274 reclen
= DIRENT64_RECLEN(strlen(dirent
->d_name
));
276 dirent
->d_off
= (offset_t
)(uiop
->uio_offset
+ FDSDSIZE
);
277 dirent
->d_reclen
= (ushort_t
)reclen
;
279 if (reclen
> uiop
->uio_resid
) {
281 * Error if no entries have been returned yet.
283 if (uiop
->uio_resid
== oresid
)
288 * uiomove() updates both resid and offset by the same
289 * amount. But we want offset to change in increments
290 * of FDSDSIZE, which is different from the number of bytes
291 * being returned to the user. So we set uio_offset
292 * separately, ignoring what uiomove() does.
294 if (uiomove((caddr_t
)dirent
, reclen
, UIO_READ
, uiop
))
296 uiop
->uio_offset
= off
+ FDSDSIZE
;
299 *eofp
= ((uiop
->uio_offset
-2*FDSDSIZE
)/FDSDSIZE
>= nentries
);
305 fdinactive(vnode_t
*vp
, cred_t
*cr
, caller_context_t
*ct
)
307 mutex_enter(&vp
->v_lock
);
308 ASSERT(vp
->v_count
>= 1);
310 if (vp
->v_count
!= 0) {
311 mutex_exit(&vp
->v_lock
);
314 mutex_exit(&vp
->v_lock
);
319 static const struct vnodeops fd_vnodeops
= {
322 .vop_close
= fdclose
,
324 .vop_getattr
= fdgetattr
,
325 .vop_access
= fdaccess
,
326 .vop_lookup
= fdlookup
,
327 .vop_create
= fdcreate
,
328 .vop_readdir
= fdreaddir
,
329 .vop_inactive
= fdinactive
,
330 .vop_frlock
= fs_nosys
,
331 .vop_poll
= fs_nosys_poll
,
332 .vop_dispose
= fs_nodispose
,
336 fdget(struct vnode
*dvp
, char *comp
, struct vnode
**vpp
)
342 if (*comp
< '0' || *comp
> '9')
344 n
= 10 * n
+ *comp
++ - '0';
346 vp
= vn_alloc(KM_SLEEP
);
348 vp
->v_vfsp
= dvp
->v_vfsp
;
349 vn_setops(vp
, &fd_vnodeops
);
352 vp
->v_rdev
= makedevice(fdrmaj
, n
);
359 * fdfs is mounted on /dev/fd, however, there are two interesting
360 * possibilities - two threads racing to do the same mount (protected
361 * by vfs locking), and two threads mounting fdfs in different places.
365 fdmount(vfs_t
*vfsp
, vnode_t
*mvp
, struct mounta
*uap
, cred_t
*cr
)
369 if (secpolicy_fs_mount(cr
, mvp
, vfsp
) != 0)
371 if (mvp
->v_type
!= VDIR
)
374 mutex_enter(&mvp
->v_lock
);
375 if ((uap
->flags
& MS_OVERLAY
) == 0 &&
376 (mvp
->v_count
> 1 || (mvp
->v_flag
& VROOT
))) {
377 mutex_exit(&mvp
->v_lock
);
380 mutex_exit(&mvp
->v_lock
);
383 * Having the resource be anything but "fd" doesn't make sense
385 vfs_setresource(vfsp
, "fd", 0);
387 vp
= vn_alloc(KM_SLEEP
);
389 vn_setops(vp
, &fd_vnodeops
);
393 vfsp
->vfs_fstype
= fdfstype
;
394 vfsp
->vfs_data
= (char *)vp
;
395 mutex_enter(&fd_minor_lock
);
397 fdfsmin
= (fdfsmin
+ 1) & L_MAXMIN32
;
398 vfsp
->vfs_dev
= makedevice(fdfsmaj
, fdfsmin
);
399 } while (vfs_devismounted(vfsp
->vfs_dev
));
400 mutex_exit(&fd_minor_lock
);
401 vfs_make_fsid(&vfsp
->vfs_fsid
, vfsp
->vfs_dev
, fdfstype
);
402 vfsp
->vfs_bsize
= 1024;
408 fdunmount(vfs_t
*vfsp
, int flag
, cred_t
*cr
)
412 if (secpolicy_fs_unmount(cr
, vfsp
) != 0)
416 * forced unmount is not supported by this file system
417 * and thus, ENOTSUP, is being returned.
422 rvp
= (vnode_t
*)vfsp
->vfs_data
;
423 if (rvp
->v_count
> 1)
432 fdroot(vfs_t
*vfsp
, vnode_t
**vpp
)
434 vnode_t
*vp
= (vnode_t
*)vfsp
->vfs_data
;
442 * No locking required because I held the root vnode before calling this
443 * function so the vfs won't disappear on me. To be more explicit:
444 * fdvrootp->v_count will be greater than 1 so fdunmount will just return.
447 fdstatvfs(struct vfs
*vfsp
, struct statvfs64
*sp
)
452 mutex_enter(&curproc
->p_lock
);
453 fdno_ctl
= rctl_enforced_value(rctlproc_legacy
[RLIMIT_NOFILE
],
454 curproc
->p_rctls
, curproc
);
455 mutex_exit(&curproc
->p_lock
);
457 bzero(sp
, sizeof (*sp
));
460 sp
->f_blocks
= (fsblkcnt64_t
)0;
461 sp
->f_bfree
= (fsblkcnt64_t
)0;
462 sp
->f_bavail
= (fsblkcnt64_t
)0;
463 sp
->f_files
= (fsfilcnt64_t
)
464 (MIN(P_FINFO(curproc
)->fi_nfiles
, fdno_ctl
+ 2));
465 sp
->f_ffree
= (fsfilcnt64_t
)0;
466 sp
->f_favail
= (fsfilcnt64_t
)0;
467 (void) cmpldev(&d32
, vfsp
->vfs_dev
);
469 (void) strcpy(sp
->f_basetype
, vfssw
[fdfstype
].vsw_name
);
470 sp
->f_flag
= vf_to_stf(vfsp
->vfs_flag
);
471 sp
->f_namemax
= FDNSIZE
;
472 (void) strcpy(sp
->f_fstr
, "/dev/fd");
473 (void) strcpy(&sp
->f_fstr
[8], "/dev/fd");
477 static const struct vfsops fd_vfsops
= {
478 .vfs_mount
= fdmount
,
479 .vfs_unmount
= fdunmount
,
481 .vfs_statvfs
= fdstatvfs
,
485 fdinit(int fstype
, char *name
)
490 ASSERT(fdfstype
!= 0);
493 * Associate VFS ops vector with this fstype.
495 error
= vfs_setfsops(fstype
, &fd_vfsops
);
497 cmn_err(CE_WARN
, "fdinit: bad fstype");
502 * Assign unique "device" numbers (reported by stat(2)).
506 if (fdfsmaj
== (major_t
)-1 || fdrmaj
== (major_t
)-1) {
507 cmn_err(CE_WARN
, "fdinit: can't get unique device numbers");
508 if (fdfsmaj
== (major_t
)-1)
510 if (fdrmaj
== (major_t
)-1)
513 mutex_init(&fd_minor_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
518 * FDFS Mount options table
520 static char *rw_cancel
[] = { MNTOPT_RO
, NULL
};
522 static mntopt_t mntopts
[] = {
524 * option name cancel option default arg flags
526 { MNTOPT_RW
, rw_cancel
, NULL
, MO_DEFAULT
,
527 (void *)MNTOPT_NOINTR
},
528 { MNTOPT_IGNORE
, NULL
, NULL
, 0,
532 static mntopts_t fdfs_mntopts
= {
533 sizeof (mntopts
) / sizeof (mntopt_t
),
537 static vfsdef_t vfw
= {
541 VSW_HASPROTO
| VSW_ZMOUNT
,
545 static struct modlfs modlfs
= {
551 static struct modlinkage modlinkage
= {
560 return (mod_install(&modlinkage
));
564 _info(struct modinfo
*modinfop
)
566 return (mod_info(&modlinkage
, modinfop
));