Merge commit 'dfc115332c94a2f62058ac7f2bce7631fbd20b3d'
[unleashed/tickless.git] / kernel / fs / namefs / namevno.c
blob2c0b78dedb425ca26c3b28e05e5032b65b9762e4
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
22 /* All Rights Reserved */
26 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
31 * This file defines the vnode operations for mounted file descriptors.
32 * The routines in this file act as a layer between the NAMEFS file
33 * system and SPECFS/FIFOFS. With the exception of nm_open(), nm_setattr(),
34 * nm_getattr() and nm_access(), the routines simply apply the VOP operation
35 * to the vnode representing the file descriptor. This switches control
36 * to the underlying file system to which the file descriptor belongs.
38 #include <sys/types.h>
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/errno.h>
43 #include <sys/time.h>
44 #include <sys/file.h>
45 #include <sys/fcntl.h>
46 #include <sys/flock.h>
47 #include <sys/kmem.h>
48 #include <sys/uio.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/pcb.h>
52 #include <sys/signal.h>
53 #include <sys/user.h>
54 #include <sys/proc.h>
55 #include <sys/conf.h>
56 #include <sys/debug.h>
57 #include <vm/seg.h>
58 #include <sys/fs/namenode.h>
59 #include <sys/stream.h>
60 #include <sys/fs_subr.h>
61 #include <sys/policy.h>
64 * Create a reference to the vnode representing the file descriptor.
65 * Then, apply the fop_open operation to that vnode.
67 * The vnode for the file descriptor may be switched under you.
68 * If it is, search the hash list for an nodep - nodep->nm_filevp
69 * pair. If it exists, return that nodep to the user.
70 * If it does not exist, create a new namenode to attach
71 * to the nodep->nm_filevp then place the pair on the hash list.
73 * Newly created objects are like children/nodes in the mounted
74 * file system, with the parent being the initial mount.
76 int
77 nm_open(vnode_t **vpp, int flag, cred_t *crp, caller_context_t *ct)
79 struct namenode *nodep = VTONM(*vpp);
80 int error = 0;
81 struct namenode *newnamep;
82 struct vnode *newvp;
83 struct vnode *infilevp;
84 struct vnode *outfilevp;
87 * If the vnode is switched under us, the corresponding
88 * VN_RELE for this VN_HOLD will be done by the file system
89 * performing the switch. Otherwise, the corresponding
90 * VN_RELE will be done by nm_close().
92 infilevp = outfilevp = nodep->nm_filevp;
93 VN_HOLD(outfilevp);
95 if ((error = fop_open(&outfilevp, flag, crp, ct)) != 0) {
96 VN_RELE(outfilevp);
97 return (error);
99 if (infilevp != outfilevp) {
101 * See if the new filevp (outfilevp) is already associated
102 * with the mount point. If it is, then it already has a
103 * namenode associated with it.
105 mutex_enter(&ntable_lock);
106 if ((newnamep =
107 namefind(outfilevp, nodep->nm_mountpt)) != NULL) {
108 struct vnode *vp = NMTOV(newnamep);
110 VN_HOLD(vp);
111 goto gotit;
114 newnamep = kmem_zalloc(sizeof (struct namenode), KM_SLEEP);
115 newvp = vn_alloc(KM_SLEEP);
116 newnamep->nm_vnode = newvp;
118 mutex_init(&newnamep->nm_lock, NULL, MUTEX_DEFAULT, NULL);
120 mutex_enter(&nodep->nm_lock);
121 newvp->v_flag = ((*vpp)->v_flag | VNOMAP | VNOSWAP) & ~VROOT;
122 vn_setops(newvp, vn_getops(*vpp));
123 newvp->v_vfsp = &namevfs;
124 newvp->v_stream = outfilevp->v_stream;
125 newvp->v_type = outfilevp->v_type;
126 newvp->v_rdev = outfilevp->v_rdev;
127 newvp->v_data = (caddr_t)newnamep;
128 vn_exists(newvp);
129 bcopy(&nodep->nm_vattr, &newnamep->nm_vattr, sizeof (vattr_t));
130 newnamep->nm_vattr.va_type = outfilevp->v_type;
131 newnamep->nm_vattr.va_nodeid = namenodeno_alloc();
132 newnamep->nm_vattr.va_size = 0;
133 newnamep->nm_vattr.va_rdev = outfilevp->v_rdev;
134 newnamep->nm_flag = NMNMNT;
135 newnamep->nm_filevp = outfilevp;
136 newnamep->nm_filep = nodep->nm_filep;
137 newnamep->nm_mountpt = nodep->nm_mountpt;
138 mutex_exit(&nodep->nm_lock);
141 * Insert the new namenode into the hash list.
143 nameinsert(newnamep);
144 gotit:
145 mutex_exit(&ntable_lock);
147 * Release the above reference to the infilevp, the reference
148 * to the NAMEFS vnode, create a reference to the new vnode
149 * and return the new vnode to the user.
151 VN_RELE(*vpp);
152 *vpp = NMTOV(newnamep);
154 return (0);
158 * Close a mounted file descriptor.
159 * Remove any locks and apply the fop_close operation to the vnode for
160 * the file descriptor.
162 static int
163 nm_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *crp,
164 caller_context_t *ct)
166 struct namenode *nodep = VTONM(vp);
167 int error = 0;
169 (void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
170 cleanshares(vp, ttoproc(curthread)->p_pid);
171 error = fop_close(nodep->nm_filevp, flag, count, offset, crp, ct);
172 if (count == 1) {
173 (void) fop_fsync(nodep->nm_filevp, FSYNC, crp, ct);
175 * Before VN_RELE() we need to remove the vnode from
176 * the hash table. We should only do so in the NMNMNT case.
177 * In other cases, nodep->nm_filep keeps a reference
178 * to nm_filevp and the entry in the hash table doesn't
179 * hurt.
181 if ((nodep->nm_flag & NMNMNT) != 0) {
182 mutex_enter(&ntable_lock);
183 nameremove(nodep);
184 mutex_exit(&ntable_lock);
186 VN_RELE(nodep->nm_filevp);
188 return (error);
191 static int
192 nm_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *crp,
193 caller_context_t *ct)
195 return (fop_read(VTONM(vp)->nm_filevp, uiop, ioflag, crp, ct));
198 static int
199 nm_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *crp,
200 caller_context_t *ct)
202 return (fop_write(VTONM(vp)->nm_filevp, uiop, ioflag, crp, ct));
205 static int
206 nm_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode, cred_t *cr, int *rvalp,
207 caller_context_t *ct)
209 return (fop_ioctl(VTONM(vp)->nm_filevp, cmd, arg, mode, cr, rvalp, ct));
213 * Return in vap the attributes that are stored in the namenode
214 * structure. Only the size is taken from the mounted object.
216 /* ARGSUSED */
217 static int
218 nm_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *crp,
219 caller_context_t *ct)
221 struct namenode *nodep = VTONM(vp);
222 struct vattr va;
223 int error;
225 mutex_enter(&nodep->nm_lock);
226 bcopy(&nodep->nm_vattr, vap, sizeof (vattr_t));
227 mutex_exit(&nodep->nm_lock);
229 if ((va.va_mask = vap->va_mask & AT_SIZE) != 0) {
230 if (error = fop_getattr(nodep->nm_filevp, &va, flags, crp, ct))
231 return (error);
232 vap->va_size = va.va_size;
235 return (0);
239 * Standard access() like check. Figure out which mode bits apply
240 * to the caller then pass the missing mode bits to the secpolicy function.
242 static int
243 nm_access_unlocked(void *vnp, int mode, cred_t *crp)
245 struct namenode *nodep = vnp;
246 int shift = 0;
248 if (crgetuid(crp) != nodep->nm_vattr.va_uid) {
249 shift += 3;
250 if (!groupmember(nodep->nm_vattr.va_gid, crp))
251 shift += 3;
254 return (secpolicy_vnode_access2(crp, NMTOV(nodep),
255 nodep->nm_vattr.va_uid, nodep->nm_vattr.va_mode << shift,
256 mode));
259 * Set the attributes of the namenode from the attributes in vap.
261 /* ARGSUSED */
262 static int
263 nm_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *crp,
264 caller_context_t *ctp)
266 struct namenode *nodep = VTONM(vp);
267 struct vattr *nmvap = &nodep->nm_vattr;
268 long mask = vap->va_mask;
269 int error = 0;
272 * Cannot set these attributes.
274 if (mask & (AT_NOSET|AT_SIZE))
275 return (EINVAL);
277 (void) fop_rwlock(nodep->nm_filevp, V_WRITELOCK_TRUE, ctp);
278 mutex_enter(&nodep->nm_lock);
281 * Change ownership/group/time/access mode of mounted file
282 * descriptor.
285 error = secpolicy_vnode_setattr(crp, vp, vap, nmvap, flags,
286 nm_access_unlocked, nodep);
287 if (error)
288 goto out;
290 mask = vap->va_mask;
292 * If request to change mode, copy new
293 * mode into existing attribute structure.
295 if (mask & AT_MODE)
296 nmvap->va_mode = vap->va_mode & ~VSVTX;
299 * If request was to change user or group, turn off suid and sgid
300 * bits.
301 * If the system was configured with the "rstchown" option, the
302 * owner is not permitted to give away the file, and can change
303 * the group id only to a group of which they are a member.
305 if (mask & AT_UID)
306 nmvap->va_uid = vap->va_uid;
307 if (mask & AT_GID)
308 nmvap->va_gid = vap->va_gid;
310 * If request is to modify times, make sure user has write
311 * permissions on the file.
313 if (mask & AT_ATIME)
314 nmvap->va_atime = vap->va_atime;
315 if (mask & AT_MTIME) {
316 nmvap->va_mtime = vap->va_mtime;
317 gethrestime(&nmvap->va_ctime);
319 out:
320 mutex_exit(&nodep->nm_lock);
321 fop_rwunlock(nodep->nm_filevp, V_WRITELOCK_TRUE, ctp);
322 return (error);
326 * Check mode permission on the namenode. First nm_access_unlocked()
327 * checks the bits on the name node, then an access check is performed
328 * on the underlying file.
330 /* ARGSUSED */
331 static int
332 nm_access(vnode_t *vp, int mode, int flags, cred_t *crp, caller_context_t *ct)
334 struct namenode *nodep = VTONM(vp);
335 int error;
337 mutex_enter(&nodep->nm_lock);
338 error = nm_access_unlocked(nodep, mode, crp);
339 mutex_exit(&nodep->nm_lock);
340 if (error == 0)
341 return (fop_access(nodep->nm_filevp, mode, flags, crp, ct));
342 else
343 return (error);
347 * We can get here if a creat or open with O_CREAT is done on a namefs
348 * mount point, for example, as the object of a shell output redirection to
349 * the mount point.
351 /*ARGSUSED*/
352 static int
353 nm_create(vnode_t *dvp, char *name, vattr_t *vap, enum vcexcl excl, int mode,
354 vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
355 vsecattr_t *vsecp)
357 int error;
359 ASSERT(dvp && *name == '\0');
360 if (excl == NONEXCL) {
361 if (mode && (error = nm_access(dvp, mode, 0, cr, ct)) != 0)
362 return (error);
363 VN_HOLD(dvp);
364 return (0);
366 return (EEXIST);
370 * Links are not allowed on mounted file descriptors.
372 /*ARGSUSED*/
373 static int
374 nm_link(vnode_t *tdvp, vnode_t *vp, char *tnm, cred_t *crp,
375 caller_context_t *ct, int flags)
377 return (EXDEV);
380 static int
381 nm_fsync(vnode_t *vp, int syncflag, cred_t *crp, caller_context_t *ct)
383 return (fop_fsync(VTONM(vp)->nm_filevp, syncflag, crp, ct));
386 /* Free the namenode */
387 /* ARGSUSED */
388 static void
389 nm_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct)
391 struct namenode *nodep = VTONM(vp);
392 vfs_t *vfsp = vp->v_vfsp;
394 mutex_enter(&vp->v_lock);
395 ASSERT(vp->v_count >= 1);
396 VN_RELE_LOCKED(vp);
397 if (vp->v_count != 0) {
398 mutex_exit(&vp->v_lock);
399 return;
401 mutex_exit(&vp->v_lock);
402 if (!(nodep->nm_flag & NMNMNT)) {
403 ASSERT(nodep->nm_filep->f_vnode == nodep->nm_filevp);
404 (void) closef(nodep->nm_filep);
406 vn_invalid(vp);
407 vn_free(vp);
408 if (vfsp != &namevfs)
409 VFS_RELE(vfsp);
410 namenodeno_free(nodep->nm_vattr.va_nodeid);
411 kmem_free(nodep, sizeof (struct namenode));
414 static int
415 nm_fid(vnode_t *vp, struct fid *fidnodep, caller_context_t *ct)
417 return (fop_fid(VTONM(vp)->nm_filevp, fidnodep, ct));
420 static int
421 nm_rwlock(vnode_t *vp, int write, caller_context_t *ctp)
423 return (fop_rwlock(VTONM(vp)->nm_filevp, write, ctp));
426 static void
427 nm_rwunlock(vnode_t *vp, int write, caller_context_t *ctp)
429 fop_rwunlock(VTONM(vp)->nm_filevp, write, ctp);
432 static int
433 nm_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
435 return (fop_seek(VTONM(vp)->nm_filevp, ooff, noffp, ct));
439 * Return the vnode representing the file descriptor in vpp.
441 static int
442 nm_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
444 struct vnode *rvp;
446 vp = VTONM(vp)->nm_filevp;
447 if (fop_realvp(vp, &rvp, ct) == 0)
448 vp = rvp;
449 *vpp = vp;
450 return (0);
453 static int
454 nm_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
455 pollhead_t **phpp, caller_context_t *ct)
457 return (fop_poll(VTONM(vp)->nm_filevp, events, anyyet, reventsp,
458 phpp, ct));
461 const struct vnodeops nm_vnodeops = {
462 .vnop_name = "namefs",
463 .vop_open = nm_open,
464 .vop_close = nm_close,
465 .vop_read = nm_read,
466 .vop_write = nm_write,
467 .vop_ioctl = nm_ioctl,
468 .vop_getattr = nm_getattr,
469 .vop_setattr = nm_setattr,
470 .vop_access = nm_access,
471 .vop_create = nm_create,
472 .vop_link = nm_link,
473 .vop_fsync = nm_fsync,
474 .vop_inactive = nm_inactive,
475 .vop_fid = nm_fid,
476 .vop_rwlock = nm_rwlock,
477 .vop_rwunlock = nm_rwunlock,
478 .vop_seek = nm_seek,
479 .vop_realvp = nm_realvp,
480 .vop_poll = nm_poll,
481 .vop_dispose = fs_nodispose,