1 /* $NetBSD: vfs_syscalls.c,v 1.401 2009/12/23 01:09:24 pooka Exp $ */
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.401 2009/12/23 01:09:24 pooka Exp $");
72 #include "opt_fileassoc.h"
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/namei.h>
79 #include <sys/filedesc.h>
80 #include <sys/kernel.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
88 #include <sys/dirent.h>
89 #include <sys/sysctl.h>
90 #include <sys/syscallargs.h>
91 #include <sys/vfs_syscalls.h>
92 #include <sys/ktrace.h>
94 #include <sys/fileassoc.h>
95 #endif /* FILEASSOC */
96 #include <sys/verified_exec.h>
97 #include <sys/kauth.h>
98 #include <sys/atomic.h>
99 #include <sys/module.h>
102 #include <miscfs/genfs/genfs.h>
103 #include <miscfs/syncfs/syncfs.h>
104 #include <miscfs/specfs/specdev.h>
106 #include <nfs/rpcv2.h>
107 #include <nfs/nfsproto.h>
109 #include <nfs/nfs_var.h>
111 MALLOC_DEFINE(M_MOUNT
, "mount", "vfs mount struct");
113 static int change_flags(struct vnode
*, u_long
, struct lwp
*);
114 static int change_mode(struct vnode
*, int, struct lwp
*l
);
115 static int change_owner(struct vnode
*, uid_t
, gid_t
, struct lwp
*, int);
117 void checkdirs(struct vnode
*);
120 * Virtual File System System Calls
124 * Mount a file system.
128 * This table is used to maintain compatibility with 4.3BSD
129 * and NetBSD 0.9 mount syscalls - and possibly other systems.
130 * Note, the order is important!
132 * Do not modify this table. It should only contain filesystems
133 * supported by NetBSD 0.9 and 4.3BSD.
135 const char * const mountcompatnames
[] = {
136 NULL
, /* 0 = MOUNT_NONE */
137 MOUNT_FFS
, /* 1 = MOUNT_UFS */
141 MOUNT_CD9660
, /* 5 = MOUNT_ISOFS */
143 MOUNT_KERNFS
, /* 7 */
144 NULL
, /* 8 = MOUNT_DEVFS */
147 const int nmountcompatnames
= sizeof(mountcompatnames
) /
148 sizeof(mountcompatnames
[0]);
151 mount_update(struct lwp
*l
, struct vnode
*vp
, const char *path
, int flags
,
152 void *data
, size_t *data_len
)
155 int error
= 0, saved_flags
;
158 saved_flags
= mp
->mnt_flag
;
160 /* We can operate only on VV_ROOT nodes. */
161 if ((vp
->v_vflag
& VV_ROOT
) == 0) {
167 * We only allow the filesystem to be reloaded if it
168 * is currently mounted read-only. Additionally, we
169 * prevent read-write to read-only downgrades.
171 if ((flags
& (MNT_RELOAD
| MNT_RDONLY
)) != 0 &&
172 (mp
->mnt_flag
& MNT_RDONLY
) == 0) {
173 error
= EOPNOTSUPP
; /* Needs translation */
177 error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_MOUNT
,
178 KAUTH_REQ_SYSTEM_MOUNT_UPDATE
, mp
, KAUTH_ARG(flags
), data
);
182 if (vfs_busy(mp
, NULL
)) {
187 mutex_enter(&mp
->mnt_updating
);
189 mp
->mnt_flag
&= ~MNT_OP_FLAGS
;
190 mp
->mnt_flag
|= flags
& (MNT_RELOAD
| MNT_FORCE
| MNT_UPDATE
);
193 * Set the mount level flags.
195 if (flags
& MNT_RDONLY
)
196 mp
->mnt_flag
|= MNT_RDONLY
;
197 else if (mp
->mnt_flag
& MNT_RDONLY
)
198 mp
->mnt_iflag
|= IMNT_WANTRDWR
;
200 ~(MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
201 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
| MNT_NOCOREDUMP
|
202 MNT_NOATIME
| MNT_NODEVMTIME
| MNT_SYMPERM
| MNT_SOFTDEP
|
204 mp
->mnt_flag
|= flags
&
205 (MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
206 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
| MNT_NOCOREDUMP
|
207 MNT_NOATIME
| MNT_NODEVMTIME
| MNT_SYMPERM
| MNT_SOFTDEP
|
208 MNT_LOG
| MNT_IGNORE
);
210 error
= VFS_MOUNT(mp
, path
, data
, data_len
);
212 if (error
&& data
!= NULL
) {
216 * Update failed; let's try and see if it was an
217 * export request. For compat with 3.0 and earlier.
219 error2
= vfs_hooks_reexport(mp
, path
, data
);
222 * Only update error code if the export request was
223 * understood but some problem occurred while
226 if (error2
!= EJUSTRETURN
)
230 if (mp
->mnt_iflag
& IMNT_WANTRDWR
)
231 mp
->mnt_flag
&= ~MNT_RDONLY
;
233 mp
->mnt_flag
= saved_flags
;
234 mp
->mnt_flag
&= ~MNT_OP_FLAGS
;
235 mp
->mnt_iflag
&= ~IMNT_WANTRDWR
;
236 if ((mp
->mnt_flag
& (MNT_RDONLY
| MNT_ASYNC
)) == 0) {
237 if (mp
->mnt_syncer
== NULL
)
238 error
= vfs_allocate_syncvnode(mp
);
240 if (mp
->mnt_syncer
!= NULL
)
241 vfs_deallocate_syncvnode(mp
);
243 mutex_exit(&mp
->mnt_updating
);
244 vfs_unbusy(mp
, false, NULL
);
251 mount_get_vfsops(const char *fstype
, struct vfsops
**vfsops
)
253 char fstypename
[sizeof(((struct statvfs
*)NULL
)->f_fstypename
)];
256 /* Copy file-system type from userspace. */
257 error
= copyinstr(fstype
, fstypename
, sizeof(fstypename
), NULL
);
260 * Historically, filesystem types were identified by numbers.
261 * If we get an integer for the filesystem type instead of a
262 * string, we check to see if it matches one of the historic
265 u_long fsindex
= (u_long
)fstype
;
266 if (fsindex
>= nmountcompatnames
||
267 mountcompatnames
[fsindex
] == NULL
)
269 strlcpy(fstypename
, mountcompatnames
[fsindex
],
273 /* Accept `ufs' as an alias for `ffs', for compatibility. */
274 if (strcmp(fstypename
, "ufs") == 0)
277 if ((*vfsops
= vfs_getopsbyname(fstypename
)) != NULL
)
280 /* If we can autoload a vfs module, try again */
281 mutex_enter(&module_lock
);
282 (void)module_autoload(fstypename
, MODULE_CLASS_VFS
);
283 mutex_exit(&module_lock
);
285 if ((*vfsops
= vfs_getopsbyname(fstypename
)) != NULL
)
292 mount_domount(struct lwp
*l
, struct vnode
**vpp
, struct vfsops
*vfsops
,
293 const char *path
, int flags
, void *data
, size_t *data_len
, u_int recurse
)
296 struct vnode
*vp
= *vpp
;
300 error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_MOUNT
,
301 KAUTH_REQ_SYSTEM_MOUNT_NEW
, vp
, KAUTH_ARG(flags
), data
);
305 /* Can't make a non-dir a mount-point (from here anyway). */
306 if (vp
->v_type
!= VDIR
)
310 * If the user is not root, ensure that they own the directory
311 * onto which we are attempting to mount.
313 if ((error
= VOP_GETATTR(vp
, &va
, l
->l_cred
)) != 0 ||
314 (va
.va_uid
!= kauth_cred_geteuid(l
->l_cred
) &&
315 (error
= kauth_authorize_generic(l
->l_cred
,
316 KAUTH_GENERIC_ISSUSER
, NULL
)) != 0)) {
320 if (flags
& MNT_EXPORTED
)
323 if ((error
= vinvalbuf(vp
, V_SAVE
, l
->l_cred
, l
, 0, 0)) != 0)
327 * Check if a file-system is not already mounted on this vnode.
329 if (vp
->v_mountedhere
!= NULL
)
332 if ((mp
= vfs_mountalloc(vfsops
, vp
)) == NULL
)
335 mp
->mnt_stat
.f_owner
= kauth_cred_geteuid(l
->l_cred
);
338 * The underlying file system may refuse the mount for
339 * various reasons. Allow the user to force it to happen.
341 * Set the mount level flags.
343 mp
->mnt_flag
= flags
&
344 (MNT_FORCE
| MNT_NOSUID
| MNT_NOEXEC
| MNT_NODEV
|
345 MNT_SYNCHRONOUS
| MNT_UNION
| MNT_ASYNC
| MNT_NOCOREDUMP
|
346 MNT_NOATIME
| MNT_NODEVMTIME
| MNT_SYMPERM
| MNT_SOFTDEP
|
347 MNT_LOG
| MNT_IGNORE
| MNT_RDONLY
);
349 mutex_enter(&mp
->mnt_updating
);
350 error
= VFS_MOUNT(mp
, path
, data
, data_len
);
351 mp
->mnt_flag
&= ~MNT_OP_FLAGS
;
354 * Put the new filesystem on the mount list after root.
358 vp
->v_mountedhere
= NULL
;
359 mutex_exit(&mp
->mnt_updating
);
360 vfs_unbusy(mp
, false, NULL
);
365 mp
->mnt_iflag
&= ~IMNT_WANTRDWR
;
366 mutex_enter(&mountlist_lock
);
367 vp
->v_mountedhere
= mp
;
368 CIRCLEQ_INSERT_TAIL(&mountlist
, mp
, mnt_list
);
369 mutex_exit(&mountlist_lock
);
370 vn_restorerecurse(vp
, recurse
);
373 if ((mp
->mnt_flag
& (MNT_RDONLY
| MNT_ASYNC
)) == 0)
374 error
= vfs_allocate_syncvnode(mp
);
375 /* Hold an additional reference to the mount across VFS_START(). */
376 mutex_exit(&mp
->mnt_updating
);
377 vfs_unbusy(mp
, true, NULL
);
378 (void) VFS_STATVFS(mp
, &mp
->mnt_stat
);
379 error
= VFS_START(mp
, 0);
382 /* Drop reference held for VFS_START(). */
389 mount_getargs(struct lwp
*l
, struct vnode
*vp
, const char *path
, int flags
,
390 void *data
, size_t *data_len
)
395 /* If MNT_GETARGS is specified, it should be the only flag. */
396 if (flags
& ~MNT_GETARGS
)
401 /* XXX: probably some notion of "can see" here if we want isolation. */
402 error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_MOUNT
,
403 KAUTH_REQ_SYSTEM_MOUNT_GET
, mp
, data
, NULL
);
407 if ((vp
->v_vflag
& VV_ROOT
) == 0)
410 if (vfs_busy(mp
, NULL
))
413 mutex_enter(&mp
->mnt_updating
);
414 mp
->mnt_flag
&= ~MNT_OP_FLAGS
;
415 mp
->mnt_flag
|= MNT_GETARGS
;
416 error
= VFS_MOUNT(mp
, path
, data
, data_len
);
417 mp
->mnt_flag
&= ~MNT_OP_FLAGS
;
418 mutex_exit(&mp
->mnt_updating
);
420 vfs_unbusy(mp
, false, NULL
);
425 sys___mount50(struct lwp
*l
, const struct sys___mount50_args
*uap
, register_t
*retval
)
428 syscallarg(const char *) type;
429 syscallarg(const char *) path;
430 syscallarg(int) flags;
431 syscallarg(void *) data;
432 syscallarg(size_t) data_len;
435 return do_sys_mount(l
, NULL
, SCARG(uap
, type
), SCARG(uap
, path
),
436 SCARG(uap
, flags
), SCARG(uap
, data
), UIO_USERSPACE
,
437 SCARG(uap
, data_len
), retval
);
441 do_sys_mount(struct lwp
*l
, struct vfsops
*vfsops
, const char *type
,
442 const char *path
, int flags
, void *data
, enum uio_seg data_seg
,
443 size_t data_len
, register_t
*retval
)
446 void *data_buf
= data
;
451 * Get vnode to be covered
453 error
= namei_simple_user(path
, NSM_FOLLOW_TRYEMULROOT
, &vp
);
458 * A lookup in VFS_MOUNT might result in an attempt to
459 * lock this vnode again, so make the lock recursive.
461 if (vfsops
== NULL
) {
462 if (flags
& (MNT_GETARGS
| MNT_UPDATE
)) {
463 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
464 recurse
= vn_setrecurse(vp
);
465 vfsops
= vp
->v_mount
->mnt_op
;
467 /* 'type' is userspace */
468 error
= mount_get_vfsops(type
, &vfsops
);
469 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
470 recurse
= vn_setrecurse(vp
);
475 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
476 recurse
= vn_setrecurse(vp
);
479 if (data
!= NULL
&& data_seg
== UIO_USERSPACE
) {
481 /* No length supplied, use default for filesystem */
482 data_len
= vfsops
->vfs_min_mount_data
;
483 if (data_len
> VFS_MAX_MOUNT_DATA
) {
488 * Hopefully a longer buffer won't make copyin() fail.
489 * For compatibility with 3.0 and earlier.
491 if (flags
& MNT_UPDATE
492 && data_len
< sizeof (struct mnt_export_args30
))
493 data_len
= sizeof (struct mnt_export_args30
);
495 data_buf
= kmem_alloc(data_len
, KM_SLEEP
);
497 /* NFS needs the buffer even for mnt_getargs .... */
498 error
= copyin(data
, data_buf
, data_len
);
503 if (flags
& MNT_GETARGS
) {
508 error
= mount_getargs(l
, vp
, path
, flags
, data_buf
, &data_len
);
511 if (data_seg
== UIO_USERSPACE
)
512 error
= copyout(data_buf
, data
, data_len
);
514 } else if (flags
& MNT_UPDATE
) {
515 error
= mount_update(l
, vp
, path
, flags
, data_buf
, &data_len
);
517 /* Locking is handled internally in mount_domount(). */
518 error
= mount_domount(l
, &vp
, vfsops
, path
, flags
, data_buf
,
524 vn_restorerecurse(vp
, recurse
);
527 if (data_buf
!= data
)
528 kmem_free(data_buf
, data_len
);
533 * Scan all active processes to see if any of them have a current
534 * or root directory onto which the new filesystem has just been
535 * mounted. If so, replace them with the new mount point.
538 checkdirs(struct vnode
*olddp
)
540 struct cwdinfo
*cwdi
;
541 struct vnode
*newdp
, *rele1
, *rele2
;
545 if (olddp
->v_usecount
== 1)
547 if (VFS_ROOT(olddp
->v_mountedhere
, &newdp
))
548 panic("mount: lost mount");
552 mutex_enter(proc_lock
);
553 PROCLIST_FOREACH(p
, &allproc
) {
554 if ((p
->p_flag
& PK_MARKER
) != 0)
556 if ((cwdi
= p
->p_cwdi
) == NULL
)
559 * Can't change to the old directory any more,
560 * so even if we see a stale value it's not a
563 if (cwdi
->cwdi_cdir
!= olddp
&&
564 cwdi
->cwdi_rdir
!= olddp
)
569 atomic_inc_uint(&cwdi
->cwdi_refcnt
);
570 mutex_exit(proc_lock
);
571 rw_enter(&cwdi
->cwdi_lock
, RW_WRITER
);
572 if (cwdi
->cwdi_cdir
== olddp
) {
573 rele1
= cwdi
->cwdi_cdir
;
575 cwdi
->cwdi_cdir
= newdp
;
577 if (cwdi
->cwdi_rdir
== olddp
) {
578 rele2
= cwdi
->cwdi_rdir
;
580 cwdi
->cwdi_rdir
= newdp
;
582 rw_exit(&cwdi
->cwdi_lock
);
588 mutex_enter(proc_lock
);
591 mutex_exit(proc_lock
);
594 if (rootvnode
== olddp
) {
603 * Unmount a file system.
605 * Note: unmount takes a path to the vnode mounted on as argument,
606 * not special file (as before).
610 sys_unmount(struct lwp
*l
, const struct sys_unmount_args
*uap
, register_t
*retval
)
613 syscallarg(const char *) path;
614 syscallarg(int) flags;
621 NDINIT(&nd
, LOOKUP
, FOLLOW
| LOCKLEAF
| TRYEMULROOT
, UIO_USERSPACE
,
623 if ((error
= namei(&nd
)) != 0)
627 atomic_inc_uint(&mp
->mnt_refcnt
);
630 error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_MOUNT
,
631 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT
, mp
, NULL
, NULL
);
639 * Don't allow unmounting the root file system.
641 if (mp
->mnt_flag
& MNT_ROOTFS
) {
648 * Must be the root of the filesystem
650 if ((vp
->v_vflag
& VV_ROOT
) == 0) {
657 error
= dounmount(mp
, SCARG(uap
, flags
), l
);
663 * Do the actual file system unmount. File system is assumed to have
664 * been locked by the caller.
666 * => Caller hold reference to the mount, explicitly for dounmount().
669 dounmount(struct mount
*mp
, int flags
, struct lwp
*l
)
671 struct vnode
*coveredvp
;
677 error
= veriexec_unmountchk(mp
);
680 #endif /* NVERIEXEC > 0 */
683 * XXX Freeze syncer. Must do this before locking the
684 * mount point. See dounmount() for details.
686 mutex_enter(&syncer_mutex
);
687 rw_enter(&mp
->mnt_unmounting
, RW_WRITER
);
688 if ((mp
->mnt_iflag
& IMNT_GONE
) != 0) {
689 rw_exit(&mp
->mnt_unmounting
);
690 mutex_exit(&syncer_mutex
);
694 used_syncer
= (mp
->mnt_syncer
!= NULL
);
697 * XXX Syncer must be frozen when we get here. This should really
698 * be done on a per-mountpoint basis, but the syncer doesn't work
701 * The caller of dounmount() must acquire syncer_mutex because
702 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
703 * order, and we must preserve that order to avoid deadlock.
705 * So, if the file system did not use the syncer, now is
706 * the time to release the syncer_mutex.
708 if (used_syncer
== 0)
709 mutex_exit(&syncer_mutex
);
711 mp
->mnt_iflag
|= IMNT_UNMOUNT
;
712 async
= mp
->mnt_flag
& MNT_ASYNC
;
713 mp
->mnt_flag
&= ~MNT_ASYNC
;
714 cache_purgevfs(mp
); /* remove cache entries for this file sys */
715 if (mp
->mnt_syncer
!= NULL
)
716 vfs_deallocate_syncvnode(mp
);
718 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
719 error
= VFS_SYNC(mp
, MNT_WAIT
, l
->l_cred
);
722 if (error
== 0 || (flags
& MNT_FORCE
))
723 error
= VFS_UNMOUNT(mp
, flags
);
725 if ((mp
->mnt_flag
& (MNT_RDONLY
| MNT_ASYNC
)) == 0)
726 (void) vfs_allocate_syncvnode(mp
);
727 mp
->mnt_iflag
&= ~IMNT_UNMOUNT
;
728 mp
->mnt_flag
|= async
;
729 rw_exit(&mp
->mnt_unmounting
);
731 mutex_exit(&syncer_mutex
);
735 mutex_enter(&mountlist_lock
);
736 if ((coveredvp
= mp
->mnt_vnodecovered
) != NULLVP
)
737 coveredvp
->v_mountedhere
= NULL
;
738 CIRCLEQ_REMOVE(&mountlist
, mp
, mnt_list
);
739 mp
->mnt_iflag
|= IMNT_GONE
;
740 mutex_exit(&mountlist_lock
);
741 if (TAILQ_FIRST(&mp
->mnt_vnodelist
) != NULL
)
742 panic("unmount: dangling vnode");
744 mutex_exit(&syncer_mutex
);
745 vfs_hooks_unmount(mp
);
746 rw_exit(&mp
->mnt_unmounting
);
747 vfs_destroy(mp
); /* reference from mount() */
748 if (coveredvp
!= NULLVP
)
754 * Sync each mounted filesystem.
758 struct ctldebug debug0
= { "syncprt", &syncprt
};
763 sys_sync(struct lwp
*l
, const void *v
, register_t
*retval
)
765 struct mount
*mp
, *nmp
;
771 mutex_enter(&mountlist_lock
);
772 for (mp
= CIRCLEQ_FIRST(&mountlist
); mp
!= (void *)&mountlist
;
774 if (vfs_busy(mp
, &nmp
)) {
777 mutex_enter(&mp
->mnt_updating
);
778 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0) {
779 asyncflag
= mp
->mnt_flag
& MNT_ASYNC
;
780 mp
->mnt_flag
&= ~MNT_ASYNC
;
781 VFS_SYNC(mp
, MNT_NOWAIT
, l
->l_cred
);
783 mp
->mnt_flag
|= MNT_ASYNC
;
785 mutex_exit(&mp
->mnt_updating
);
786 vfs_unbusy(mp
, false, &nmp
);
788 mutex_exit(&mountlist_lock
);
797 * Change filesystem quotas.
801 sys_quotactl(struct lwp
*l
, const struct sys_quotactl_args
*uap
, register_t
*retval
)
804 syscallarg(const char *) path;
807 syscallarg(void *) arg;
813 error
= namei_simple_user(SCARG(uap
, path
),
814 NSM_FOLLOW_TRYEMULROOT
, &vp
);
818 error
= VFS_QUOTACTL(mp
, SCARG(uap
, cmd
), SCARG(uap
, uid
),
825 dostatvfs(struct mount
*mp
, struct statvfs
*sp
, struct lwp
*l
, int flags
,
828 struct cwdinfo
*cwdi
= l
->l_proc
->p_cwdi
;
832 * If MNT_NOWAIT or MNT_LAZY is specified, do not
833 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
834 * overrides MNT_NOWAIT.
836 if (flags
== MNT_NOWAIT
|| flags
== MNT_LAZY
||
837 (flags
!= MNT_WAIT
&& flags
!= 0)) {
838 memcpy(sp
, &mp
->mnt_stat
, sizeof(*sp
));
842 /* Get the filesystem stats now */
843 memset(sp
, 0, sizeof(*sp
));
844 if ((error
= VFS_STATVFS(mp
, sp
)) != 0) {
848 if (cwdi
->cwdi_rdir
== NULL
)
849 (void)memcpy(&mp
->mnt_stat
, sp
, sizeof(mp
->mnt_stat
));
851 if (cwdi
->cwdi_rdir
!= NULL
) {
855 char *path
= PNBUF_GET();
857 bp
= path
+ MAXPATHLEN
;
859 rw_enter(&cwdi
->cwdi_lock
, RW_READER
);
860 error
= getcwd_common(cwdi
->cwdi_rdir
, rootvnode
, &bp
, path
,
861 MAXPATHLEN
/ 2, 0, l
);
862 rw_exit(&cwdi
->cwdi_lock
);
870 * for mount points that are below our root, we can see
871 * them, so we fix up the pathname and return them. The
872 * rest we cannot see, so we don't allow viewing the
875 if (strncmp(bp
, sp
->f_mntonname
, len
) == 0 &&
876 ((c
= sp
->f_mntonname
[len
]) == '/' || c
== '\0')) {
877 (void)strlcpy(sp
->f_mntonname
,
878 c
== '\0' ? "/" : &sp
->f_mntonname
[len
],
879 sizeof(sp
->f_mntonname
));
882 (void)strlcpy(sp
->f_mntonname
, "/",
883 sizeof(sp
->f_mntonname
));
890 sp
->f_flag
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
895 * Get filesystem statistics by path.
898 do_sys_pstatvfs(struct lwp
*l
, const char *path
, int flags
, struct statvfs
*sb
)
904 error
= namei_simple_user(path
, NSM_FOLLOW_TRYEMULROOT
, &vp
);
908 error
= dostatvfs(mp
, sb
, l
, flags
, 1);
915 sys_statvfs1(struct lwp
*l
, const struct sys_statvfs1_args
*uap
, register_t
*retval
)
918 syscallarg(const char *) path;
919 syscallarg(struct statvfs *) buf;
920 syscallarg(int) flags;
925 sb
= STATVFSBUF_GET();
926 error
= do_sys_pstatvfs(l
, SCARG(uap
, path
), SCARG(uap
, flags
), sb
);
928 error
= copyout(sb
, SCARG(uap
, buf
), sizeof(*sb
));
934 * Get filesystem statistics by fd.
937 do_sys_fstatvfs(struct lwp
*l
, int fd
, int flags
, struct statvfs
*sb
)
943 /* fd_getvnode() will use the descriptor for us */
944 if ((error
= fd_getvnode(fd
, &fp
)) != 0)
946 mp
= ((struct vnode
*)fp
->f_data
)->v_mount
;
947 error
= dostatvfs(mp
, sb
, curlwp
, flags
, 1);
954 sys_fstatvfs1(struct lwp
*l
, const struct sys_fstatvfs1_args
*uap
, register_t
*retval
)
958 syscallarg(struct statvfs *) buf;
959 syscallarg(int) flags;
964 sb
= STATVFSBUF_GET();
965 error
= do_sys_fstatvfs(l
, SCARG(uap
, fd
), SCARG(uap
, flags
), sb
);
967 error
= copyout(sb
, SCARG(uap
, buf
), sizeof(*sb
));
974 * Get statistics on all filesystems.
977 do_sys_getvfsstat(struct lwp
*l
, void *sfsp
, size_t bufsize
, int flags
,
978 int (*copyfn
)(const void *, void *, size_t), size_t entry_sz
,
982 struct proc
*p
= l
->l_proc
;
983 struct mount
*mp
, *nmp
;
985 size_t count
, maxcount
;
988 sb
= STATVFSBUF_GET();
989 maxcount
= bufsize
/ entry_sz
;
990 mutex_enter(&mountlist_lock
);
992 for (mp
= CIRCLEQ_FIRST(&mountlist
); mp
!= (void *)&mountlist
;
994 if (vfs_busy(mp
, &nmp
)) {
997 if (sfsp
&& count
< maxcount
) {
998 error
= dostatvfs(mp
, sb
, l
, flags
, 0);
1000 vfs_unbusy(mp
, false, &nmp
);
1004 error
= copyfn(sb
, sfsp
, entry_sz
);
1006 vfs_unbusy(mp
, false, NULL
);
1009 sfsp
= (char *)sfsp
+ entry_sz
;
1010 root
|= strcmp(sb
->f_mntonname
, "/") == 0;
1013 vfs_unbusy(mp
, false, &nmp
);
1015 mutex_exit(&mountlist_lock
);
1017 if (root
== 0 && p
->p_cwdi
->cwdi_rdir
) {
1021 error
= dostatvfs(p
->p_cwdi
->cwdi_rdir
->v_mount
,
1026 error
= copyfn(sb
, sfsp
, entry_sz
);
1032 if (sfsp
&& count
> maxcount
)
1042 sys_getvfsstat(struct lwp
*l
, const struct sys_getvfsstat_args
*uap
, register_t
*retval
)
1045 syscallarg(struct statvfs *) buf;
1046 syscallarg(size_t) bufsize;
1047 syscallarg(int) flags;
1050 return do_sys_getvfsstat(l
, SCARG(uap
, buf
), SCARG(uap
, bufsize
),
1051 SCARG(uap
, flags
), copyout
, sizeof (struct statvfs
), retval
);
1055 * Change current working directory to a given file descriptor.
1059 sys_fchdir(struct lwp
*l
, const struct sys_fchdir_args
*uap
, register_t
*retval
)
1064 struct proc
*p
= l
->l_proc
;
1065 struct cwdinfo
*cwdi
;
1066 struct vnode
*vp
, *tdp
;
1071 /* fd_getvnode() will use the descriptor for us */
1072 fd
= SCARG(uap
, fd
);
1073 if ((error
= fd_getvnode(fd
, &fp
)) != 0)
1078 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
1079 if (vp
->v_type
!= VDIR
)
1082 error
= VOP_ACCESS(vp
, VEXEC
, l
->l_cred
);
1087 while ((mp
= vp
->v_mountedhere
) != NULL
) {
1088 error
= vfs_busy(mp
, NULL
);
1092 error
= VFS_ROOT(mp
, &tdp
);
1093 vfs_unbusy(mp
, false, NULL
);
1101 * Disallow changing to a directory not under the process's
1102 * current root directory (if there is one).
1105 rw_enter(&cwdi
->cwdi_lock
, RW_WRITER
);
1106 if (cwdi
->cwdi_rdir
&& !vn_isunder(vp
, NULL
, l
)) {
1108 error
= EPERM
; /* operation not permitted */
1110 vrele(cwdi
->cwdi_cdir
);
1111 cwdi
->cwdi_cdir
= vp
;
1113 rw_exit(&cwdi
->cwdi_lock
);
1121 * Change this process's notion of the root directory to a given file
1125 sys_fchroot(struct lwp
*l
, const struct sys_fchroot_args
*uap
, register_t
*retval
)
1127 struct proc
*p
= l
->l_proc
;
1130 int error
, fd
= SCARG(uap
, fd
);
1132 if ((error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_CHROOT
,
1133 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT
, NULL
, NULL
, NULL
)) != 0)
1135 /* fd_getvnode() will use the descriptor for us */
1136 if ((error
= fd_getvnode(fd
, &fp
)) != 0)
1139 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
1140 if (vp
->v_type
!= VDIR
)
1143 error
= VOP_ACCESS(vp
, VEXEC
, l
->l_cred
);
1149 change_root(p
->p_cwdi
, vp
, l
);
1157 * Change current working directory (``.'').
1161 sys_chdir(struct lwp
*l
, const struct sys_chdir_args
*uap
, register_t
*retval
)
1164 syscallarg(const char *) path;
1166 struct proc
*p
= l
->l_proc
;
1167 struct cwdinfo
*cwdi
;
1171 if ((error
= chdir_lookup(SCARG(uap
, path
), UIO_USERSPACE
,
1175 rw_enter(&cwdi
->cwdi_lock
, RW_WRITER
);
1176 vrele(cwdi
->cwdi_cdir
);
1177 cwdi
->cwdi_cdir
= vp
;
1178 rw_exit(&cwdi
->cwdi_lock
);
1183 * Change notion of root (``/'') directory.
1187 sys_chroot(struct lwp
*l
, const struct sys_chroot_args
*uap
, register_t
*retval
)
1190 syscallarg(const char *) path;
1192 struct proc
*p
= l
->l_proc
;
1196 if ((error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_CHROOT
,
1197 KAUTH_REQ_SYSTEM_CHROOT_CHROOT
, NULL
, NULL
, NULL
)) != 0)
1199 if ((error
= chdir_lookup(SCARG(uap
, path
), UIO_USERSPACE
,
1203 change_root(p
->p_cwdi
, vp
, l
);
1209 * Common routine for chroot and fchroot.
1210 * NB: callers need to properly authorize the change root operation.
1213 change_root(struct cwdinfo
*cwdi
, struct vnode
*vp
, struct lwp
*l
)
1216 rw_enter(&cwdi
->cwdi_lock
, RW_WRITER
);
1217 if (cwdi
->cwdi_rdir
!= NULL
)
1218 vrele(cwdi
->cwdi_rdir
);
1219 cwdi
->cwdi_rdir
= vp
;
1222 * Prevent escaping from chroot by putting the root under
1223 * the working directory. Silently chdir to / if we aren't
1226 if (!vn_isunder(cwdi
->cwdi_cdir
, vp
, l
)) {
1228 * XXX would be more failsafe to change directory to a
1229 * deadfs node here instead
1231 vrele(cwdi
->cwdi_cdir
);
1233 cwdi
->cwdi_cdir
= vp
;
1235 rw_exit(&cwdi
->cwdi_lock
);
1239 * Common routine for chroot and chdir.
1242 chdir_lookup(const char *path
, int where
, struct vnode
**vpp
, struct lwp
*l
)
1244 struct nameidata nd
;
1247 NDINIT(&nd
, LOOKUP
, FOLLOW
| LOCKLEAF
| TRYEMULROOT
, where
,
1249 if ((error
= namei(&nd
)) != 0)
1252 if ((*vpp
)->v_type
!= VDIR
)
1255 error
= VOP_ACCESS(*vpp
, VEXEC
, l
->l_cred
);
1260 VOP_UNLOCK(*vpp
, 0);
1265 * Check permissions, allocate an open file structure,
1266 * and call the device open routine if any.
1269 sys_open(struct lwp
*l
, const struct sys_open_args
*uap
, register_t
*retval
)
1272 syscallarg(const char *) path;
1273 syscallarg(int) flags;
1274 syscallarg(int) mode;
1276 struct proc
*p
= l
->l_proc
;
1277 struct cwdinfo
*cwdi
= p
->p_cwdi
;
1281 int type
, indx
, error
;
1283 struct nameidata nd
;
1285 flags
= FFLAGS(SCARG(uap
, flags
));
1286 if ((flags
& (FREAD
| FWRITE
)) == 0)
1288 if ((error
= fd_allocfile(&fp
, &indx
)) != 0)
1290 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1291 cmode
= ((SCARG(uap
, mode
) &~ cwdi
->cwdi_cmask
) & ALLPERMS
) &~ S_ISTXT
;
1292 NDINIT(&nd
, LOOKUP
, FOLLOW
| TRYEMULROOT
, UIO_USERSPACE
,
1294 l
->l_dupfd
= -indx
- 1; /* XXX check for fdopen */
1295 if ((error
= vn_open(&nd
, flags
, cmode
)) != 0) {
1296 fd_abort(p
, fp
, indx
);
1297 if ((error
== EDUPFD
|| error
== EMOVEFD
) &&
1298 l
->l_dupfd
>= 0 && /* XXX from fdopen */
1300 fd_dupopen(l
->l_dupfd
, &indx
, flags
, error
)) == 0) {
1304 if (error
== ERESTART
)
1311 fp
->f_flag
= flags
& FMASK
;
1312 fp
->f_type
= DTYPE_VNODE
;
1315 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
1316 lf
.l_whence
= SEEK_SET
;
1319 if (flags
& O_EXLOCK
)
1320 lf
.l_type
= F_WRLCK
;
1322 lf
.l_type
= F_RDLCK
;
1324 if ((flags
& FNONBLOCK
) == 0)
1327 error
= VOP_ADVLOCK(vp
, fp
, F_SETLK
, &lf
, type
);
1329 (void) vn_close(vp
, fp
->f_flag
, fp
->f_cred
);
1330 fd_abort(p
, fp
, indx
);
1333 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
1334 atomic_or_uint(&fp
->f_flag
, FHASLOCK
);
1338 fd_affix(p
, fp
, indx
);
1343 vfs__fhfree(fhandle_t
*fhp
)
1350 fhsize
= FHANDLE_SIZE(fhp
);
1351 kmem_free(fhp
, fhsize
);
1355 * vfs_composefh: compose a filehandle.
1359 vfs_composefh(struct vnode
*vp
, fhandle_t
*fhp
, size_t *fh_size
)
1369 if (*fh_size
< FHANDLE_SIZE_MIN
) {
1372 fidsize
= *fh_size
- offsetof(fhandle_t
, fh_fid
);
1374 memset(fhp
, 0, *fh_size
);
1375 fhp
->fh_fsid
= mp
->mnt_stat
.f_fsidx
;
1376 fidp
= &fhp
->fh_fid
;
1379 error
= VFS_VPTOFH(vp
, fidp
, &fidsize
);
1380 needfhsize
= FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize
);
1381 if (error
== 0 && *fh_size
< needfhsize
) {
1384 *fh_size
= needfhsize
;
1389 vfs_composefh_alloc(struct vnode
*vp
, fhandle_t
**fhpp
)
1400 error
= VFS_VPTOFH(vp
, NULL
, &fidsize
);
1401 KASSERT(error
!= 0);
1402 if (error
!= E2BIG
) {
1405 fhsize
= FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize
);
1406 fhp
= kmem_zalloc(fhsize
, KM_SLEEP
);
1411 fhp
->fh_fsid
= mp
->mnt_stat
.f_fsidx
;
1412 error
= VFS_VPTOFH(vp
, &fhp
->fh_fid
, &fidsize
);
1414 KASSERT((FHANDLE_SIZE(fhp
) == fhsize
&&
1415 FHANDLE_FILEID(fhp
)->fid_len
== fidsize
));
1418 kmem_free(fhp
, fhsize
);
1425 vfs_composefh_free(fhandle_t
*fhp
)
1432 * vfs_fhtovp: lookup a vnode by a filehandle.
1436 vfs_fhtovp(fhandle_t
*fhp
, struct vnode
**vpp
)
1442 mp
= vfs_getvfs(FHANDLE_FSID(fhp
));
1447 if (mp
->mnt_op
->vfs_fhtovp
== NULL
) {
1451 error
= VFS_FHTOVP(mp
, FHANDLE_FILEID(fhp
), vpp
);
1457 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1462 vfs_copyinfh_alloc(const void *ufhp
, size_t fhsize
, fhandle_t
**fhpp
)
1468 if (fhsize
> FHANDLE_SIZE_MAX
) {
1471 if (fhsize
< FHANDLE_SIZE_MIN
) {
1475 fhp
= kmem_alloc(fhsize
, KM_SLEEP
);
1479 error
= copyin(ufhp
, fhp
, fhsize
);
1481 /* XXX this check shouldn't be here */
1482 if (FHANDLE_SIZE(fhp
) == fhsize
) {
1485 } else if (fhsize
== NFSX_V2FH
&& FHANDLE_SIZE(fhp
) < fhsize
) {
1487 * a kludge for nfsv2 padded handles.
1491 sz
= FHANDLE_SIZE(fhp
);
1492 kmem_free(fhp
, fhsize
);
1497 * userland told us wrong size.
1502 kmem_free(fhp
, fhsize
);
1507 vfs_copyinfh_free(fhandle_t
*fhp
)
1514 * Get file handle system call
1517 sys___getfh30(struct lwp
*l
, const struct sys___getfh30_args
*uap
, register_t
*retval
)
1520 syscallarg(char *) fname;
1521 syscallarg(fhandle_t *) fhp;
1522 syscallarg(size_t *) fh_size;
1527 struct nameidata nd
;
1532 * Must be super user
1534 error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_FILEHANDLE
,
1535 0, NULL
, NULL
, NULL
);
1538 NDINIT(&nd
, LOOKUP
, FOLLOW
| LOCKLEAF
| TRYEMULROOT
, UIO_USERSPACE
,
1544 error
= vfs_composefh_alloc(vp
, &fh
);
1549 error
= copyin(SCARG(uap
, fh_size
), &usz
, sizeof(size_t));
1553 sz
= FHANDLE_SIZE(fh
);
1554 error
= copyout(&sz
, SCARG(uap
, fh_size
), sizeof(size_t));
1559 error
= copyout(fh
, SCARG(uap
, fhp
), sz
);
1564 vfs_composefh_free(fh
);
1569 * Open a file given a file handle.
1571 * Check permissions, allocate an open file structure,
1572 * and call the device open routine if any.
1576 dofhopen(struct lwp
*l
, const void *ufhp
, size_t fhsize
, int oflags
,
1580 struct vnode
*vp
= NULL
;
1581 kauth_cred_t cred
= l
->l_cred
;
1583 int type
, indx
, error
=0;
1593 * Must be super user
1595 if ((error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_FILEHANDLE
,
1596 0, NULL
, NULL
, NULL
)))
1599 flags
= FFLAGS(oflags
);
1600 if ((flags
& (FREAD
| FWRITE
)) == 0)
1602 if ((flags
& O_CREAT
))
1604 if ((error
= fd_allocfile(&nfp
, &indx
)) != 0)
1607 error
= vfs_copyinfh_alloc(ufhp
, fhsize
, &fh
);
1611 error
= vfs_fhtovp(fh
, &vp
);
1616 /* Now do an effective vn_open */
1618 if (vp
->v_type
== VSOCK
) {
1622 error
= vn_openchk(vp
, cred
, flags
);
1625 if (flags
& O_TRUNC
) {
1626 VOP_UNLOCK(vp
, 0); /* XXX */
1627 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
); /* XXX */
1630 error
= VOP_SETATTR(vp
, &va
, cred
);
1634 if ((error
= VOP_OPEN(vp
, flags
, cred
)) != 0)
1636 if (flags
& FWRITE
) {
1637 mutex_enter(&vp
->v_interlock
);
1639 mutex_exit(&vp
->v_interlock
);
1642 /* done with modified vn_open, now finish what sys_open does. */
1644 fp
->f_flag
= flags
& FMASK
;
1645 fp
->f_type
= DTYPE_VNODE
;
1648 if (flags
& (O_EXLOCK
| O_SHLOCK
)) {
1649 lf
.l_whence
= SEEK_SET
;
1652 if (flags
& O_EXLOCK
)
1653 lf
.l_type
= F_WRLCK
;
1655 lf
.l_type
= F_RDLCK
;
1657 if ((flags
& FNONBLOCK
) == 0)
1660 error
= VOP_ADVLOCK(vp
, fp
, F_SETLK
, &lf
, type
);
1662 (void) vn_close(vp
, fp
->f_flag
, fp
->f_cred
);
1663 fd_abort(p
, fp
, indx
);
1666 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
1667 atomic_or_uint(&fp
->f_flag
, FHASLOCK
);
1671 fd_affix(p
, fp
, indx
);
1672 vfs_copyinfh_free(fh
);
1676 fd_abort(p
, fp
, indx
);
1679 vfs_copyinfh_free(fh
);
1684 sys___fhopen40(struct lwp
*l
, const struct sys___fhopen40_args
*uap
, register_t
*retval
)
1687 syscallarg(const void *) fhp;
1688 syscallarg(size_t) fh_size;
1689 syscallarg(int) flags;
1692 return dofhopen(l
, SCARG(uap
, fhp
), SCARG(uap
, fh_size
),
1693 SCARG(uap
, flags
), retval
);
1697 do_fhstat(struct lwp
*l
, const void *ufhp
, size_t fhsize
, struct stat
*sb
)
1704 * Must be super user
1706 if ((error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_FILEHANDLE
,
1707 0, NULL
, NULL
, NULL
)))
1710 error
= vfs_copyinfh_alloc(ufhp
, fhsize
, &fh
);
1714 error
= vfs_fhtovp(fh
, &vp
);
1715 vfs_copyinfh_free(fh
);
1719 error
= vn_stat(vp
, sb
);
1727 sys___fhstat50(struct lwp
*l
, const struct sys___fhstat50_args
*uap
, register_t
*retval
)
1730 syscallarg(const void *) fhp;
1731 syscallarg(size_t) fh_size;
1732 syscallarg(struct stat *) sb;
1737 error
= do_fhstat(l
, SCARG(uap
, fhp
), SCARG(uap
, fh_size
), &sb
);
1740 return copyout(&sb
, SCARG(uap
, sb
), sizeof(sb
));
1744 do_fhstatvfs(struct lwp
*l
, const void *ufhp
, size_t fhsize
, struct statvfs
*sb
,
1753 * Must be super user
1755 if ((error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_FILEHANDLE
,
1756 0, NULL
, NULL
, NULL
)))
1759 error
= vfs_copyinfh_alloc(ufhp
, fhsize
, &fh
);
1763 error
= vfs_fhtovp(fh
, &vp
);
1764 vfs_copyinfh_free(fh
);
1769 error
= dostatvfs(mp
, sb
, l
, flags
, 1);
1776 sys___fhstatvfs140(struct lwp
*l
, const struct sys___fhstatvfs140_args
*uap
, register_t
*retval
)
1779 syscallarg(const void *) fhp;
1780 syscallarg(size_t) fh_size;
1781 syscallarg(struct statvfs *) buf;
1782 syscallarg(int) flags;
1784 struct statvfs
*sb
= STATVFSBUF_GET();
1787 error
= do_fhstatvfs(l
, SCARG(uap
, fhp
), SCARG(uap
, fh_size
), sb
,
1790 error
= copyout(sb
, SCARG(uap
, buf
), sizeof(*sb
));
1796 * Create a special file.
1800 sys___mknod50(struct lwp
*l
, const struct sys___mknod50_args
*uap
,
1804 syscallarg(const char *) path;
1805 syscallarg(mode_t) mode;
1806 syscallarg(dev_t) dev;
1808 return do_sys_mknod(l
, SCARG(uap
, path
), SCARG(uap
, mode
),
1809 SCARG(uap
, dev
), retval
, UIO_USERSPACE
);
1813 do_sys_mknod(struct lwp
*l
, const char *pathname
, mode_t mode
, dev_t dev
,
1814 register_t
*retval
, enum uio_seg seg
)
1816 struct proc
*p
= l
->l_proc
;
1820 struct nameidata nd
;
1824 if ((error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_MKNOD
,
1825 0, NULL
, NULL
, NULL
)) != 0)
1828 optype
= VOP_MKNOD_DESCOFFSET
;
1830 VERIEXEC_PATH_GET(pathname
, seg
, cpath
, path
);
1831 NDINIT(&nd
, CREATE
, LOCKPARENT
| TRYEMULROOT
, seg
, cpath
);
1833 if ((error
= namei(&nd
)) != 0)
1840 /* We will read cwdi->cwdi_cmask unlocked. */
1841 vattr
.va_mode
= (mode
& ALLPERMS
) &~ p
->p_cwdi
->cwdi_cmask
;
1842 vattr
.va_rdev
= dev
;
1844 switch (mode
& S_IFMT
) {
1845 case S_IFMT
: /* used by badsect to flag bad sectors */
1846 vattr
.va_type
= VBAD
;
1849 vattr
.va_type
= VCHR
;
1852 vattr
.va_type
= VBLK
;
1855 optype
= VOP_WHITEOUT_DESCOFFSET
;
1859 error
= veriexec_openchk(l
, nd
.ni_vp
, nd
.ni_dirp
,
1861 #endif /* NVERIEXEC > 0 */
1862 vattr
.va_type
= VREG
;
1863 vattr
.va_rdev
= VNOVAL
;
1864 optype
= VOP_CREATE_DESCOFFSET
;
1873 case VOP_WHITEOUT_DESCOFFSET
:
1874 error
= VOP_WHITEOUT(nd
.ni_dvp
, &nd
.ni_cnd
, CREATE
);
1876 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
1880 case VOP_MKNOD_DESCOFFSET
:
1881 error
= VOP_MKNOD(nd
.ni_dvp
, &nd
.ni_vp
,
1882 &nd
.ni_cnd
, &vattr
);
1887 case VOP_CREATE_DESCOFFSET
:
1888 error
= VOP_CREATE(nd
.ni_dvp
, &nd
.ni_vp
,
1889 &nd
.ni_cnd
, &vattr
);
1895 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
1896 if (nd
.ni_dvp
== vp
)
1904 VERIEXEC_PATH_PUT(path
);
1909 * Create a named pipe.
1913 sys_mkfifo(struct lwp
*l
, const struct sys_mkfifo_args
*uap
, register_t
*retval
)
1916 syscallarg(const char *) path;
1917 syscallarg(int) mode;
1919 struct proc
*p
= l
->l_proc
;
1922 struct nameidata nd
;
1924 NDINIT(&nd
, CREATE
, LOCKPARENT
| TRYEMULROOT
, UIO_USERSPACE
,
1926 if ((error
= namei(&nd
)) != 0)
1928 if (nd
.ni_vp
!= NULL
) {
1929 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
1930 if (nd
.ni_dvp
== nd
.ni_vp
)
1938 vattr
.va_type
= VFIFO
;
1939 /* We will read cwdi->cwdi_cmask unlocked. */
1940 vattr
.va_mode
= (SCARG(uap
, mode
) & ALLPERMS
) &~ p
->p_cwdi
->cwdi_cmask
;
1941 error
= VOP_MKNOD(nd
.ni_dvp
, &nd
.ni_vp
, &nd
.ni_cnd
, &vattr
);
1948 * Make a hard file link.
1952 sys_link(struct lwp
*l
, const struct sys_link_args
*uap
, register_t
*retval
)
1955 syscallarg(const char *) path;
1956 syscallarg(const char *) link;
1959 struct nameidata nd
;
1962 error
= namei_simple_user(SCARG(uap
, path
),
1963 NSM_FOLLOW_TRYEMULROOT
, &vp
);
1966 NDINIT(&nd
, CREATE
, LOCKPARENT
| TRYEMULROOT
, UIO_USERSPACE
,
1968 if ((error
= namei(&nd
)) != 0)
1971 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
1972 if (nd
.ni_dvp
== nd
.ni_vp
)
1980 error
= VOP_LINK(nd
.ni_dvp
, vp
, &nd
.ni_cnd
);
1987 * Make a symbolic link.
1991 sys_symlink(struct lwp
*l
, const struct sys_symlink_args
*uap
, register_t
*retval
)
1994 syscallarg(const char *) path;
1995 syscallarg(const char *) link;
1997 struct proc
*p
= l
->l_proc
;
2001 struct nameidata nd
;
2004 error
= copyinstr(SCARG(uap
, path
), path
, MAXPATHLEN
, NULL
);
2007 NDINIT(&nd
, CREATE
, LOCKPARENT
| TRYEMULROOT
, UIO_USERSPACE
,
2009 if ((error
= namei(&nd
)) != 0)
2012 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
2013 if (nd
.ni_dvp
== nd
.ni_vp
)
2022 vattr
.va_type
= VLNK
;
2023 /* We will read cwdi->cwdi_cmask unlocked. */
2024 vattr
.va_mode
= ACCESSPERMS
&~ p
->p_cwdi
->cwdi_cmask
;
2025 error
= VOP_SYMLINK(nd
.ni_dvp
, &nd
.ni_vp
, &nd
.ni_cnd
, &vattr
, path
);
2034 * Delete a whiteout from the filesystem.
2038 sys_undelete(struct lwp
*l
, const struct sys_undelete_args
*uap
, register_t
*retval
)
2041 syscallarg(const char *) path;
2044 struct nameidata nd
;
2046 NDINIT(&nd
, DELETE
, LOCKPARENT
| DOWHITEOUT
| TRYEMULROOT
,
2047 UIO_USERSPACE
, SCARG(uap
, path
));
2052 if (nd
.ni_vp
!= NULLVP
|| !(nd
.ni_cnd
.cn_flags
& ISWHITEOUT
)) {
2053 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
2054 if (nd
.ni_dvp
== nd
.ni_vp
)
2062 if ((error
= VOP_WHITEOUT(nd
.ni_dvp
, &nd
.ni_cnd
, DELETE
)) != 0)
2063 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
2069 * Delete a name from the filesystem.
2073 sys_unlink(struct lwp
*l
, const struct sys_unlink_args
*uap
, register_t
*retval
)
2076 syscallarg(const char *) path;
2079 return do_sys_unlink(SCARG(uap
, path
), UIO_USERSPACE
);
2083 do_sys_unlink(const char *arg
, enum uio_seg seg
)
2087 struct nameidata nd
;
2091 VERIEXEC_PATH_GET(arg
, seg
, cpath
, path
);
2092 NDINIT(&nd
, DELETE
, LOCKPARENT
| LOCKLEAF
| TRYEMULROOT
, seg
, cpath
);
2094 if ((error
= namei(&nd
)) != 0)
2099 * The root of a mounted filesystem cannot be deleted.
2101 if (vp
->v_vflag
& VV_ROOT
) {
2102 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
2103 if (nd
.ni_dvp
== vp
)
2113 /* Handle remove requests for veriexec entries. */
2114 if ((error
= veriexec_removechk(curlwp
, nd
.ni_vp
, nd
.ni_dirp
)) != 0) {
2115 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
2116 if (nd
.ni_dvp
== vp
)
2123 #endif /* NVERIEXEC > 0 */
2126 (void)fileassoc_file_delete(vp
);
2127 #endif /* FILEASSOC */
2128 error
= VOP_REMOVE(nd
.ni_dvp
, nd
.ni_vp
, &nd
.ni_cnd
);
2130 VERIEXEC_PATH_PUT(path
);
2135 * Reposition read/write file offset.
2138 sys_lseek(struct lwp
*l
, const struct sys_lseek_args
*uap
, register_t
*retval
)
2142 syscallarg(int) pad;
2143 syscallarg(off_t) offset;
2144 syscallarg(int) whence;
2146 kauth_cred_t cred
= l
->l_cred
;
2153 fd
= SCARG(uap
, fd
);
2155 if ((fp
= fd_getfile(fd
)) == NULL
)
2159 if (fp
->f_type
!= DTYPE_VNODE
|| vp
->v_type
== VFIFO
) {
2164 switch (SCARG(uap
, whence
)) {
2166 newoff
= fp
->f_offset
+ SCARG(uap
, offset
);
2169 error
= VOP_GETATTR(vp
, &vattr
, cred
);
2173 newoff
= SCARG(uap
, offset
) + vattr
.va_size
;
2176 newoff
= SCARG(uap
, offset
);
2182 if ((error
= VOP_SEEK(vp
, fp
->f_offset
, newoff
, cred
)) == 0) {
2183 *(off_t
*)retval
= fp
->f_offset
= newoff
;
2191 * Positional read system call.
2194 sys_pread(struct lwp
*l
, const struct sys_pread_args
*uap
, register_t
*retval
)
2198 syscallarg(void *) buf;
2199 syscallarg(size_t) nbyte;
2200 syscallarg(off_t) offset;
2205 int error
, fd
= SCARG(uap
, fd
);
2207 if ((fp
= fd_getfile(fd
)) == NULL
)
2210 if ((fp
->f_flag
& FREAD
) == 0) {
2216 if (fp
->f_type
!= DTYPE_VNODE
|| vp
->v_type
== VFIFO
) {
2221 offset
= SCARG(uap
, offset
);
2224 * XXX This works because no file systems actually
2225 * XXX take any action on the seek operation.
2227 if ((error
= VOP_SEEK(vp
, fp
->f_offset
, offset
, fp
->f_cred
)) != 0)
2230 /* dofileread() will unuse the descriptor for us */
2231 return (dofileread(fd
, fp
, SCARG(uap
, buf
), SCARG(uap
, nbyte
),
2232 &offset
, 0, retval
));
2240 * Positional scatter read system call.
2243 sys_preadv(struct lwp
*l
, const struct sys_preadv_args
*uap
, register_t
*retval
)
2247 syscallarg(const struct iovec *) iovp;
2248 syscallarg(int) iovcnt;
2249 syscallarg(off_t) offset;
2251 off_t offset
= SCARG(uap
, offset
);
2253 return do_filereadv(SCARG(uap
, fd
), SCARG(uap
, iovp
),
2254 SCARG(uap
, iovcnt
), &offset
, 0, retval
);
2258 * Positional write system call.
2261 sys_pwrite(struct lwp
*l
, const struct sys_pwrite_args
*uap
, register_t
*retval
)
2265 syscallarg(const void *) buf;
2266 syscallarg(size_t) nbyte;
2267 syscallarg(off_t) offset;
2272 int error
, fd
= SCARG(uap
, fd
);
2274 if ((fp
= fd_getfile(fd
)) == NULL
)
2277 if ((fp
->f_flag
& FWRITE
) == 0) {
2283 if (fp
->f_type
!= DTYPE_VNODE
|| vp
->v_type
== VFIFO
) {
2288 offset
= SCARG(uap
, offset
);
2291 * XXX This works because no file systems actually
2292 * XXX take any action on the seek operation.
2294 if ((error
= VOP_SEEK(vp
, fp
->f_offset
, offset
, fp
->f_cred
)) != 0)
2297 /* dofilewrite() will unuse the descriptor for us */
2298 return (dofilewrite(fd
, fp
, SCARG(uap
, buf
), SCARG(uap
, nbyte
),
2299 &offset
, 0, retval
));
2307 * Positional gather write system call.
2310 sys_pwritev(struct lwp
*l
, const struct sys_pwritev_args
*uap
, register_t
*retval
)
2314 syscallarg(const struct iovec *) iovp;
2315 syscallarg(int) iovcnt;
2316 syscallarg(off_t) offset;
2318 off_t offset
= SCARG(uap
, offset
);
2320 return do_filewritev(SCARG(uap
, fd
), SCARG(uap
, iovp
),
2321 SCARG(uap
, iovcnt
), &offset
, 0, retval
);
2325 * Check access permissions.
2328 sys_access(struct lwp
*l
, const struct sys_access_args
*uap
, register_t
*retval
)
2331 syscallarg(const char *) path;
2332 syscallarg(int) flags;
2337 struct nameidata nd
;
2339 cred
= kauth_cred_dup(l
->l_cred
);
2340 kauth_cred_seteuid(cred
, kauth_cred_getuid(l
->l_cred
));
2341 kauth_cred_setegid(cred
, kauth_cred_getgid(l
->l_cred
));
2342 NDINIT(&nd
, LOOKUP
, FOLLOW
| LOCKLEAF
| TRYEMULROOT
, UIO_USERSPACE
,
2344 /* Override default credentials */
2345 nd
.ni_cnd
.cn_cred
= cred
;
2346 if ((error
= namei(&nd
)) != 0)
2350 /* Flags == 0 means only check for existence. */
2351 if (SCARG(uap
, flags
)) {
2353 if (SCARG(uap
, flags
) & R_OK
)
2355 if (SCARG(uap
, flags
) & W_OK
)
2357 if (SCARG(uap
, flags
) & X_OK
)
2360 error
= VOP_ACCESS(vp
, flags
, cred
);
2361 if (!error
&& (flags
& VWRITE
))
2362 error
= vn_writechk(vp
);
2366 kauth_cred_free(cred
);
2371 * Common code for all sys_stat functions, including compat versions.
2374 do_sys_stat(const char *path
, unsigned int nd_flags
, struct stat
*sb
)
2377 struct nameidata nd
;
2379 NDINIT(&nd
, LOOKUP
, nd_flags
| LOCKLEAF
| TRYEMULROOT
,
2380 UIO_USERSPACE
, path
);
2384 error
= vn_stat(nd
.ni_vp
, sb
);
2390 * Get file status; this version follows links.
2394 sys___stat50(struct lwp
*l
, const struct sys___stat50_args
*uap
, register_t
*retval
)
2397 syscallarg(const char *) path;
2398 syscallarg(struct stat *) ub;
2403 error
= do_sys_stat(SCARG(uap
, path
), FOLLOW
, &sb
);
2406 return copyout(&sb
, SCARG(uap
, ub
), sizeof(sb
));
2410 * Get file status; this version does not follow links.
2414 sys___lstat50(struct lwp
*l
, const struct sys___lstat50_args
*uap
, register_t
*retval
)
2417 syscallarg(const char *) path;
2418 syscallarg(struct stat *) ub;
2423 error
= do_sys_stat(SCARG(uap
, path
), NOFOLLOW
, &sb
);
2426 return copyout(&sb
, SCARG(uap
, ub
), sizeof(sb
));
2430 * Get configurable pathname variables.
2434 sys_pathconf(struct lwp
*l
, const struct sys_pathconf_args
*uap
, register_t
*retval
)
2437 syscallarg(const char *) path;
2438 syscallarg(int) name;
2441 struct nameidata nd
;
2443 NDINIT(&nd
, LOOKUP
, FOLLOW
| LOCKLEAF
| TRYEMULROOT
, UIO_USERSPACE
,
2445 if ((error
= namei(&nd
)) != 0)
2447 error
= VOP_PATHCONF(nd
.ni_vp
, SCARG(uap
, name
), retval
);
2453 * Return target name of a symbolic link.
2457 sys_readlink(struct lwp
*l
, const struct sys_readlink_args
*uap
, register_t
*retval
)
2460 syscallarg(const char *) path;
2461 syscallarg(char *) buf;
2462 syscallarg(size_t) count;
2468 struct nameidata nd
;
2470 NDINIT(&nd
, LOOKUP
, NOFOLLOW
| LOCKLEAF
| TRYEMULROOT
, UIO_USERSPACE
,
2472 if ((error
= namei(&nd
)) != 0)
2475 if (vp
->v_type
!= VLNK
)
2477 else if (!(vp
->v_mount
->mnt_flag
& MNT_SYMPERM
) ||
2478 (error
= VOP_ACCESS(vp
, VREAD
, l
->l_cred
)) == 0) {
2479 aiov
.iov_base
= SCARG(uap
, buf
);
2480 aiov
.iov_len
= SCARG(uap
, count
);
2481 auio
.uio_iov
= &aiov
;
2482 auio
.uio_iovcnt
= 1;
2483 auio
.uio_offset
= 0;
2484 auio
.uio_rw
= UIO_READ
;
2485 KASSERT(l
== curlwp
);
2486 auio
.uio_vmspace
= l
->l_proc
->p_vmspace
;
2487 auio
.uio_resid
= SCARG(uap
, count
);
2488 error
= VOP_READLINK(vp
, &auio
, l
->l_cred
);
2491 *retval
= SCARG(uap
, count
) - auio
.uio_resid
;
2496 * Change flags of a file given a path name.
2500 sys_chflags(struct lwp
*l
, const struct sys_chflags_args
*uap
, register_t
*retval
)
2503 syscallarg(const char *) path;
2504 syscallarg(u_long) flags;
2509 error
= namei_simple_user(SCARG(uap
, path
),
2510 NSM_FOLLOW_TRYEMULROOT
, &vp
);
2513 error
= change_flags(vp
, SCARG(uap
, flags
), l
);
2519 * Change flags of a file given a file descriptor.
2523 sys_fchflags(struct lwp
*l
, const struct sys_fchflags_args
*uap
, register_t
*retval
)
2527 syscallarg(u_long) flags;
2533 /* fd_getvnode() will use the descriptor for us */
2534 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
2537 error
= change_flags(vp
, SCARG(uap
, flags
), l
);
2539 fd_putfile(SCARG(uap
, fd
));
2544 * Change flags of a file given a path name; this version does
2548 sys_lchflags(struct lwp
*l
, const struct sys_lchflags_args
*uap
, register_t
*retval
)
2551 syscallarg(const char *) path;
2552 syscallarg(u_long) flags;
2557 error
= namei_simple_user(SCARG(uap
, path
),
2558 NSM_NOFOLLOW_TRYEMULROOT
, &vp
);
2561 error
= change_flags(vp
, SCARG(uap
, flags
), l
);
2567 * Common routine to change flags of a file.
2570 change_flags(struct vnode
*vp
, u_long flags
, struct lwp
*l
)
2575 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
2577 * Non-superusers cannot change the flags on devices, even if they
2580 if (kauth_authorize_generic(l
->l_cred
, KAUTH_GENERIC_ISSUSER
, NULL
)) {
2581 if ((error
= VOP_GETATTR(vp
, &vattr
, l
->l_cred
)) != 0)
2583 if (vattr
.va_type
== VCHR
|| vattr
.va_type
== VBLK
) {
2589 vattr
.va_flags
= flags
;
2590 error
= VOP_SETATTR(vp
, &vattr
, l
->l_cred
);
2596 * Change mode of a file given path name; this version follows links.
2600 sys_chmod(struct lwp
*l
, const struct sys_chmod_args
*uap
, register_t
*retval
)
2603 syscallarg(const char *) path;
2604 syscallarg(int) mode;
2609 error
= namei_simple_user(SCARG(uap
, path
),
2610 NSM_FOLLOW_TRYEMULROOT
, &vp
);
2614 error
= change_mode(vp
, SCARG(uap
, mode
), l
);
2621 * Change mode of a file given a file descriptor.
2625 sys_fchmod(struct lwp
*l
, const struct sys_fchmod_args
*uap
, register_t
*retval
)
2629 syscallarg(int) mode;
2634 /* fd_getvnode() will use the descriptor for us */
2635 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
2637 error
= change_mode(fp
->f_data
, SCARG(uap
, mode
), l
);
2638 fd_putfile(SCARG(uap
, fd
));
2643 * Change mode of a file given path name; this version does not follow links.
2647 sys_lchmod(struct lwp
*l
, const struct sys_lchmod_args
*uap
, register_t
*retval
)
2650 syscallarg(const char *) path;
2651 syscallarg(int) mode;
2656 error
= namei_simple_user(SCARG(uap
, path
),
2657 NSM_NOFOLLOW_TRYEMULROOT
, &vp
);
2661 error
= change_mode(vp
, SCARG(uap
, mode
), l
);
2668 * Common routine to set mode given a vnode.
2671 change_mode(struct vnode
*vp
, int mode
, struct lwp
*l
)
2676 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
2678 vattr
.va_mode
= mode
& ALLPERMS
;
2679 error
= VOP_SETATTR(vp
, &vattr
, l
->l_cred
);
2685 * Set ownership given a path name; this version follows links.
2689 sys_chown(struct lwp
*l
, const struct sys_chown_args
*uap
, register_t
*retval
)
2692 syscallarg(const char *) path;
2693 syscallarg(uid_t) uid;
2694 syscallarg(gid_t) gid;
2699 error
= namei_simple_user(SCARG(uap
, path
),
2700 NSM_FOLLOW_TRYEMULROOT
, &vp
);
2704 error
= change_owner(vp
, SCARG(uap
, uid
), SCARG(uap
, gid
), l
, 0);
2711 * Set ownership given a path name; this version follows links.
2712 * Provides POSIX semantics.
2716 sys___posix_chown(struct lwp
*l
, const struct sys___posix_chown_args
*uap
, register_t
*retval
)
2719 syscallarg(const char *) path;
2720 syscallarg(uid_t) uid;
2721 syscallarg(gid_t) gid;
2726 error
= namei_simple_user(SCARG(uap
, path
),
2727 NSM_FOLLOW_TRYEMULROOT
, &vp
);
2731 error
= change_owner(vp
, SCARG(uap
, uid
), SCARG(uap
, gid
), l
, 1);
2738 * Set ownership given a file descriptor.
2742 sys_fchown(struct lwp
*l
, const struct sys_fchown_args
*uap
, register_t
*retval
)
2746 syscallarg(uid_t) uid;
2747 syscallarg(gid_t) gid;
2752 /* fd_getvnode() will use the descriptor for us */
2753 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
2755 error
= change_owner(fp
->f_data
, SCARG(uap
, uid
), SCARG(uap
, gid
),
2757 fd_putfile(SCARG(uap
, fd
));
2762 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2766 sys___posix_fchown(struct lwp
*l
, const struct sys___posix_fchown_args
*uap
, register_t
*retval
)
2770 syscallarg(uid_t) uid;
2771 syscallarg(gid_t) gid;
2776 /* fd_getvnode() will use the descriptor for us */
2777 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
2779 error
= change_owner(fp
->f_data
, SCARG(uap
, uid
), SCARG(uap
, gid
),
2781 fd_putfile(SCARG(uap
, fd
));
2786 * Set ownership given a path name; this version does not follow links.
2790 sys_lchown(struct lwp
*l
, const struct sys_lchown_args
*uap
, register_t
*retval
)
2793 syscallarg(const char *) path;
2794 syscallarg(uid_t) uid;
2795 syscallarg(gid_t) gid;
2800 error
= namei_simple_user(SCARG(uap
, path
),
2801 NSM_NOFOLLOW_TRYEMULROOT
, &vp
);
2805 error
= change_owner(vp
, SCARG(uap
, uid
), SCARG(uap
, gid
), l
, 0);
2812 * Set ownership given a path name; this version does not follow links.
2813 * Provides POSIX/XPG semantics.
2817 sys___posix_lchown(struct lwp
*l
, const struct sys___posix_lchown_args
*uap
, register_t
*retval
)
2820 syscallarg(const char *) path;
2821 syscallarg(uid_t) uid;
2822 syscallarg(gid_t) gid;
2827 error
= namei_simple_user(SCARG(uap
, path
),
2828 NSM_NOFOLLOW_TRYEMULROOT
, &vp
);
2832 error
= change_owner(vp
, SCARG(uap
, uid
), SCARG(uap
, gid
), l
, 1);
2839 * Common routine to set ownership given a vnode.
2842 change_owner(struct vnode
*vp
, uid_t uid
, gid_t gid
, struct lwp
*l
,
2843 int posix_semantics
)
2849 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
2850 if ((error
= VOP_GETATTR(vp
, &vattr
, l
->l_cred
)) != 0)
2853 #define CHANGED(x) ((int)(x) != -1)
2854 newmode
= vattr
.va_mode
;
2855 if (posix_semantics
) {
2857 * POSIX/XPG semantics: if the caller is not the super-user,
2858 * clear set-user-id and set-group-id bits. Both POSIX and
2859 * the XPG consider the behaviour for calls by the super-user
2860 * implementation-defined; we leave the set-user-id and set-
2861 * group-id settings intact in that case.
2863 if (kauth_authorize_generic(l
->l_cred
, KAUTH_GENERIC_ISSUSER
,
2865 newmode
&= ~(S_ISUID
| S_ISGID
);
2868 * NetBSD semantics: when changing owner and/or group,
2869 * clear the respective bit(s).
2872 newmode
&= ~S_ISUID
;
2874 newmode
&= ~S_ISGID
;
2876 /* Update va_mode iff altered. */
2877 if (vattr
.va_mode
== newmode
)
2881 vattr
.va_uid
= CHANGED(uid
) ? uid
: (uid_t
)VNOVAL
;
2882 vattr
.va_gid
= CHANGED(gid
) ? gid
: (gid_t
)VNOVAL
;
2883 vattr
.va_mode
= newmode
;
2884 error
= VOP_SETATTR(vp
, &vattr
, l
->l_cred
);
2893 * Set the access and modification times given a path name; this
2894 * version follows links.
2898 sys___utimes50(struct lwp
*l
, const struct sys___utimes50_args
*uap
,
2902 syscallarg(const char *) path;
2903 syscallarg(const struct timeval *) tptr;
2906 return do_sys_utimes(l
, NULL
, SCARG(uap
, path
), FOLLOW
,
2907 SCARG(uap
, tptr
), UIO_USERSPACE
);
2911 * Set the access and modification times given a file descriptor.
2915 sys___futimes50(struct lwp
*l
, const struct sys___futimes50_args
*uap
,
2920 syscallarg(const struct timeval *) tptr;
2925 /* fd_getvnode() will use the descriptor for us */
2926 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
2928 error
= do_sys_utimes(l
, fp
->f_data
, NULL
, 0, SCARG(uap
, tptr
),
2930 fd_putfile(SCARG(uap
, fd
));
2935 * Set the access and modification times given a path name; this
2936 * version does not follow links.
2939 sys___lutimes50(struct lwp
*l
, const struct sys___lutimes50_args
*uap
,
2943 syscallarg(const char *) path;
2944 syscallarg(const struct timeval *) tptr;
2947 return do_sys_utimes(l
, NULL
, SCARG(uap
, path
), NOFOLLOW
,
2948 SCARG(uap
, tptr
), UIO_USERSPACE
);
2952 * Common routine to set access and modification times given a vnode.
2955 do_sys_utimes(struct lwp
*l
, struct vnode
*vp
, const char *path
, int flag
,
2956 const struct timeval
*tptr
, enum uio_seg seg
)
2959 int error
, dorele
= 0;
2960 namei_simple_flags_t sflags
;
2962 bool vanull
, setbirthtime
;
2963 struct timespec ts
[2];
2966 * I have checked all callers and they pass either FOLLOW,
2967 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
2968 * is 0. More to the point, they don't pass anything else.
2969 * Let's keep it that way at least until the namei interfaces
2970 * are fully sanitized.
2972 KASSERT(flag
== NOFOLLOW
|| flag
== FOLLOW
);
2973 sflags
= (flag
== FOLLOW
) ?
2974 NSM_FOLLOW_TRYEMULROOT
: NSM_NOFOLLOW_TRYEMULROOT
;
2981 struct timeval tv
[2];
2984 if (seg
!= UIO_SYSSPACE
) {
2985 error
= copyin(tptr
, tv
, sizeof (tv
));
2990 TIMEVAL_TO_TIMESPEC(&tptr
[0], &ts
[0]);
2991 TIMEVAL_TO_TIMESPEC(&tptr
[1], &ts
[1]);
2995 /* note: SEG describes TPTR, not PATH; PATH is always user */
2996 error
= namei_simple_user(path
, sflags
, &vp
);
3002 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
3003 setbirthtime
= (VOP_GETATTR(vp
, &vattr
, l
->l_cred
) == 0 &&
3004 timespeccmp(&ts
[1], &vattr
.va_birthtime
, <));
3006 vattr
.va_atime
= ts
[0];
3007 vattr
.va_mtime
= ts
[1];
3009 vattr
.va_birthtime
= ts
[1];
3011 vattr
.va_vaflags
|= VA_UTIMES_NULL
;
3012 error
= VOP_SETATTR(vp
, &vattr
, l
->l_cred
);
3022 * Truncate a file given its path name.
3026 sys_truncate(struct lwp
*l
, const struct sys_truncate_args
*uap
, register_t
*retval
)
3029 syscallarg(const char *) path;
3030 syscallarg(int) pad;
3031 syscallarg(off_t) length;
3037 error
= namei_simple_user(SCARG(uap
, path
),
3038 NSM_FOLLOW_TRYEMULROOT
, &vp
);
3041 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
3042 if (vp
->v_type
== VDIR
)
3044 else if ((error
= vn_writechk(vp
)) == 0 &&
3045 (error
= VOP_ACCESS(vp
, VWRITE
, l
->l_cred
)) == 0) {
3047 vattr
.va_size
= SCARG(uap
, length
);
3048 error
= VOP_SETATTR(vp
, &vattr
, l
->l_cred
);
3055 * Truncate a file given a file descriptor.
3059 sys_ftruncate(struct lwp
*l
, const struct sys_ftruncate_args
*uap
, register_t
*retval
)
3063 syscallarg(int) pad;
3064 syscallarg(off_t) length;
3071 /* fd_getvnode() will use the descriptor for us */
3072 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
3074 if ((fp
->f_flag
& FWRITE
) == 0) {
3079 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
3080 if (vp
->v_type
== VDIR
)
3082 else if ((error
= vn_writechk(vp
)) == 0) {
3084 vattr
.va_size
= SCARG(uap
, length
);
3085 error
= VOP_SETATTR(vp
, &vattr
, fp
->f_cred
);
3089 fd_putfile(SCARG(uap
, fd
));
3094 * Sync an open file.
3098 sys_fsync(struct lwp
*l
, const struct sys_fsync_args
*uap
, register_t
*retval
)
3107 /* fd_getvnode() will use the descriptor for us */
3108 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
3111 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
3112 error
= VOP_FSYNC(vp
, fp
->f_cred
, FSYNC_WAIT
, 0, 0);
3114 fd_putfile(SCARG(uap
, fd
));
3119 * Sync a range of file data. API modeled after that found in AIX.
3121 * FDATASYNC indicates that we need only save enough metadata to be able
3122 * to re-read the written data. Note we duplicate AIX's requirement that
3123 * the file be open for writing.
3127 sys_fsync_range(struct lwp
*l
, const struct sys_fsync_range_args
*uap
, register_t
*retval
)
3131 syscallarg(int) flags;
3132 syscallarg(off_t) start;
3133 syscallarg(off_t) length;
3141 /* fd_getvnode() will use the descriptor for us */
3142 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
3145 if ((fp
->f_flag
& FWRITE
) == 0) {
3150 flags
= SCARG(uap
, flags
);
3151 if (((flags
& (FDATASYNC
| FFILESYNC
)) == 0) ||
3152 ((~flags
& (FDATASYNC
| FFILESYNC
)) == 0)) {
3156 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3157 if (flags
& FDATASYNC
)
3158 nflags
= FSYNC_DATAONLY
| FSYNC_WAIT
;
3160 nflags
= FSYNC_WAIT
;
3161 if (flags
& FDISKSYNC
)
3162 nflags
|= FSYNC_CACHE
;
3164 len
= SCARG(uap
, length
);
3165 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3167 s
= SCARG(uap
, start
);
3179 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
3180 error
= VOP_FSYNC(vp
, fp
->f_cred
, nflags
, s
, e
);
3183 fd_putfile(SCARG(uap
, fd
));
3188 * Sync the data of an open file.
3192 sys_fdatasync(struct lwp
*l
, const struct sys_fdatasync_args
*uap
, register_t
*retval
)
3201 /* fd_getvnode() will use the descriptor for us */
3202 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
3204 if ((fp
->f_flag
& FWRITE
) == 0) {
3205 fd_putfile(SCARG(uap
, fd
));
3209 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
3210 error
= VOP_FSYNC(vp
, fp
->f_cred
, FSYNC_WAIT
|FSYNC_DATAONLY
, 0, 0);
3212 fd_putfile(SCARG(uap
, fd
));
3217 * Rename files, (standard) BSD semantics frontend.
3221 sys_rename(struct lwp
*l
, const struct sys_rename_args
*uap
, register_t
*retval
)
3224 syscallarg(const char *) from;
3225 syscallarg(const char *) to;
3228 return (do_sys_rename(SCARG(uap
, from
), SCARG(uap
, to
), UIO_USERSPACE
, 0));
3232 * Rename files, POSIX semantics frontend.
3236 sys___posix_rename(struct lwp
*l
, const struct sys___posix_rename_args
*uap
, register_t
*retval
)
3239 syscallarg(const char *) from;
3240 syscallarg(const char *) to;
3243 return (do_sys_rename(SCARG(uap
, from
), SCARG(uap
, to
), UIO_USERSPACE
, 1));
3247 * Rename files. Source and destination must either both be directories,
3248 * or both not be directories. If target is a directory, it must be empty.
3249 * If `from' and `to' refer to the same object, the value of the `retain'
3250 * argument is used to determine whether `from' will be
3252 * (retain == 0) deleted unless `from' and `to' refer to the same
3253 * object in the file system's name space (BSD).
3254 * (retain == 1) always retained (POSIX).
3257 do_sys_rename(const char *from
, const char *to
, enum uio_seg seg
, int retain
)
3259 struct vnode
*tvp
, *fvp
, *tdvp
;
3260 struct nameidata fromnd
, tond
;
3262 struct lwp
*l
= curlwp
;
3267 NDINIT(&fromnd
, DELETE
, LOCKPARENT
| SAVESTART
| TRYEMULROOT
| INRENAME
,
3269 if ((error
= namei(&fromnd
)) != 0)
3271 if (fromnd
.ni_dvp
!= fromnd
.ni_vp
)
3272 VOP_UNLOCK(fromnd
.ni_dvp
, 0);
3276 error
= VFS_RENAMELOCK_ENTER(fs
);
3278 VOP_ABORTOP(fromnd
.ni_dvp
, &fromnd
.ni_cnd
);
3279 vrele(fromnd
.ni_dvp
);
3285 * close, partially, yet another race - ideally we should only
3286 * go as far as getting fromnd.ni_dvp before getting the per-fs
3287 * lock, and then continue to get fromnd.ni_vp, but we can't do
3288 * that with namei as it stands.
3290 * This still won't prevent rmdir from nuking fromnd.ni_vp
3291 * under us. The real fix is to get the locks in the right
3292 * order and do the lookups in the right places, but that's a
3295 * Preserve the SAVESTART in cn_flags, because who knows what
3296 * might happen if we don't.
3298 * Note: this logic (as well as this whole function) is cloned
3299 * in nfs_serv.c. Proceed accordingly.
3302 if ((fromnd
.ni_cnd
.cn_namelen
== 1 &&
3303 fromnd
.ni_cnd
.cn_nameptr
[0] == '.') ||
3304 (fromnd
.ni_cnd
.cn_namelen
== 2 &&
3305 fromnd
.ni_cnd
.cn_nameptr
[0] == '.' &&
3306 fromnd
.ni_cnd
.cn_nameptr
[1] == '.')) {
3308 VFS_RENAMELOCK_EXIT(fs
);
3309 VOP_ABORTOP(fromnd
.ni_dvp
, &fromnd
.ni_cnd
);
3310 vrele(fromnd
.ni_dvp
);
3313 saveflag
= fromnd
.ni_cnd
.cn_flags
& SAVESTART
;
3314 fromnd
.ni_cnd
.cn_flags
&= ~SAVESTART
;
3315 vn_lock(fromnd
.ni_dvp
, LK_EXCLUSIVE
| LK_RETRY
);
3316 error
= relookup(fromnd
.ni_dvp
, &fromnd
.ni_vp
, &fromnd
.ni_cnd
);
3317 fromnd
.ni_cnd
.cn_flags
|= saveflag
;
3319 VOP_UNLOCK(fromnd
.ni_dvp
, 0);
3320 VFS_RENAMELOCK_EXIT(fs
);
3321 VOP_ABORTOP(fromnd
.ni_dvp
, &fromnd
.ni_cnd
);
3322 vrele(fromnd
.ni_dvp
);
3325 VOP_UNLOCK(fromnd
.ni_vp
, 0);
3326 if (fromnd
.ni_dvp
!= fromnd
.ni_vp
)
3327 VOP_UNLOCK(fromnd
.ni_dvp
, 0);
3330 NDINIT(&tond
, RENAME
,
3331 LOCKPARENT
| LOCKLEAF
| NOCACHE
| SAVESTART
| TRYEMULROOT
3332 | INRENAME
| (fvp
->v_type
== VDIR
? CREATEDIR
: 0),
3334 if ((error
= namei(&tond
)) != 0) {
3335 VFS_RENAMELOCK_EXIT(fs
);
3336 VOP_ABORTOP(fromnd
.ni_dvp
, &fromnd
.ni_cnd
);
3337 vrele(fromnd
.ni_dvp
);
3345 if (fvp
->v_type
== VDIR
&& tvp
->v_type
!= VDIR
) {
3348 } else if (fvp
->v_type
!= VDIR
&& tvp
->v_type
== VDIR
) {
3358 * Source and destination refer to the same object.
3363 else if (fromnd
.ni_dvp
== tdvp
&&
3364 fromnd
.ni_cnd
.cn_namelen
== tond
.ni_cnd
.cn_namelen
&&
3365 !memcmp(fromnd
.ni_cnd
.cn_nameptr
,
3366 tond
.ni_cnd
.cn_nameptr
,
3367 fromnd
.ni_cnd
.cn_namelen
))
3377 f1_len
= fromnd
.ni_cnd
.cn_namelen
+ 1;
3378 f1
= kmem_alloc(f1_len
, KM_SLEEP
);
3379 strlcpy(f1
, fromnd
.ni_cnd
.cn_nameptr
, f1_len
);
3381 f2_len
= tond
.ni_cnd
.cn_namelen
+ 1;
3382 f2
= kmem_alloc(f2_len
, KM_SLEEP
);
3383 strlcpy(f2
, tond
.ni_cnd
.cn_nameptr
, f2_len
);
3385 error
= veriexec_renamechk(l
, fvp
, f1
, tvp
, f2
);
3387 kmem_free(f1
, f1_len
);
3388 kmem_free(f2
, f2_len
);
3390 #endif /* NVERIEXEC > 0 */
3395 error
= VOP_RENAME(fromnd
.ni_dvp
, fromnd
.ni_vp
, &fromnd
.ni_cnd
,
3396 tond
.ni_dvp
, tond
.ni_vp
, &tond
.ni_cnd
);
3397 VFS_RENAMELOCK_EXIT(fs
);
3399 VOP_ABORTOP(tond
.ni_dvp
, &tond
.ni_cnd
);
3406 VFS_RENAMELOCK_EXIT(fs
);
3407 VOP_ABORTOP(fromnd
.ni_dvp
, &fromnd
.ni_cnd
);
3408 vrele(fromnd
.ni_dvp
);
3411 vrele(tond
.ni_startdir
);
3412 PNBUF_PUT(tond
.ni_cnd
.cn_pnbuf
);
3414 if (fromnd
.ni_startdir
)
3415 vrele(fromnd
.ni_startdir
);
3416 PNBUF_PUT(fromnd
.ni_cnd
.cn_pnbuf
);
3417 return (error
== -1 ? 0 : error
);
3421 * Make a directory file.
3425 sys_mkdir(struct lwp
*l
, const struct sys_mkdir_args
*uap
, register_t
*retval
)
3428 syscallarg(const char *) path;
3429 syscallarg(int) mode;
3432 return do_sys_mkdir(SCARG(uap
, path
), SCARG(uap
, mode
), UIO_USERSPACE
);
3436 do_sys_mkdir(const char *path
, mode_t mode
, enum uio_seg seg
)
3438 struct proc
*p
= curlwp
->l_proc
;
3442 struct nameidata nd
;
3444 NDINIT(&nd
, CREATE
, LOCKPARENT
| CREATEDIR
| TRYEMULROOT
,
3446 if ((error
= namei(&nd
)) != 0)
3450 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
3451 if (nd
.ni_dvp
== vp
)
3459 vattr
.va_type
= VDIR
;
3460 /* We will read cwdi->cwdi_cmask unlocked. */
3461 vattr
.va_mode
= (mode
& ACCESSPERMS
) &~ p
->p_cwdi
->cwdi_cmask
;
3462 error
= VOP_MKDIR(nd
.ni_dvp
, &nd
.ni_vp
, &nd
.ni_cnd
, &vattr
);
3469 * Remove a directory file.
3473 sys_rmdir(struct lwp
*l
, const struct sys_rmdir_args
*uap
, register_t
*retval
)
3476 syscallarg(const char *) path;
3480 struct nameidata nd
;
3482 NDINIT(&nd
, DELETE
, LOCKPARENT
| LOCKLEAF
| TRYEMULROOT
, UIO_USERSPACE
,
3484 if ((error
= namei(&nd
)) != 0)
3487 if (vp
->v_type
!= VDIR
) {
3492 * No rmdir "." please.
3494 if (nd
.ni_dvp
== vp
) {
3499 * The root of a mounted filesystem cannot be deleted.
3501 if ((vp
->v_vflag
& VV_ROOT
) != 0 || vp
->v_mountedhere
!= NULL
) {
3505 error
= VOP_RMDIR(nd
.ni_dvp
, nd
.ni_vp
, &nd
.ni_cnd
);
3509 VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
3510 if (nd
.ni_dvp
== vp
)
3519 * Read a block of directory entries in a file system independent format.
3522 sys___getdents30(struct lwp
*l
, const struct sys___getdents30_args
*uap
, register_t
*retval
)
3526 syscallarg(char *) buf;
3527 syscallarg(size_t) count;
3532 /* fd_getvnode() will use the descriptor for us */
3533 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
3535 if ((fp
->f_flag
& FREAD
) == 0) {
3539 error
= vn_readdir(fp
, SCARG(uap
, buf
), UIO_USERSPACE
,
3540 SCARG(uap
, count
), &done
, l
, 0, 0);
3541 ktrgenio(SCARG(uap
, fd
), UIO_READ
, SCARG(uap
, buf
), done
, error
);
3544 fd_putfile(SCARG(uap
, fd
));
3549 * Set the mode mask for creation of filesystem nodes.
3552 sys_umask(struct lwp
*l
, const struct sys_umask_args
*uap
, register_t
*retval
)
3555 syscallarg(mode_t) newmask;
3557 struct proc
*p
= l
->l_proc
;
3558 struct cwdinfo
*cwdi
;
3561 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3562 * important is that we serialize changes to the mask. The
3563 * rw_exit() will issue a write memory barrier on our behalf,
3564 * and force the changes out to other CPUs (as it must use an
3565 * atomic operation, draining the local CPU's store buffers).
3568 rw_enter(&cwdi
->cwdi_lock
, RW_WRITER
);
3569 *retval
= cwdi
->cwdi_cmask
;
3570 cwdi
->cwdi_cmask
= SCARG(uap
, newmask
) & ALLPERMS
;
3571 rw_exit(&cwdi
->cwdi_lock
);
3577 dorevoke(struct vnode
*vp
, kauth_cred_t cred
)
3582 if ((error
= VOP_GETATTR(vp
, &vattr
, cred
)) != 0)
3584 if (kauth_cred_geteuid(cred
) == vattr
.va_uid
||
3585 (error
= kauth_authorize_generic(cred
,
3586 KAUTH_GENERIC_ISSUSER
, NULL
)) == 0)
3587 VOP_REVOKE(vp
, REVOKEALL
);
3592 * Void all references to file by ripping underlying filesystem
3597 sys_revoke(struct lwp
*l
, const struct sys_revoke_args
*uap
, register_t
*retval
)
3600 syscallarg(const char *) path;
3605 error
= namei_simple_user(SCARG(uap
, path
),
3606 NSM_FOLLOW_TRYEMULROOT
, &vp
);
3609 error
= dorevoke(vp
, l
->l_cred
);