2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
16 #include <sys/types.h>
17 #include <sys/param.h>
18 #include <sys/t_lock.h>
19 #include <sys/errno.h>
24 #include <sys/pathname.h>
26 #include <sys/vnode.h>
30 #include <sys/debug.h>
31 #include <sys/atomic.h>
33 #include <sys/flock.h>
34 #include <sys/nbmlock.h>
35 #include <sys/fcntl.h>
45 #define O_RWMASK (O_WRONLY | O_RDWR) /* == 3 */
47 int fop_shrlock_enable
= 0;
49 int stat_to_vattr(const struct stat
*, vattr_t
*);
50 int fop__getxvattr(vnode_t
*, xvattr_t
*);
51 int fop__setxvattr(vnode_t
*, xvattr_t
*);
63 if ((*vpp
)->v_type
== VREG
) {
65 atomic_add_32(&((*vpp
)->v_rdcnt
), 1);
67 atomic_add_32(&((*vpp
)->v_wrcnt
), 1);
70 /* call to ->vop_open was here */
86 /* call to ->vop_close was here */
89 * Check passed in count to handle possible dups. Vnode counts are only
90 * kept on regular files
92 if ((vp
->v_type
== VREG
) && (count
== 1)) {
94 ASSERT(vp
->v_rdcnt
> 0);
95 atomic_add_32(&(vp
->v_rdcnt
), -1);
98 ASSERT(vp
->v_wrcnt
> 0);
99 atomic_add_32(&(vp
->v_wrcnt
), -1);
112 caller_context_t
*ct
)
121 * If that caller asks for read beyond end of file,
122 * that causes the pread call to block. (Ugh!)
123 * Get the file size and return what we can.
125 (void) fstat(vp
->v_fd
, &st
);
126 resid
= uio
->uio_resid
;
127 if ((uio
->uio_loffset
+ resid
) > st
.st_size
)
128 resid
= st
.st_size
- uio
->uio_loffset
;
132 ASSERT(uio
->uio_iovcnt
> 0);
135 if (iov
->iov_len
== 0) {
144 n
= pread(vp
->v_fd
, iov
->iov_base
, cnt
, uio
->uio_loffset
);
152 uio
->uio_loffset
+= n
;
167 caller_context_t
*ct
)
173 while (uio
->uio_resid
> 0) {
175 ASSERT(uio
->uio_iovcnt
> 0);
178 if (iov
->iov_len
== 0) {
184 if (cnt
> uio
->uio_resid
)
185 cnt
= uio
->uio_resid
;
187 n
= pwrite(vp
->v_fd
, iov
->iov_base
, iov
->iov_len
,
196 uio
->uio_loffset
+= n
;
199 if (ioflag
== FSYNC
) {
200 (void) fsync(vp
->v_fd
);
215 caller_context_t
*ct
)
227 caller_context_t
*ct
)
229 /* allow any flags? See fs_setfl */
240 caller_context_t
*ct
)
245 if (fstat(vp
->v_fd
, &st
) == -1)
247 error
= stat_to_vattr(&st
, vap
);
249 if (vap
->va_mask
& AT_XVATTR
)
250 (void) fop__getxvattr(vp
, (xvattr_t
*)vap
);
262 caller_context_t
*ct
)
266 if (vap
->va_mask
& AT_SIZE
) {
267 if (ftruncate(vp
->v_fd
, vap
->va_size
) == -1)
271 /* AT_MODE or anything else? */
273 if (vap
->va_mask
& AT_XVATTR
)
274 (void) fop__setxvattr(vp
, (xvattr_t
*)vap
);
276 if (vap
->va_mask
& (AT_ATIME
| AT_MTIME
)) {
277 if (vap
->va_mask
& AT_ATIME
) {
278 times
[0] = vap
->va_atime
;
281 times
[0].tv_nsec
= UTIME_OMIT
;
283 if (vap
->va_mask
& AT_MTIME
) {
284 times
[1] = vap
->va_mtime
;
287 times
[1].tv_nsec
= UTIME_OMIT
;
290 (void) futimens(vp
->v_fd
, times
);
303 caller_context_t
*ct
)
318 caller_context_t
*ct
,
319 int *deflags
, /* Returned per-dirent flags */
320 pathname_t
*ppnp
) /* Returned case-preserved name in directory */
323 int omode
= O_RDWR
| O_NOFOLLOW
;
327 if (flags
& LOOKUP_XATTR
)
331 * If lookup is for "", just return dvp.
333 if (name
[0] == '\0') {
339 if (fstatat(dvp
->v_fd
, name
, &st
, AT_SYMLINK_NOFOLLOW
) == -1)
342 vp
= vncache_lookup(&st
);
344 /* lookup gave us a hold */
349 if (S_ISDIR(st
.st_mode
))
350 omode
= O_RDONLY
| O_NOFOLLOW
;
353 fd
= openat(dvp
->v_fd
, name
, omode
, 0);
355 if ((omode
& O_RWMASK
) == O_RDWR
) {
363 if (fstat(fd
, &st
) == -1) {
368 vp
= vncache_enter(&st
, dvp
, name
, fd
);
385 caller_context_t
*ct
,
386 vsecattr_t
*vsecp
) /* ACL to set during create */
393 * If creating "", just return dvp.
395 if (name
[0] == '\0') {
401 err
= fstatat(dvp
->v_fd
, name
, &st
, AT_SYMLINK_NOFOLLOW
);
407 /* The file already exists. */
411 vp
= vncache_lookup(&st
);
412 /* vp gained a hold */
417 * Open it. (may or may not exist)
419 omode
= O_RDWR
| O_CREAT
| O_NOFOLLOW
;
423 fd
= openat(dvp
->v_fd
, name
, omode
, mode
);
425 if ((omode
& O_RWMASK
) == O_RDWR
) {
432 (void) fstat(fd
, &st
);
434 vp
= vncache_enter(&st
, dvp
, name
, fd
);
435 /* vp has its initial hold */
438 /* Should have the vp now. */
442 if (vp
->v_type
== VDIR
&& vap
->va_type
!= VDIR
) {
446 if (vp
->v_type
!= VDIR
&& vap
->va_type
== VDIR
) {
452 * Might need to set attributes.
454 (void) fop_setattr(vp
, vap
, 0, cr
, ct
);
466 caller_context_t
*ct
,
470 if (unlinkat(dvp
->v_fd
, name
, 0))
483 caller_context_t
*ct
,
489 * Would prefer to specify "from" as the combination:
490 * (fr_vp->v_fd, NULL) but linkat does not permit it.
492 err
= linkat(AT_FDCWD
, fr_vp
->v_path
, to_dvp
->v_fd
, to_name
,
508 caller_context_t
*ct
,
515 if (fstatat(from_dvp
->v_fd
, from_name
, &st
,
516 AT_SYMLINK_NOFOLLOW
) == -1)
519 vp
= vncache_lookup(&st
);
523 err
= renameat(from_dvp
->v_fd
, from_name
, to_dvp
->v_fd
, to_name
);
527 vncache_renamed(vp
, to_dvp
, to_name
);
542 caller_context_t
*ct
,
544 vsecattr_t
*vsecp
) /* ACL to set during create */
549 mode_t mode
= vap
->va_mode
& 0777;
551 if (mkdirat(dvp
->v_fd
, name
, mode
) == -1)
554 if ((fd
= openat(dvp
->v_fd
, name
, O_RDONLY
)) == -1)
556 if (fstat(fd
, &st
) == -1) {
562 *vpp
= vncache_enter(&st
, dvp
, name
, fd
);
565 * Might need to set attributes.
567 (void) fop_setattr(*vpp
, vap
, 0, cr
, ct
);
579 caller_context_t
*ct
,
583 if (unlinkat(dvp
->v_fd
, name
, AT_REMOVEDIR
) == -1)
596 caller_context_t
*ct
,
608 error
= lseek(fd
, uiop
->uio_loffset
, SEEK_SET
);
612 ASSERT(uiop
->uio_iovcnt
> 0);
614 if (iov
->iov_len
< sizeof (struct dirent
))
617 /* LINTED E_BAD_PTR_CAST_ALIGN */
618 cnt
= getdents(fd
, (struct dirent
*)(uiop
->uio_iov
->iov_base
),
629 iov
->iov_base
+= cnt
;
631 uiop
->uio_resid
-= cnt
;
632 uiop
->uio_loffset
= lseek(fd
, 0LL, SEEK_CUR
);
645 caller_context_t
*ct
,
657 caller_context_t
*ct
)
668 caller_context_t
*ct
)
671 if (fsync(vp
->v_fd
) == -1)
682 caller_context_t
*ct
)
684 vncache_inactive(vp
);
692 caller_context_t
*ct
)
702 caller_context_t
*ct
)
713 caller_context_t
*ct
)
715 /* See: fs_rwunlock */
724 caller_context_t
*ct
)
734 caller_context_t
*ct
)
737 return (vncache_cmp(vp1
, vp2
));
748 struct flk_callback
*flk_cbp
,
750 caller_context_t
*ct
)
764 if (fcntl(vp
->v_fd
, cmd
, bfp
) == -1)
779 caller_context_t
*ct
)
791 if (fcntl(vp
->v_fd
, cmd
, bfp
) == -1)
802 caller_context_t
*ct
)
820 caller_context_t
*ct
)
833 caller_context_t
*ct
)
850 caller_context_t
*ct
)
867 caller_context_t
*ct
)
884 caller_context_t
*ct
)
896 struct pollhead
**phpp
,
897 caller_context_t
*ct
)
902 if (events
& POLLRDNORM
)
903 *reventsp
|= POLLRDNORM
;
904 if (events
& POLLRDBAND
)
905 *reventsp
|= POLLRDBAND
;
906 if (events
& POLLOUT
)
907 *reventsp
|= POLLOUT
;
908 if (events
& POLLWRBAND
)
909 *reventsp
|= POLLWRBAND
;
910 *phpp
= NULL
; /* or fake_pollhead? */
922 caller_context_t
*ct
)
937 caller_context_t
*ct
)
939 register ulong_t val
;
940 register int error
= 0;
961 case _PC_SYMLINK_MAX
:
974 val
= _POSIX_VDISABLE
;
977 case _PC_CHOWN_RESTRICTED
:
978 val
= 1; /* chown restricted enabled */
981 case _PC_FILESIZEBITS
:
982 val
= (ulong_t
)-1; /* large file support */
985 case _PC_ACL_ENABLED
:
989 case _PC_CASE_BEHAVIOR
:
990 val
= _CASE_SENSITIVE
;
993 case _PC_SATTR_ENABLED
:
994 case _PC_SATTR_EXISTS
:
998 case _PC_ACCESS_FILTERING
:
1021 caller_context_t
*ct
)
1032 caller_context_t
*ct
)
1045 caller_context_t
*ct
)
1056 caller_context_t
*ct
)
1062 * Fake up just enough of this so we can test get/set SDs.
1068 vsecattr_t
*vsecattr
,
1071 caller_context_t
*ct
)
1074 vsecattr
->vsa_aclcnt
= 0;
1075 vsecattr
->vsa_aclentsz
= 0;
1076 vsecattr
->vsa_aclentp
= NULL
;
1077 vsecattr
->vsa_dfaclcnt
= 0; /* Default ACLs are not fabricated */
1078 vsecattr
->vsa_dfaclentp
= NULL
;
1080 if (vsecattr
->vsa_mask
& (VSA_ACLCNT
| VSA_ACL
)) {
1084 aclsize
= sizeof (aclent_t
);
1085 vsecattr
->vsa_aclcnt
= 1;
1086 vsecattr
->vsa_aclentp
= kmem_zalloc(aclsize
, KM_SLEEP
);
1087 aclentp
= vsecattr
->vsa_aclentp
;
1089 aclentp
->a_type
= OTHER_OBJ
;
1090 aclentp
->a_perm
= 0777;
1091 aclentp
->a_id
= (gid_t
)-1;
1093 } else if (vsecattr
->vsa_mask
& (VSA_ACECNT
| VSA_ACE
)) {
1096 acl
= kmem_alloc(sizeof (ace_t
), KM_SLEEP
);
1097 acl
->a_who
= (uint32_t)-1;
1098 acl
->a_type
= ACE_ACCESS_ALLOWED_ACE_TYPE
;
1099 acl
->a_flags
= ACE_EVERYONE
;
1100 acl
->a_access_mask
= ACE_MODIFY_PERMS
;
1102 vsecattr
->vsa_aclentp
= (void *)acl
;
1103 vsecattr
->vsa_aclcnt
= 1;
1104 vsecattr
->vsa_aclentsz
= sizeof (ace_t
);
1115 struct shrlock
*shr
,
1118 caller_context_t
*ct
)
1123 case F_SHARE_NBMAND
:
1130 if (!fop_shrlock_enable
)
1133 if (fcntl(vp
->v_fd
, cmd
, shr
) == -1)
1141 fop_vnevent(vnode_t
*vp
, vnevent_t vnevent
, vnode_t
*dvp
, char *fnm
,
1142 caller_context_t
*ct
)
1149 fop_reqzcbuf(vnode_t
*vp
, enum uio_rw ioflag
, xuio_t
*uiop
, cred_t
*cr
,
1150 caller_context_t
*ct
)
1157 fop_retzcbuf(vnode_t
*vp
, xuio_t
*uiop
, cred_t
*cr
, caller_context_t
*ct
)
1164 * ***************************************************************
1169 * Convert stat(2) formats to vnode types and vice versa. (Knows about
1170 * numerical order of S_IFMT and vnode types.)
1172 enum vtype iftovt_tab
[] = {
1173 VNON
, VFIFO
, VCHR
, VNON
, VDIR
, VNON
, VBLK
, VNON
,
1174 VREG
, VNON
, VLNK
, VNON
, VSOCK
, VNON
, VNON
, VNON
1177 ushort_t vttoif_tab
[] = {
1178 0, S_IFREG
, S_IFDIR
, S_IFBLK
, S_IFCHR
, S_IFLNK
, S_IFIFO
,
1179 S_IFDOOR
, 0, S_IFSOCK
, S_IFPORT
, 0
1185 * Convert from a stat structure to an vattr structure
1186 * Note: only set fields according to va_mask
1190 stat_to_vattr(const struct stat
*st
, vattr_t
*vap
)
1193 if (vap
->va_mask
& AT_TYPE
)
1194 vap
->va_type
= IFTOVT(st
->st_mode
);
1196 if (vap
->va_mask
& AT_MODE
)
1197 vap
->va_mode
= st
->st_mode
;
1199 if (vap
->va_mask
& AT_UID
)
1200 vap
->va_uid
= st
->st_uid
;
1202 if (vap
->va_mask
& AT_GID
)
1203 vap
->va_gid
= st
->st_gid
;
1205 if (vap
->va_mask
& AT_FSID
)
1206 vap
->va_fsid
= st
->st_dev
;
1208 if (vap
->va_mask
& AT_NODEID
)
1209 vap
->va_nodeid
= st
->st_ino
;
1211 if (vap
->va_mask
& AT_NLINK
)
1212 vap
->va_nlink
= st
->st_nlink
;
1214 if (vap
->va_mask
& AT_SIZE
)
1215 vap
->va_size
= (u_offset_t
)st
->st_size
;
1217 if (vap
->va_mask
& AT_ATIME
) {
1218 vap
->va_atime
.tv_sec
= st
->st_atim
.tv_sec
;
1219 vap
->va_atime
.tv_nsec
= st
->st_atim
.tv_nsec
;
1222 if (vap
->va_mask
& AT_MTIME
) {
1223 vap
->va_mtime
.tv_sec
= st
->st_mtim
.tv_sec
;
1224 vap
->va_mtime
.tv_nsec
= st
->st_mtim
.tv_nsec
;
1227 if (vap
->va_mask
& AT_CTIME
) {
1228 vap
->va_ctime
.tv_sec
= st
->st_ctim
.tv_sec
;
1229 vap
->va_ctime
.tv_nsec
= st
->st_ctim
.tv_nsec
;
1232 if (vap
->va_mask
& AT_RDEV
)
1233 vap
->va_rdev
= st
->st_rdev
;
1235 if (vap
->va_mask
& AT_BLKSIZE
)
1236 vap
->va_blksize
= (uint_t
)st
->st_blksize
;
1239 if (vap
->va_mask
& AT_NBLOCKS
)
1240 vap
->va_nblocks
= (u_longlong_t
)st
->st_blocks
;
1242 if (vap
->va_mask
& AT_SEQ
)
1250 flk_init_callback(flk_callback_t
*flk_cb
,
1251 callb_cpr_t
*(*cb_fcn
)(flk_cb_when_t
, void *), void *cbdata
)
1256 vn_hold(vnode_t
*vp
)
1258 mutex_enter(&vp
->v_lock
);
1260 mutex_exit(&vp
->v_lock
);
1264 vn_rele(vnode_t
*vp
)
1266 VERIFY3U(vp
->v_count
, !=, 0);
1267 mutex_enter(&vp
->v_lock
);
1268 if (vp
->v_count
== 1) {
1269 mutex_exit(&vp
->v_lock
);
1270 vncache_inactive(vp
);
1273 mutex_exit(&vp
->v_lock
);
1285 if (vp
->v_wrcnt
> 1)
1289 if ((vp
->v_rdcnt
> 1) || (vp
->v_wrcnt
> 1))
1293 if ((vp
->v_rdcnt
> 1) && (vp
->v_wrcnt
> 1))
1297 if (vp
->v_rdcnt
> 1)
1306 * vn_is_opened() checks whether a particular file is opened and
1307 * whether the open is for read and/or write.
1309 * Vnode counts are only kept on regular files (v_type=VREG).
1325 if (vp
->v_rdcnt
&& vp
->v_wrcnt
)
1329 if (vp
->v_rdcnt
|| vp
->v_wrcnt
)
1342 * vn_is_mapped() checks whether a particular file is mapped and whether
1343 * the file is mapped read and/or write.