4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
29 * All rights reserved.
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
38 #include <sys/vnode.h>
41 #include <sys/errno.h>
42 #include <sys/sysmacros.h>
43 #include <sys/statvfs.h>
45 #include <sys/kstat.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/vtrace.h>
52 #include <sys/nbmlock.h>
53 #include <sys/policy.h>
56 #include <rpc/types.h>
61 #include <nfs/export.h>
62 #include <nfs/nfs_cmd.h>
67 #include <vm/seg_map.h>
68 #include <vm/seg_kmem.h>
70 #include <sys/strsubr.h>
73 * These are the interface routines for the server side of the
74 * Network File System. See the NFS version 2 protocol specification
75 * for a description of this interface.
78 static int sattr_to_vattr(struct nfssattr
*, struct vattr
*);
79 static void acl_perm(struct vnode
*, struct exportinfo
*, struct vattr
*,
83 * Some "over the wire" UNIX file types. These are encoded
84 * into the mode. This needs to be fixed in the next rev.
86 #define IFMT 0170000 /* type of file */
87 #define IFCHR 0020000 /* character special */
88 #define IFBLK 0060000 /* block special */
89 #define IFSOCK 0140000 /* socket */
91 u_longlong_t nfs2_srv_caller_id
;
94 * Get file attributes.
95 * Returns the current attributes of the file with the given fhandle.
99 rfs_getattr(fhandle_t
*fhp
, struct nfsattrstat
*ns
, struct exportinfo
*exi
,
100 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
106 vp
= nfs_fhtovp(fhp
, exi
);
108 ns
->ns_status
= NFSERR_STALE
;
115 va
.va_mask
= AT_ALL
; /* we want all the attributes */
117 error
= rfs4_delegated_getattr(vp
, &va
, 0, cr
);
119 /* check for overflows */
121 /* Lie about the object type for a referral */
122 if (vn_is_nfs_reparse(vp
, cr
))
125 acl_perm(vp
, exi
, &va
, cr
);
126 error
= vattr_to_nattr(&va
, &ns
->ns_attr
);
131 ns
->ns_status
= puterrno(error
);
134 rfs_getattr_getfh(fhandle_t
*fhp
)
140 * Set file attributes.
141 * Sets the attributes of the file with the given fhandle. Returns
142 * the new attributes.
146 rfs_setattr(struct nfssaargs
*args
, struct nfsattrstat
*ns
,
147 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
159 vp
= nfs_fhtovp(&args
->saa_fh
, exi
);
161 ns
->ns_status
= NFSERR_STALE
;
165 if (rdonly(ro
, vp
)) {
167 ns
->ns_status
= NFSERR_ROFS
;
171 error
= sattr_to_vattr(&args
->saa_sa
, &va
);
174 ns
->ns_status
= puterrno(error
);
179 * If the client is requesting a change to the mtime,
180 * but the nanosecond field is set to 1 billion, then
181 * this is a flag to the server that it should set the
182 * atime and mtime fields to the server's current time.
183 * The 1 billion number actually came from the client
184 * as 1 million, but the units in the over the wire
185 * request are microseconds instead of nanoseconds.
187 * This is an overload of the protocol and should be
188 * documented in the NFS Version 2 protocol specification.
190 if (va
.va_mask
& AT_MTIME
) {
191 if (va
.va_mtime
.tv_nsec
== 1000000000) {
192 gethrestime(&va
.va_mtime
);
193 va
.va_atime
= va
.va_mtime
;
194 va
.va_mask
|= AT_ATIME
;
202 * If the filesystem is exported with nosuid, then mask off
203 * the setuid and setgid bits.
205 if ((va
.va_mask
& AT_MODE
) && vp
->v_type
== VREG
&&
206 (exi
->exi_export
.ex_flags
& EX_NOSUID
))
207 va
.va_mode
&= ~(VSUID
| VSGID
);
211 ct
.cc_caller_id
= nfs2_srv_caller_id
;
212 ct
.cc_flags
= CC_DONTBLOCK
;
215 * We need to specially handle size changes because it is
216 * possible for the client to create a file with modes
217 * which indicate read-only, but with the file opened for
218 * writing. If the client then tries to set the size of
219 * the file, then the normal access checking done in
220 * fop_setattr would prevent the client from doing so,
221 * although it should be legal for it to do so. To get
222 * around this, we do the access checking for ourselves
223 * and then use fop_space which doesn't do the access
224 * checking which fop_setattr does. fop_space can only
225 * operate on VREG files, let fop_setattr handle the other
226 * extremely rare cases.
227 * Also the client should not be allowed to change the
228 * size of the file if there is a conflicting non-blocking
229 * mandatory lock in the region of change.
231 if (vp
->v_type
== VREG
&& va
.va_mask
& AT_SIZE
) {
232 if (nbl_need_check(vp
)) {
233 nbl_start_crit(vp
, RW_READER
);
237 bva
.va_mask
= AT_UID
| AT_SIZE
;
239 error
= fop_getattr(vp
, &bva
, 0, cr
, &ct
);
245 ns
->ns_status
= puterrno(error
);
253 if (va
.va_size
< bva
.va_size
) {
255 length
= bva
.va_size
- va
.va_size
;
257 offset
= bva
.va_size
;
258 length
= va
.va_size
- bva
.va_size
;
260 if (nbl_conflict(vp
, NBL_WRITE
, offset
, length
, 0,
266 if (crgetuid(cr
) == bva
.va_uid
&& !error
&&
267 va
.va_size
!= bva
.va_size
) {
268 va
.va_mask
&= ~AT_SIZE
;
271 bf
.l_start
= (off64_t
)va
.va_size
;
276 error
= fop_space(vp
, F_FREESP
, &bf
, FWRITE
,
277 (offset_t
)va
.va_size
, cr
, &ct
);
287 if (!error
&& va
.va_mask
) {
288 error
= fop_setattr(vp
, &va
, flag
, cr
, &ct
);
292 * check if the monitor on either vop_space or vop_setattr detected
293 * a delegation conflict and if so, mark the thread flag as
294 * wouldblock so that the response is dropped and the client will
297 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
)) {
299 curthread
->t_flag
|= T_WOULDBLOCK
;
304 va
.va_mask
= AT_ALL
; /* get everything */
306 error
= rfs4_delegated_getattr(vp
, &va
, 0, cr
);
308 /* check for overflows */
310 acl_perm(vp
, exi
, &va
, cr
);
311 error
= vattr_to_nattr(&va
, &ns
->ns_attr
);
318 * Force modified metadata out to stable storage.
320 (void) fop_fsync(vp
, FNODSYNC
, cr
, &ct
);
324 ns
->ns_status
= puterrno(error
);
327 rfs_setattr_getfh(struct nfssaargs
*args
)
329 return (&args
->saa_fh
);
334 * Returns an fhandle and file attributes for file name in a directory.
338 rfs_lookup(struct nfsdiropargs
*da
, struct nfsdiropres
*dr
,
339 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
345 fhandle_t
*fhp
= da
->da_fhandle
;
346 struct sec_ol sec
= {0, 0};
347 bool_t publicfh_flag
= FALSE
, auth_weak
= FALSE
;
352 * Disallow NULL paths
354 if (da
->da_name
== NULL
|| *da
->da_name
== '\0') {
355 dr
->dr_status
= NFSERR_ACCES
;
360 * Allow lookups from the root - the default
361 * location of the public filehandle.
363 if (exi
!= NULL
&& (exi
->exi_export
.ex_flags
& EX_PUBLIC
)) {
367 dvp
= nfs_fhtovp(fhp
, exi
);
369 dr
->dr_status
= NFSERR_STALE
;
375 * Not allow lookup beyond root.
376 * If the filehandle matches a filehandle of the exi,
377 * then the ".." refers beyond the root of an exported filesystem.
379 if (strcmp(da
->da_name
, "..") == 0 &&
380 EQFID(&exi
->exi_fid
, (fid_t
*)&fhp
->fh_len
)) {
382 dr
->dr_status
= NFSERR_NOENT
;
386 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
387 name
= nfscmd_convname(ca
, exi
, da
->da_name
, NFSCMD_CONV_INBOUND
,
391 dr
->dr_status
= NFSERR_ACCES
;
396 * If the public filehandle is used then allow
397 * a multi-component lookup, i.e. evaluate
398 * a pathname and follow symbolic links if
401 * This may result in a vnode in another filesystem
402 * which is OK as long as the filesystem is exported.
404 if (PUBLIC_FH2(fhp
)) {
405 publicfh_flag
= TRUE
;
406 error
= rfs_publicfh_mclookup(name
, dvp
, cr
, &vp
, &exi
,
410 * Do a normal single component lookup.
412 error
= fop_lookup(dvp
, name
, &vp
, NULL
, 0, NULL
, cr
,
416 if (name
!= da
->da_name
)
417 kmem_free(name
, MAXPATHLEN
);
421 va
.va_mask
= AT_ALL
; /* we want everything */
423 error
= rfs4_delegated_getattr(vp
, &va
, 0, cr
);
425 /* check for overflows */
427 acl_perm(vp
, exi
, &va
, cr
);
428 error
= vattr_to_nattr(&va
, &dr
->dr_attr
);
430 if (sec
.sec_flags
& SEC_QUERY
)
431 error
= makefh_ol(&dr
->dr_fhandle
, exi
,
434 error
= makefh(&dr
->dr_fhandle
, vp
,
436 if (!error
&& publicfh_flag
&&
437 !chk_clnt_sec(exi
, req
))
448 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
449 * and have obtained a new exportinfo in exi which needs to be
450 * released. Note the the original exportinfo pointed to by exi
451 * will be released by the caller, comon_dispatch.
453 if (publicfh_flag
&& exi
!= NULL
)
457 * If it's public fh, no 0x81, and client's flavor is
458 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
459 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
462 dr
->dr_status
= (enum nfsstat
)WNFSERR_CLNT_FLAVOR
;
464 dr
->dr_status
= puterrno(error
);
467 rfs_lookup_getfh(struct nfsdiropargs
*da
)
469 return (da
->da_fhandle
);
473 * Read symbolic link.
474 * Returns the string in the symbolic link at the given fhandle.
478 rfs_readlink(fhandle_t
*fhp
, struct nfsrdlnres
*rl
, struct exportinfo
*exi
,
479 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
490 vp
= nfs_fhtovp(fhp
, exi
);
493 rl
->rl_status
= NFSERR_STALE
;
497 va
.va_mask
= AT_MODE
;
499 error
= fop_getattr(vp
, &va
, 0, cr
, NULL
);
504 rl
->rl_status
= puterrno(error
);
508 if (MANDLOCK(vp
, va
.va_mode
)) {
511 rl
->rl_status
= NFSERR_ACCES
;
515 /* We lied about the object type for a referral */
516 if (vn_is_nfs_reparse(vp
, cr
))
520 * XNFS and RFC1094 require us to return ENXIO if argument
521 * is not a link. BUGID 1138002.
523 if (vp
->v_type
!= VLNK
&& !is_referral
) {
526 rl
->rl_status
= NFSERR_NXIO
;
531 * Allocate data for pathname. This will be freed by rfs_rlfree.
533 rl
->rl_data
= kmem_alloc(NFS_MAXPATHLEN
, KM_SLEEP
);
539 /* Get an artificial symlink based on a referral */
540 s
= build_symlink(vp
, cr
, &strsz
);
541 global_svstat_ptr
[2][NFS_REFERLINKS
].value
.ui64
++;
542 DTRACE_PROBE2(nfs2serv__func__referral__reflink
,
543 vnode_t
*, vp
, char *, s
);
548 (void) strlcpy(rl
->rl_data
, s
, NFS_MAXPATHLEN
);
549 rl
->rl_count
= (uint32_t)MIN(strsz
, NFS_MAXPATHLEN
);
556 * Set up io vector to read sym link data
558 iov
.iov_base
= rl
->rl_data
;
559 iov
.iov_len
= NFS_MAXPATHLEN
;
562 uio
.uio_segflg
= UIO_SYSSPACE
;
563 uio
.uio_extflg
= UIO_COPY_CACHED
;
564 uio
.uio_loffset
= (offset_t
)0;
565 uio
.uio_resid
= NFS_MAXPATHLEN
;
570 error
= fop_readlink(vp
, &uio
, cr
, NULL
);
572 rl
->rl_count
= (uint32_t)(NFS_MAXPATHLEN
- uio
.uio_resid
);
575 rl
->rl_data
[rl
->rl_count
] = '\0';
582 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
583 name
= nfscmd_convname(ca
, exi
, rl
->rl_data
,
584 NFSCMD_CONV_OUTBOUND
, MAXPATHLEN
);
586 if (name
!= NULL
&& name
!= rl
->rl_data
) {
587 kmem_free(rl
->rl_data
, NFS_MAXPATHLEN
);
592 * XNFS and RFC1094 require us to return ENXIO if argument
593 * is not a link. UFS returns EINVAL if this is the case,
594 * so we do the mapping here. BUGID 1138002.
597 rl
->rl_status
= NFSERR_NXIO
;
599 rl
->rl_status
= puterrno(error
);
603 rfs_readlink_getfh(fhandle_t
*fhp
)
608 * Free data allocated by rfs_readlink
611 rfs_rlfree(struct nfsrdlnres
*rl
)
613 if (rl
->rl_data
!= NULL
)
614 kmem_free(rl
->rl_data
, NFS_MAXPATHLEN
);
617 static int rdma_setup_read_data2(struct nfsreadargs
*, struct nfsrdresult
*);
621 * Returns some data read from the file at the given fhandle.
625 rfs_read(struct nfsreadargs
*ra
, struct nfsrdresult
*rr
,
626 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
638 vp
= nfs_fhtovp(&ra
->ra_fhandle
, exi
);
641 rr
->rr_status
= NFSERR_STALE
;
645 if (vp
->v_type
!= VREG
) {
648 rr
->rr_status
= NFSERR_ISDIR
;
654 ct
.cc_caller_id
= nfs2_srv_caller_id
;
655 ct
.cc_flags
= CC_DONTBLOCK
;
658 * Enter the critical region before calling fop_rwlock
659 * to avoid a deadlock with write requests.
661 if (nbl_need_check(vp
)) {
662 nbl_start_crit(vp
, RW_READER
);
663 if (nbl_conflict(vp
, NBL_READ
, ra
->ra_offset
, ra
->ra_count
,
668 rr
->rr_status
= NFSERR_ACCES
;
674 error
= fop_rwlock(vp
, V_WRITELOCK_FALSE
, &ct
);
676 /* check if a monitor detected a delegation conflict */
677 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
)) {
679 /* mark as wouldblock so response is dropped */
680 curthread
->t_flag
|= T_WOULDBLOCK
;
688 error
= fop_getattr(vp
, &va
, 0, cr
, &ct
);
691 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, &ct
);
697 rr
->rr_status
= puterrno(error
);
703 * This is a kludge to allow reading of files created
704 * with no read permission. The owner of the file
705 * is always allowed to read it.
707 if (crgetuid(cr
) != va
.va_uid
) {
708 error
= fop_access(vp
, VREAD
, 0, cr
, &ct
);
712 * Exec is the same as read over the net because
715 error
= fop_access(vp
, VEXEC
, 0, cr
, &ct
);
718 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, &ct
);
723 rr
->rr_status
= puterrno(error
);
729 if (MANDLOCK(vp
, va
.va_mode
)) {
730 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, &ct
);
736 rr
->rr_status
= NFSERR_ACCES
;
741 rr
->rr_ok
.rrok_wlist_len
= 0;
742 rr
->rr_ok
.rrok_wlist
= NULL
;
744 if ((uoff_t
)ra
->ra_offset
>= va
.va_size
) {
748 * In this case, status is NFS_OK, but there is no data
749 * to encode. So set rr_mp to NULL.
752 rr
->rr_ok
.rrok_wlist
= ra
->ra_wlist
;
753 if (rr
->rr_ok
.rrok_wlist
)
754 clist_zero_len(rr
->rr_ok
.rrok_wlist
);
761 (void) rdma_get_wchunk(req
, &iov
, ra
->ra_wlist
);
762 if (ra
->ra_count
> iov
.iov_len
) {
764 rr
->rr_status
= NFSERR_INVAL
;
769 * mp will contain the data to be sent out in the read reply.
770 * This will be freed after the reply has been sent out (by the
772 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
773 * that the call to xdrmblk_putmblk() never fails.
775 mp
= allocb_wait(RNDUP(ra
->ra_count
), BPRI_MED
, STR_NOSIG
,
778 ASSERT(alloc_err
== 0);
785 iov
.iov_base
= (caddr_t
)mp
->b_datap
->db_base
;
786 iov
.iov_len
= ra
->ra_count
;
791 uio
.uio_segflg
= UIO_SYSSPACE
;
792 uio
.uio_extflg
= UIO_COPY_CACHED
;
793 uio
.uio_loffset
= (offset_t
)ra
->ra_offset
;
794 uio
.uio_resid
= ra
->ra_count
;
796 error
= fop_read(vp
, &uio
, 0, cr
, &ct
);
803 * check if a monitor detected a delegation conflict and
804 * mark as wouldblock so response is dropped
806 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
))
807 curthread
->t_flag
|= T_WOULDBLOCK
;
809 rr
->rr_status
= puterrno(error
);
811 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, &ct
);
822 * Get attributes again so we can send the latest access
823 * time to the client side for its cache.
827 error
= fop_getattr(vp
, &va
, 0, cr
, &ct
);
833 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, &ct
);
839 rr
->rr_status
= puterrno(error
);
844 rr
->rr_count
= (uint32_t)(ra
->ra_count
- uio
.uio_resid
);
847 rr
->rr_data
= (char *)mp
->b_datap
->db_base
;
850 rr
->rr_data
= (caddr_t
)iov
.iov_base
;
851 if (!rdma_setup_read_data2(ra
, rr
)) {
853 rr
->rr_status
= puterrno(NFSERR_INVAL
);
858 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, &ct
);
862 acl_perm(vp
, exi
, &va
, cr
);
864 /* check for overflows */
865 error
= vattr_to_nattr(&va
, &rr
->rr_attr
);
869 rr
->rr_status
= puterrno(error
);
873 * Free data allocated by rfs_read
876 rfs_rdfree(struct nfsrdresult
*rr
)
880 if (rr
->rr_status
== NFS_OK
) {
888 rfs_read_getfh(struct nfsreadargs
*ra
)
890 return (&ra
->ra_fhandle
);
893 #define MAX_IOVECS 12
896 static int rfs_write_sync_hits
= 0;
897 static int rfs_write_sync_misses
= 0;
901 * Write data to file.
902 * Returns attributes of a file after writing some data to it.
904 * Any changes made here, especially in error handling might have
905 * to also be done in rfs_write (which clusters write requests).
909 rfs_write_sync(struct nfswriteargs
*wa
, struct nfsattrstat
*ns
,
910 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
917 struct iovec iov
[MAX_IOVECS
];
925 vp
= nfs_fhtovp(&wa
->wa_fhandle
, exi
);
927 ns
->ns_status
= NFSERR_STALE
;
931 if (rdonly(ro
, vp
)) {
933 ns
->ns_status
= NFSERR_ROFS
;
937 if (vp
->v_type
!= VREG
) {
939 ns
->ns_status
= NFSERR_ISDIR
;
945 ct
.cc_caller_id
= nfs2_srv_caller_id
;
946 ct
.cc_flags
= CC_DONTBLOCK
;
948 va
.va_mask
= AT_UID
|AT_MODE
;
950 error
= fop_getattr(vp
, &va
, 0, cr
, &ct
);
954 ns
->ns_status
= puterrno(error
);
959 if (crgetuid(cr
) != va
.va_uid
) {
961 * This is a kludge to allow writes of files created
962 * with read only permission. The owner of the file
963 * is always allowed to write it.
965 error
= fop_access(vp
, VWRITE
, 0, cr
, &ct
);
969 ns
->ns_status
= puterrno(error
);
975 * Can't access a mandatory lock file. This might cause
976 * the NFS service thread to block forever waiting for a
977 * lock to be released that will never be released.
979 if (MANDLOCK(vp
, va
.va_mode
)) {
981 ns
->ns_status
= NFSERR_ACCES
;
986 * We have to enter the critical region before calling fop_rwlock
987 * to avoid a deadlock with ufs.
989 if (nbl_need_check(vp
)) {
990 nbl_start_crit(vp
, RW_READER
);
992 if (nbl_conflict(vp
, NBL_WRITE
, wa
->wa_offset
,
993 wa
->wa_count
, 0, NULL
)) {
999 error
= fop_rwlock(vp
, V_WRITELOCK_TRUE
, &ct
);
1001 /* check if a monitor detected a delegation conflict */
1002 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
)) {
1004 /* mark as wouldblock so response is dropped */
1005 curthread
->t_flag
|= T_WOULDBLOCK
;
1009 if (wa
->wa_data
|| wa
->wa_rlist
) {
1010 /* Do the RDMA thing if necessary */
1012 iov
[0].iov_base
= (char *)((wa
->wa_rlist
)->u
.c_daddr3
);
1013 iov
[0].iov_len
= wa
->wa_count
;
1015 iov
[0].iov_base
= wa
->wa_data
;
1016 iov
[0].iov_len
= wa
->wa_count
;
1020 uio
.uio_segflg
= UIO_SYSSPACE
;
1021 uio
.uio_extflg
= UIO_COPY_DEFAULT
;
1022 uio
.uio_loffset
= (offset_t
)wa
->wa_offset
;
1023 uio
.uio_resid
= wa
->wa_count
;
1025 * The limit is checked on the client. We
1026 * should allow any size writes here.
1028 uio
.uio_llimit
= curproc
->p_fsz_ctl
;
1029 rlimit
= uio
.uio_llimit
- wa
->wa_offset
;
1030 if (rlimit
< (rlim64_t
)uio
.uio_resid
)
1031 uio
.uio_resid
= (uint_t
)rlimit
;
1034 * for now we assume no append mode
1037 * We're changing creds because VM may fault and we need
1038 * the cred of the current thread to be used if quota
1039 * checking is enabled.
1041 savecred
= curthread
->t_cred
;
1042 curthread
->t_cred
= cr
;
1043 error
= fop_write(vp
, &uio
, FSYNC
, cr
, &ct
);
1044 curthread
->t_cred
= savecred
;
1047 for (m
= wa
->wa_mblk
; m
!= NULL
; m
= m
->b_cont
)
1049 if (iovcnt
<= MAX_IOVECS
) {
1051 rfs_write_sync_hits
++;
1056 rfs_write_sync_misses
++;
1058 iovp
= kmem_alloc(sizeof (*iovp
) * iovcnt
, KM_SLEEP
);
1060 mblk_to_iov(wa
->wa_mblk
, iovcnt
, iovp
);
1062 uio
.uio_iovcnt
= iovcnt
;
1063 uio
.uio_segflg
= UIO_SYSSPACE
;
1064 uio
.uio_extflg
= UIO_COPY_DEFAULT
;
1065 uio
.uio_loffset
= (offset_t
)wa
->wa_offset
;
1066 uio
.uio_resid
= wa
->wa_count
;
1068 * The limit is checked on the client. We
1069 * should allow any size writes here.
1071 uio
.uio_llimit
= curproc
->p_fsz_ctl
;
1072 rlimit
= uio
.uio_llimit
- wa
->wa_offset
;
1073 if (rlimit
< (rlim64_t
)uio
.uio_resid
)
1074 uio
.uio_resid
= (uint_t
)rlimit
;
1077 * For now we assume no append mode.
1080 * We're changing creds because VM may fault and we need
1081 * the cred of the current thread to be used if quota
1082 * checking is enabled.
1084 savecred
= curthread
->t_cred
;
1085 curthread
->t_cred
= cr
;
1086 error
= fop_write(vp
, &uio
, FSYNC
, cr
, &ct
);
1087 curthread
->t_cred
= savecred
;
1090 kmem_free(iovp
, sizeof (*iovp
) * iovcnt
);
1093 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, &ct
);
1097 * Get attributes again so we send the latest mod
1098 * time to the client side for its cache.
1100 va
.va_mask
= AT_ALL
; /* now we want everything */
1102 error
= fop_getattr(vp
, &va
, 0, cr
, &ct
);
1104 /* check for overflows */
1106 acl_perm(vp
, exi
, &va
, cr
);
1107 error
= vattr_to_nattr(&va
, &ns
->ns_attr
);
1116 /* check if a monitor detected a delegation conflict */
1117 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
))
1118 /* mark as wouldblock so response is dropped */
1119 curthread
->t_flag
|= T_WOULDBLOCK
;
1121 ns
->ns_status
= puterrno(error
);
1125 struct rfs_async_write
{
1126 struct nfswriteargs
*wa
;
1127 struct nfsattrstat
*ns
;
1128 struct svc_req
*req
;
1132 struct rfs_async_write
*list
;
1135 struct rfs_async_write_list
{
1138 struct rfs_async_write
*list
;
1139 struct rfs_async_write_list
*next
;
1142 static struct rfs_async_write_list
*rfs_async_write_head
= NULL
;
1143 static kmutex_t rfs_async_write_lock
;
1144 static int rfs_write_async
= 1; /* enables write clustering if == 1 */
1146 #define MAXCLIOVECS 42
1147 #define RFSWRITE_INITVAL (enum nfsstat) -1
1150 static int rfs_write_hits
= 0;
1151 static int rfs_write_misses
= 0;
1155 * Write data to file.
1156 * Returns attributes of a file after writing some data to it.
1159 rfs_write(struct nfswriteargs
*wa
, struct nfsattrstat
*ns
,
1160 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
1167 struct rfs_async_write_list
*lp
;
1168 struct rfs_async_write_list
*nlp
;
1169 struct rfs_async_write
*rp
;
1170 struct rfs_async_write
*nrp
;
1171 struct rfs_async_write
*trp
;
1172 struct rfs_async_write
*lrp
;
1177 struct iovec
*niovp
;
1178 struct iovec iov
[MAXCLIOVECS
];
1183 struct rfs_async_write nrpsp
;
1184 struct rfs_async_write_list nlpsp
;
1188 caller_context_t ct
;
1190 if (!rfs_write_async
) {
1191 rfs_write_sync(wa
, ns
, exi
, req
, cr
, ro
);
1196 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1197 * is considered an OK.
1199 ns
->ns_status
= RFSWRITE_INITVAL
;
1207 nrp
->thread
= curthread
;
1210 * Look to see if there is already a cluster started
1213 mutex_enter(&rfs_async_write_lock
);
1214 for (lp
= rfs_async_write_head
; lp
!= NULL
; lp
= lp
->next
) {
1215 if (bcmp(&wa
->wa_fhandle
, lp
->fhp
,
1216 sizeof (fhandle_t
)) == 0)
1221 * If lp is non-NULL, then there is already a cluster
1222 * started. We need to place ourselves in the cluster
1223 * list in the right place as determined by starting
1224 * offset. Conflicts with non-blocking mandatory locked
1225 * regions will be checked when the cluster is processed.
1230 while (rp
!= NULL
&& rp
->wa
->wa_offset
< wa
->wa_offset
) {
1239 while (nrp
->ns
->ns_status
== RFSWRITE_INITVAL
)
1240 cv_wait(&lp
->cv
, &rfs_async_write_lock
);
1241 mutex_exit(&rfs_async_write_lock
);
1247 * No cluster started yet, start one and add ourselves
1248 * to the list of clusters.
1253 nlp
->fhp
= &wa
->wa_fhandle
;
1254 cv_init(&nlp
->cv
, NULL
, CV_DEFAULT
, NULL
);
1258 if (rfs_async_write_head
== NULL
) {
1259 rfs_async_write_head
= nlp
;
1261 lp
= rfs_async_write_head
;
1262 while (lp
->next
!= NULL
)
1266 mutex_exit(&rfs_async_write_lock
);
1269 * Convert the file handle common to all of the requests
1270 * in this cluster to a vnode.
1272 vp
= nfs_fhtovp(&wa
->wa_fhandle
, exi
);
1274 mutex_enter(&rfs_async_write_lock
);
1275 if (rfs_async_write_head
== nlp
)
1276 rfs_async_write_head
= nlp
->next
;
1278 lp
= rfs_async_write_head
;
1279 while (lp
->next
!= nlp
)
1281 lp
->next
= nlp
->next
;
1283 t_flag
= curthread
->t_flag
& T_WOULDBLOCK
;
1284 for (rp
= nlp
->list
; rp
!= NULL
; rp
= rp
->list
) {
1285 rp
->ns
->ns_status
= NFSERR_STALE
;
1286 rp
->thread
->t_flag
|= t_flag
;
1288 cv_broadcast(&nlp
->cv
);
1289 mutex_exit(&rfs_async_write_lock
);
1295 * Can only write regular files. Attempts to write any
1296 * other file types fail with EISDIR.
1298 if (vp
->v_type
!= VREG
) {
1300 mutex_enter(&rfs_async_write_lock
);
1301 if (rfs_async_write_head
== nlp
)
1302 rfs_async_write_head
= nlp
->next
;
1304 lp
= rfs_async_write_head
;
1305 while (lp
->next
!= nlp
)
1307 lp
->next
= nlp
->next
;
1309 t_flag
= curthread
->t_flag
& T_WOULDBLOCK
;
1310 for (rp
= nlp
->list
; rp
!= NULL
; rp
= rp
->list
) {
1311 rp
->ns
->ns_status
= NFSERR_ISDIR
;
1312 rp
->thread
->t_flag
|= t_flag
;
1314 cv_broadcast(&nlp
->cv
);
1315 mutex_exit(&rfs_async_write_lock
);
1321 * Enter the critical region before calling fop_rwlock, to avoid a
1322 * deadlock with ufs.
1324 if (nbl_need_check(vp
)) {
1325 nbl_start_crit(vp
, RW_READER
);
1331 ct
.cc_caller_id
= nfs2_srv_caller_id
;
1332 ct
.cc_flags
= CC_DONTBLOCK
;
1335 * Lock the file for writing. This operation provides
1336 * the delay which allows clusters to grow.
1338 error
= fop_rwlock(vp
, V_WRITELOCK_TRUE
, &ct
);
1340 /* check if a monitor detected a delegation conflict */
1341 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
)) {
1345 /* mark as wouldblock so response is dropped */
1346 curthread
->t_flag
|= T_WOULDBLOCK
;
1347 mutex_enter(&rfs_async_write_lock
);
1348 if (rfs_async_write_head
== nlp
)
1349 rfs_async_write_head
= nlp
->next
;
1351 lp
= rfs_async_write_head
;
1352 while (lp
->next
!= nlp
)
1354 lp
->next
= nlp
->next
;
1356 for (rp
= nlp
->list
; rp
!= NULL
; rp
= rp
->list
) {
1357 if (rp
->ns
->ns_status
== RFSWRITE_INITVAL
) {
1358 rp
->ns
->ns_status
= puterrno(error
);
1359 rp
->thread
->t_flag
|= T_WOULDBLOCK
;
1362 cv_broadcast(&nlp
->cv
);
1363 mutex_exit(&rfs_async_write_lock
);
1369 * Disconnect this cluster from the list of clusters.
1370 * The cluster that is being dealt with must be fixed
1371 * in size after this point, so there is no reason
1372 * to leave it on the list so that new requests can
1375 * The algorithm is that the first write request will
1376 * create a cluster, convert the file handle to a
1377 * vnode pointer, and then lock the file for writing.
1378 * This request is not likely to be clustered with
1379 * any others. However, the next request will create
1380 * a new cluster and be blocked in fop_rwlock while
1381 * the first request is being processed. This delay
1382 * will allow more requests to be clustered in this
1385 mutex_enter(&rfs_async_write_lock
);
1386 if (rfs_async_write_head
== nlp
)
1387 rfs_async_write_head
= nlp
->next
;
1389 lp
= rfs_async_write_head
;
1390 while (lp
->next
!= nlp
)
1392 lp
->next
= nlp
->next
;
1394 mutex_exit(&rfs_async_write_lock
);
1397 * Step through the list of requests in this cluster.
1398 * We need to check permissions to make sure that all
1399 * of the requests have sufficient permission to write
1400 * the file. A cluster can be composed of requests
1401 * from different clients and different users on each
1404 * As a side effect, we also calculate the size of the
1405 * byte range that this cluster encompasses.
1408 off
= rp
->wa
->wa_offset
;
1411 if (rdonly(rp
->ro
, vp
)) {
1412 rp
->ns
->ns_status
= NFSERR_ROFS
;
1413 t_flag
= curthread
->t_flag
& T_WOULDBLOCK
;
1414 rp
->thread
->t_flag
|= t_flag
;
1418 va
.va_mask
= AT_UID
|AT_MODE
;
1420 error
= fop_getattr(vp
, &va
, 0, rp
->cr
, &ct
);
1423 if (crgetuid(rp
->cr
) != va
.va_uid
) {
1425 * This is a kludge to allow writes of files
1426 * created with read only permission. The
1427 * owner of the file is always allowed to
1430 error
= fop_access(vp
, VWRITE
, 0, rp
->cr
, &ct
);
1432 if (!error
&& MANDLOCK(vp
, va
.va_mode
))
1437 * Check for a conflict with a nbmand-locked region.
1439 if (in_crit
&& nbl_conflict(vp
, NBL_WRITE
, rp
->wa
->wa_offset
,
1440 rp
->wa
->wa_count
, 0, NULL
)) {
1445 rp
->ns
->ns_status
= puterrno(error
);
1446 t_flag
= curthread
->t_flag
& T_WOULDBLOCK
;
1447 rp
->thread
->t_flag
|= t_flag
;
1450 if (len
< rp
->wa
->wa_offset
+ rp
->wa
->wa_count
- off
)
1451 len
= rp
->wa
->wa_offset
+ rp
->wa
->wa_count
- off
;
1452 } while ((rp
= rp
->list
) != NULL
);
1455 * Step through the cluster attempting to gather as many
1456 * requests which are contiguous as possible. These
1457 * contiguous requests are handled via one call to fop_write
1458 * instead of different calls to fop_write. We also keep
1459 * track of the fact that any data was written.
1465 * Skip any requests which are already marked as having an
1468 if (rp
->ns
->ns_status
!= RFSWRITE_INITVAL
) {
1474 * Count the number of iovec's which are required
1475 * to handle this set of requests. One iovec is
1476 * needed for each data buffer, whether addressed
1477 * by wa_data or by the b_rptr pointers in the
1483 if (lrp
->wa
->wa_data
|| lrp
->wa
->wa_rlist
)
1486 m
= lrp
->wa
->wa_mblk
;
1492 if (lrp
->list
== NULL
||
1493 lrp
->list
->ns
->ns_status
!= RFSWRITE_INITVAL
||
1494 lrp
->wa
->wa_offset
+ lrp
->wa
->wa_count
!=
1495 lrp
->list
->wa
->wa_offset
) {
1502 if (iovcnt
<= MAXCLIOVECS
) {
1511 niovp
= kmem_alloc(sizeof (*niovp
) * iovcnt
, KM_SLEEP
);
1514 * Put together the scatter/gather iovecs.
1520 if (trp
->wa
->wa_data
|| trp
->wa
->wa_rlist
) {
1521 if (trp
->wa
->wa_rlist
) {
1523 (char *)((trp
->wa
->wa_rlist
)->
1525 iovp
->iov_len
= trp
->wa
->wa_count
;
1527 iovp
->iov_base
= trp
->wa
->wa_data
;
1528 iovp
->iov_len
= trp
->wa
->wa_count
;
1532 m
= trp
->wa
->wa_mblk
;
1533 rcount
= trp
->wa
->wa_count
;
1535 iovp
->iov_base
= (caddr_t
)m
->b_rptr
;
1536 iovp
->iov_len
= (m
->b_wptr
- m
->b_rptr
);
1537 rcount
-= iovp
->iov_len
;
1539 iovp
->iov_len
+= rcount
;
1546 count
+= trp
->wa
->wa_count
;
1548 } while (trp
!= lrp
);
1550 uio
.uio_iov
= niovp
;
1551 uio
.uio_iovcnt
= iovcnt
;
1552 uio
.uio_segflg
= UIO_SYSSPACE
;
1553 uio
.uio_extflg
= UIO_COPY_DEFAULT
;
1554 uio
.uio_loffset
= (offset_t
)rp
->wa
->wa_offset
;
1555 uio
.uio_resid
= count
;
1557 * The limit is checked on the client. We
1558 * should allow any size writes here.
1560 uio
.uio_llimit
= curproc
->p_fsz_ctl
;
1561 rlimit
= uio
.uio_llimit
- rp
->wa
->wa_offset
;
1562 if (rlimit
< (rlim64_t
)uio
.uio_resid
)
1563 uio
.uio_resid
= (uint_t
)rlimit
;
1566 * For now we assume no append mode.
1570 * We're changing creds because VM may fault
1571 * and we need the cred of the current
1572 * thread to be used if quota * checking is
1575 savecred
= curthread
->t_cred
;
1576 curthread
->t_cred
= cr
;
1577 error
= fop_write(vp
, &uio
, 0, rp
->cr
, &ct
);
1578 curthread
->t_cred
= savecred
;
1580 /* check if a monitor detected a delegation conflict */
1581 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
))
1582 /* mark as wouldblock so response is dropped */
1583 curthread
->t_flag
|= T_WOULDBLOCK
;
1586 kmem_free(niovp
, sizeof (*niovp
) * iovcnt
);
1591 * Get attributes again so we send the latest mod
1592 * time to the client side for its cache.
1594 va
.va_mask
= AT_ALL
; /* now we want everything */
1596 error
= fop_getattr(vp
, &va
, 0, rp
->cr
, &ct
);
1599 acl_perm(vp
, exi
, &va
, rp
->cr
);
1603 * Fill in the status responses for each request
1604 * which was just handled. Also, copy the latest
1605 * attributes in to the attribute responses if
1608 t_flag
= curthread
->t_flag
& T_WOULDBLOCK
;
1610 rp
->thread
->t_flag
|= t_flag
;
1611 /* check for overflows */
1613 error
= vattr_to_nattr(&va
, &rp
->ns
->ns_attr
);
1615 rp
->ns
->ns_status
= puterrno(error
);
1617 } while (rp
!= lrp
);
1618 } while (rp
!= NULL
);
1621 * If any data was written at all, then we need to flush
1622 * the data and metadata to stable storage.
1625 error
= fop_putpage(vp
, (uoff_t
)off
, len
, 0, cr
, &ct
);
1628 error
= fop_fsync(vp
, FNODSYNC
, cr
, &ct
);
1632 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, &ct
);
1638 t_flag
= curthread
->t_flag
& T_WOULDBLOCK
;
1639 mutex_enter(&rfs_async_write_lock
);
1640 for (rp
= nlp
->list
; rp
!= NULL
; rp
= rp
->list
) {
1641 if (rp
->ns
->ns_status
== RFSWRITE_INITVAL
) {
1642 rp
->ns
->ns_status
= puterrno(error
);
1643 rp
->thread
->t_flag
|= t_flag
;
1646 cv_broadcast(&nlp
->cv
);
1647 mutex_exit(&rfs_async_write_lock
);
1652 rfs_write_getfh(struct nfswriteargs
*wa
)
1654 return (&wa
->wa_fhandle
);
1659 * Creates a file with given attributes and returns those attributes
1660 * and an fhandle for the new file.
1663 rfs_create(struct nfscreatargs
*args
, struct nfsdiropres
*dr
,
1664 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
1673 char *name
= args
->ca_da
.da_name
;
1674 vnode_t
*tvp
= NULL
;
1678 struct sockaddr
*ca
;
1681 * Disallow NULL paths
1683 if (name
== NULL
|| *name
== '\0') {
1684 dr
->dr_status
= NFSERR_ACCES
;
1688 dvp
= nfs_fhtovp(args
->ca_da
.da_fhandle
, exi
);
1690 dr
->dr_status
= NFSERR_STALE
;
1694 error
= sattr_to_vattr(args
->ca_sa
, &va
);
1696 dr
->dr_status
= puterrno(error
);
1701 * Must specify the mode.
1703 if (!(va
.va_mask
& AT_MODE
)) {
1705 dr
->dr_status
= NFSERR_INVAL
;
1710 * This is a completely gross hack to make mknod
1711 * work over the wire until we can wack the protocol
1713 if ((va
.va_mode
& IFMT
) == IFCHR
) {
1714 if (args
->ca_sa
->sa_size
== (uint_t
)NFS_FIFO_DEV
)
1715 va
.va_type
= VFIFO
; /* xtra kludge for named pipe */
1719 * uncompress the received dev_t
1720 * if the top half is zero indicating a request
1721 * from an `older style' OS.
1723 if ((va
.va_size
& 0xffff0000) == 0)
1724 va
.va_rdev
= nfsv2_expdev(va
.va_size
);
1726 va
.va_rdev
= (dev_t
)va
.va_size
;
1728 va
.va_mask
&= ~AT_SIZE
;
1729 } else if ((va
.va_mode
& IFMT
) == IFBLK
) {
1732 * uncompress the received dev_t
1733 * if the top half is zero indicating a request
1734 * from an `older style' OS.
1736 if ((va
.va_size
& 0xffff0000) == 0)
1737 va
.va_rdev
= nfsv2_expdev(va
.va_size
);
1739 va
.va_rdev
= (dev_t
)va
.va_size
;
1740 va
.va_mask
&= ~AT_SIZE
;
1741 } else if ((va
.va_mode
& IFMT
) == IFSOCK
) {
1746 va
.va_mode
&= ~IFMT
;
1747 va
.va_mask
|= AT_TYPE
;
1749 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
1750 name
= nfscmd_convname(ca
, exi
, name
, NFSCMD_CONV_INBOUND
,
1753 dr
->dr_status
= puterrno(EINVAL
);
1758 * Why was the choice made to use VWRITE as the mode to the
1759 * call to fop_create ? This results in a bug. When a client
1760 * opens a file that already exists and is RDONLY, the second
1761 * open fails with an EACESS because of the mode.
1766 if (!(va
.va_mask
& AT_SIZE
) || va
.va_type
!= VREG
) {
1767 error
= fop_lookup(dvp
, name
, &tvp
, NULL
, 0, NULL
, cr
,
1773 at
.va_mask
= AT_MODE
;
1774 error
= fop_getattr(tvp
, &at
, 0, cr
, NULL
);
1776 mode
= (at
.va_mode
& S_IWUSR
) ? VWRITE
: VREAD
;
1783 if (rdonly(ro
, dvp
)) {
1785 } else if (va
.va_type
!= VREG
&& va
.va_type
!= VFIFO
&&
1786 va
.va_type
!= VSOCK
&& secpolicy_sys_devices(cr
) != 0) {
1794 * If file size is being modified on an already existing file
1795 * make sure that there are no conflicting non-blocking mandatory
1796 * locks in the region being manipulated. Return EACCES if there
1797 * are conflicting locks.
1799 if (!error
&& (va
.va_type
== VREG
) && (va
.va_mask
& AT_SIZE
)) {
1800 lookuperr
= fop_lookup(dvp
, name
, &tvp
, NULL
, 0, NULL
, cr
,
1804 rfs4_check_delegated(FWRITE
, tvp
, va
.va_size
== 0)) {
1806 curthread
->t_flag
|= T_WOULDBLOCK
;
1810 if (!lookuperr
&& nbl_need_check(tvp
)) {
1812 * The file exists. Now check if it has any
1813 * conflicting non-blocking mandatory locks
1814 * in the region being changed.
1820 nbl_start_crit(tvp
, RW_READER
);
1823 bva
.va_mask
= AT_SIZE
;
1824 error
= fop_getattr(tvp
, &bva
, 0, cr
, NULL
);
1826 if (va
.va_size
< bva
.va_size
) {
1827 offset
= va
.va_size
;
1828 length
= bva
.va_size
- va
.va_size
;
1830 offset
= bva
.va_size
;
1831 length
= va
.va_size
- bva
.va_size
;
1834 if (nbl_conflict(tvp
, NBL_WRITE
,
1835 offset
, length
, 0, NULL
)) {
1845 } else if (tvp
!= NULL
) {
1852 * If filesystem is shared with nosuid the remove any
1853 * setuid/setgid bits on create.
1855 if (va
.va_type
== VREG
&&
1856 exi
->exi_export
.ex_flags
& EX_NOSUID
)
1857 va
.va_mode
&= ~(VSUID
| VSGID
);
1859 error
= fop_create(dvp
, name
, &va
, NONEXCL
, mode
, &vp
, cr
, 0,
1864 if ((va
.va_mask
& AT_SIZE
) && (va
.va_size
== 0))
1869 if (rfs4_check_delegated(FWRITE
, vp
, trunc
)) {
1871 curthread
->t_flag
|= T_WOULDBLOCK
;
1874 va
.va_mask
= AT_ALL
;
1876 error
= fop_getattr(vp
, &va
, 0, cr
, NULL
);
1878 /* check for overflows */
1880 acl_perm(vp
, exi
, &va
, cr
);
1881 error
= vattr_to_nattr(&va
, &dr
->dr_attr
);
1883 error
= makefh(&dr
->dr_fhandle
, vp
,
1888 * Force modified metadata out to stable storage.
1890 * if a underlying vp exists, pass it to fop_fsync
1892 if (fop_realvp(vp
, &realvp
, NULL
) == 0)
1893 (void) fop_fsync(realvp
, FNODSYNC
, cr
, NULL
);
1895 (void) fop_fsync(vp
, FNODSYNC
, cr
, NULL
);
1906 * Force modified data and metadata out to stable storage.
1908 (void) fop_fsync(dvp
, 0, cr
, NULL
);
1914 dr
->dr_status
= puterrno(error
);
1916 if (name
!= args
->ca_da
.da_name
)
1917 kmem_free(name
, MAXPATHLEN
);
1920 rfs_create_getfh(struct nfscreatargs
*args
)
1922 return (args
->ca_da
.da_fhandle
);
1927 * Remove named file from parent directory.
1931 rfs_remove(struct nfsdiropargs
*da
, enum nfsstat
*status
,
1932 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
1940 * Disallow NULL paths
1942 if (da
->da_name
== NULL
|| *da
->da_name
== '\0') {
1943 *status
= NFSERR_ACCES
;
1947 vp
= nfs_fhtovp(da
->da_fhandle
, exi
);
1949 *status
= NFSERR_STALE
;
1953 if (rdonly(ro
, vp
)) {
1955 *status
= NFSERR_ROFS
;
1960 * Check for a conflict with a non-blocking mandatory share reservation.
1962 error
= fop_lookup(vp
, da
->da_name
, &targvp
, NULL
, 0,
1963 NULL
, cr
, NULL
, NULL
, NULL
);
1966 *status
= puterrno(error
);
1971 * If the file is delegated to an v4 client, then initiate
1972 * recall and drop this request (by setting T_WOULDBLOCK).
1973 * The client will eventually re-transmit the request and
1974 * (hopefully), by then, the v4 client will have returned
1978 if (rfs4_check_delegated(FWRITE
, targvp
, TRUE
)) {
1981 curthread
->t_flag
|= T_WOULDBLOCK
;
1985 if (nbl_need_check(targvp
)) {
1986 nbl_start_crit(targvp
, RW_READER
);
1988 if (nbl_conflict(targvp
, NBL_REMOVE
, 0, 0, 0, NULL
)) {
1994 error
= fop_remove(vp
, da
->da_name
, cr
, NULL
, 0);
1997 * Force modified data and metadata out to stable storage.
1999 (void) fop_fsync(vp
, 0, cr
, NULL
);
2003 nbl_end_crit(targvp
);
2007 *status
= puterrno(error
);
2012 rfs_remove_getfh(struct nfsdiropargs
*da
)
2014 return (da
->da_fhandle
);
2019 * Give a file (from) a new name (to).
2023 rfs_rename(struct nfsrnmargs
*args
, enum nfsstat
*status
,
2024 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2029 struct exportinfo
*to_exi
;
2035 fromvp
= nfs_fhtovp(args
->rna_from
.da_fhandle
, exi
);
2036 if (fromvp
== NULL
) {
2037 *status
= NFSERR_STALE
;
2041 fh
= args
->rna_to
.da_fhandle
;
2042 to_exi
= checkexport(&fh
->fh_fsid
, (fid_t
*)&fh
->fh_xlen
);
2043 if (to_exi
== NULL
) {
2045 *status
= NFSERR_ACCES
;
2050 if (to_exi
!= exi
) {
2052 *status
= NFSERR_XDEV
;
2056 tovp
= nfs_fhtovp(args
->rna_to
.da_fhandle
, exi
);
2059 *status
= NFSERR_STALE
;
2063 if (fromvp
->v_type
!= VDIR
|| tovp
->v_type
!= VDIR
) {
2066 *status
= NFSERR_NOTDIR
;
2071 * Disallow NULL paths
2073 if (args
->rna_from
.da_name
== NULL
|| *args
->rna_from
.da_name
== '\0' ||
2074 args
->rna_to
.da_name
== NULL
|| *args
->rna_to
.da_name
== '\0') {
2077 *status
= NFSERR_ACCES
;
2081 if (rdonly(ro
, tovp
)) {
2084 *status
= NFSERR_ROFS
;
2089 * Check for a conflict with a non-blocking mandatory share reservation.
2091 error
= fop_lookup(fromvp
, args
->rna_from
.da_name
, &srcvp
, NULL
, 0,
2092 NULL
, cr
, NULL
, NULL
, NULL
);
2096 *status
= puterrno(error
);
2100 /* Check for delegations on the source file */
2102 if (rfs4_check_delegated(FWRITE
, srcvp
, FALSE
)) {
2106 curthread
->t_flag
|= T_WOULDBLOCK
;
2110 /* Check for delegation on the file being renamed over, if it exists */
2112 if (rfs4_deleg_policy
!= SRV_NEVER_DELEGATE
&&
2113 fop_lookup(tovp
, args
->rna_to
.da_name
, &targvp
, NULL
, 0, NULL
, cr
,
2114 NULL
, NULL
, NULL
) == 0) {
2116 if (rfs4_check_delegated(FWRITE
, targvp
, TRUE
)) {
2121 curthread
->t_flag
|= T_WOULDBLOCK
;
2128 if (nbl_need_check(srcvp
)) {
2129 nbl_start_crit(srcvp
, RW_READER
);
2131 if (nbl_conflict(srcvp
, NBL_RENAME
, 0, 0, 0, NULL
)) {
2137 error
= fop_rename(fromvp
, args
->rna_from
.da_name
,
2138 tovp
, args
->rna_to
.da_name
, cr
, NULL
, 0);
2141 vn_renamepath(tovp
, srcvp
, args
->rna_to
.da_name
,
2142 strlen(args
->rna_to
.da_name
));
2145 * Force modified data and metadata out to stable storage.
2147 (void) fop_fsync(tovp
, 0, cr
, NULL
);
2148 (void) fop_fsync(fromvp
, 0, cr
, NULL
);
2152 nbl_end_crit(srcvp
);
2157 *status
= puterrno(error
);
2161 rfs_rename_getfh(struct nfsrnmargs
*args
)
2163 return (args
->rna_from
.da_fhandle
);
2168 * Create a file (to) which is a hard link to the given file (from).
2172 rfs_link(struct nfslinkargs
*args
, enum nfsstat
*status
,
2173 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2178 struct exportinfo
*to_exi
;
2181 fromvp
= nfs_fhtovp(args
->la_from
, exi
);
2182 if (fromvp
== NULL
) {
2183 *status
= NFSERR_STALE
;
2187 fh
= args
->la_to
.da_fhandle
;
2188 to_exi
= checkexport(&fh
->fh_fsid
, (fid_t
*)&fh
->fh_xlen
);
2189 if (to_exi
== NULL
) {
2191 *status
= NFSERR_ACCES
;
2196 if (to_exi
!= exi
) {
2198 *status
= NFSERR_XDEV
;
2202 tovp
= nfs_fhtovp(args
->la_to
.da_fhandle
, exi
);
2205 *status
= NFSERR_STALE
;
2209 if (tovp
->v_type
!= VDIR
) {
2212 *status
= NFSERR_NOTDIR
;
2216 * Disallow NULL paths
2218 if (args
->la_to
.da_name
== NULL
|| *args
->la_to
.da_name
== '\0') {
2221 *status
= NFSERR_ACCES
;
2225 if (rdonly(ro
, tovp
)) {
2228 *status
= NFSERR_ROFS
;
2232 error
= fop_link(tovp
, fromvp
, args
->la_to
.da_name
, cr
, NULL
, 0);
2235 * Force modified data and metadata out to stable storage.
2237 (void) fop_fsync(tovp
, 0, cr
, NULL
);
2238 (void) fop_fsync(fromvp
, FNODSYNC
, cr
, NULL
);
2243 *status
= puterrno(error
);
2247 rfs_link_getfh(struct nfslinkargs
*args
)
2249 return (args
->la_from
);
2253 * Symbolicly link to a file.
2254 * Create a file (to) with the given attributes which is a symbolic link
2255 * to the given path name (to).
2258 rfs_symlink(struct nfsslargs
*args
, enum nfsstat
*status
,
2259 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2266 struct sockaddr
*ca
;
2270 * Disallow NULL paths
2272 if (args
->sla_from
.da_name
== NULL
|| *args
->sla_from
.da_name
== '\0') {
2273 *status
= NFSERR_ACCES
;
2277 vp
= nfs_fhtovp(args
->sla_from
.da_fhandle
, exi
);
2279 *status
= NFSERR_STALE
;
2283 if (rdonly(ro
, vp
)) {
2285 *status
= NFSERR_ROFS
;
2289 error
= sattr_to_vattr(args
->sla_sa
, &va
);
2292 *status
= puterrno(error
);
2296 if (!(va
.va_mask
& AT_MODE
)) {
2298 *status
= NFSERR_INVAL
;
2302 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
2303 name
= nfscmd_convname(ca
, exi
, args
->sla_tnm
,
2304 NFSCMD_CONV_INBOUND
, MAXPATHLEN
);
2307 *status
= NFSERR_ACCES
;
2312 va
.va_mask
|= AT_TYPE
;
2314 error
= fop_symlink(vp
, args
->sla_from
.da_name
, &va
, name
, cr
, NULL
, 0);
2317 * Force new data and metadata out to stable storage.
2319 lerror
= fop_lookup(vp
, args
->sla_from
.da_name
, &svp
, NULL
, 0,
2320 NULL
, cr
, NULL
, NULL
, NULL
);
2323 (void) fop_fsync(svp
, 0, cr
, NULL
);
2328 * Force modified data and metadata out to stable storage.
2330 (void) fop_fsync(vp
, 0, cr
, NULL
);
2334 *status
= puterrno(error
);
2335 if (name
!= args
->sla_tnm
)
2336 kmem_free(name
, MAXPATHLEN
);
2340 rfs_symlink_getfh(struct nfsslargs
*args
)
2342 return (args
->sla_from
.da_fhandle
);
2347 * Create a directory with the given name, parent directory, and attributes.
2348 * Returns a file handle and attributes for the new directory.
2352 rfs_mkdir(struct nfscreatargs
*args
, struct nfsdiropres
*dr
,
2353 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2357 vnode_t
*dvp
= NULL
;
2359 char *name
= args
->ca_da
.da_name
;
2362 * Disallow NULL paths
2364 if (name
== NULL
|| *name
== '\0') {
2365 dr
->dr_status
= NFSERR_ACCES
;
2369 vp
= nfs_fhtovp(args
->ca_da
.da_fhandle
, exi
);
2371 dr
->dr_status
= NFSERR_STALE
;
2375 if (rdonly(ro
, vp
)) {
2377 dr
->dr_status
= NFSERR_ROFS
;
2381 error
= sattr_to_vattr(args
->ca_sa
, &va
);
2384 dr
->dr_status
= puterrno(error
);
2388 if (!(va
.va_mask
& AT_MODE
)) {
2390 dr
->dr_status
= NFSERR_INVAL
;
2395 va
.va_mask
|= AT_TYPE
;
2397 error
= fop_mkdir(vp
, name
, &va
, &dvp
, cr
, NULL
, 0, NULL
);
2401 * Attribtutes of the newly created directory should
2402 * be returned to the client.
2404 va
.va_mask
= AT_ALL
; /* We want everything */
2405 error
= fop_getattr(dvp
, &va
, 0, cr
, NULL
);
2407 /* check for overflows */
2409 acl_perm(vp
, exi
, &va
, cr
);
2410 error
= vattr_to_nattr(&va
, &dr
->dr_attr
);
2412 error
= makefh(&dr
->dr_fhandle
, dvp
, exi
);
2416 * Force new data and metadata out to stable storage.
2418 (void) fop_fsync(dvp
, 0, cr
, NULL
);
2423 * Force modified data and metadata out to stable storage.
2425 (void) fop_fsync(vp
, 0, cr
, NULL
);
2429 dr
->dr_status
= puterrno(error
);
2433 rfs_mkdir_getfh(struct nfscreatargs
*args
)
2435 return (args
->ca_da
.da_fhandle
);
2439 * Remove a directory.
2440 * Remove the given directory name from the given parent directory.
2444 rfs_rmdir(struct nfsdiropargs
*da
, enum nfsstat
*status
,
2445 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2451 * Disallow NULL paths
2453 if (da
->da_name
== NULL
|| *da
->da_name
== '\0') {
2454 *status
= NFSERR_ACCES
;
2458 vp
= nfs_fhtovp(da
->da_fhandle
, exi
);
2460 *status
= NFSERR_STALE
;
2464 if (rdonly(ro
, vp
)) {
2466 *status
= NFSERR_ROFS
;
2471 * fop_rmdir takes a third argument (the current
2472 * directory of the process). That's because someone
2473 * wants to return EINVAL if one tries to remove ".".
2474 * Of course, NFS servers have no idea what their
2475 * clients' current directories are. We fake it by
2476 * supplying a vnode known to exist and illegal to
2479 error
= fop_rmdir(vp
, da
->da_name
, rootdir
, cr
, NULL
, 0);
2482 * Force modified data and metadata out to stable storage.
2484 (void) fop_fsync(vp
, 0, cr
, NULL
);
2489 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2490 * if the directory is not empty. A System V NFS server
2491 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2494 if (error
== EEXIST
)
2495 *status
= NFSERR_NOTEMPTY
;
2497 *status
= puterrno(error
);
2501 rfs_rmdir_getfh(struct nfsdiropargs
*da
)
2503 return (da
->da_fhandle
);
2508 rfs_readdir(struct nfsrddirargs
*rda
, struct nfsrddirres
*rd
,
2509 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2517 struct sockaddr
*ca
;
2521 vp
= nfs_fhtovp(&rda
->rda_fh
, exi
);
2523 rd
->rd_entries
= NULL
;
2524 rd
->rd_status
= NFSERR_STALE
;
2528 if (vp
->v_type
!= VDIR
) {
2530 rd
->rd_entries
= NULL
;
2531 rd
->rd_status
= NFSERR_NOTDIR
;
2535 (void) fop_rwlock(vp
, V_WRITELOCK_FALSE
, NULL
);
2537 error
= fop_access(vp
, VREAD
, 0, cr
, NULL
);
2540 rd
->rd_entries
= NULL
;
2544 if (rda
->rda_count
== 0) {
2545 rd
->rd_entries
= NULL
;
2551 rda
->rda_count
= MIN(rda
->rda_count
, NFS_MAXDATA
);
2554 * Allocate data for entries. This will be freed by rfs_rddirfree.
2556 rd
->rd_bufsize
= (uint_t
)rda
->rda_count
;
2557 rd
->rd_entries
= kmem_alloc(rd
->rd_bufsize
, KM_SLEEP
);
2560 * Set up io vector to read directory data
2562 iov
.iov_base
= (caddr_t
)rd
->rd_entries
;
2563 iov
.iov_len
= rda
->rda_count
;
2566 uio
.uio_segflg
= UIO_SYSSPACE
;
2567 uio
.uio_extflg
= UIO_COPY_CACHED
;
2568 uio
.uio_loffset
= (offset_t
)rda
->rda_offset
;
2569 uio
.uio_resid
= rda
->rda_count
;
2574 error
= fop_readdir(vp
, &uio
, cr
, &iseof
, NULL
, 0);
2583 if (uio
.uio_resid
== rda
->rda_count
) {
2587 rd
->rd_size
= (uint32_t)(rda
->rda_count
-
2589 rd
->rd_eof
= iseof
? TRUE
: FALSE
;
2593 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
2594 nents
= nfscmd_countents((char *)rd
->rd_entries
, rd
->rd_size
);
2595 ret
= nfscmd_convdirplus(ca
, exi
, (char *)rd
->rd_entries
, nents
,
2596 rda
->rda_count
, &ndata
);
2601 * We had to drop one or more entries in order to fit
2602 * during the character conversion. We need to patch
2603 * up the size and eof info.
2607 dropbytes
= nfscmd_dropped_entrysize(
2608 (struct dirent64
*)rd
->rd_entries
, nents
, ret
);
2609 rd
->rd_size
-= dropbytes
;
2611 if (ndata
== NULL
) {
2612 ndata
= (char *)rd
->rd_entries
;
2613 } else if (ndata
!= (char *)rd
->rd_entries
) {
2614 kmem_free(rd
->rd_entries
, rd
->rd_bufsize
);
2615 rd
->rd_entries
= (void *)ndata
;
2616 rd
->rd_bufsize
= rda
->rda_count
;
2620 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, NULL
);
2624 * Don't do this. It causes local disk writes when just
2625 * reading the file and the overhead is deemed larger
2629 * Force modified metadata out to stable storage.
2631 (void) fop_fsync(vp
, FNODSYNC
, cr
, NULL
);
2636 rd
->rd_status
= puterrno(error
);
2640 rfs_readdir_getfh(struct nfsrddirargs
*rda
)
2642 return (&rda
->rda_fh
);
2645 rfs_rddirfree(struct nfsrddirres
*rd
)
2647 if (rd
->rd_entries
!= NULL
)
2648 kmem_free(rd
->rd_entries
, rd
->rd_bufsize
);
2653 rfs_statfs(fhandle_t
*fh
, struct nfsstatfs
*fs
, struct exportinfo
*exi
,
2654 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2657 struct statvfs64 sb
;
2660 vp
= nfs_fhtovp(fh
, exi
);
2662 fs
->fs_status
= NFSERR_STALE
;
2666 error
= VFS_STATVFS(vp
->v_vfsp
, &sb
);
2669 fs
->fs_tsize
= nfstsize();
2670 fs
->fs_bsize
= sb
.f_frsize
;
2671 fs
->fs_blocks
= sb
.f_blocks
;
2672 fs
->fs_bfree
= sb
.f_bfree
;
2673 fs
->fs_bavail
= sb
.f_bavail
;
2678 fs
->fs_status
= puterrno(error
);
2682 rfs_statfs_getfh(fhandle_t
*fh
)
2688 sattr_to_vattr(struct nfssattr
*sa
, struct vattr
*vap
)
2693 * There was a sign extension bug in some VFS based systems
2694 * which stored the mode as a short. When it would get
2695 * assigned to a u_long, no sign extension would occur.
2696 * It needed to, but this wasn't noticed because sa_mode
2697 * would then get assigned back to the short, thus ignoring
2698 * the upper 16 bits of sa_mode.
2700 * To make this implementation work for both broken
2701 * clients and good clients, we check for both versions
2704 if (sa
->sa_mode
!= (uint32_t)((ushort_t
)-1) &&
2705 sa
->sa_mode
!= (uint32_t)-1) {
2706 vap
->va_mask
|= AT_MODE
;
2707 vap
->va_mode
= sa
->sa_mode
;
2709 if (sa
->sa_uid
!= (uint32_t)-1) {
2710 vap
->va_mask
|= AT_UID
;
2711 vap
->va_uid
= sa
->sa_uid
;
2713 if (sa
->sa_gid
!= (uint32_t)-1) {
2714 vap
->va_mask
|= AT_GID
;
2715 vap
->va_gid
= sa
->sa_gid
;
2717 if (sa
->sa_size
!= (uint32_t)-1) {
2718 vap
->va_mask
|= AT_SIZE
;
2719 vap
->va_size
= sa
->sa_size
;
2721 if (sa
->sa_atime
.tv_sec
!= (int32_t)-1 &&
2722 sa
->sa_atime
.tv_usec
!= (int32_t)-1) {
2724 /* return error if time overflow */
2725 if (!NFS2_TIME_OK(sa
->sa_atime
.tv_sec
))
2728 vap
->va_mask
|= AT_ATIME
;
2730 * nfs protocol defines times as unsigned so don't extend sign,
2731 * unless sysadmin set nfs_allow_preepoch_time.
2733 NFS_TIME_T_CONVERT(vap
->va_atime
.tv_sec
, sa
->sa_atime
.tv_sec
);
2734 vap
->va_atime
.tv_nsec
= (uint32_t)(sa
->sa_atime
.tv_usec
* 1000);
2736 if (sa
->sa_mtime
.tv_sec
!= (int32_t)-1 &&
2737 sa
->sa_mtime
.tv_usec
!= (int32_t)-1) {
2739 /* return error if time overflow */
2740 if (!NFS2_TIME_OK(sa
->sa_mtime
.tv_sec
))
2743 vap
->va_mask
|= AT_MTIME
;
2745 * nfs protocol defines times as unsigned so don't extend sign,
2746 * unless sysadmin set nfs_allow_preepoch_time.
2748 NFS_TIME_T_CONVERT(vap
->va_mtime
.tv_sec
, sa
->sa_mtime
.tv_sec
);
2749 vap
->va_mtime
.tv_nsec
= (uint32_t)(sa
->sa_mtime
.tv_usec
* 1000);
2754 static enum nfsftype vt_to_nf
[] = {
2755 0, NFREG
, NFDIR
, NFBLK
, NFCHR
, NFLNK
, 0, 0, 0, NFSOC
, 0
2759 * check the following fields for overflow: nodeid, size, and time.
2760 * There could be a problem when converting 64-bit LP64 fields
2761 * into 32-bit ones. Return an error if there is an overflow.
2764 vattr_to_nattr(struct vattr
*vap
, struct nfsfattr
*na
)
2766 ASSERT(vap
->va_type
>= VNON
&& vap
->va_type
<= VBAD
);
2767 na
->na_type
= vt_to_nf
[vap
->va_type
];
2769 if (vap
->va_mode
== (unsigned short) -1)
2770 na
->na_mode
= (uint32_t)-1;
2772 na
->na_mode
= VTTOIF(vap
->va_type
) | vap
->va_mode
;
2774 if (vap
->va_uid
== (unsigned short)(-1))
2775 na
->na_uid
= (uint32_t)(-1);
2776 else if (vap
->va_uid
== UID_NOBODY
)
2777 na
->na_uid
= (uint32_t)NFS_UID_NOBODY
;
2779 na
->na_uid
= vap
->va_uid
;
2781 if (vap
->va_gid
== (unsigned short)(-1))
2782 na
->na_gid
= (uint32_t)-1;
2783 else if (vap
->va_gid
== GID_NOBODY
)
2784 na
->na_gid
= (uint32_t)NFS_GID_NOBODY
;
2786 na
->na_gid
= vap
->va_gid
;
2789 * Do we need to check fsid for overflow? It is 64-bit in the
2790 * vattr, but are bigger than 32 bit values supported?
2792 na
->na_fsid
= vap
->va_fsid
;
2794 na
->na_nodeid
= vap
->va_nodeid
;
2797 * Check to make sure that the nodeid is representable over the
2798 * wire without losing bits.
2800 if (vap
->va_nodeid
!= (u_longlong_t
)na
->na_nodeid
)
2802 na
->na_nlink
= vap
->va_nlink
;
2805 * Check for big files here, instead of at the caller. See
2806 * comments in cstat for large special file explanation.
2808 if (vap
->va_size
> (u_longlong_t
)MAXOFF32_T
) {
2809 if ((vap
->va_type
== VREG
) || (vap
->va_type
== VDIR
))
2811 if ((vap
->va_type
== VBLK
) || (vap
->va_type
== VCHR
)) {
2812 /* UNKNOWN_SIZE | OVERFLOW */
2813 na
->na_size
= MAXOFF32_T
;
2815 na
->na_size
= vap
->va_size
;
2817 na
->na_size
= vap
->va_size
;
2820 * If the vnode times overflow the 32-bit times that NFS2
2821 * uses on the wire then return an error.
2823 if (!NFS_VAP_TIME_OK(vap
)) {
2826 na
->na_atime
.tv_sec
= vap
->va_atime
.tv_sec
;
2827 na
->na_atime
.tv_usec
= vap
->va_atime
.tv_nsec
/ 1000;
2829 na
->na_mtime
.tv_sec
= vap
->va_mtime
.tv_sec
;
2830 na
->na_mtime
.tv_usec
= vap
->va_mtime
.tv_nsec
/ 1000;
2832 na
->na_ctime
.tv_sec
= vap
->va_ctime
.tv_sec
;
2833 na
->na_ctime
.tv_usec
= vap
->va_ctime
.tv_nsec
/ 1000;
2836 * If the dev_t will fit into 16 bits then compress
2837 * it, otherwise leave it alone. See comments in
2840 if (getminor(vap
->va_rdev
) <= SO4_MAXMIN
&&
2841 getmajor(vap
->va_rdev
) <= SO4_MAXMAJ
)
2842 na
->na_rdev
= nfsv2_cmpdev(vap
->va_rdev
);
2844 (void) cmpldev(&na
->na_rdev
, vap
->va_rdev
);
2846 na
->na_blocks
= vap
->va_nblocks
;
2847 na
->na_blocksize
= vap
->va_blksize
;
2850 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2851 * over-the-wire protocols for named-pipe vnodes. It remaps the
2852 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2855 * If you are porting the NFS to a non-Sun server, you probably
2856 * don't want to include the following block of code. The
2857 * over-the-wire special file types will be changing with the
2858 * NFS Protocol Revision.
2860 if (vap
->va_type
== VFIFO
)
2866 * acl v2 support: returns approximate permission.
2867 * default: returns minimal permission (more restrictive)
2868 * aclok: returns maximal permission (less restrictive)
2869 * This routine changes the permissions that are alaredy in *va.
2870 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2871 * CLASS_OBJ is always the same as GROUP_OBJ entry.
2874 acl_perm(struct vnode
*vp
, struct exportinfo
*exi
, struct vattr
*va
, cred_t
*cr
)
2885 /* dont care default acl */
2886 vsa
.vsa_mask
= (VSA_ACL
| VSA_ACLCNT
);
2887 error
= fop_getsecattr(vp
, &vsa
, 0, cr
, NULL
);
2890 aclcnt
= vsa
.vsa_aclcnt
;
2891 if (aclcnt
> MIN_ACL_ENTRIES
) {
2892 /* non-trivial ACL */
2893 aclentp
= vsa
.vsa_aclentp
;
2894 if (exi
->exi_export
.ex_flags
& EX_ACLOK
) {
2895 /* maximal permissions */
2898 for (; aclcnt
> 0; aclcnt
--, aclentp
++) {
2899 switch (aclentp
->a_type
) {
2904 aclentp
->a_perm
<< 3;
2905 other_perm
|= aclentp
->a_perm
;
2909 aclentp
->a_perm
<< 3;
2912 other_perm
|= aclentp
->a_perm
;
2915 other_orig
= aclentp
->a_perm
;
2918 mask_perm
= aclentp
->a_perm
;
2924 grp_perm
&= mask_perm
<< 3;
2925 other_perm
&= mask_perm
;
2926 other_perm
|= other_orig
;
2929 /* minimal permissions */
2932 for (; aclcnt
> 0; aclcnt
--, aclentp
++) {
2933 switch (aclentp
->a_type
) {
2939 aclentp
->a_perm
<< 3;
2945 aclentp
->a_perm
<< 3;
2961 va
->va_mode
&= ~077;
2962 va
->va_mode
|= grp_perm
| other_perm
;
2965 kmem_free(vsa
.vsa_aclentp
,
2966 vsa
.vsa_aclcnt
* sizeof (aclent_t
));
2973 mutex_init(&rfs_async_write_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2974 nfs2_srv_caller_id
= fs_new_caller_id();
2980 mutex_destroy(&rfs_async_write_lock
);
2984 rdma_setup_read_data2(struct nfsreadargs
*ra
, struct nfsrdresult
*rr
)
2988 uint32_t count
= rr
->rr_count
;
2992 if (rdma_setup_read_chunks(wcl
, count
, &wlist_len
) == FALSE
) {
2997 rr
->rr_ok
.rrok_wlist_len
= wlist_len
;
2998 rr
->rr_ok
.rrok_wlist
= wcl
;