4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
37 #include <sys/vnode.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/nbmlock.h>
49 #include <sys/policy.h>
52 #include <rpc/types.h>
55 #include <rpc/rpc_rdma.h>
58 #include <nfs/export.h>
59 #include <nfs/nfs_cmd.h>
61 #include <sys/strsubr.h>
69 * These are the interface routines for the server side of the
70 * Network File System. See the NFS version 3 protocol specification
71 * for a description of this interface.
74 static writeverf3 write3verf
;
76 static int sattr3_to_vattr(sattr3
*, struct vattr
*);
77 static int vattr_to_fattr3(struct vattr
*, fattr3
*);
78 static int vattr_to_wcc_attr(struct vattr
*, wcc_attr
*);
79 static void vattr_to_pre_op_attr(struct vattr
*, pre_op_attr
*);
80 static void vattr_to_wcc_data(struct vattr
*, struct vattr
*, wcc_data
*);
81 static int rdma_setup_read_data3(READ3args
*, READ3resok
*);
83 extern int nfs_loaned_buffers
;
85 u_longlong_t nfs3_srv_caller_id
;
89 rfs3_getattr(GETATTR3args
*args
, GETATTR3res
*resp
, struct exportinfo
*exi
,
90 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
96 vp
= nfs3_fhtovp(&args
->object
, exi
);
98 DTRACE_NFSV3_4(op__getattr__start
, struct svc_req
*, req
,
99 cred_t
*, cr
, vnode_t
*, vp
, GETATTR3args
*, args
);
107 error
= rfs4_delegated_getattr(vp
, &va
, 0, cr
);
110 /* Lie about the object type for a referral */
111 if (vn_is_nfs_reparse(vp
, cr
))
114 /* overflow error if time or size is out of range */
115 error
= vattr_to_fattr3(&va
, &resp
->resok
.obj_attributes
);
118 resp
->status
= NFS3_OK
;
120 DTRACE_NFSV3_4(op__getattr__done
, struct svc_req
*, req
,
121 cred_t
*, cr
, vnode_t
*, vp
, GETATTR3res
*, resp
);
129 if (curthread
->t_flag
& T_WOULDBLOCK
) {
130 curthread
->t_flag
&= ~T_WOULDBLOCK
;
131 resp
->status
= NFS3ERR_JUKEBOX
;
133 resp
->status
= puterrno3(error
);
135 DTRACE_NFSV3_4(op__getattr__done
, struct svc_req
*, req
,
136 cred_t
*, cr
, vnode_t
*, vp
, GETATTR3res
*, resp
);
143 rfs3_getattr_getfh(GETATTR3args
*args
)
146 return (&args
->object
);
150 rfs3_setattr(SETATTR3args
*args
, SETATTR3res
*resp
, struct exportinfo
*exi
,
151 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
167 vp
= nfs3_fhtovp(&args
->object
, exi
);
169 DTRACE_NFSV3_4(op__setattr__start
, struct svc_req
*, req
,
170 cred_t
*, cr
, vnode_t
*, vp
, SETATTR3args
*, args
);
177 error
= sattr3_to_vattr(&args
->new_attributes
, &ava
);
182 * We need to specially handle size changes because of
183 * possible conflicting NBMAND locks. Get into critical
184 * region before fop_getattr, so the size attribute is
185 * valid when checking conflicts.
187 * Also, check to see if the v4 side of the server has
188 * delegated this file. If so, then we return JUKEBOX to
189 * allow the client to retrasmit its request.
191 if (vp
->v_type
== VREG
&& (ava
.va_mask
& AT_SIZE
)) {
192 if (nbl_need_check(vp
)) {
193 nbl_start_crit(vp
, RW_READER
);
198 bva
.va_mask
= AT_ALL
;
199 error
= rfs4_delegated_getattr(vp
, &bva
, 0, cr
);
202 * If we can't get the attributes, then we can't do the
203 * right access checking. So, we'll fail the request.
210 if (rdonly(ro
, vp
)) {
211 resp
->status
= NFS3ERR_ROFS
;
215 if (args
->guard
.check
&&
216 (args
->guard
.obj_ctime
.seconds
!= bva
.va_ctime
.tv_sec
||
217 args
->guard
.obj_ctime
.nseconds
!= bva
.va_ctime
.tv_nsec
)) {
218 resp
->status
= NFS3ERR_NOT_SYNC
;
222 if (args
->new_attributes
.mtime
.set_it
== SET_TO_CLIENT_TIME
)
228 * If the filesystem is exported with nosuid, then mask off
229 * the setuid and setgid bits.
231 if ((ava
.va_mask
& AT_MODE
) && vp
->v_type
== VREG
&&
232 (exi
->exi_export
.ex_flags
& EX_NOSUID
))
233 ava
.va_mode
&= ~(VSUID
| VSGID
);
237 ct
.cc_caller_id
= nfs3_srv_caller_id
;
238 ct
.cc_flags
= CC_DONTBLOCK
;
241 * We need to specially handle size changes because it is
242 * possible for the client to create a file with modes
243 * which indicate read-only, but with the file opened for
244 * writing. If the client then tries to set the size of
245 * the file, then the normal access checking done in
246 * fop_setattr would prevent the client from doing so,
247 * although it should be legal for it to do so. To get
248 * around this, we do the access checking for ourselves
249 * and then use fop_space which doesn't do the access
250 * checking which fop_setattr does. fop_space can only
251 * operate on VREG files, let fop_setattr handle the other
252 * extremely rare cases.
253 * Also the client should not be allowed to change the
254 * size of the file if there is a conflicting non-blocking
255 * mandatory lock in the region the change.
257 if (vp
->v_type
== VREG
&& (ava
.va_mask
& AT_SIZE
)) {
262 if (ava
.va_size
< bva
.va_size
) {
263 offset
= ava
.va_size
;
264 length
= bva
.va_size
- ava
.va_size
;
266 offset
= bva
.va_size
;
267 length
= ava
.va_size
- bva
.va_size
;
269 if (nbl_conflict(vp
, NBL_WRITE
, offset
, length
, 0,
276 if (crgetuid(cr
) == bva
.va_uid
&& ava
.va_size
!= bva
.va_size
) {
277 ava
.va_mask
&= ~AT_SIZE
;
280 bf
.l_start
= (off64_t
)ava
.va_size
;
284 error
= fop_space(vp
, F_FREESP
, &bf
, FWRITE
,
285 (offset_t
)ava
.va_size
, cr
, &ct
);
289 if (!error
&& ava
.va_mask
)
290 error
= fop_setattr(vp
, &ava
, flag
, cr
, &ct
);
292 /* check if a monitor detected a delegation conflict */
293 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
)) {
294 resp
->status
= NFS3ERR_JUKEBOX
;
298 ava
.va_mask
= AT_ALL
;
299 avap
= rfs4_delegated_getattr(vp
, &ava
, 0, cr
) ? NULL
: &ava
;
302 * Force modified metadata out to stable storage.
304 (void) fop_fsync(vp
, FNODSYNC
, cr
, &ct
);
312 resp
->status
= NFS3_OK
;
313 vattr_to_wcc_data(bvap
, avap
, &resp
->resok
.obj_wcc
);
315 DTRACE_NFSV3_4(op__setattr__done
, struct svc_req
*, req
,
316 cred_t
*, cr
, vnode_t
*, vp
, SETATTR3res
*, resp
);
323 if (curthread
->t_flag
& T_WOULDBLOCK
) {
324 curthread
->t_flag
&= ~T_WOULDBLOCK
;
325 resp
->status
= NFS3ERR_JUKEBOX
;
327 resp
->status
= puterrno3(error
);
329 DTRACE_NFSV3_4(op__setattr__done
, struct svc_req
*, req
,
330 cred_t
*, cr
, vnode_t
*, vp
, SETATTR3res
*, resp
);
337 vattr_to_wcc_data(bvap
, avap
, &resp
->resfail
.obj_wcc
);
341 rfs3_setattr_getfh(SETATTR3args
*args
)
344 return (&args
->object
);
349 rfs3_lookup(LOOKUP3args
*args
, LOOKUP3res
*resp
, struct exportinfo
*exi
,
350 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
360 struct sec_ol sec
= {0, 0};
361 bool_t publicfh_flag
= FALSE
, auth_weak
= FALSE
;
368 * Allow lookups from the root - the default
369 * location of the public filehandle.
371 if (exi
!= NULL
&& (exi
->exi_export
.ex_flags
& EX_PUBLIC
)) {
375 DTRACE_NFSV3_4(op__lookup__start
, struct svc_req
*, req
,
376 cred_t
*, cr
, vnode_t
*, dvp
, LOOKUP3args
*, args
);
378 dvp
= nfs3_fhtovp(&args
->what
.dir
, exi
);
380 DTRACE_NFSV3_4(op__lookup__start
, struct svc_req
*, req
,
381 cred_t
*, cr
, vnode_t
*, dvp
, LOOKUP3args
*, args
);
389 dva
.va_mask
= AT_ALL
;
390 dvap
= fop_getattr(dvp
, &dva
, 0, cr
, NULL
) ? NULL
: &dva
;
392 if (args
->what
.name
== nfs3nametoolong
) {
393 resp
->status
= NFS3ERR_NAMETOOLONG
;
397 if (args
->what
.name
== NULL
|| *(args
->what
.name
) == '\0') {
398 resp
->status
= NFS3ERR_ACCES
;
402 fhp
= &args
->what
.dir
;
403 if (strcmp(args
->what
.name
, "..") == 0 &&
404 EQFID(&exi
->exi_fid
, FH3TOFIDP(fhp
))) {
405 resp
->status
= NFS3ERR_NOENT
;
409 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
410 name
= nfscmd_convname(ca
, exi
, args
->what
.name
,
411 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
414 resp
->status
= NFS3ERR_ACCES
;
419 * If the public filehandle is used then allow
420 * a multi-component lookup
422 if (PUBLIC_FH3(&args
->what
.dir
)) {
423 publicfh_flag
= TRUE
;
424 error
= rfs_publicfh_mclookup(name
, dvp
, cr
, &vp
,
426 if (error
&& exi
!= NULL
)
427 exi_rele(exi
); /* See comment below Re: publicfh_flag */
429 error
= fop_lookup(dvp
, name
, &vp
,
430 NULL
, 0, NULL
, cr
, NULL
, NULL
, NULL
);
433 if (name
!= args
->what
.name
)
434 kmem_free(name
, MAXPATHLEN
+ 1);
436 dva
.va_mask
= AT_ALL
;
437 dvap
= fop_getattr(dvp
, &dva
, 0, cr
, NULL
) ? NULL
: &dva
;
442 if (sec
.sec_flags
& SEC_QUERY
) {
443 error
= makefh3_ol(&resp
->resok
.object
, exi
, sec
.sec_index
);
445 error
= makefh3(&resp
->resok
.object
, vp
, exi
);
446 if (!error
&& publicfh_flag
&& !chk_clnt_sec(exi
, req
))
451 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
452 * and have obtained a new exportinfo in exi which needs to be
453 * released. Note that the original exportinfo pointed to by exi
454 * will be released by the caller, common_dispatch.
465 vap
= rfs4_delegated_getattr(vp
, &va
, 0, cr
) ? NULL
: &va
;
469 resp
->status
= NFS3_OK
;
470 vattr_to_post_op_attr(vap
, &resp
->resok
.obj_attributes
);
471 vattr_to_post_op_attr(dvap
, &resp
->resok
.dir_attributes
);
474 * If it's public fh, no 0x81, and client's flavor is
475 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
476 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
479 resp
->status
= (enum nfsstat3
)WNFSERR_CLNT_FLAVOR
;
481 DTRACE_NFSV3_4(op__lookup__done
, struct svc_req
*, req
,
482 cred_t
*, cr
, vnode_t
*, dvp
, LOOKUP3res
*, resp
);
488 if (curthread
->t_flag
& T_WOULDBLOCK
) {
489 curthread
->t_flag
&= ~T_WOULDBLOCK
;
490 resp
->status
= NFS3ERR_JUKEBOX
;
492 resp
->status
= puterrno3(error
);
494 DTRACE_NFSV3_4(op__lookup__done
, struct svc_req
*, req
,
495 cred_t
*, cr
, vnode_t
*, dvp
, LOOKUP3res
*, resp
);
499 vattr_to_post_op_attr(dvap
, &resp
->resfail
.dir_attributes
);
504 rfs3_lookup_getfh(LOOKUP3args
*args
)
507 return (&args
->what
.dir
);
512 rfs3_access(ACCESS3args
*args
, ACCESS3res
*resp
, struct exportinfo
*exi
,
513 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
523 vp
= nfs3_fhtovp(&args
->object
, exi
);
525 DTRACE_NFSV3_4(op__access__start
, struct svc_req
*, req
,
526 cred_t
*, cr
, vnode_t
*, vp
, ACCESS3args
*, args
);
534 * If the file system is exported read only, it is not appropriate
535 * to check write permissions for regular files and directories.
536 * Special files are interpreted by the client, so the underlying
537 * permissions are sent back to the client for interpretation.
539 if (rdonly(ro
, vp
) && (vp
->v_type
== VREG
|| vp
->v_type
== VDIR
))
545 * We need the mode so that we can correctly determine access
546 * permissions relative to a mandatory lock file. Access to
547 * mandatory lock files is denied on the server, so it might
548 * as well be reflected to the server during the open.
550 va
.va_mask
= AT_MODE
;
551 error
= fop_getattr(vp
, &va
, 0, cr
, NULL
);
557 resp
->resok
.access
= 0;
560 if (args
->access
& ACCESS3_READ
) {
561 error
= fop_access(vp
, VREAD
, 0, cr
, NULL
);
563 if (curthread
->t_flag
& T_WOULDBLOCK
)
565 } else if (!MANDLOCK(vp
, va
.va_mode
))
566 resp
->resok
.access
|= ACCESS3_READ
;
568 if ((args
->access
& ACCESS3_LOOKUP
) && vp
->v_type
== VDIR
) {
569 error
= fop_access(vp
, VEXEC
, 0, cr
, NULL
);
571 if (curthread
->t_flag
& T_WOULDBLOCK
)
574 resp
->resok
.access
|= ACCESS3_LOOKUP
;
576 if (checkwriteperm
&&
577 (args
->access
& (ACCESS3_MODIFY
|ACCESS3_EXTEND
))) {
578 error
= fop_access(vp
, VWRITE
, 0, cr
, NULL
);
580 if (curthread
->t_flag
& T_WOULDBLOCK
)
582 } else if (!MANDLOCK(vp
, va
.va_mode
))
583 resp
->resok
.access
|=
584 (args
->access
& (ACCESS3_MODIFY
|ACCESS3_EXTEND
));
586 if (checkwriteperm
&&
587 (args
->access
& ACCESS3_DELETE
) && vp
->v_type
== VDIR
) {
588 error
= fop_access(vp
, VWRITE
, 0, cr
, NULL
);
590 if (curthread
->t_flag
& T_WOULDBLOCK
)
593 resp
->resok
.access
|= ACCESS3_DELETE
;
595 if (args
->access
& ACCESS3_EXECUTE
) {
596 error
= fop_access(vp
, VEXEC
, 0, cr
, NULL
);
598 if (curthread
->t_flag
& T_WOULDBLOCK
)
600 } else if (!MANDLOCK(vp
, va
.va_mode
))
601 resp
->resok
.access
|= ACCESS3_EXECUTE
;
605 vap
= rfs4_delegated_getattr(vp
, &va
, 0, cr
) ? NULL
: &va
;
607 resp
->status
= NFS3_OK
;
608 vattr_to_post_op_attr(vap
, &resp
->resok
.obj_attributes
);
610 DTRACE_NFSV3_4(op__access__done
, struct svc_req
*, req
,
611 cred_t
*, cr
, vnode_t
*, vp
, ACCESS3res
*, resp
);
618 if (curthread
->t_flag
& T_WOULDBLOCK
) {
619 curthread
->t_flag
&= ~T_WOULDBLOCK
;
620 resp
->status
= NFS3ERR_JUKEBOX
;
622 resp
->status
= puterrno3(error
);
623 DTRACE_NFSV3_4(op__access__done
, struct svc_req
*, req
,
624 cred_t
*, cr
, vnode_t
*, vp
, ACCESS3res
*, resp
);
627 vattr_to_post_op_attr(vap
, &resp
->resfail
.obj_attributes
);
631 rfs3_access_getfh(ACCESS3args
*args
)
634 return (&args
->object
);
639 rfs3_readlink(READLINK3args
*args
, READLINK3res
*resp
, struct exportinfo
*exi
,
640 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
655 vp
= nfs3_fhtovp(&args
->symlink
, exi
);
657 DTRACE_NFSV3_4(op__readlink__start
, struct svc_req
*, req
,
658 cred_t
*, cr
, vnode_t
*, vp
, READLINK3args
*, args
);
666 error
= fop_getattr(vp
, &va
, 0, cr
, NULL
);
672 /* We lied about the object type for a referral */
673 if (vn_is_nfs_reparse(vp
, cr
))
676 if (vp
->v_type
!= VLNK
&& !is_referral
) {
677 resp
->status
= NFS3ERR_INVAL
;
681 if (MANDLOCK(vp
, va
.va_mode
)) {
682 resp
->status
= NFS3ERR_ACCES
;
686 data
= kmem_alloc(MAXPATHLEN
+ 1, KM_SLEEP
);
692 /* Get an artificial symlink based on a referral */
693 s
= build_symlink(vp
, cr
, &strsz
);
694 global_svstat_ptr
[3][NFS_REFERLINKS
].value
.ui64
++;
695 DTRACE_PROBE2(nfs3serv__func__referral__reflink
,
696 vnode_t
*, vp
, char *, s
);
701 (void) strlcpy(data
, s
, MAXPATHLEN
+ 1);
708 iov
.iov_len
= MAXPATHLEN
;
711 uio
.uio_segflg
= UIO_SYSSPACE
;
712 uio
.uio_extflg
= UIO_COPY_CACHED
;
714 uio
.uio_resid
= MAXPATHLEN
;
716 error
= fop_readlink(vp
, &uio
, cr
, NULL
);
719 *(data
+ MAXPATHLEN
- uio
.uio_resid
) = '\0';
723 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
725 /* Lie about object type again just to be consistent */
726 if (is_referral
&& vap
!= NULL
)
731 * Don't do this. It causes local disk writes when just
732 * reading the file and the overhead is deemed larger
736 * Force modified metadata out to stable storage.
738 (void) fop_fsync(vp
, FNODSYNC
, cr
, NULL
);
742 kmem_free(data
, MAXPATHLEN
+ 1);
746 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
747 name
= nfscmd_convname(ca
, exi
, data
, NFSCMD_CONV_OUTBOUND
,
752 * Even though the conversion failed, we return
753 * something. We just don't translate it.
758 resp
->status
= NFS3_OK
;
759 vattr_to_post_op_attr(vap
, &resp
->resok
.symlink_attributes
);
760 resp
->resok
.data
= name
;
762 DTRACE_NFSV3_4(op__readlink__done
, struct svc_req
*, req
,
763 cred_t
*, cr
, vnode_t
*, vp
, READLINK3res
*, resp
);
767 kmem_free(data
, MAXPATHLEN
+ 1);
772 if (curthread
->t_flag
& T_WOULDBLOCK
) {
773 curthread
->t_flag
&= ~T_WOULDBLOCK
;
774 resp
->status
= NFS3ERR_JUKEBOX
;
776 resp
->status
= puterrno3(error
);
778 DTRACE_NFSV3_4(op__readlink__done
, struct svc_req
*, req
,
779 cred_t
*, cr
, vnode_t
*, vp
, READLINK3res
*, resp
);
782 vattr_to_post_op_attr(vap
, &resp
->resfail
.symlink_attributes
);
786 rfs3_readlink_getfh(READLINK3args
*args
)
789 return (&args
->symlink
);
793 rfs3_readlink_free(READLINK3res
*resp
)
796 if (resp
->status
== NFS3_OK
)
797 kmem_free(resp
->resok
.data
, MAXPATHLEN
+ 1);
801 * Server routine to handle read
802 * May handle RDMA data as well as mblks
806 rfs3_read(READ3args
*args
, READ3res
*resp
, struct exportinfo
*exi
,
807 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
813 struct iovec iov
, *iovp
= NULL
;
819 int need_rwunlock
= 0;
827 vp
= nfs3_fhtovp(&args
->file
, exi
);
829 DTRACE_NFSV3_4(op__read__start
, struct svc_req
*, req
,
830 cred_t
*, cr
, vnode_t
*, vp
, READ3args
*, args
);
838 if (args
->count
> clist_len(args
->wlist
)) {
845 /* use loaned buffers for TCP */
846 loaned_buffers
= (nfs_loaned_buffers
&& !rdma_used
) ? 1 : 0;
850 ct
.cc_caller_id
= nfs3_srv_caller_id
;
851 ct
.cc_flags
= CC_DONTBLOCK
;
854 * Enter the critical region before calling fop_rwlock
855 * to avoid a deadlock with write requests.
857 if (nbl_need_check(vp
)) {
858 nbl_start_crit(vp
, RW_READER
);
860 if (nbl_conflict(vp
, NBL_READ
, args
->offset
, args
->count
, 0,
867 error
= fop_rwlock(vp
, V_WRITELOCK_FALSE
, &ct
);
869 /* check if a monitor detected a delegation conflict */
870 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
)) {
871 resp
->status
= NFS3ERR_JUKEBOX
;
878 error
= fop_getattr(vp
, &va
, 0, cr
, &ct
);
881 * If we can't get the attributes, then we can't do the
882 * right access checking. So, we'll fail the request.
889 if (vp
->v_type
!= VREG
) {
890 resp
->status
= NFS3ERR_INVAL
;
894 if (crgetuid(cr
) != va
.va_uid
) {
895 error
= fop_access(vp
, VREAD
, 0, cr
, &ct
);
897 if (curthread
->t_flag
& T_WOULDBLOCK
)
899 error
= fop_access(vp
, VEXEC
, 0, cr
, &ct
);
905 if (MANDLOCK(vp
, va
.va_mode
)) {
906 resp
->status
= NFS3ERR_ACCES
;
910 offset
= args
->offset
;
911 if (offset
>= va
.va_size
) {
912 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, &ct
);
915 resp
->status
= NFS3_OK
;
916 vattr_to_post_op_attr(vap
, &resp
->resok
.file_attributes
);
917 resp
->resok
.count
= 0;
918 resp
->resok
.eof
= TRUE
;
919 resp
->resok
.data
.data_len
= 0;
920 resp
->resok
.data
.data_val
= NULL
;
921 resp
->resok
.data
.mp
= NULL
;
923 resp
->resok
.wlist
= args
->wlist
;
924 resp
->resok
.wlist_len
= resp
->resok
.count
;
925 if (resp
->resok
.wlist
)
926 clist_zero_len(resp
->resok
.wlist
);
930 if (args
->count
== 0) {
931 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, &ct
);
934 resp
->status
= NFS3_OK
;
935 vattr_to_post_op_attr(vap
, &resp
->resok
.file_attributes
);
936 resp
->resok
.count
= 0;
937 resp
->resok
.eof
= FALSE
;
938 resp
->resok
.data
.data_len
= 0;
939 resp
->resok
.data
.data_val
= NULL
;
940 resp
->resok
.data
.mp
= NULL
;
942 resp
->resok
.wlist
= args
->wlist
;
943 resp
->resok
.wlist_len
= resp
->resok
.count
;
944 if (resp
->resok
.wlist
)
945 clist_zero_len(resp
->resok
.wlist
);
950 * do not allocate memory more the max. allowed
953 if (args
->count
> rfs3_tsize(req
))
954 args
->count
= rfs3_tsize(req
);
956 if (loaned_buffers
) {
957 uiop
= (uio_t
*)rfs_setup_xuio(vp
);
958 ASSERT(uiop
!= NULL
);
959 uiop
->uio_segflg
= UIO_SYSSPACE
;
960 uiop
->uio_loffset
= args
->offset
;
961 uiop
->uio_resid
= args
->count
;
963 /* Jump to do the read if successful */
964 if (fop_reqzcbuf(vp
, UIO_READ
, (xuio_t
*)uiop
, cr
, &ct
) == 0) {
966 * Need to hold the vnode until after fop_retzcbuf()
973 DTRACE_PROBE2(nfss__i__reqzcbuf_failed
, int,
974 uiop
->uio_loffset
, int, uiop
->uio_resid
);
976 uiop
->uio_extflg
= 0;
977 /* failure to setup for zero copy */
978 rfs_free_xuio((void *)uiop
);
983 * If returning data via RDMA Write, then grab the chunk list.
984 * If we aren't returning READ data w/RDMA_WRITE, then grab
988 (void) rdma_get_wchunk(req
, &iov
, args
->wlist
);
993 * mp will contain the data to be sent out in the read reply.
994 * For UDP, this will be freed after the reply has been sent
995 * out by the driver. For TCP, it will be freed after the last
996 * segment associated with the reply has been ACKed by the
999 mp
= rfs_read_alloc(args
->count
, &iovp
, &iovcnt
);
1001 uio
.uio_iovcnt
= iovcnt
;
1004 uio
.uio_segflg
= UIO_SYSSPACE
;
1005 uio
.uio_extflg
= UIO_COPY_CACHED
;
1006 uio
.uio_loffset
= args
->offset
;
1007 uio
.uio_resid
= args
->count
;
1011 error
= fop_read(vp
, uiop
, 0, cr
, &ct
);
1016 /* check if a monitor detected a delegation conflict */
1017 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
)) {
1018 resp
->status
= NFS3ERR_JUKEBOX
;
1024 /* make mblk using zc buffers */
1025 if (loaned_buffers
) {
1026 mp
= uio_to_mblk(uiop
);
1030 va
.va_mask
= AT_ALL
;
1031 error
= fop_getattr(vp
, &va
, 0, cr
, &ct
);
1038 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, &ct
);
1043 resp
->status
= NFS3_OK
;
1044 vattr_to_post_op_attr(vap
, &resp
->resok
.file_attributes
);
1045 resp
->resok
.count
= args
->count
- uiop
->uio_resid
;
1046 if (!error
&& offset
+ resp
->resok
.count
== va
.va_size
)
1047 resp
->resok
.eof
= TRUE
;
1049 resp
->resok
.eof
= FALSE
;
1050 resp
->resok
.data
.data_len
= resp
->resok
.count
;
1053 rfs_rndup_mblks(mp
, resp
->resok
.count
, loaned_buffers
);
1055 resp
->resok
.data
.mp
= mp
;
1056 resp
->resok
.size
= (uint_t
)args
->count
;
1059 resp
->resok
.data
.data_val
= (caddr_t
)iov
.iov_base
;
1060 if (!rdma_setup_read_data3(args
, &(resp
->resok
))) {
1061 resp
->status
= NFS3ERR_INVAL
;
1064 resp
->resok
.data
.data_val
= (caddr_t
)mp
->b_datap
->db_base
;
1065 (resp
->resok
).wlist
= NULL
;
1069 DTRACE_NFSV3_4(op__read__done
, struct svc_req
*, req
,
1070 cred_t
*, cr
, vnode_t
*, vp
, READ3res
*, resp
);
1075 kmem_free(iovp
, iovcnt
* sizeof (struct iovec
));
1080 if (curthread
->t_flag
& T_WOULDBLOCK
) {
1081 curthread
->t_flag
&= ~T_WOULDBLOCK
;
1082 resp
->status
= NFS3ERR_JUKEBOX
;
1084 resp
->status
= puterrno3(error
);
1086 DTRACE_NFSV3_4(op__read__done
, struct svc_req
*, req
,
1087 cred_t
*, cr
, vnode_t
*, vp
, READ3res
*, resp
);
1091 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, &ct
);
1096 vattr_to_post_op_attr(vap
, &resp
->resfail
.file_attributes
);
1099 kmem_free(iovp
, iovcnt
* sizeof (struct iovec
));
1103 rfs3_read_free(READ3res
*resp
)
1107 if (resp
->status
== NFS3_OK
) {
1108 mp
= resp
->resok
.data
.mp
;
1115 rfs3_read_getfh(READ3args
*args
)
1118 return (&args
->file
);
1121 #define MAX_IOVECS 12
1124 static int rfs3_write_hits
= 0;
1125 static int rfs3_write_misses
= 0;
1129 rfs3_write(WRITE3args
*args
, WRITE3res
*resp
, struct exportinfo
*exi
,
1130 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
1134 struct vattr
*bvap
= NULL
;
1136 struct vattr
*avap
= NULL
;
1140 struct iovec iov
[MAX_IOVECS
];
1147 int rwlock_ret
= -1;
1148 caller_context_t ct
;
1150 vp
= nfs3_fhtovp(&args
->file
, exi
);
1152 DTRACE_NFSV3_4(op__write__start
, struct svc_req
*, req
,
1153 cred_t
*, cr
, vnode_t
*, vp
, WRITE3args
*, args
);
1162 ct
.cc_caller_id
= nfs3_srv_caller_id
;
1163 ct
.cc_flags
= CC_DONTBLOCK
;
1166 * We have to enter the critical region before calling fop_rwlock
1167 * to avoid a deadlock with ufs.
1169 if (nbl_need_check(vp
)) {
1170 nbl_start_crit(vp
, RW_READER
);
1172 if (nbl_conflict(vp
, NBL_WRITE
, args
->offset
, args
->count
, 0,
1179 rwlock_ret
= fop_rwlock(vp
, V_WRITELOCK_TRUE
, &ct
);
1181 /* check if a monitor detected a delegation conflict */
1182 if (rwlock_ret
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
)) {
1183 resp
->status
= NFS3ERR_JUKEBOX
;
1189 bva
.va_mask
= AT_ALL
;
1190 error
= fop_getattr(vp
, &bva
, 0, cr
, &ct
);
1193 * If we can't get the attributes, then we can't do the
1194 * right access checking. So, we'll fail the request.
1202 if (args
->count
!= args
->data
.data_len
) {
1203 resp
->status
= NFS3ERR_INVAL
;
1207 if (rdonly(ro
, vp
)) {
1208 resp
->status
= NFS3ERR_ROFS
;
1212 if (vp
->v_type
!= VREG
) {
1213 resp
->status
= NFS3ERR_INVAL
;
1217 if (crgetuid(cr
) != bva
.va_uid
&&
1218 (error
= fop_access(vp
, VWRITE
, 0, cr
, &ct
)))
1221 if (MANDLOCK(vp
, bva
.va_mode
)) {
1222 resp
->status
= NFS3ERR_ACCES
;
1226 if (args
->count
== 0) {
1227 resp
->status
= NFS3_OK
;
1228 vattr_to_wcc_data(bvap
, avap
, &resp
->resok
.file_wcc
);
1229 resp
->resok
.count
= 0;
1230 resp
->resok
.committed
= args
->stable
;
1231 resp
->resok
.verf
= write3verf
;
1235 if (args
->mblk
!= NULL
) {
1237 for (m
= args
->mblk
; m
!= NULL
; m
= m
->b_cont
)
1239 if (iovcnt
<= MAX_IOVECS
) {
1246 rfs3_write_misses
++;
1248 iovp
= kmem_alloc(sizeof (*iovp
) * iovcnt
, KM_SLEEP
);
1250 mblk_to_iov(args
->mblk
, iovcnt
, iovp
);
1252 } else if (args
->rlist
!= NULL
) {
1255 iovp
->iov_base
= (char *)((args
->rlist
)->u
.c_daddr3
);
1256 iovp
->iov_len
= args
->count
;
1260 iovp
->iov_base
= args
->data
.data_val
;
1261 iovp
->iov_len
= args
->count
;
1265 uio
.uio_iovcnt
= iovcnt
;
1267 uio
.uio_segflg
= UIO_SYSSPACE
;
1268 uio
.uio_extflg
= UIO_COPY_DEFAULT
;
1269 uio
.uio_loffset
= args
->offset
;
1270 uio
.uio_resid
= args
->count
;
1271 uio
.uio_llimit
= curproc
->p_fsz_ctl
;
1272 rlimit
= uio
.uio_llimit
- args
->offset
;
1273 if (rlimit
< (uoff_t
)uio
.uio_resid
)
1274 uio
.uio_resid
= (int)rlimit
;
1276 if (args
->stable
== UNSTABLE
)
1278 else if (args
->stable
== FILE_SYNC
)
1280 else if (args
->stable
== DATA_SYNC
)
1284 kmem_free(iovp
, sizeof (*iovp
) * iovcnt
);
1285 resp
->status
= NFS3ERR_INVAL
;
1290 * We're changing creds because VM may fault and we need
1291 * the cred of the current thread to be used if quota
1292 * checking is enabled.
1294 savecred
= curthread
->t_cred
;
1295 curthread
->t_cred
= cr
;
1296 error
= fop_write(vp
, &uio
, ioflag
, cr
, &ct
);
1297 curthread
->t_cred
= savecred
;
1300 kmem_free(iovp
, sizeof (*iovp
) * iovcnt
);
1302 /* check if a monitor detected a delegation conflict */
1303 if (error
== EAGAIN
&& (ct
.cc_flags
& CC_WOULDBLOCK
)) {
1304 resp
->status
= NFS3ERR_JUKEBOX
;
1308 ava
.va_mask
= AT_ALL
;
1309 avap
= fop_getattr(vp
, &ava
, 0, cr
, &ct
) ? NULL
: &ava
;
1315 * If we were unable to get the V_WRITELOCK_TRUE, then we
1316 * may not have accurate after attrs, so check if
1317 * we have both attributes, they have a non-zero va_seq, and
1318 * va_seq has changed by exactly one,
1319 * if not, turn off the before attr.
1321 if (rwlock_ret
!= V_WRITELOCK_TRUE
) {
1322 if (bvap
== NULL
|| avap
== NULL
||
1323 bvap
->va_seq
== 0 || avap
->va_seq
== 0 ||
1324 avap
->va_seq
!= (bvap
->va_seq
+ 1)) {
1329 resp
->status
= NFS3_OK
;
1330 vattr_to_wcc_data(bvap
, avap
, &resp
->resok
.file_wcc
);
1331 resp
->resok
.count
= args
->count
- uio
.uio_resid
;
1332 resp
->resok
.committed
= args
->stable
;
1333 resp
->resok
.verf
= write3verf
;
1337 if (curthread
->t_flag
& T_WOULDBLOCK
) {
1338 curthread
->t_flag
&= ~T_WOULDBLOCK
;
1339 resp
->status
= NFS3ERR_JUKEBOX
;
1341 resp
->status
= puterrno3(error
);
1343 vattr_to_wcc_data(bvap
, avap
, &resp
->resfail
.file_wcc
);
1345 DTRACE_NFSV3_4(op__write__done
, struct svc_req
*, req
,
1346 cred_t
*, cr
, vnode_t
*, vp
, WRITE3res
*, resp
);
1349 if (rwlock_ret
!= -1)
1350 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, &ct
);
1358 rfs3_write_getfh(WRITE3args
*args
)
1361 return (&args
->file
);
1365 rfs3_create(CREATE3args
*args
, CREATE3res
*resp
, struct exportinfo
*exi
,
1366 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
1371 vnode_t
*tvp
= NULL
;
1375 struct vattr
*dbvap
;
1377 struct vattr
*davap
;
1383 struct sockaddr
*ca
;
1389 dvp
= nfs3_fhtovp(&args
->where
.dir
, exi
);
1391 DTRACE_NFSV3_4(op__create__start
, struct svc_req
*, req
,
1392 cred_t
*, cr
, vnode_t
*, dvp
, CREATE3args
*, args
);
1399 dbva
.va_mask
= AT_ALL
;
1400 dbvap
= fop_getattr(dvp
, &dbva
, 0, cr
, NULL
) ? NULL
: &dbva
;
1403 if (args
->where
.name
== nfs3nametoolong
) {
1404 resp
->status
= NFS3ERR_NAMETOOLONG
;
1408 if (args
->where
.name
== NULL
|| *(args
->where
.name
) == '\0') {
1409 resp
->status
= NFS3ERR_ACCES
;
1413 if (rdonly(ro
, dvp
)) {
1414 resp
->status
= NFS3ERR_ROFS
;
1418 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
1419 name
= nfscmd_convname(ca
, exi
, args
->where
.name
,
1420 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
1423 /* This is really a Solaris EILSEQ */
1424 resp
->status
= NFS3ERR_INVAL
;
1428 if (args
->how
.mode
== EXCLUSIVE
) {
1429 va
.va_mask
= AT_TYPE
| AT_MODE
| AT_MTIME
;
1431 va
.va_mode
= (mode_t
)0;
1433 * Ensure no time overflows and that types match
1435 mtime
= (nfstime3
*)&args
->how
.createhow3_u
.verf
;
1436 va
.va_mtime
.tv_sec
= mtime
->seconds
% INT32_MAX
;
1437 va
.va_mtime
.tv_nsec
= mtime
->nseconds
;
1440 error
= sattr3_to_vattr(&args
->how
.createhow3_u
.obj_attributes
,
1444 va
.va_mask
|= AT_TYPE
;
1446 if (args
->how
.mode
== GUARDED
)
1452 * During creation of file in non-exclusive mode
1453 * if size of file is being set then make sure
1454 * that if the file already exists that no conflicting
1455 * non-blocking mandatory locks exists in the region
1456 * being modified. If there are conflicting locks fail
1457 * the operation with EACCES.
1459 if (va
.va_mask
& AT_SIZE
) {
1463 * Does file already exist?
1465 error
= fop_lookup(dvp
, name
, &tvp
,
1466 NULL
, 0, NULL
, cr
, NULL
, NULL
, NULL
);
1469 * Check to see if the file has been delegated
1470 * to a v4 client. If so, then begin recall of
1471 * the delegation and return JUKEBOX to allow
1472 * the client to retrasmit its request.
1475 trunc
= va
.va_size
== 0;
1477 rfs4_check_delegated(FWRITE
, tvp
, trunc
)) {
1478 resp
->status
= NFS3ERR_JUKEBOX
;
1483 * Check for NBMAND lock conflicts
1485 if (!error
&& nbl_need_check(tvp
)) {
1489 nbl_start_crit(tvp
, RW_READER
);
1492 tva
.va_mask
= AT_SIZE
;
1493 error
= fop_getattr(tvp
, &tva
, 0, cr
,
1496 * Can't check for conflicts, so return
1502 offset
= tva
.va_size
< va
.va_size
?
1503 tva
.va_size
: va
.va_size
;
1504 len
= tva
.va_size
< va
.va_size
?
1505 va
.va_size
- tva
.va_size
:
1506 tva
.va_size
- va
.va_size
;
1507 if (nbl_conflict(tvp
, NBL_WRITE
,
1508 offset
, len
, 0, NULL
)) {
1518 if (va
.va_mask
& AT_SIZE
)
1519 reqsize
= va
.va_size
;
1523 * Must specify the mode.
1525 if (!(va
.va_mask
& AT_MODE
)) {
1526 resp
->status
= NFS3ERR_INVAL
;
1531 * If the filesystem is exported with nosuid, then mask off
1532 * the setuid and setgid bits.
1534 if (va
.va_type
== VREG
&& (exi
->exi_export
.ex_flags
& EX_NOSUID
))
1535 va
.va_mode
&= ~(VSUID
| VSGID
);
1539 * The file open mode used is VWRITE. If the client needs
1540 * some other semantic, then it should do the access checking
1541 * itself. It would have been nice to have the file open mode
1542 * passed as part of the arguments.
1544 error
= fop_create(dvp
, name
, &va
, excl
, VWRITE
,
1545 &vp
, cr
, 0, NULL
, NULL
);
1547 dava
.va_mask
= AT_ALL
;
1548 davap
= fop_getattr(dvp
, &dava
, 0, cr
, NULL
) ? NULL
: &dava
;
1552 * If we got something other than file already exists
1553 * then just return this error. Otherwise, we got
1554 * EEXIST. If we were doing a GUARDED create, then
1555 * just return this error. Otherwise, we need to
1556 * make sure that this wasn't a duplicate of an
1557 * exclusive create request.
1559 * The assumption is made that a non-exclusive create
1560 * request will never return EEXIST.
1562 if (error
!= EEXIST
|| args
->how
.mode
== GUARDED
)
1565 * Lookup the file so that we can get a vnode for it.
1567 error
= fop_lookup(dvp
, name
, &vp
, NULL
, 0,
1568 NULL
, cr
, NULL
, NULL
, NULL
);
1571 * We couldn't find the file that we thought that
1572 * we just created. So, we'll just try creating
1575 if (error
== ENOENT
)
1581 * If the file is delegated to a v4 client, go ahead
1582 * and initiate recall, this create is a hint that a
1583 * conflicting v3 open has occurred.
1586 if (rfs4_check_delegated(FWRITE
, vp
, FALSE
)) {
1588 resp
->status
= NFS3ERR_JUKEBOX
;
1592 va
.va_mask
= AT_ALL
;
1593 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
1595 mtime
= (nfstime3
*)&args
->how
.createhow3_u
.verf
;
1596 /* % with INT32_MAX to prevent overflows */
1597 if (args
->how
.mode
== EXCLUSIVE
&& (vap
== NULL
||
1598 vap
->va_mtime
.tv_sec
!=
1599 (mtime
->seconds
% INT32_MAX
) ||
1600 vap
->va_mtime
.tv_nsec
!= mtime
->nseconds
)) {
1607 if ((args
->how
.mode
== UNCHECKED
||
1608 args
->how
.mode
== GUARDED
) &&
1609 args
->how
.createhow3_u
.obj_attributes
.size
.set_it
&&
1615 if (rfs4_check_delegated(FWRITE
, vp
, trunc
)) {
1617 resp
->status
= NFS3ERR_JUKEBOX
;
1621 va
.va_mask
= AT_ALL
;
1622 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
1625 * We need to check to make sure that the file got
1626 * created to the indicated size. If not, we do a
1627 * setattr to try to change the size, but we don't
1628 * try too hard. This shouldn't a problem as most
1629 * clients will only specifiy a size of zero which
1630 * local file systems handle. However, even if
1631 * the client does specify a non-zero size, it can
1632 * still recover by checking the size of the file
1633 * after it has created it and then issue a setattr
1634 * request of its own to set the size of the file.
1637 (args
->how
.mode
== UNCHECKED
||
1638 args
->how
.mode
== GUARDED
) &&
1639 args
->how
.createhow3_u
.obj_attributes
.size
.set_it
&&
1640 vap
->va_size
!= reqsize
) {
1641 va
.va_mask
= AT_SIZE
;
1642 va
.va_size
= reqsize
;
1643 (void) fop_setattr(vp
, &va
, 0, cr
, NULL
);
1644 va
.va_mask
= AT_ALL
;
1645 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
1649 if (name
!= args
->where
.name
)
1650 kmem_free(name
, MAXPATHLEN
+ 1);
1652 error
= makefh3(&resp
->resok
.obj
.handle
, vp
, exi
);
1654 resp
->resok
.obj
.handle_follows
= FALSE
;
1656 resp
->resok
.obj
.handle_follows
= TRUE
;
1659 * Force modified data and metadata out to stable storage.
1661 (void) fop_fsync(vp
, FNODSYNC
, cr
, NULL
);
1662 (void) fop_fsync(dvp
, 0, cr
, NULL
);
1671 resp
->status
= NFS3_OK
;
1672 vattr_to_post_op_attr(vap
, &resp
->resok
.obj_attributes
);
1673 vattr_to_wcc_data(dbvap
, davap
, &resp
->resok
.dir_wcc
);
1675 DTRACE_NFSV3_4(op__create__done
, struct svc_req
*, req
,
1676 cred_t
*, cr
, vnode_t
*, dvp
, CREATE3res
*, resp
);
1682 if (curthread
->t_flag
& T_WOULDBLOCK
) {
1683 curthread
->t_flag
&= ~T_WOULDBLOCK
;
1684 resp
->status
= NFS3ERR_JUKEBOX
;
1686 resp
->status
= puterrno3(error
);
1688 DTRACE_NFSV3_4(op__create__done
, struct svc_req
*, req
,
1689 cred_t
*, cr
, vnode_t
*, dvp
, CREATE3res
*, resp
);
1691 if (name
!= NULL
&& name
!= args
->where
.name
)
1692 kmem_free(name
, MAXPATHLEN
+ 1);
1701 vattr_to_wcc_data(dbvap
, davap
, &resp
->resfail
.dir_wcc
);
1705 rfs3_create_getfh(CREATE3args
*args
)
1708 return (&args
->where
.dir
);
1712 rfs3_mkdir(MKDIR3args
*args
, MKDIR3res
*resp
, struct exportinfo
*exi
,
1713 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
1720 struct vattr
*dbvap
;
1722 struct vattr
*davap
;
1724 struct sockaddr
*ca
;
1730 dvp
= nfs3_fhtovp(&args
->where
.dir
, exi
);
1732 DTRACE_NFSV3_4(op__mkdir__start
, struct svc_req
*, req
,
1733 cred_t
*, cr
, vnode_t
*, dvp
, MKDIR3args
*, args
);
1740 dbva
.va_mask
= AT_ALL
;
1741 dbvap
= fop_getattr(dvp
, &dbva
, 0, cr
, NULL
) ? NULL
: &dbva
;
1744 if (args
->where
.name
== nfs3nametoolong
) {
1745 resp
->status
= NFS3ERR_NAMETOOLONG
;
1749 if (args
->where
.name
== NULL
|| *(args
->where
.name
) == '\0') {
1750 resp
->status
= NFS3ERR_ACCES
;
1754 if (rdonly(ro
, dvp
)) {
1755 resp
->status
= NFS3ERR_ROFS
;
1759 error
= sattr3_to_vattr(&args
->attributes
, &va
);
1763 if (!(va
.va_mask
& AT_MODE
)) {
1764 resp
->status
= NFS3ERR_INVAL
;
1768 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
1769 name
= nfscmd_convname(ca
, exi
, args
->where
.name
,
1770 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
1773 resp
->status
= NFS3ERR_INVAL
;
1777 va
.va_mask
|= AT_TYPE
;
1780 error
= fop_mkdir(dvp
, name
, &va
, &vp
, cr
, NULL
, 0, NULL
);
1782 if (name
!= args
->where
.name
)
1783 kmem_free(name
, MAXPATHLEN
+ 1);
1785 dava
.va_mask
= AT_ALL
;
1786 davap
= fop_getattr(dvp
, &dava
, 0, cr
, NULL
) ? NULL
: &dava
;
1789 * Force modified data and metadata out to stable storage.
1791 (void) fop_fsync(dvp
, 0, cr
, NULL
);
1796 error
= makefh3(&resp
->resok
.obj
.handle
, vp
, exi
);
1798 resp
->resok
.obj
.handle_follows
= FALSE
;
1800 resp
->resok
.obj
.handle_follows
= TRUE
;
1802 va
.va_mask
= AT_ALL
;
1803 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
1806 * Force modified data and metadata out to stable storage.
1808 (void) fop_fsync(vp
, 0, cr
, NULL
);
1812 resp
->status
= NFS3_OK
;
1813 vattr_to_post_op_attr(vap
, &resp
->resok
.obj_attributes
);
1814 vattr_to_wcc_data(dbvap
, davap
, &resp
->resok
.dir_wcc
);
1816 DTRACE_NFSV3_4(op__mkdir__done
, struct svc_req
*, req
,
1817 cred_t
*, cr
, vnode_t
*, dvp
, MKDIR3res
*, resp
);
1823 if (curthread
->t_flag
& T_WOULDBLOCK
) {
1824 curthread
->t_flag
&= ~T_WOULDBLOCK
;
1825 resp
->status
= NFS3ERR_JUKEBOX
;
1827 resp
->status
= puterrno3(error
);
1829 DTRACE_NFSV3_4(op__mkdir__done
, struct svc_req
*, req
,
1830 cred_t
*, cr
, vnode_t
*, dvp
, MKDIR3res
*, resp
);
1833 vattr_to_wcc_data(dbvap
, davap
, &resp
->resfail
.dir_wcc
);
1837 rfs3_mkdir_getfh(MKDIR3args
*args
)
1840 return (&args
->where
.dir
);
1844 rfs3_symlink(SYMLINK3args
*args
, SYMLINK3res
*resp
, struct exportinfo
*exi
,
1845 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
1852 struct vattr
*dbvap
;
1854 struct vattr
*davap
;
1856 struct sockaddr
*ca
;
1858 char *symdata
= NULL
;
1863 dvp
= nfs3_fhtovp(&args
->where
.dir
, exi
);
1865 DTRACE_NFSV3_4(op__symlink__start
, struct svc_req
*, req
,
1866 cred_t
*, cr
, vnode_t
*, dvp
, SYMLINK3args
*, args
);
1873 dbva
.va_mask
= AT_ALL
;
1874 dbvap
= fop_getattr(dvp
, &dbva
, 0, cr
, NULL
) ? NULL
: &dbva
;
1877 if (args
->where
.name
== nfs3nametoolong
) {
1878 resp
->status
= NFS3ERR_NAMETOOLONG
;
1882 if (args
->where
.name
== NULL
|| *(args
->where
.name
) == '\0') {
1883 resp
->status
= NFS3ERR_ACCES
;
1887 if (rdonly(ro
, dvp
)) {
1888 resp
->status
= NFS3ERR_ROFS
;
1892 error
= sattr3_to_vattr(&args
->symlink
.symlink_attributes
, &va
);
1896 if (!(va
.va_mask
& AT_MODE
)) {
1897 resp
->status
= NFS3ERR_INVAL
;
1901 if (args
->symlink
.symlink_data
== nfs3nametoolong
) {
1902 resp
->status
= NFS3ERR_NAMETOOLONG
;
1906 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
1907 name
= nfscmd_convname(ca
, exi
, args
->where
.name
,
1908 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
1911 /* This is really a Solaris EILSEQ */
1912 resp
->status
= NFS3ERR_INVAL
;
1916 symdata
= nfscmd_convname(ca
, exi
, args
->symlink
.symlink_data
,
1917 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
1918 if (symdata
== NULL
) {
1919 /* This is really a Solaris EILSEQ */
1920 resp
->status
= NFS3ERR_INVAL
;
1925 va
.va_mask
|= AT_TYPE
;
1928 error
= fop_symlink(dvp
, name
, &va
, symdata
, cr
, NULL
, 0);
1930 dava
.va_mask
= AT_ALL
;
1931 davap
= fop_getattr(dvp
, &dava
, 0, cr
, NULL
) ? NULL
: &dava
;
1936 error
= fop_lookup(dvp
, name
, &vp
, NULL
, 0, NULL
, cr
,
1940 * Force modified data and metadata out to stable storage.
1942 (void) fop_fsync(dvp
, 0, cr
, NULL
);
1945 resp
->status
= NFS3_OK
;
1947 resp
->resok
.obj
.handle_follows
= FALSE
;
1948 vattr_to_post_op_attr(NULL
, &resp
->resok
.obj_attributes
);
1949 vattr_to_wcc_data(dbvap
, davap
, &resp
->resok
.dir_wcc
);
1953 error
= makefh3(&resp
->resok
.obj
.handle
, vp
, exi
);
1955 resp
->resok
.obj
.handle_follows
= FALSE
;
1957 resp
->resok
.obj
.handle_follows
= TRUE
;
1959 va
.va_mask
= AT_ALL
;
1960 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
1963 * Force modified data and metadata out to stable storage.
1965 (void) fop_fsync(vp
, 0, cr
, NULL
);
1969 vattr_to_post_op_attr(vap
, &resp
->resok
.obj_attributes
);
1970 vattr_to_wcc_data(dbvap
, davap
, &resp
->resok
.dir_wcc
);
1974 if (curthread
->t_flag
& T_WOULDBLOCK
) {
1975 curthread
->t_flag
&= ~T_WOULDBLOCK
;
1976 resp
->status
= NFS3ERR_JUKEBOX
;
1978 resp
->status
= puterrno3(error
);
1980 vattr_to_wcc_data(dbvap
, davap
, &resp
->resfail
.dir_wcc
);
1982 if (name
!= NULL
&& name
!= args
->where
.name
)
1983 kmem_free(name
, MAXPATHLEN
+ 1);
1984 if (symdata
!= NULL
&& symdata
!= args
->symlink
.symlink_data
)
1985 kmem_free(symdata
, MAXPATHLEN
+ 1);
1987 DTRACE_NFSV3_4(op__symlink__done
, struct svc_req
*, req
,
1988 cred_t
*, cr
, vnode_t
*, dvp
, SYMLINK3res
*, resp
);
1995 rfs3_symlink_getfh(SYMLINK3args
*args
)
1998 return (&args
->where
.dir
);
2002 rfs3_mknod(MKNOD3args
*args
, MKNOD3res
*resp
, struct exportinfo
*exi
,
2003 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2011 struct vattr
*dbvap
;
2013 struct vattr
*davap
;
2017 struct sockaddr
*ca
;
2023 dvp
= nfs3_fhtovp(&args
->where
.dir
, exi
);
2025 DTRACE_NFSV3_4(op__mknod__start
, struct svc_req
*, req
,
2026 cred_t
*, cr
, vnode_t
*, dvp
, MKNOD3args
*, args
);
2033 dbva
.va_mask
= AT_ALL
;
2034 dbvap
= fop_getattr(dvp
, &dbva
, 0, cr
, NULL
) ? NULL
: &dbva
;
2037 if (args
->where
.name
== nfs3nametoolong
) {
2038 resp
->status
= NFS3ERR_NAMETOOLONG
;
2042 if (args
->where
.name
== NULL
|| *(args
->where
.name
) == '\0') {
2043 resp
->status
= NFS3ERR_ACCES
;
2047 if (rdonly(ro
, dvp
)) {
2048 resp
->status
= NFS3ERR_ROFS
;
2052 switch (args
->what
.type
) {
2055 error
= sattr3_to_vattr(
2056 &args
->what
.mknoddata3_u
.device
.dev_attributes
, &va
);
2059 if (secpolicy_sys_devices(cr
) != 0) {
2060 resp
->status
= NFS3ERR_PERM
;
2063 if (args
->what
.type
== NF3CHR
)
2067 va
.va_rdev
= makedevice(
2068 args
->what
.mknoddata3_u
.device
.spec
.specdata1
,
2069 args
->what
.mknoddata3_u
.device
.spec
.specdata2
);
2070 va
.va_mask
|= AT_TYPE
| AT_RDEV
;
2073 error
= sattr3_to_vattr(
2074 &args
->what
.mknoddata3_u
.pipe_attributes
, &va
);
2078 va
.va_mask
|= AT_TYPE
;
2081 error
= sattr3_to_vattr(
2082 &args
->what
.mknoddata3_u
.pipe_attributes
, &va
);
2086 va
.va_mask
|= AT_TYPE
;
2089 resp
->status
= NFS3ERR_BADTYPE
;
2094 * Must specify the mode.
2096 if (!(va
.va_mask
& AT_MODE
)) {
2097 resp
->status
= NFS3ERR_INVAL
;
2101 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
2102 name
= nfscmd_convname(ca
, exi
, args
->where
.name
,
2103 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
2106 resp
->status
= NFS3ERR_INVAL
;
2114 error
= fop_create(dvp
, name
, &va
, excl
, mode
,
2115 &vp
, cr
, 0, NULL
, NULL
);
2117 if (name
!= args
->where
.name
)
2118 kmem_free(name
, MAXPATHLEN
+ 1);
2120 dava
.va_mask
= AT_ALL
;
2121 davap
= fop_getattr(dvp
, &dava
, 0, cr
, NULL
) ? NULL
: &dava
;
2124 * Force modified data and metadata out to stable storage.
2126 (void) fop_fsync(dvp
, 0, cr
, NULL
);
2131 resp
->status
= NFS3_OK
;
2133 error
= makefh3(&resp
->resok
.obj
.handle
, vp
, exi
);
2135 resp
->resok
.obj
.handle_follows
= FALSE
;
2137 resp
->resok
.obj
.handle_follows
= TRUE
;
2139 va
.va_mask
= AT_ALL
;
2140 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
2143 * Force modified metadata out to stable storage.
2145 * if a underlying vp exists, pass it to fop_fsync
2147 if (fop_realvp(vp
, &realvp
, NULL
) == 0)
2148 (void) fop_fsync(realvp
, FNODSYNC
, cr
, NULL
);
2150 (void) fop_fsync(vp
, FNODSYNC
, cr
, NULL
);
2154 vattr_to_post_op_attr(vap
, &resp
->resok
.obj_attributes
);
2155 vattr_to_wcc_data(dbvap
, davap
, &resp
->resok
.dir_wcc
);
2156 DTRACE_NFSV3_4(op__mknod__done
, struct svc_req
*, req
,
2157 cred_t
*, cr
, vnode_t
*, dvp
, MKNOD3res
*, resp
);
2162 if (curthread
->t_flag
& T_WOULDBLOCK
) {
2163 curthread
->t_flag
&= ~T_WOULDBLOCK
;
2164 resp
->status
= NFS3ERR_JUKEBOX
;
2166 resp
->status
= puterrno3(error
);
2168 DTRACE_NFSV3_4(op__mknod__done
, struct svc_req
*, req
,
2169 cred_t
*, cr
, vnode_t
*, dvp
, MKNOD3res
*, resp
);
2172 vattr_to_wcc_data(dbvap
, davap
, &resp
->resfail
.dir_wcc
);
2176 rfs3_mknod_getfh(MKNOD3args
*args
)
2179 return (&args
->where
.dir
);
2183 rfs3_remove(REMOVE3args
*args
, REMOVE3res
*resp
, struct exportinfo
*exi
,
2184 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2192 vnode_t
*targvp
= NULL
;
2193 struct sockaddr
*ca
;
2199 vp
= nfs3_fhtovp(&args
->object
.dir
, exi
);
2201 DTRACE_NFSV3_4(op__remove__start
, struct svc_req
*, req
,
2202 cred_t
*, cr
, vnode_t
*, vp
, REMOVE3args
*, args
);
2209 bva
.va_mask
= AT_ALL
;
2210 bvap
= fop_getattr(vp
, &bva
, 0, cr
, NULL
) ? NULL
: &bva
;
2213 if (vp
->v_type
!= VDIR
) {
2214 resp
->status
= NFS3ERR_NOTDIR
;
2218 if (args
->object
.name
== nfs3nametoolong
) {
2219 resp
->status
= NFS3ERR_NAMETOOLONG
;
2223 if (args
->object
.name
== NULL
|| *(args
->object
.name
) == '\0') {
2224 resp
->status
= NFS3ERR_ACCES
;
2228 if (rdonly(ro
, vp
)) {
2229 resp
->status
= NFS3ERR_ROFS
;
2233 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
2234 name
= nfscmd_convname(ca
, exi
, args
->object
.name
,
2235 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
2238 resp
->status
= NFS3ERR_INVAL
;
2243 * Check for a conflict with a non-blocking mandatory share
2244 * reservation and V4 delegations
2246 error
= fop_lookup(vp
, name
, &targvp
, NULL
, 0,
2247 NULL
, cr
, NULL
, NULL
, NULL
);
2251 if (rfs4_check_delegated(FWRITE
, targvp
, TRUE
)) {
2252 resp
->status
= NFS3ERR_JUKEBOX
;
2256 if (!nbl_need_check(targvp
)) {
2257 error
= fop_remove(vp
, name
, cr
, NULL
, 0);
2259 nbl_start_crit(targvp
, RW_READER
);
2260 if (nbl_conflict(targvp
, NBL_REMOVE
, 0, 0, 0, NULL
)) {
2263 error
= fop_remove(vp
, name
, cr
, NULL
, 0);
2265 nbl_end_crit(targvp
);
2270 ava
.va_mask
= AT_ALL
;
2271 avap
= fop_getattr(vp
, &ava
, 0, cr
, NULL
) ? NULL
: &ava
;
2274 * Force modified data and metadata out to stable storage.
2276 (void) fop_fsync(vp
, 0, cr
, NULL
);
2281 resp
->status
= NFS3_OK
;
2282 vattr_to_wcc_data(bvap
, avap
, &resp
->resok
.dir_wcc
);
2286 if (curthread
->t_flag
& T_WOULDBLOCK
) {
2287 curthread
->t_flag
&= ~T_WOULDBLOCK
;
2288 resp
->status
= NFS3ERR_JUKEBOX
;
2290 resp
->status
= puterrno3(error
);
2292 vattr_to_wcc_data(bvap
, avap
, &resp
->resfail
.dir_wcc
);
2294 DTRACE_NFSV3_4(op__remove__done
, struct svc_req
*, req
,
2295 cred_t
*, cr
, vnode_t
*, vp
, REMOVE3res
*, resp
);
2297 if (name
!= NULL
&& name
!= args
->object
.name
)
2298 kmem_free(name
, MAXPATHLEN
+ 1);
2305 rfs3_remove_getfh(REMOVE3args
*args
)
2308 return (&args
->object
.dir
);
2312 rfs3_rmdir(RMDIR3args
*args
, RMDIR3res
*resp
, struct exportinfo
*exi
,
2313 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2321 struct sockaddr
*ca
;
2327 vp
= nfs3_fhtovp(&args
->object
.dir
, exi
);
2329 DTRACE_NFSV3_4(op__rmdir__start
, struct svc_req
*, req
,
2330 cred_t
*, cr
, vnode_t
*, vp
, RMDIR3args
*, args
);
2337 bva
.va_mask
= AT_ALL
;
2338 bvap
= fop_getattr(vp
, &bva
, 0, cr
, NULL
) ? NULL
: &bva
;
2341 if (vp
->v_type
!= VDIR
) {
2342 resp
->status
= NFS3ERR_NOTDIR
;
2346 if (args
->object
.name
== nfs3nametoolong
) {
2347 resp
->status
= NFS3ERR_NAMETOOLONG
;
2351 if (args
->object
.name
== NULL
|| *(args
->object
.name
) == '\0') {
2352 resp
->status
= NFS3ERR_ACCES
;
2356 if (rdonly(ro
, vp
)) {
2357 resp
->status
= NFS3ERR_ROFS
;
2361 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
2362 name
= nfscmd_convname(ca
, exi
, args
->object
.name
,
2363 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
2366 resp
->status
= NFS3ERR_INVAL
;
2370 error
= fop_rmdir(vp
, name
, rootdir
, cr
, NULL
, 0);
2372 if (name
!= args
->object
.name
)
2373 kmem_free(name
, MAXPATHLEN
+ 1);
2375 ava
.va_mask
= AT_ALL
;
2376 avap
= fop_getattr(vp
, &ava
, 0, cr
, NULL
) ? NULL
: &ava
;
2379 * Force modified data and metadata out to stable storage.
2381 (void) fop_fsync(vp
, 0, cr
, NULL
);
2385 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2386 * if the directory is not empty. A System V NFS server
2387 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2390 if (error
== EEXIST
)
2395 resp
->status
= NFS3_OK
;
2396 vattr_to_wcc_data(bvap
, avap
, &resp
->resok
.dir_wcc
);
2400 if (curthread
->t_flag
& T_WOULDBLOCK
) {
2401 curthread
->t_flag
&= ~T_WOULDBLOCK
;
2402 resp
->status
= NFS3ERR_JUKEBOX
;
2404 resp
->status
= puterrno3(error
);
2406 vattr_to_wcc_data(bvap
, avap
, &resp
->resfail
.dir_wcc
);
2408 DTRACE_NFSV3_4(op__rmdir__done
, struct svc_req
*, req
,
2409 cred_t
*, cr
, vnode_t
*, vp
, RMDIR3res
*, resp
);
2416 rfs3_rmdir_getfh(RMDIR3args
*args
)
2419 return (&args
->object
.dir
);
2423 rfs3_rename(RENAME3args
*args
, RENAME3res
*resp
, struct exportinfo
*exi
,
2424 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2430 struct vattr
*fbvap
;
2432 struct vattr
*favap
;
2434 struct vattr
*tbvap
;
2436 struct vattr
*tavap
;
2439 struct exportinfo
*to_exi
;
2440 vnode_t
*srcvp
= NULL
;
2441 struct sockaddr
*ca
;
2443 char *toname
= NULL
;
2451 fvp
= nfs3_fhtovp(&args
->from
.dir
, exi
);
2453 DTRACE_NFSV3_4(op__rename__start
, struct svc_req
*, req
,
2454 cred_t
*, cr
, vnode_t
*, fvp
, RENAME3args
*, args
);
2461 fbva
.va_mask
= AT_ALL
;
2462 fbvap
= fop_getattr(fvp
, &fbva
, 0, cr
, NULL
) ? NULL
: &fbva
;
2465 fh3
= &args
->to
.dir
;
2466 to_exi
= checkexport(&fh3
->fh3_fsid
, FH3TOXFIDP(fh3
));
2467 if (to_exi
== NULL
) {
2468 resp
->status
= NFS3ERR_ACCES
;
2473 if (to_exi
!= exi
) {
2474 resp
->status
= NFS3ERR_XDEV
;
2478 tvp
= nfs3_fhtovp(&args
->to
.dir
, exi
);
2484 tbva
.va_mask
= AT_ALL
;
2485 tbvap
= fop_getattr(tvp
, &tbva
, 0, cr
, NULL
) ? NULL
: &tbva
;
2488 if (fvp
->v_type
!= VDIR
|| tvp
->v_type
!= VDIR
) {
2489 resp
->status
= NFS3ERR_NOTDIR
;
2493 if (args
->from
.name
== nfs3nametoolong
||
2494 args
->to
.name
== nfs3nametoolong
) {
2495 resp
->status
= NFS3ERR_NAMETOOLONG
;
2498 if (args
->from
.name
== NULL
|| *(args
->from
.name
) == '\0' ||
2499 args
->to
.name
== NULL
|| *(args
->to
.name
) == '\0') {
2500 resp
->status
= NFS3ERR_ACCES
;
2504 if (rdonly(ro
, tvp
)) {
2505 resp
->status
= NFS3ERR_ROFS
;
2509 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
2510 name
= nfscmd_convname(ca
, exi
, args
->from
.name
,
2511 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
2514 resp
->status
= NFS3ERR_INVAL
;
2518 toname
= nfscmd_convname(ca
, exi
, args
->to
.name
,
2519 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
2521 if (toname
== NULL
) {
2522 resp
->status
= NFS3ERR_INVAL
;
2527 * Check for a conflict with a non-blocking mandatory share
2528 * reservation or V4 delegations.
2530 error
= fop_lookup(fvp
, name
, &srcvp
, NULL
, 0,
2531 NULL
, cr
, NULL
, NULL
, NULL
);
2536 * If we rename a delegated file we should recall the
2537 * delegation, since future opens should fail or would
2538 * refer to a new file.
2540 if (rfs4_check_delegated(FWRITE
, srcvp
, FALSE
)) {
2541 resp
->status
= NFS3ERR_JUKEBOX
;
2546 * Check for renaming over a delegated file. Check rfs4_deleg_policy
2547 * first to avoid fop_lookup if possible.
2549 if (rfs4_deleg_policy
!= SRV_NEVER_DELEGATE
&&
2550 fop_lookup(tvp
, toname
, &targvp
, NULL
, 0, NULL
, cr
,
2551 NULL
, NULL
, NULL
) == 0) {
2553 if (rfs4_check_delegated(FWRITE
, targvp
, TRUE
)) {
2555 resp
->status
= NFS3ERR_JUKEBOX
;
2561 if (!nbl_need_check(srcvp
)) {
2562 error
= fop_rename(fvp
, name
, tvp
, toname
, cr
, NULL
, 0);
2564 nbl_start_crit(srcvp
, RW_READER
);
2565 if (nbl_conflict(srcvp
, NBL_RENAME
, 0, 0, 0, NULL
))
2568 error
= fop_rename(fvp
, name
, tvp
, toname
, cr
, NULL
, 0);
2569 nbl_end_crit(srcvp
);
2572 vn_renamepath(tvp
, srcvp
, args
->to
.name
,
2573 strlen(args
->to
.name
));
2577 fava
.va_mask
= AT_ALL
;
2578 favap
= fop_getattr(fvp
, &fava
, 0, cr
, NULL
) ? NULL
: &fava
;
2579 tava
.va_mask
= AT_ALL
;
2580 tavap
= fop_getattr(tvp
, &tava
, 0, cr
, NULL
) ? NULL
: &tava
;
2583 * Force modified data and metadata out to stable storage.
2585 (void) fop_fsync(fvp
, 0, cr
, NULL
);
2586 (void) fop_fsync(tvp
, 0, cr
, NULL
);
2591 resp
->status
= NFS3_OK
;
2592 vattr_to_wcc_data(fbvap
, favap
, &resp
->resok
.fromdir_wcc
);
2593 vattr_to_wcc_data(tbvap
, tavap
, &resp
->resok
.todir_wcc
);
2597 if (curthread
->t_flag
& T_WOULDBLOCK
) {
2598 curthread
->t_flag
&= ~T_WOULDBLOCK
;
2599 resp
->status
= NFS3ERR_JUKEBOX
;
2601 resp
->status
= puterrno3(error
);
2604 vattr_to_wcc_data(fbvap
, favap
, &resp
->resfail
.fromdir_wcc
);
2605 vattr_to_wcc_data(tbvap
, tavap
, &resp
->resfail
.todir_wcc
);
2608 if (name
!= NULL
&& name
!= args
->from
.name
)
2609 kmem_free(name
, MAXPATHLEN
+ 1);
2610 if (toname
!= NULL
&& toname
!= args
->to
.name
)
2611 kmem_free(toname
, MAXPATHLEN
+ 1);
2613 DTRACE_NFSV3_4(op__rename__done
, struct svc_req
*, req
,
2614 cred_t
*, cr
, vnode_t
*, fvp
, RENAME3res
*, resp
);
2622 rfs3_rename_getfh(RENAME3args
*args
)
2625 return (&args
->from
.dir
);
2629 rfs3_link(LINK3args
*args
, LINK3res
*resp
, struct exportinfo
*exi
,
2630 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2642 struct exportinfo
*to_exi
;
2643 struct sockaddr
*ca
;
2651 vp
= nfs3_fhtovp(&args
->file
, exi
);
2653 DTRACE_NFSV3_4(op__link__start
, struct svc_req
*, req
,
2654 cred_t
*, cr
, vnode_t
*, vp
, LINK3args
*, args
);
2661 va
.va_mask
= AT_ALL
;
2662 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
2664 fh3
= &args
->link
.dir
;
2665 to_exi
= checkexport(&fh3
->fh3_fsid
, FH3TOXFIDP(fh3
));
2666 if (to_exi
== NULL
) {
2667 resp
->status
= NFS3ERR_ACCES
;
2672 if (to_exi
!= exi
) {
2673 resp
->status
= NFS3ERR_XDEV
;
2677 dvp
= nfs3_fhtovp(&args
->link
.dir
, exi
);
2683 bva
.va_mask
= AT_ALL
;
2684 bvap
= fop_getattr(dvp
, &bva
, 0, cr
, NULL
) ? NULL
: &bva
;
2686 if (dvp
->v_type
!= VDIR
) {
2687 resp
->status
= NFS3ERR_NOTDIR
;
2691 if (args
->link
.name
== nfs3nametoolong
) {
2692 resp
->status
= NFS3ERR_NAMETOOLONG
;
2696 if (args
->link
.name
== NULL
|| *(args
->link
.name
) == '\0') {
2697 resp
->status
= NFS3ERR_ACCES
;
2701 if (rdonly(ro
, dvp
)) {
2702 resp
->status
= NFS3ERR_ROFS
;
2706 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
2707 name
= nfscmd_convname(ca
, exi
, args
->link
.name
,
2708 NFSCMD_CONV_INBOUND
, MAXPATHLEN
+ 1);
2711 resp
->status
= NFS3ERR_SERVERFAULT
;
2715 error
= fop_link(dvp
, vp
, name
, cr
, NULL
, 0);
2717 va
.va_mask
= AT_ALL
;
2718 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
2719 ava
.va_mask
= AT_ALL
;
2720 avap
= fop_getattr(dvp
, &ava
, 0, cr
, NULL
) ? NULL
: &ava
;
2723 * Force modified data and metadata out to stable storage.
2725 (void) fop_fsync(vp
, FNODSYNC
, cr
, NULL
);
2726 (void) fop_fsync(dvp
, 0, cr
, NULL
);
2733 resp
->status
= NFS3_OK
;
2734 vattr_to_post_op_attr(vap
, &resp
->resok
.file_attributes
);
2735 vattr_to_wcc_data(bvap
, avap
, &resp
->resok
.linkdir_wcc
);
2737 DTRACE_NFSV3_4(op__link__done
, struct svc_req
*, req
,
2738 cred_t
*, cr
, vnode_t
*, vp
, LINK3res
*, resp
);
2745 if (curthread
->t_flag
& T_WOULDBLOCK
) {
2746 curthread
->t_flag
&= ~T_WOULDBLOCK
;
2747 resp
->status
= NFS3ERR_JUKEBOX
;
2749 resp
->status
= puterrno3(error
);
2751 if (name
!= NULL
&& name
!= args
->link
.name
)
2752 kmem_free(name
, MAXPATHLEN
+ 1);
2754 DTRACE_NFSV3_4(op__link__done
, struct svc_req
*, req
,
2755 cred_t
*, cr
, vnode_t
*, vp
, LINK3res
*, resp
);
2761 vattr_to_post_op_attr(vap
, &resp
->resfail
.file_attributes
);
2762 vattr_to_wcc_data(bvap
, avap
, &resp
->resfail
.linkdir_wcc
);
2766 rfs3_link_getfh(LINK3args
*args
)
2769 return (&args
->file
);
2773 * This macro defines the size of a response which contains attribute
2774 * information and one directory entry (whose length is specified by
2775 * the macro parameter). If the incoming request is larger than this,
2776 * then we are guaranteed to be able to return at one directory entry
2777 * if one exists. Therefore, we do not need to check for
2778 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
2779 * is not, then we need to check to make sure that this error does not
2780 * need to be returned.
2782 * NFS3_READDIR_MIN_COUNT is comprised of following :
2784 * status - 1 * BYTES_PER_XDR_UNIT
2785 * attr. flag - 1 * BYTES_PER_XDR_UNIT
2786 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
2787 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2788 * boolean - 1 * BYTES_PER_XDR_UNIT
2789 * file id - 2 * BYTES_PER_XDR_UNIT
2790 * directory name length - 1 * BYTES_PER_XDR_UNIT
2791 * cookie - 2 * BYTES_PER_XDR_UNIT
2792 * end of list - 1 * BYTES_PER_XDR_UNIT
2793 * end of file - 1 * BYTES_PER_XDR_UNIT
2794 * Name length of directory to the nearest byte
2797 #define NFS3_READDIR_MIN_COUNT(length) \
2798 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
2799 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
2803 rfs3_readdir(READDIR3args
*args
, READDIR3res
*resp
, struct exportinfo
*exi
,
2804 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
2817 struct sockaddr
*ca
;
2821 vp
= nfs3_fhtovp(&args
->dir
, exi
);
2823 DTRACE_NFSV3_4(op__readdir__start
, struct svc_req
*, req
,
2824 cred_t
*, cr
, vnode_t
*, vp
, READDIR3args
*, args
);
2831 (void) fop_rwlock(vp
, V_WRITELOCK_FALSE
, NULL
);
2833 va
.va_mask
= AT_ALL
;
2834 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
2836 if (vp
->v_type
!= VDIR
) {
2837 resp
->status
= NFS3ERR_NOTDIR
;
2841 error
= fop_access(vp
, VREAD
, 0, cr
, NULL
);
2846 * Now don't allow arbitrary count to alloc;
2847 * allow the maximum not to exceed rfs3_tsize()
2849 if (args
->count
> rfs3_tsize(req
))
2850 args
->count
= rfs3_tsize(req
);
2853 * Make sure that there is room to read at least one entry
2854 * if any are available.
2856 if (args
->count
< DIRENT64_RECLEN(MAXNAMELEN
))
2857 count
= DIRENT64_RECLEN(MAXNAMELEN
);
2859 count
= args
->count
;
2861 data
= kmem_alloc(count
, KM_SLEEP
);
2863 iov
.iov_base
= data
;
2864 iov
.iov_len
= count
;
2867 uio
.uio_segflg
= UIO_SYSSPACE
;
2868 uio
.uio_extflg
= UIO_COPY_CACHED
;
2869 uio
.uio_loffset
= (offset_t
)args
->cookie
;
2870 uio
.uio_resid
= count
;
2872 error
= fop_readdir(vp
, &uio
, cr
, &iseof
, NULL
, 0);
2874 va
.va_mask
= AT_ALL
;
2875 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
2878 kmem_free(data
, count
);
2883 * If the count was not large enough to be able to guarantee
2884 * to be able to return at least one entry, then need to
2885 * check to see if NFS3ERR_TOOSMALL should be returned.
2887 if (args
->count
< NFS3_READDIR_MIN_COUNT(MAXNAMELEN
)) {
2889 * bufsize is used to keep track of the size of the response.
2890 * It is primed with:
2891 * 1 for the status +
2892 * 1 for the dir_attributes.attributes boolean +
2893 * 2 for the cookie verifier
2894 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2895 * to bytes. If there are directory attributes to be
2897 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2898 * time BYTES_PER_XDR_UNIT is added to account for them.
2900 bufsize
= (1 + 1 + 2) * BYTES_PER_XDR_UNIT
;
2902 bufsize
+= NFS3_SIZEOF_FATTR3
* BYTES_PER_XDR_UNIT
;
2904 * An entry is composed of:
2905 * 1 for the true/false list indicator +
2906 * 2 for the fileid +
2907 * 1 for the length of the name +
2908 * 2 for the cookie +
2909 * all times BYTES_PER_XDR_UNIT to convert from
2910 * XDR units to bytes, plus the length of the name
2911 * rounded up to the nearest BYTES_PER_XDR_UNIT.
2913 if (count
!= uio
.uio_resid
) {
2914 namlen
= strlen(((struct dirent64
*)data
)->d_name
);
2915 bufsize
+= (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT
+
2916 roundup(namlen
, BYTES_PER_XDR_UNIT
);
2919 * We need to check to see if the number of bytes left
2920 * to go into the buffer will actually fit into the
2921 * buffer. This is calculated as the size of this
2923 * 1 for the true/false list indicator +
2924 * 1 for the eof indicator
2925 * times BYTES_PER_XDR_UNIT to convert from from
2926 * XDR units to bytes.
2928 bufsize
+= (1 + 1) * BYTES_PER_XDR_UNIT
;
2929 if (bufsize
> args
->count
) {
2930 kmem_free(data
, count
);
2931 resp
->status
= NFS3ERR_TOOSMALL
;
2937 * Have a valid readir buffer for the native character
2938 * set. Need to check if a conversion is necessary and
2939 * potentially rewrite the whole buffer. Note that if the
2940 * conversion expands names enough, the structure may not
2941 * fit. In this case, we need to drop entries until if fits
2942 * and patch the counts in order that the next readdir will
2943 * get the correct entries.
2945 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
2946 data
= nfscmd_convdirent(ca
, exi
, data
, count
, &resp
->status
);
2949 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, NULL
);
2953 * Don't do this. It causes local disk writes when just
2954 * reading the file and the overhead is deemed larger
2958 * Force modified metadata out to stable storage.
2960 (void) fop_fsync(vp
, FNODSYNC
, cr
, NULL
);
2963 resp
->status
= NFS3_OK
;
2964 vattr_to_post_op_attr(vap
, &resp
->resok
.dir_attributes
);
2965 resp
->resok
.cookieverf
= 0;
2966 resp
->resok
.reply
.entries
= (entry3
*)data
;
2967 resp
->resok
.reply
.eof
= iseof
;
2968 resp
->resok
.size
= count
- uio
.uio_resid
;
2969 resp
->resok
.count
= args
->count
;
2970 resp
->resok
.freecount
= count
;
2972 DTRACE_NFSV3_4(op__readdir__done
, struct svc_req
*, req
,
2973 cred_t
*, cr
, vnode_t
*, vp
, READDIR3res
*, resp
);
2980 if (curthread
->t_flag
& T_WOULDBLOCK
) {
2981 curthread
->t_flag
&= ~T_WOULDBLOCK
;
2982 resp
->status
= NFS3ERR_JUKEBOX
;
2984 resp
->status
= puterrno3(error
);
2986 DTRACE_NFSV3_4(op__readdir__done
, struct svc_req
*, req
,
2987 cred_t
*, cr
, vnode_t
*, vp
, READDIR3res
*, resp
);
2990 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, NULL
);
2993 vattr_to_post_op_attr(vap
, &resp
->resfail
.dir_attributes
);
2997 rfs3_readdir_getfh(READDIR3args
*args
)
3000 return (&args
->dir
);
3004 rfs3_readdir_free(READDIR3res
*resp
)
3007 if (resp
->status
== NFS3_OK
)
3008 kmem_free(resp
->resok
.reply
.entries
, resp
->resok
.freecount
);
3014 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3017 * This macro computes the size of a response which contains
3018 * one directory entry including the attributes as well as file handle.
3019 * If the incoming request is larger than this, then we are guaranteed to be
3020 * able to return at least one more directory entry if one exists.
3022 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3024 * boolean - 1 * BYTES_PER_XDR_UNIT
3025 * file id - 2 * BYTES_PER_XDR_UNIT
3026 * directory name length - 1 * BYTES_PER_XDR_UNIT
3027 * cookie - 2 * BYTES_PER_XDR_UNIT
3028 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3029 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3030 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3031 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3032 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3033 * name length of the entry to the nearest bytes
3035 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3036 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3037 BYTES_PER_XDR_UNIT + \
3038 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3040 static int rfs3_readdir_unit
= MAXBSIZE
;
3044 rfs3_readdirplus(READDIRPLUS3args
*args
, READDIRPLUS3res
*resp
,
3045 struct exportinfo
*exi
, struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
3055 struct dirent64
*dp
;
3059 entryplus3_info
*infop
= NULL
;
3065 int rd_unit
= rfs3_readdir_unit
;
3069 uint_t
*namlen
= NULL
;
3071 struct sockaddr
*ca
;
3076 vp
= nfs3_fhtovp(&args
->dir
, exi
);
3078 DTRACE_NFSV3_4(op__readdirplus__start
, struct svc_req
*, req
,
3079 cred_t
*, cr
, vnode_t
*, vp
, READDIRPLUS3args
*, args
);
3086 (void) fop_rwlock(vp
, V_WRITELOCK_FALSE
, NULL
);
3088 va
.va_mask
= AT_ALL
;
3089 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
3091 if (vp
->v_type
!= VDIR
) {
3096 error
= fop_access(vp
, VREAD
, 0, cr
, NULL
);
3101 * Don't allow arbitrary counts for allocation
3103 if (args
->maxcount
> rfs3_tsize(req
))
3104 args
->maxcount
= rfs3_tsize(req
);
3107 * Make sure that there is room to read at least one entry
3108 * if any are available
3110 args
->dircount
= MIN(args
->dircount
, args
->maxcount
);
3112 if (args
->dircount
< DIRENT64_RECLEN(MAXNAMELEN
))
3113 args
->dircount
= DIRENT64_RECLEN(MAXNAMELEN
);
3116 * This allocation relies on a minimum directory entry
3117 * being roughly 24 bytes. Therefore, the namlen array
3118 * will have enough space based on the maximum number of
3121 namlen
= kmem_alloc(args
->dircount
, KM_SLEEP
);
3123 space_left
= args
->dircount
;
3124 data
= kmem_alloc(args
->dircount
, KM_SLEEP
);
3125 dp
= (struct dirent64
*)data
;
3128 uio
.uio_segflg
= UIO_SYSSPACE
;
3129 uio
.uio_extflg
= UIO_COPY_CACHED
;
3130 uio
.uio_loffset
= (offset_t
)args
->cookie
;
3133 * bufsize is used to keep track of the size of the response as we
3134 * get post op attributes and filehandles for each entry. This is
3135 * an optimization as the server may have read more entries than will
3136 * fit in the buffer specified by maxcount. We stop calculating
3137 * post op attributes and filehandles once we have exceeded maxcount.
3138 * This will minimize the effect of truncation.
3140 * It is primed with:
3141 * 1 for the status +
3142 * 1 for the dir_attributes.attributes boolean +
3143 * 2 for the cookie verifier
3144 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3145 * to bytes. If there are directory attributes to be
3147 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3148 * time BYTES_PER_XDR_UNIT is added to account for them.
3150 bufsize
= (1 + 1 + 2) * BYTES_PER_XDR_UNIT
;
3152 bufsize
+= NFS3_SIZEOF_FATTR3
* BYTES_PER_XDR_UNIT
;
3156 * Here we make a check so that our read unit is not larger than
3157 * the space left in the buffer.
3159 rd_unit
= MIN(rd_unit
, space_left
);
3160 iov
.iov_base
= (char *)dp
;
3161 iov
.iov_len
= rd_unit
;
3162 uio
.uio_resid
= rd_unit
;
3165 error
= fop_readdir(vp
, &uio
, cr
, &iseof
, NULL
, 0);
3168 kmem_free(data
, args
->dircount
);
3172 if (uio
.uio_resid
== prev_len
&& !iseof
) {
3174 kmem_free(data
, args
->dircount
);
3175 resp
->status
= NFS3ERR_TOOSMALL
;
3180 * We could not get any more entries, so get the attributes
3181 * and filehandle for the entries already obtained.
3187 * We estimate the size of the response by assuming the
3188 * entry exists and attributes and filehandle are also valid
3190 for (size
= prev_len
- uio
.uio_resid
;
3192 size
-= dp
->d_reclen
, dp
= nextdp(dp
)) {
3194 if (dp
->d_ino
== 0) {
3199 namlen
[nents
] = strlen(dp
->d_name
);
3200 entrysize
= NFS3_READDIRPLUS_ENTRY(namlen
[nents
]);
3203 * We need to check to see if the number of bytes left
3204 * to go into the buffer will actually fit into the
3205 * buffer. This is calculated as the size of this
3207 * 1 for the true/false list indicator +
3208 * 1 for the eof indicator
3209 * times BYTES_PER_XDR_UNIT to convert from XDR units
3212 * Also check the dircount limit against the first entry read
3215 tofit
= entrysize
+ (1 + 1) * BYTES_PER_XDR_UNIT
;
3216 if (bufsize
+ tofit
> args
->maxcount
) {
3218 * We make a check here to see if this was the
3219 * first entry being measured. If so, then maxcount
3220 * was too small to begin with and so we need to
3221 * return with NFS3ERR_TOOSMALL.
3224 kmem_free(data
, args
->dircount
);
3225 resp
->status
= NFS3ERR_TOOSMALL
;
3231 bufsize
+= entrysize
;
3236 * If there is enough room to fit at least 1 more entry including
3237 * post op attributes and filehandle in the buffer AND that we haven't
3238 * exceeded dircount then go back and get some more.
3241 (args
->maxcount
- bufsize
) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN
)) {
3242 space_left
-= (prev_len
- uio
.uio_resid
);
3243 if (space_left
>= DIRENT64_RECLEN(MAXNAMELEN
))
3246 /* else, fall through */
3249 va
.va_mask
= AT_ALL
;
3250 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
3252 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, NULL
);
3254 infop
= kmem_alloc(nents
* sizeof (struct entryplus3_info
), KM_SLEEP
);
3255 resp
->resok
.infop
= infop
;
3257 dp
= (struct dirent64
*)data
;
3258 for (i
= 0; i
< nents
; i
++) {
3260 if (dp
->d_ino
== 0) {
3261 infop
[i
].attr
.attributes
= FALSE
;
3262 infop
[i
].fh
.handle_follows
= FALSE
;
3267 infop
[i
].namelen
= namlen
[i
];
3269 error
= fop_lookup(vp
, dp
->d_name
, &nvp
, NULL
, 0, NULL
, cr
,
3272 infop
[i
].attr
.attributes
= FALSE
;
3273 infop
[i
].fh
.handle_follows
= FALSE
;
3278 nva
.va_mask
= AT_ALL
;
3279 nvap
= rfs4_delegated_getattr(nvp
, &nva
, 0, cr
) ? NULL
: &nva
;
3281 /* Lie about the object type for a referral */
3282 if (vn_is_nfs_reparse(nvp
, cr
))
3283 nvap
->va_type
= VLNK
;
3285 vattr_to_post_op_attr(nvap
, &infop
[i
].attr
);
3287 error
= makefh3(&infop
[i
].fh
.handle
, nvp
, exi
);
3289 infop
[i
].fh
.handle_follows
= TRUE
;
3291 infop
[i
].fh
.handle_follows
= FALSE
;
3297 ca
= (struct sockaddr
*)svc_getrpccaller(req
->rq_xprt
)->buf
;
3298 ret
= nfscmd_convdirplus(ca
, exi
, data
, nents
, args
->dircount
, &ndata
);
3304 * We had to drop one or more entries in order to fit
3305 * during the character conversion. We need to patch
3306 * up the size and eof info.
3311 ret
= nfscmd_dropped_entrysize((struct dirent64
*)data
,
3318 * Don't do this. It causes local disk writes when just
3319 * reading the file and the overhead is deemed larger
3323 * Force modified metadata out to stable storage.
3325 (void) fop_fsync(vp
, FNODSYNC
, cr
, NULL
);
3328 kmem_free(namlen
, args
->dircount
);
3330 resp
->status
= NFS3_OK
;
3331 vattr_to_post_op_attr(vap
, &resp
->resok
.dir_attributes
);
3332 resp
->resok
.cookieverf
= 0;
3333 resp
->resok
.reply
.entries
= (entryplus3
*)ndata
;
3334 resp
->resok
.reply
.eof
= iseof
;
3335 resp
->resok
.size
= nents
;
3336 resp
->resok
.count
= args
->dircount
- ret
;
3337 resp
->resok
.maxcount
= args
->maxcount
;
3339 DTRACE_NFSV3_4(op__readdirplus__done
, struct svc_req
*, req
,
3340 cred_t
*, cr
, vnode_t
*, vp
, READDIRPLUS3res
*, resp
);
3342 kmem_free(data
, args
->dircount
);
3350 if (curthread
->t_flag
& T_WOULDBLOCK
) {
3351 curthread
->t_flag
&= ~T_WOULDBLOCK
;
3352 resp
->status
= NFS3ERR_JUKEBOX
;
3354 resp
->status
= puterrno3(error
);
3357 DTRACE_NFSV3_4(op__readdirplus__done
, struct svc_req
*, req
,
3358 cred_t
*, cr
, vnode_t
*, vp
, READDIRPLUS3res
*, resp
);
3361 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, NULL
);
3366 kmem_free(namlen
, args
->dircount
);
3368 vattr_to_post_op_attr(vap
, &resp
->resfail
.dir_attributes
);
3372 rfs3_readdirplus_getfh(READDIRPLUS3args
*args
)
3375 return (&args
->dir
);
3379 rfs3_readdirplus_free(READDIRPLUS3res
*resp
)
3382 if (resp
->status
== NFS3_OK
) {
3383 kmem_free(resp
->resok
.reply
.entries
, resp
->resok
.count
);
3384 kmem_free(resp
->resok
.infop
,
3385 resp
->resok
.size
* sizeof (struct entryplus3_info
));
3391 rfs3_fsstat(FSSTAT3args
*args
, FSSTAT3res
*resp
, struct exportinfo
*exi
,
3392 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
3398 struct statvfs64 sb
;
3402 vp
= nfs3_fhtovp(&args
->fsroot
, exi
);
3404 DTRACE_NFSV3_4(op__fsstat__start
, struct svc_req
*, req
,
3405 cred_t
*, cr
, vnode_t
*, vp
, FSSTAT3args
*, args
);
3412 error
= VFS_STATVFS(vp
->v_vfsp
, &sb
);
3414 va
.va_mask
= AT_ALL
;
3415 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
3420 resp
->status
= NFS3_OK
;
3421 vattr_to_post_op_attr(vap
, &resp
->resok
.obj_attributes
);
3422 if (sb
.f_blocks
!= (fsblkcnt64_t
)-1)
3423 resp
->resok
.tbytes
= (size3
)sb
.f_frsize
* (size3
)sb
.f_blocks
;
3425 resp
->resok
.tbytes
= (size3
)sb
.f_blocks
;
3426 if (sb
.f_bfree
!= (fsblkcnt64_t
)-1)
3427 resp
->resok
.fbytes
= (size3
)sb
.f_frsize
* (size3
)sb
.f_bfree
;
3429 resp
->resok
.fbytes
= (size3
)sb
.f_bfree
;
3430 if (sb
.f_bavail
!= (fsblkcnt64_t
)-1)
3431 resp
->resok
.abytes
= (size3
)sb
.f_frsize
* (size3
)sb
.f_bavail
;
3433 resp
->resok
.abytes
= (size3
)sb
.f_bavail
;
3434 resp
->resok
.tfiles
= (size3
)sb
.f_files
;
3435 resp
->resok
.ffiles
= (size3
)sb
.f_ffree
;
3436 resp
->resok
.afiles
= (size3
)sb
.f_favail
;
3437 resp
->resok
.invarsec
= 0;
3439 DTRACE_NFSV3_4(op__fsstat__done
, struct svc_req
*, req
,
3440 cred_t
*, cr
, vnode_t
*, vp
, FSSTAT3res
*, resp
);
3446 if (curthread
->t_flag
& T_WOULDBLOCK
) {
3447 curthread
->t_flag
&= ~T_WOULDBLOCK
;
3448 resp
->status
= NFS3ERR_JUKEBOX
;
3450 resp
->status
= puterrno3(error
);
3452 DTRACE_NFSV3_4(op__fsstat__done
, struct svc_req
*, req
,
3453 cred_t
*, cr
, vnode_t
*, vp
, FSSTAT3res
*, resp
);
3457 vattr_to_post_op_attr(vap
, &resp
->resfail
.obj_attributes
);
3461 rfs3_fsstat_getfh(FSSTAT3args
*args
)
3464 return (&args
->fsroot
);
3469 rfs3_fsinfo(FSINFO3args
*args
, FSINFO3res
*resp
, struct exportinfo
*exi
,
3470 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
3479 vp
= nfs3_fhtovp(&args
->fsroot
, exi
);
3481 DTRACE_NFSV3_4(op__fsinfo__start
, struct svc_req
*, req
,
3482 cred_t
*, cr
, vnode_t
*, vp
, FSINFO3args
*, args
);
3485 if (curthread
->t_flag
& T_WOULDBLOCK
) {
3486 curthread
->t_flag
&= ~T_WOULDBLOCK
;
3487 resp
->status
= NFS3ERR_JUKEBOX
;
3489 resp
->status
= NFS3ERR_STALE
;
3490 vattr_to_post_op_attr(NULL
, &resp
->resfail
.obj_attributes
);
3494 va
.va_mask
= AT_ALL
;
3495 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
3497 resp
->status
= NFS3_OK
;
3498 vattr_to_post_op_attr(vap
, &resp
->resok
.obj_attributes
);
3499 xfer_size
= rfs3_tsize(req
);
3500 resp
->resok
.rtmax
= xfer_size
;
3501 resp
->resok
.rtpref
= xfer_size
;
3502 resp
->resok
.rtmult
= DEV_BSIZE
;
3503 resp
->resok
.wtmax
= xfer_size
;
3504 resp
->resok
.wtpref
= xfer_size
;
3505 resp
->resok
.wtmult
= DEV_BSIZE
;
3506 resp
->resok
.dtpref
= MAXBSIZE
;
3509 * Large file spec: want maxfilesize based on limit of
3510 * underlying filesystem. We can guess 2^31-1 if need be.
3512 error
= fop_pathconf(vp
, _PC_FILESIZEBITS
, &l
, cr
, NULL
);
3514 resp
->status
= puterrno3(error
);
3519 * If the underlying file system does not support _PC_FILESIZEBITS,
3520 * return a reasonable default. Note that error code on fop_pathconf
3521 * will be 0, even if the underlying file system does not support
3524 if (l
== (ulong_t
)-1) {
3525 resp
->resok
.maxfilesize
= MAXOFF32_T
;
3527 if (l
>= (sizeof (uint64_t) * 8))
3528 resp
->resok
.maxfilesize
= INT64_MAX
;
3530 resp
->resok
.maxfilesize
= (1LL << (l
-1)) - 1;
3533 resp
->resok
.time_delta
.seconds
= 0;
3534 resp
->resok
.time_delta
.nseconds
= 1000;
3535 resp
->resok
.properties
= FSF3_LINK
| FSF3_SYMLINK
|
3536 FSF3_HOMOGENEOUS
| FSF3_CANSETTIME
;
3538 DTRACE_NFSV3_4(op__fsinfo__done
, struct svc_req
*, req
,
3539 cred_t
*, cr
, vnode_t
*, vp
, FSINFO3res
*, resp
);
3546 DTRACE_NFSV3_4(op__fsinfo__done
, struct svc_req
*, req
,
3547 cred_t
*, cr
, vnode_t
*, NULL
, FSINFO3res
*, resp
);
3553 rfs3_fsinfo_getfh(FSINFO3args
*args
)
3555 return (&args
->fsroot
);
3560 rfs3_pathconf(PATHCONF3args
*args
, PATHCONF3res
*resp
, struct exportinfo
*exi
,
3561 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
3571 vp
= nfs3_fhtovp(&args
->object
, exi
);
3573 DTRACE_NFSV3_4(op__pathconf__start
, struct svc_req
*, req
,
3574 cred_t
*, cr
, vnode_t
*, vp
, PATHCONF3args
*, args
);
3581 va
.va_mask
= AT_ALL
;
3582 vap
= fop_getattr(vp
, &va
, 0, cr
, NULL
) ? NULL
: &va
;
3584 error
= fop_pathconf(vp
, _PC_LINK_MAX
, &val
, cr
, NULL
);
3587 resp
->resok
.info
.link_max
= (uint32
)val
;
3589 error
= fop_pathconf(vp
, _PC_NAME_MAX
, &val
, cr
, NULL
);
3592 resp
->resok
.info
.name_max
= (uint32
)val
;
3594 error
= fop_pathconf(vp
, _PC_NO_TRUNC
, &val
, cr
, NULL
);
3598 resp
->resok
.info
.no_trunc
= TRUE
;
3600 resp
->resok
.info
.no_trunc
= FALSE
;
3602 error
= fop_pathconf(vp
, _PC_CHOWN_RESTRICTED
, &val
, cr
, NULL
);
3606 resp
->resok
.info
.chown_restricted
= TRUE
;
3608 resp
->resok
.info
.chown_restricted
= FALSE
;
3610 resp
->status
= NFS3_OK
;
3611 vattr_to_post_op_attr(vap
, &resp
->resok
.obj_attributes
);
3612 resp
->resok
.info
.case_insensitive
= FALSE
;
3613 resp
->resok
.info
.case_preserving
= TRUE
;
3614 DTRACE_NFSV3_4(op__pathconf__done
, struct svc_req
*, req
,
3615 cred_t
*, cr
, vnode_t
*, vp
, PATHCONF3res
*, resp
);
3620 if (curthread
->t_flag
& T_WOULDBLOCK
) {
3621 curthread
->t_flag
&= ~T_WOULDBLOCK
;
3622 resp
->status
= NFS3ERR_JUKEBOX
;
3624 resp
->status
= puterrno3(error
);
3626 DTRACE_NFSV3_4(op__pathconf__done
, struct svc_req
*, req
,
3627 cred_t
*, cr
, vnode_t
*, vp
, PATHCONF3res
*, resp
);
3630 vattr_to_post_op_attr(vap
, &resp
->resfail
.obj_attributes
);
3634 rfs3_pathconf_getfh(PATHCONF3args
*args
)
3637 return (&args
->object
);
3641 rfs3_commit(COMMIT3args
*args
, COMMIT3res
*resp
, struct exportinfo
*exi
,
3642 struct svc_req
*req
, cred_t
*cr
, bool_t ro
)
3654 vp
= nfs3_fhtovp(&args
->file
, exi
);
3656 DTRACE_NFSV3_4(op__commit__start
, struct svc_req
*, req
,
3657 cred_t
*, cr
, vnode_t
*, vp
, COMMIT3args
*, args
);
3664 bva
.va_mask
= AT_ALL
;
3665 error
= fop_getattr(vp
, &bva
, 0, cr
, NULL
);
3668 * If we can't get the attributes, then we can't do the
3669 * right access checking. So, we'll fail the request.
3676 if (rdonly(ro
, vp
)) {
3677 resp
->status
= NFS3ERR_ROFS
;
3681 if (vp
->v_type
!= VREG
) {
3682 resp
->status
= NFS3ERR_INVAL
;
3686 if (crgetuid(cr
) != bva
.va_uid
&&
3687 (error
= fop_access(vp
, VWRITE
, 0, cr
, NULL
)))
3690 error
= fop_fsync(vp
, FSYNC
, cr
, NULL
);
3692 ava
.va_mask
= AT_ALL
;
3693 avap
= fop_getattr(vp
, &ava
, 0, cr
, NULL
) ? NULL
: &ava
;
3698 resp
->status
= NFS3_OK
;
3699 vattr_to_wcc_data(bvap
, avap
, &resp
->resok
.file_wcc
);
3700 resp
->resok
.verf
= write3verf
;
3702 DTRACE_NFSV3_4(op__commit__done
, struct svc_req
*, req
,
3703 cred_t
*, cr
, vnode_t
*, vp
, COMMIT3res
*, resp
);
3710 if (curthread
->t_flag
& T_WOULDBLOCK
) {
3711 curthread
->t_flag
&= ~T_WOULDBLOCK
;
3712 resp
->status
= NFS3ERR_JUKEBOX
;
3714 resp
->status
= puterrno3(error
);
3716 DTRACE_NFSV3_4(op__commit__done
, struct svc_req
*, req
,
3717 cred_t
*, cr
, vnode_t
*, vp
, COMMIT3res
*, resp
);
3721 vattr_to_wcc_data(bvap
, avap
, &resp
->resfail
.file_wcc
);
3725 rfs3_commit_getfh(COMMIT3args
*args
)
3728 return (&args
->file
);
3732 sattr3_to_vattr(sattr3
*sap
, struct vattr
*vap
)
3737 if (sap
->mode
.set_it
) {
3738 vap
->va_mode
= (mode_t
)sap
->mode
.mode
;
3739 vap
->va_mask
|= AT_MODE
;
3741 if (sap
->uid
.set_it
) {
3742 vap
->va_uid
= (uid_t
)sap
->uid
.uid
;
3743 vap
->va_mask
|= AT_UID
;
3745 if (sap
->gid
.set_it
) {
3746 vap
->va_gid
= (gid_t
)sap
->gid
.gid
;
3747 vap
->va_mask
|= AT_GID
;
3749 if (sap
->size
.set_it
) {
3750 if (sap
->size
.size
> (size3
)((u_longlong_t
)-1))
3752 vap
->va_size
= sap
->size
.size
;
3753 vap
->va_mask
|= AT_SIZE
;
3755 if (sap
->atime
.set_it
== SET_TO_CLIENT_TIME
) {
3757 /* check time validity */
3758 if (!NFS3_TIME_OK(sap
->atime
.atime
.seconds
))
3762 * nfs protocol defines times as unsigned so don't extend sign,
3763 * unless sysadmin set nfs_allow_preepoch_time.
3765 NFS_TIME_T_CONVERT(vap
->va_atime
.tv_sec
,
3766 sap
->atime
.atime
.seconds
);
3767 vap
->va_atime
.tv_nsec
= (uint32_t)sap
->atime
.atime
.nseconds
;
3768 vap
->va_mask
|= AT_ATIME
;
3769 } else if (sap
->atime
.set_it
== SET_TO_SERVER_TIME
) {
3770 gethrestime(&vap
->va_atime
);
3771 vap
->va_mask
|= AT_ATIME
;
3773 if (sap
->mtime
.set_it
== SET_TO_CLIENT_TIME
) {
3775 /* check time validity */
3776 if (!NFS3_TIME_OK(sap
->mtime
.mtime
.seconds
))
3780 * nfs protocol defines times as unsigned so don't extend sign,
3781 * unless sysadmin set nfs_allow_preepoch_time.
3783 NFS_TIME_T_CONVERT(vap
->va_mtime
.tv_sec
,
3784 sap
->mtime
.mtime
.seconds
);
3785 vap
->va_mtime
.tv_nsec
= (uint32_t)sap
->mtime
.mtime
.nseconds
;
3786 vap
->va_mask
|= AT_MTIME
;
3787 } else if (sap
->mtime
.set_it
== SET_TO_SERVER_TIME
) {
3788 gethrestime(&vap
->va_mtime
);
3789 vap
->va_mask
|= AT_MTIME
;
3795 static ftype3 vt_to_nf3
[] = {
3796 0, NF3REG
, NF3DIR
, NF3BLK
, NF3CHR
, NF3LNK
, NF3FIFO
, 0, 0, NF3SOCK
, 0
3800 vattr_to_fattr3(struct vattr
*vap
, fattr3
*fap
)
3803 ASSERT(vap
->va_type
>= VNON
&& vap
->va_type
<= VBAD
);
3804 /* Return error if time or size overflow */
3805 if (! (NFS_VAP_TIME_OK(vap
) && NFS3_SIZE_OK(vap
->va_size
))) {
3808 fap
->type
= vt_to_nf3
[vap
->va_type
];
3809 fap
->mode
= (mode3
)(vap
->va_mode
& MODEMASK
);
3810 fap
->nlink
= (uint32
)vap
->va_nlink
;
3811 if (vap
->va_uid
== UID_NOBODY
)
3812 fap
->uid
= (uid3
)NFS_UID_NOBODY
;
3814 fap
->uid
= (uid3
)vap
->va_uid
;
3815 if (vap
->va_gid
== GID_NOBODY
)
3816 fap
->gid
= (gid3
)NFS_GID_NOBODY
;
3818 fap
->gid
= (gid3
)vap
->va_gid
;
3819 fap
->size
= (size3
)vap
->va_size
;
3820 fap
->used
= (size3
)DEV_BSIZE
* (size3
)vap
->va_nblocks
;
3821 fap
->rdev
.specdata1
= (uint32
)getmajor(vap
->va_rdev
);
3822 fap
->rdev
.specdata2
= (uint32
)getminor(vap
->va_rdev
);
3823 fap
->fsid
= (uint64
)vap
->va_fsid
;
3824 fap
->fileid
= (fileid3
)vap
->va_nodeid
;
3825 fap
->atime
.seconds
= vap
->va_atime
.tv_sec
;
3826 fap
->atime
.nseconds
= vap
->va_atime
.tv_nsec
;
3827 fap
->mtime
.seconds
= vap
->va_mtime
.tv_sec
;
3828 fap
->mtime
.nseconds
= vap
->va_mtime
.tv_nsec
;
3829 fap
->ctime
.seconds
= vap
->va_ctime
.tv_sec
;
3830 fap
->ctime
.nseconds
= vap
->va_ctime
.tv_nsec
;
3835 vattr_to_wcc_attr(struct vattr
*vap
, wcc_attr
*wccap
)
3838 /* Return error if time or size overflow */
3839 if (!(NFS_TIME_T_OK(vap
->va_mtime
.tv_sec
) &&
3840 NFS_TIME_T_OK(vap
->va_ctime
.tv_sec
) &&
3841 NFS3_SIZE_OK(vap
->va_size
))) {
3844 wccap
->size
= (size3
)vap
->va_size
;
3845 wccap
->mtime
.seconds
= vap
->va_mtime
.tv_sec
;
3846 wccap
->mtime
.nseconds
= vap
->va_mtime
.tv_nsec
;
3847 wccap
->ctime
.seconds
= vap
->va_ctime
.tv_sec
;
3848 wccap
->ctime
.nseconds
= vap
->va_ctime
.tv_nsec
;
3853 vattr_to_pre_op_attr(struct vattr
*vap
, pre_op_attr
*poap
)
3856 /* don't return attrs if time overflow */
3857 if ((vap
!= NULL
) && !vattr_to_wcc_attr(vap
, &poap
->attr
)) {
3858 poap
->attributes
= TRUE
;
3860 poap
->attributes
= FALSE
;
3864 vattr_to_post_op_attr(struct vattr
*vap
, post_op_attr
*poap
)
3867 /* don't return attrs if time overflow */
3868 if ((vap
!= NULL
) && !vattr_to_fattr3(vap
, &poap
->attr
)) {
3869 poap
->attributes
= TRUE
;
3871 poap
->attributes
= FALSE
;
3875 vattr_to_wcc_data(struct vattr
*bvap
, struct vattr
*avap
, wcc_data
*wccp
)
3878 vattr_to_pre_op_attr(bvap
, &wccp
->before
);
3879 vattr_to_post_op_attr(avap
, &wccp
->after
);
3885 struct rfs3_verf_overlay
{
3886 uint_t id
; /* a "unique" identifier */
3887 int ts
; /* a unique timestamp */
3892 * The following algorithm attempts to find a unique verifier
3893 * to be used as the write verifier returned from the server
3894 * to the client. It is important that this verifier change
3895 * whenever the server reboots. Of secondary importance, it
3896 * is important for the verifier to be unique between two
3897 * different servers.
3899 * Thus, an attempt is made to use the system hostid and the
3900 * current time in seconds when the nfssrv kernel module is
3901 * loaded. It is assumed that an NFS server will not be able
3902 * to boot and then to reboot in less than a second. If the
3903 * hostid has not been set, then the current high resolution
3904 * time is used. This will ensure different verifiers each
3905 * time the server reboots and minimize the chances that two
3906 * different servers will have the same verifier.
3910 * We ASSERT that this constant logic expression is
3911 * always true because in the past, it wasn't.
3913 ASSERT(sizeof (*verfp
) <= sizeof (write3verf
));
3916 verfp
= (struct rfs3_verf_overlay
*)&write3verf
;
3917 verfp
->ts
= (int)now
.tv_sec
;
3918 verfp
->id
= zone_get_hostid(NULL
);
3921 verfp
->id
= (uint_t
)now
.tv_nsec
;
3923 nfs3_srv_caller_id
= fs_new_caller_id();
3928 rdma_setup_read_data3(READ3args
*args
, READ3resok
*rok
)
3932 count3 count
= rok
->count
;
3935 if (rdma_setup_read_chunks(wcl
, count
, &wlist_len
) == FALSE
) {
3940 rok
->wlist_len
= wlist_len
;