dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / nfs / nfs3_srv.c
blob63fd80a7c4e1462c17fe551c9da4ee79962e0878
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/nbmlock.h>
49 #include <sys/policy.h>
50 #include <sys/sdt.h>
52 #include <rpc/types.h>
53 #include <rpc/auth.h>
54 #include <rpc/svc.h>
55 #include <rpc/rpc_rdma.h>
57 #include <nfs/nfs.h>
58 #include <nfs/export.h>
59 #include <nfs/nfs_cmd.h>
61 #include <sys/strsubr.h>
63 #include <sys/zone.h>
65 #include <inet/ip.h>
66 #include <inet/ip6.h>
69 * These are the interface routines for the server side of the
70 * Network File System. See the NFS version 3 protocol specification
71 * for a description of this interface.
74 static writeverf3 write3verf;
76 static int sattr3_to_vattr(sattr3 *, struct vattr *);
77 static int vattr_to_fattr3(struct vattr *, fattr3 *);
78 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
79 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
80 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
81 static int rdma_setup_read_data3(READ3args *, READ3resok *);
83 extern int nfs_loaned_buffers;
85 u_longlong_t nfs3_srv_caller_id;
87 /* ARGSUSED */
88 void
89 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
90 struct svc_req *req, cred_t *cr, bool_t ro)
92 int error;
93 vnode_t *vp;
94 struct vattr va;
96 vp = nfs3_fhtovp(&args->object, exi);
98 DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
99 cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
101 if (vp == NULL) {
102 error = ESTALE;
103 goto out;
106 va.va_mask = AT_ALL;
107 error = rfs4_delegated_getattr(vp, &va, 0, cr);
109 if (!error) {
110 /* Lie about the object type for a referral */
111 if (vn_is_nfs_reparse(vp, cr))
112 va.va_type = VLNK;
114 /* overflow error if time or size is out of range */
115 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
116 if (error)
117 goto out;
118 resp->status = NFS3_OK;
120 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
121 cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
123 VN_RELE(vp);
125 return;
128 out:
129 if (curthread->t_flag & T_WOULDBLOCK) {
130 curthread->t_flag &= ~T_WOULDBLOCK;
131 resp->status = NFS3ERR_JUKEBOX;
132 } else
133 resp->status = puterrno3(error);
135 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
136 cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
138 if (vp != NULL)
139 VN_RELE(vp);
142 void *
143 rfs3_getattr_getfh(GETATTR3args *args)
146 return (&args->object);
149 void
150 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
151 struct svc_req *req, cred_t *cr, bool_t ro)
153 int error;
154 vnode_t *vp;
155 struct vattr *bvap;
156 struct vattr bva;
157 struct vattr *avap;
158 struct vattr ava;
159 int flag;
160 int in_crit = 0;
161 struct flock64 bf;
162 caller_context_t ct;
164 bvap = NULL;
165 avap = NULL;
167 vp = nfs3_fhtovp(&args->object, exi);
169 DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
170 cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
172 if (vp == NULL) {
173 error = ESTALE;
174 goto out;
177 error = sattr3_to_vattr(&args->new_attributes, &ava);
178 if (error)
179 goto out;
182 * We need to specially handle size changes because of
183 * possible conflicting NBMAND locks. Get into critical
184 * region before fop_getattr, so the size attribute is
185 * valid when checking conflicts.
187 * Also, check to see if the v4 side of the server has
188 * delegated this file. If so, then we return JUKEBOX to
189 * allow the client to retrasmit its request.
191 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
192 if (nbl_need_check(vp)) {
193 nbl_start_crit(vp, RW_READER);
194 in_crit = 1;
198 bva.va_mask = AT_ALL;
199 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
202 * If we can't get the attributes, then we can't do the
203 * right access checking. So, we'll fail the request.
205 if (error)
206 goto out;
208 bvap = &bva;
210 if (rdonly(ro, vp)) {
211 resp->status = NFS3ERR_ROFS;
212 goto out1;
215 if (args->guard.check &&
216 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
217 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
218 resp->status = NFS3ERR_NOT_SYNC;
219 goto out1;
222 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
223 flag = ATTR_UTIME;
224 else
225 flag = 0;
228 * If the filesystem is exported with nosuid, then mask off
229 * the setuid and setgid bits.
231 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
232 (exi->exi_export.ex_flags & EX_NOSUID))
233 ava.va_mode &= ~(VSUID | VSGID);
235 ct.cc_sysid = 0;
236 ct.cc_pid = 0;
237 ct.cc_caller_id = nfs3_srv_caller_id;
238 ct.cc_flags = CC_DONTBLOCK;
241 * We need to specially handle size changes because it is
242 * possible for the client to create a file with modes
243 * which indicate read-only, but with the file opened for
244 * writing. If the client then tries to set the size of
245 * the file, then the normal access checking done in
246 * fop_setattr would prevent the client from doing so,
247 * although it should be legal for it to do so. To get
248 * around this, we do the access checking for ourselves
249 * and then use fop_space which doesn't do the access
250 * checking which fop_setattr does. fop_space can only
251 * operate on VREG files, let fop_setattr handle the other
252 * extremely rare cases.
253 * Also the client should not be allowed to change the
254 * size of the file if there is a conflicting non-blocking
255 * mandatory lock in the region the change.
257 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
258 if (in_crit) {
259 uoff_t offset;
260 ssize_t length;
262 if (ava.va_size < bva.va_size) {
263 offset = ava.va_size;
264 length = bva.va_size - ava.va_size;
265 } else {
266 offset = bva.va_size;
267 length = ava.va_size - bva.va_size;
269 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
270 NULL)) {
271 error = EACCES;
272 goto out;
276 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
277 ava.va_mask &= ~AT_SIZE;
278 bf.l_type = F_WRLCK;
279 bf.l_whence = 0;
280 bf.l_start = (off64_t)ava.va_size;
281 bf.l_len = 0;
282 bf.l_sysid = 0;
283 bf.l_pid = 0;
284 error = fop_space(vp, F_FREESP, &bf, FWRITE,
285 (offset_t)ava.va_size, cr, &ct);
289 if (!error && ava.va_mask)
290 error = fop_setattr(vp, &ava, flag, cr, &ct);
292 /* check if a monitor detected a delegation conflict */
293 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
294 resp->status = NFS3ERR_JUKEBOX;
295 goto out1;
298 ava.va_mask = AT_ALL;
299 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
302 * Force modified metadata out to stable storage.
304 (void) fop_fsync(vp, FNODSYNC, cr, &ct);
306 if (error)
307 goto out;
309 if (in_crit)
310 nbl_end_crit(vp);
312 resp->status = NFS3_OK;
313 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
315 DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
316 cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
318 VN_RELE(vp);
320 return;
322 out:
323 if (curthread->t_flag & T_WOULDBLOCK) {
324 curthread->t_flag &= ~T_WOULDBLOCK;
325 resp->status = NFS3ERR_JUKEBOX;
326 } else
327 resp->status = puterrno3(error);
328 out1:
329 DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
330 cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
332 if (vp != NULL) {
333 if (in_crit)
334 nbl_end_crit(vp);
335 VN_RELE(vp);
337 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
340 void *
341 rfs3_setattr_getfh(SETATTR3args *args)
344 return (&args->object);
347 /* ARGSUSED */
348 void
349 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
350 struct svc_req *req, cred_t *cr, bool_t ro)
352 int error;
353 vnode_t *vp;
354 vnode_t *dvp;
355 struct vattr *vap;
356 struct vattr va;
357 struct vattr *dvap;
358 struct vattr dva;
359 nfs_fh3 *fhp;
360 struct sec_ol sec = {0, 0};
361 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
362 struct sockaddr *ca;
363 char *name = NULL;
365 dvap = NULL;
368 * Allow lookups from the root - the default
369 * location of the public filehandle.
371 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
372 dvp = rootdir;
373 VN_HOLD(dvp);
375 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
376 cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
377 } else {
378 dvp = nfs3_fhtovp(&args->what.dir, exi);
380 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
381 cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
383 if (dvp == NULL) {
384 error = ESTALE;
385 goto out;
389 dva.va_mask = AT_ALL;
390 dvap = fop_getattr(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
392 if (args->what.name == nfs3nametoolong) {
393 resp->status = NFS3ERR_NAMETOOLONG;
394 goto out1;
397 if (args->what.name == NULL || *(args->what.name) == '\0') {
398 resp->status = NFS3ERR_ACCES;
399 goto out1;
402 fhp = &args->what.dir;
403 if (strcmp(args->what.name, "..") == 0 &&
404 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
405 resp->status = NFS3ERR_NOENT;
406 goto out1;
409 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
410 name = nfscmd_convname(ca, exi, args->what.name,
411 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
413 if (name == NULL) {
414 resp->status = NFS3ERR_ACCES;
415 goto out1;
419 * If the public filehandle is used then allow
420 * a multi-component lookup
422 if (PUBLIC_FH3(&args->what.dir)) {
423 publicfh_flag = TRUE;
424 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
425 &exi, &sec);
426 if (error && exi != NULL)
427 exi_rele(exi); /* See comment below Re: publicfh_flag */
428 } else {
429 error = fop_lookup(dvp, name, &vp,
430 NULL, 0, NULL, cr, NULL, NULL, NULL);
433 if (name != args->what.name)
434 kmem_free(name, MAXPATHLEN + 1);
436 dva.va_mask = AT_ALL;
437 dvap = fop_getattr(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
439 if (error)
440 goto out;
442 if (sec.sec_flags & SEC_QUERY) {
443 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
444 } else {
445 error = makefh3(&resp->resok.object, vp, exi);
446 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
447 auth_weak = TRUE;
451 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
452 * and have obtained a new exportinfo in exi which needs to be
453 * released. Note that the original exportinfo pointed to by exi
454 * will be released by the caller, common_dispatch.
456 if (publicfh_flag)
457 exi_rele(exi);
459 if (error) {
460 VN_RELE(vp);
461 goto out;
464 va.va_mask = AT_ALL;
465 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
467 VN_RELE(vp);
469 resp->status = NFS3_OK;
470 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
471 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
474 * If it's public fh, no 0x81, and client's flavor is
475 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
476 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
478 if (auth_weak)
479 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
481 DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
482 cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
483 VN_RELE(dvp);
485 return;
487 out:
488 if (curthread->t_flag & T_WOULDBLOCK) {
489 curthread->t_flag &= ~T_WOULDBLOCK;
490 resp->status = NFS3ERR_JUKEBOX;
491 } else
492 resp->status = puterrno3(error);
493 out1:
494 DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
495 cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
497 if (dvp != NULL)
498 VN_RELE(dvp);
499 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
503 void *
504 rfs3_lookup_getfh(LOOKUP3args *args)
507 return (&args->what.dir);
510 /* ARGSUSED */
511 void
512 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
513 struct svc_req *req, cred_t *cr, bool_t ro)
515 int error;
516 vnode_t *vp;
517 struct vattr *vap;
518 struct vattr va;
519 int checkwriteperm;
521 vap = NULL;
523 vp = nfs3_fhtovp(&args->object, exi);
525 DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
526 cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
528 if (vp == NULL) {
529 error = ESTALE;
530 goto out;
534 * If the file system is exported read only, it is not appropriate
535 * to check write permissions for regular files and directories.
536 * Special files are interpreted by the client, so the underlying
537 * permissions are sent back to the client for interpretation.
539 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
540 checkwriteperm = 0;
541 else
542 checkwriteperm = 1;
545 * We need the mode so that we can correctly determine access
546 * permissions relative to a mandatory lock file. Access to
547 * mandatory lock files is denied on the server, so it might
548 * as well be reflected to the server during the open.
550 va.va_mask = AT_MODE;
551 error = fop_getattr(vp, &va, 0, cr, NULL);
552 if (error)
553 goto out;
555 vap = &va;
557 resp->resok.access = 0;
560 if (args->access & ACCESS3_READ) {
561 error = fop_access(vp, VREAD, 0, cr, NULL);
562 if (error) {
563 if (curthread->t_flag & T_WOULDBLOCK)
564 goto out;
565 } else if (!MANDLOCK(vp, va.va_mode))
566 resp->resok.access |= ACCESS3_READ;
568 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
569 error = fop_access(vp, VEXEC, 0, cr, NULL);
570 if (error) {
571 if (curthread->t_flag & T_WOULDBLOCK)
572 goto out;
573 } else
574 resp->resok.access |= ACCESS3_LOOKUP;
576 if (checkwriteperm &&
577 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
578 error = fop_access(vp, VWRITE, 0, cr, NULL);
579 if (error) {
580 if (curthread->t_flag & T_WOULDBLOCK)
581 goto out;
582 } else if (!MANDLOCK(vp, va.va_mode))
583 resp->resok.access |=
584 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
586 if (checkwriteperm &&
587 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
588 error = fop_access(vp, VWRITE, 0, cr, NULL);
589 if (error) {
590 if (curthread->t_flag & T_WOULDBLOCK)
591 goto out;
592 } else
593 resp->resok.access |= ACCESS3_DELETE;
595 if (args->access & ACCESS3_EXECUTE) {
596 error = fop_access(vp, VEXEC, 0, cr, NULL);
597 if (error) {
598 if (curthread->t_flag & T_WOULDBLOCK)
599 goto out;
600 } else if (!MANDLOCK(vp, va.va_mode))
601 resp->resok.access |= ACCESS3_EXECUTE;
604 va.va_mask = AT_ALL;
605 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
607 resp->status = NFS3_OK;
608 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
610 DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
611 cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
613 VN_RELE(vp);
615 return;
617 out:
618 if (curthread->t_flag & T_WOULDBLOCK) {
619 curthread->t_flag &= ~T_WOULDBLOCK;
620 resp->status = NFS3ERR_JUKEBOX;
621 } else
622 resp->status = puterrno3(error);
623 DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
624 cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
625 if (vp != NULL)
626 VN_RELE(vp);
627 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
630 void *
631 rfs3_access_getfh(ACCESS3args *args)
634 return (&args->object);
637 /* ARGSUSED */
638 void
639 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
640 struct svc_req *req, cred_t *cr, bool_t ro)
642 int error;
643 vnode_t *vp;
644 struct vattr *vap;
645 struct vattr va;
646 struct iovec iov;
647 struct uio uio;
648 char *data;
649 struct sockaddr *ca;
650 char *name = NULL;
651 int is_referral = 0;
653 vap = NULL;
655 vp = nfs3_fhtovp(&args->symlink, exi);
657 DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
658 cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
660 if (vp == NULL) {
661 error = ESTALE;
662 goto out;
665 va.va_mask = AT_ALL;
666 error = fop_getattr(vp, &va, 0, cr, NULL);
667 if (error)
668 goto out;
670 vap = &va;
672 /* We lied about the object type for a referral */
673 if (vn_is_nfs_reparse(vp, cr))
674 is_referral = 1;
676 if (vp->v_type != VLNK && !is_referral) {
677 resp->status = NFS3ERR_INVAL;
678 goto out1;
681 if (MANDLOCK(vp, va.va_mode)) {
682 resp->status = NFS3ERR_ACCES;
683 goto out1;
686 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
688 if (is_referral) {
689 char *s;
690 size_t strsz;
692 /* Get an artificial symlink based on a referral */
693 s = build_symlink(vp, cr, &strsz);
694 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
695 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
696 vnode_t *, vp, char *, s);
697 if (s == NULL)
698 error = EINVAL;
699 else {
700 error = 0;
701 (void) strlcpy(data, s, MAXPATHLEN + 1);
702 kmem_free(s, strsz);
705 } else {
707 iov.iov_base = data;
708 iov.iov_len = MAXPATHLEN;
709 uio.uio_iov = &iov;
710 uio.uio_iovcnt = 1;
711 uio.uio_segflg = UIO_SYSSPACE;
712 uio.uio_extflg = UIO_COPY_CACHED;
713 uio.uio_loffset = 0;
714 uio.uio_resid = MAXPATHLEN;
716 error = fop_readlink(vp, &uio, cr, NULL);
718 if (!error)
719 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
722 va.va_mask = AT_ALL;
723 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
725 /* Lie about object type again just to be consistent */
726 if (is_referral && vap != NULL)
727 vap->va_type = VLNK;
729 #if 0 /* notyet */
731 * Don't do this. It causes local disk writes when just
732 * reading the file and the overhead is deemed larger
733 * than the benefit.
736 * Force modified metadata out to stable storage.
738 (void) fop_fsync(vp, FNODSYNC, cr, NULL);
739 #endif
741 if (error) {
742 kmem_free(data, MAXPATHLEN + 1);
743 goto out;
746 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
747 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
748 MAXPATHLEN + 1);
750 if (name == NULL) {
752 * Even though the conversion failed, we return
753 * something. We just don't translate it.
755 name = data;
758 resp->status = NFS3_OK;
759 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
760 resp->resok.data = name;
762 DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
763 cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
764 VN_RELE(vp);
766 if (name != data)
767 kmem_free(data, MAXPATHLEN + 1);
769 return;
771 out:
772 if (curthread->t_flag & T_WOULDBLOCK) {
773 curthread->t_flag &= ~T_WOULDBLOCK;
774 resp->status = NFS3ERR_JUKEBOX;
775 } else
776 resp->status = puterrno3(error);
777 out1:
778 DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
779 cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
780 if (vp != NULL)
781 VN_RELE(vp);
782 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
785 void *
786 rfs3_readlink_getfh(READLINK3args *args)
789 return (&args->symlink);
792 void
793 rfs3_readlink_free(READLINK3res *resp)
796 if (resp->status == NFS3_OK)
797 kmem_free(resp->resok.data, MAXPATHLEN + 1);
801 * Server routine to handle read
802 * May handle RDMA data as well as mblks
804 /* ARGSUSED */
805 void
806 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
807 struct svc_req *req, cred_t *cr, bool_t ro)
809 int error;
810 vnode_t *vp;
811 struct vattr *vap;
812 struct vattr va;
813 struct iovec iov, *iovp = NULL;
814 int iovcnt;
815 struct uio uio;
816 uoff_t offset;
817 mblk_t *mp = NULL;
818 int in_crit = 0;
819 int need_rwunlock = 0;
820 caller_context_t ct;
821 int rdma_used = 0;
822 int loaned_buffers;
823 struct uio *uiop;
825 vap = NULL;
827 vp = nfs3_fhtovp(&args->file, exi);
829 DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
830 cred_t *, cr, vnode_t *, vp, READ3args *, args);
832 if (vp == NULL) {
833 error = ESTALE;
834 goto out;
837 if (args->wlist) {
838 if (args->count > clist_len(args->wlist)) {
839 error = EINVAL;
840 goto out;
842 rdma_used = 1;
845 /* use loaned buffers for TCP */
846 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
848 ct.cc_sysid = 0;
849 ct.cc_pid = 0;
850 ct.cc_caller_id = nfs3_srv_caller_id;
851 ct.cc_flags = CC_DONTBLOCK;
854 * Enter the critical region before calling fop_rwlock
855 * to avoid a deadlock with write requests.
857 if (nbl_need_check(vp)) {
858 nbl_start_crit(vp, RW_READER);
859 in_crit = 1;
860 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
861 NULL)) {
862 error = EACCES;
863 goto out;
867 error = fop_rwlock(vp, V_WRITELOCK_FALSE, &ct);
869 /* check if a monitor detected a delegation conflict */
870 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
871 resp->status = NFS3ERR_JUKEBOX;
872 goto out1;
875 need_rwunlock = 1;
877 va.va_mask = AT_ALL;
878 error = fop_getattr(vp, &va, 0, cr, &ct);
881 * If we can't get the attributes, then we can't do the
882 * right access checking. So, we'll fail the request.
884 if (error)
885 goto out;
887 vap = &va;
889 if (vp->v_type != VREG) {
890 resp->status = NFS3ERR_INVAL;
891 goto out1;
894 if (crgetuid(cr) != va.va_uid) {
895 error = fop_access(vp, VREAD, 0, cr, &ct);
896 if (error) {
897 if (curthread->t_flag & T_WOULDBLOCK)
898 goto out;
899 error = fop_access(vp, VEXEC, 0, cr, &ct);
900 if (error)
901 goto out;
905 if (MANDLOCK(vp, va.va_mode)) {
906 resp->status = NFS3ERR_ACCES;
907 goto out1;
910 offset = args->offset;
911 if (offset >= va.va_size) {
912 fop_rwunlock(vp, V_WRITELOCK_FALSE, &ct);
913 if (in_crit)
914 nbl_end_crit(vp);
915 resp->status = NFS3_OK;
916 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
917 resp->resok.count = 0;
918 resp->resok.eof = TRUE;
919 resp->resok.data.data_len = 0;
920 resp->resok.data.data_val = NULL;
921 resp->resok.data.mp = NULL;
922 /* RDMA */
923 resp->resok.wlist = args->wlist;
924 resp->resok.wlist_len = resp->resok.count;
925 if (resp->resok.wlist)
926 clist_zero_len(resp->resok.wlist);
927 goto done;
930 if (args->count == 0) {
931 fop_rwunlock(vp, V_WRITELOCK_FALSE, &ct);
932 if (in_crit)
933 nbl_end_crit(vp);
934 resp->status = NFS3_OK;
935 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
936 resp->resok.count = 0;
937 resp->resok.eof = FALSE;
938 resp->resok.data.data_len = 0;
939 resp->resok.data.data_val = NULL;
940 resp->resok.data.mp = NULL;
941 /* RDMA */
942 resp->resok.wlist = args->wlist;
943 resp->resok.wlist_len = resp->resok.count;
944 if (resp->resok.wlist)
945 clist_zero_len(resp->resok.wlist);
946 goto done;
950 * do not allocate memory more the max. allowed
951 * transfer size
953 if (args->count > rfs3_tsize(req))
954 args->count = rfs3_tsize(req);
956 if (loaned_buffers) {
957 uiop = (uio_t *)rfs_setup_xuio(vp);
958 ASSERT(uiop != NULL);
959 uiop->uio_segflg = UIO_SYSSPACE;
960 uiop->uio_loffset = args->offset;
961 uiop->uio_resid = args->count;
963 /* Jump to do the read if successful */
964 if (fop_reqzcbuf(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
966 * Need to hold the vnode until after fop_retzcbuf()
967 * is called.
969 VN_HOLD(vp);
970 goto doio_read;
973 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
974 uiop->uio_loffset, int, uiop->uio_resid);
976 uiop->uio_extflg = 0;
977 /* failure to setup for zero copy */
978 rfs_free_xuio((void *)uiop);
979 loaned_buffers = 0;
983 * If returning data via RDMA Write, then grab the chunk list.
984 * If we aren't returning READ data w/RDMA_WRITE, then grab
985 * a mblk.
987 if (rdma_used) {
988 (void) rdma_get_wchunk(req, &iov, args->wlist);
989 uio.uio_iov = &iov;
990 uio.uio_iovcnt = 1;
991 } else {
993 * mp will contain the data to be sent out in the read reply.
994 * For UDP, this will be freed after the reply has been sent
995 * out by the driver. For TCP, it will be freed after the last
996 * segment associated with the reply has been ACKed by the
997 * client.
999 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1000 uio.uio_iov = iovp;
1001 uio.uio_iovcnt = iovcnt;
1004 uio.uio_segflg = UIO_SYSSPACE;
1005 uio.uio_extflg = UIO_COPY_CACHED;
1006 uio.uio_loffset = args->offset;
1007 uio.uio_resid = args->count;
1008 uiop = &uio;
1010 doio_read:
1011 error = fop_read(vp, uiop, 0, cr, &ct);
1013 if (error) {
1014 if (mp)
1015 freemsg(mp);
1016 /* check if a monitor detected a delegation conflict */
1017 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1018 resp->status = NFS3ERR_JUKEBOX;
1019 goto out1;
1021 goto out;
1024 /* make mblk using zc buffers */
1025 if (loaned_buffers) {
1026 mp = uio_to_mblk(uiop);
1027 ASSERT(mp != NULL);
1030 va.va_mask = AT_ALL;
1031 error = fop_getattr(vp, &va, 0, cr, &ct);
1033 if (error)
1034 vap = NULL;
1035 else
1036 vap = &va;
1038 fop_rwunlock(vp, V_WRITELOCK_FALSE, &ct);
1040 if (in_crit)
1041 nbl_end_crit(vp);
1043 resp->status = NFS3_OK;
1044 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1045 resp->resok.count = args->count - uiop->uio_resid;
1046 if (!error && offset + resp->resok.count == va.va_size)
1047 resp->resok.eof = TRUE;
1048 else
1049 resp->resok.eof = FALSE;
1050 resp->resok.data.data_len = resp->resok.count;
1052 if (mp)
1053 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1055 resp->resok.data.mp = mp;
1056 resp->resok.size = (uint_t)args->count;
1058 if (rdma_used) {
1059 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1060 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1061 resp->status = NFS3ERR_INVAL;
1063 } else {
1064 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1065 (resp->resok).wlist = NULL;
1068 done:
1069 DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1070 cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1072 VN_RELE(vp);
1074 if (iovp != NULL)
1075 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1077 return;
1079 out:
1080 if (curthread->t_flag & T_WOULDBLOCK) {
1081 curthread->t_flag &= ~T_WOULDBLOCK;
1082 resp->status = NFS3ERR_JUKEBOX;
1083 } else
1084 resp->status = puterrno3(error);
1085 out1:
1086 DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1087 cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1089 if (vp != NULL) {
1090 if (need_rwunlock)
1091 fop_rwunlock(vp, V_WRITELOCK_FALSE, &ct);
1092 if (in_crit)
1093 nbl_end_crit(vp);
1094 VN_RELE(vp);
1096 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1098 if (iovp != NULL)
1099 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1102 void
1103 rfs3_read_free(READ3res *resp)
1105 mblk_t *mp;
1107 if (resp->status == NFS3_OK) {
1108 mp = resp->resok.data.mp;
1109 if (mp != NULL)
1110 freemsg(mp);
1114 void *
1115 rfs3_read_getfh(READ3args *args)
1118 return (&args->file);
1121 #define MAX_IOVECS 12
1123 #ifdef DEBUG
1124 static int rfs3_write_hits = 0;
1125 static int rfs3_write_misses = 0;
1126 #endif
1128 void
1129 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1130 struct svc_req *req, cred_t *cr, bool_t ro)
1132 int error;
1133 vnode_t *vp;
1134 struct vattr *bvap = NULL;
1135 struct vattr bva;
1136 struct vattr *avap = NULL;
1137 struct vattr ava;
1138 uoff_t rlimit;
1139 struct uio uio;
1140 struct iovec iov[MAX_IOVECS];
1141 mblk_t *m;
1142 struct iovec *iovp;
1143 int iovcnt;
1144 int ioflag;
1145 cred_t *savecred;
1146 int in_crit = 0;
1147 int rwlock_ret = -1;
1148 caller_context_t ct;
1150 vp = nfs3_fhtovp(&args->file, exi);
1152 DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1153 cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1155 if (vp == NULL) {
1156 error = ESTALE;
1157 goto err;
1160 ct.cc_sysid = 0;
1161 ct.cc_pid = 0;
1162 ct.cc_caller_id = nfs3_srv_caller_id;
1163 ct.cc_flags = CC_DONTBLOCK;
1166 * We have to enter the critical region before calling fop_rwlock
1167 * to avoid a deadlock with ufs.
1169 if (nbl_need_check(vp)) {
1170 nbl_start_crit(vp, RW_READER);
1171 in_crit = 1;
1172 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1173 NULL)) {
1174 error = EACCES;
1175 goto err;
1179 rwlock_ret = fop_rwlock(vp, V_WRITELOCK_TRUE, &ct);
1181 /* check if a monitor detected a delegation conflict */
1182 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1183 resp->status = NFS3ERR_JUKEBOX;
1184 rwlock_ret = -1;
1185 goto err1;
1189 bva.va_mask = AT_ALL;
1190 error = fop_getattr(vp, &bva, 0, cr, &ct);
1193 * If we can't get the attributes, then we can't do the
1194 * right access checking. So, we'll fail the request.
1196 if (error)
1197 goto err;
1199 bvap = &bva;
1200 avap = bvap;
1202 if (args->count != args->data.data_len) {
1203 resp->status = NFS3ERR_INVAL;
1204 goto err1;
1207 if (rdonly(ro, vp)) {
1208 resp->status = NFS3ERR_ROFS;
1209 goto err1;
1212 if (vp->v_type != VREG) {
1213 resp->status = NFS3ERR_INVAL;
1214 goto err1;
1217 if (crgetuid(cr) != bva.va_uid &&
1218 (error = fop_access(vp, VWRITE, 0, cr, &ct)))
1219 goto err;
1221 if (MANDLOCK(vp, bva.va_mode)) {
1222 resp->status = NFS3ERR_ACCES;
1223 goto err1;
1226 if (args->count == 0) {
1227 resp->status = NFS3_OK;
1228 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1229 resp->resok.count = 0;
1230 resp->resok.committed = args->stable;
1231 resp->resok.verf = write3verf;
1232 goto out;
1235 if (args->mblk != NULL) {
1236 iovcnt = 0;
1237 for (m = args->mblk; m != NULL; m = m->b_cont)
1238 iovcnt++;
1239 if (iovcnt <= MAX_IOVECS) {
1240 #ifdef DEBUG
1241 rfs3_write_hits++;
1242 #endif
1243 iovp = iov;
1244 } else {
1245 #ifdef DEBUG
1246 rfs3_write_misses++;
1247 #endif
1248 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1250 mblk_to_iov(args->mblk, iovcnt, iovp);
1252 } else if (args->rlist != NULL) {
1253 iovcnt = 1;
1254 iovp = iov;
1255 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1256 iovp->iov_len = args->count;
1257 } else {
1258 iovcnt = 1;
1259 iovp = iov;
1260 iovp->iov_base = args->data.data_val;
1261 iovp->iov_len = args->count;
1264 uio.uio_iov = iovp;
1265 uio.uio_iovcnt = iovcnt;
1267 uio.uio_segflg = UIO_SYSSPACE;
1268 uio.uio_extflg = UIO_COPY_DEFAULT;
1269 uio.uio_loffset = args->offset;
1270 uio.uio_resid = args->count;
1271 uio.uio_llimit = curproc->p_fsz_ctl;
1272 rlimit = uio.uio_llimit - args->offset;
1273 if (rlimit < (uoff_t)uio.uio_resid)
1274 uio.uio_resid = (int)rlimit;
1276 if (args->stable == UNSTABLE)
1277 ioflag = 0;
1278 else if (args->stable == FILE_SYNC)
1279 ioflag = FSYNC;
1280 else if (args->stable == DATA_SYNC)
1281 ioflag = FDSYNC;
1282 else {
1283 if (iovp != iov)
1284 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1285 resp->status = NFS3ERR_INVAL;
1286 goto err1;
1290 * We're changing creds because VM may fault and we need
1291 * the cred of the current thread to be used if quota
1292 * checking is enabled.
1294 savecred = curthread->t_cred;
1295 curthread->t_cred = cr;
1296 error = fop_write(vp, &uio, ioflag, cr, &ct);
1297 curthread->t_cred = savecred;
1299 if (iovp != iov)
1300 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1302 /* check if a monitor detected a delegation conflict */
1303 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1304 resp->status = NFS3ERR_JUKEBOX;
1305 goto err1;
1308 ava.va_mask = AT_ALL;
1309 avap = fop_getattr(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1311 if (error)
1312 goto err;
1315 * If we were unable to get the V_WRITELOCK_TRUE, then we
1316 * may not have accurate after attrs, so check if
1317 * we have both attributes, they have a non-zero va_seq, and
1318 * va_seq has changed by exactly one,
1319 * if not, turn off the before attr.
1321 if (rwlock_ret != V_WRITELOCK_TRUE) {
1322 if (bvap == NULL || avap == NULL ||
1323 bvap->va_seq == 0 || avap->va_seq == 0 ||
1324 avap->va_seq != (bvap->va_seq + 1)) {
1325 bvap = NULL;
1329 resp->status = NFS3_OK;
1330 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1331 resp->resok.count = args->count - uio.uio_resid;
1332 resp->resok.committed = args->stable;
1333 resp->resok.verf = write3verf;
1334 goto out;
1336 err:
1337 if (curthread->t_flag & T_WOULDBLOCK) {
1338 curthread->t_flag &= ~T_WOULDBLOCK;
1339 resp->status = NFS3ERR_JUKEBOX;
1340 } else
1341 resp->status = puterrno3(error);
1342 err1:
1343 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1344 out:
1345 DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1346 cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1348 if (vp != NULL) {
1349 if (rwlock_ret != -1)
1350 fop_rwunlock(vp, V_WRITELOCK_TRUE, &ct);
1351 if (in_crit)
1352 nbl_end_crit(vp);
1353 VN_RELE(vp);
1357 void *
1358 rfs3_write_getfh(WRITE3args *args)
1361 return (&args->file);
1364 void
1365 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1366 struct svc_req *req, cred_t *cr, bool_t ro)
1368 int error;
1369 int in_crit = 0;
1370 vnode_t *vp;
1371 vnode_t *tvp = NULL;
1372 vnode_t *dvp;
1373 struct vattr *vap;
1374 struct vattr va;
1375 struct vattr *dbvap;
1376 struct vattr dbva;
1377 struct vattr *davap;
1378 struct vattr dava;
1379 enum vcexcl excl;
1380 nfstime3 *mtime;
1381 len_t reqsize;
1382 bool_t trunc;
1383 struct sockaddr *ca;
1384 char *name = NULL;
1386 dbvap = NULL;
1387 davap = NULL;
1389 dvp = nfs3_fhtovp(&args->where.dir, exi);
1391 DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1392 cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1394 if (dvp == NULL) {
1395 error = ESTALE;
1396 goto out;
1399 dbva.va_mask = AT_ALL;
1400 dbvap = fop_getattr(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1401 davap = dbvap;
1403 if (args->where.name == nfs3nametoolong) {
1404 resp->status = NFS3ERR_NAMETOOLONG;
1405 goto out1;
1408 if (args->where.name == NULL || *(args->where.name) == '\0') {
1409 resp->status = NFS3ERR_ACCES;
1410 goto out1;
1413 if (rdonly(ro, dvp)) {
1414 resp->status = NFS3ERR_ROFS;
1415 goto out1;
1418 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1419 name = nfscmd_convname(ca, exi, args->where.name,
1420 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1422 if (name == NULL) {
1423 /* This is really a Solaris EILSEQ */
1424 resp->status = NFS3ERR_INVAL;
1425 goto out1;
1428 if (args->how.mode == EXCLUSIVE) {
1429 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1430 va.va_type = VREG;
1431 va.va_mode = (mode_t)0;
1433 * Ensure no time overflows and that types match
1435 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1436 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1437 va.va_mtime.tv_nsec = mtime->nseconds;
1438 excl = EXCL;
1439 } else {
1440 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1441 &va);
1442 if (error)
1443 goto out;
1444 va.va_mask |= AT_TYPE;
1445 va.va_type = VREG;
1446 if (args->how.mode == GUARDED)
1447 excl = EXCL;
1448 else {
1449 excl = NONEXCL;
1452 * During creation of file in non-exclusive mode
1453 * if size of file is being set then make sure
1454 * that if the file already exists that no conflicting
1455 * non-blocking mandatory locks exists in the region
1456 * being modified. If there are conflicting locks fail
1457 * the operation with EACCES.
1459 if (va.va_mask & AT_SIZE) {
1460 struct vattr tva;
1463 * Does file already exist?
1465 error = fop_lookup(dvp, name, &tvp,
1466 NULL, 0, NULL, cr, NULL, NULL, NULL);
1469 * Check to see if the file has been delegated
1470 * to a v4 client. If so, then begin recall of
1471 * the delegation and return JUKEBOX to allow
1472 * the client to retrasmit its request.
1475 trunc = va.va_size == 0;
1476 if (!error &&
1477 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1478 resp->status = NFS3ERR_JUKEBOX;
1479 goto out1;
1483 * Check for NBMAND lock conflicts
1485 if (!error && nbl_need_check(tvp)) {
1486 uoff_t offset;
1487 ssize_t len;
1489 nbl_start_crit(tvp, RW_READER);
1490 in_crit = 1;
1492 tva.va_mask = AT_SIZE;
1493 error = fop_getattr(tvp, &tva, 0, cr,
1494 NULL);
1496 * Can't check for conflicts, so return
1497 * error.
1499 if (error)
1500 goto out;
1502 offset = tva.va_size < va.va_size ?
1503 tva.va_size : va.va_size;
1504 len = tva.va_size < va.va_size ?
1505 va.va_size - tva.va_size :
1506 tva.va_size - va.va_size;
1507 if (nbl_conflict(tvp, NBL_WRITE,
1508 offset, len, 0, NULL)) {
1509 error = EACCES;
1510 goto out;
1512 } else if (tvp) {
1513 VN_RELE(tvp);
1514 tvp = NULL;
1518 if (va.va_mask & AT_SIZE)
1519 reqsize = va.va_size;
1523 * Must specify the mode.
1525 if (!(va.va_mask & AT_MODE)) {
1526 resp->status = NFS3ERR_INVAL;
1527 goto out1;
1531 * If the filesystem is exported with nosuid, then mask off
1532 * the setuid and setgid bits.
1534 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1535 va.va_mode &= ~(VSUID | VSGID);
1537 tryagain:
1539 * The file open mode used is VWRITE. If the client needs
1540 * some other semantic, then it should do the access checking
1541 * itself. It would have been nice to have the file open mode
1542 * passed as part of the arguments.
1544 error = fop_create(dvp, name, &va, excl, VWRITE,
1545 &vp, cr, 0, NULL, NULL);
1547 dava.va_mask = AT_ALL;
1548 davap = fop_getattr(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1550 if (error) {
1552 * If we got something other than file already exists
1553 * then just return this error. Otherwise, we got
1554 * EEXIST. If we were doing a GUARDED create, then
1555 * just return this error. Otherwise, we need to
1556 * make sure that this wasn't a duplicate of an
1557 * exclusive create request.
1559 * The assumption is made that a non-exclusive create
1560 * request will never return EEXIST.
1562 if (error != EEXIST || args->how.mode == GUARDED)
1563 goto out;
1565 * Lookup the file so that we can get a vnode for it.
1567 error = fop_lookup(dvp, name, &vp, NULL, 0,
1568 NULL, cr, NULL, NULL, NULL);
1569 if (error) {
1571 * We couldn't find the file that we thought that
1572 * we just created. So, we'll just try creating
1573 * it again.
1575 if (error == ENOENT)
1576 goto tryagain;
1577 goto out;
1581 * If the file is delegated to a v4 client, go ahead
1582 * and initiate recall, this create is a hint that a
1583 * conflicting v3 open has occurred.
1586 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1587 VN_RELE(vp);
1588 resp->status = NFS3ERR_JUKEBOX;
1589 goto out1;
1592 va.va_mask = AT_ALL;
1593 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
1595 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1596 /* % with INT32_MAX to prevent overflows */
1597 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1598 vap->va_mtime.tv_sec !=
1599 (mtime->seconds % INT32_MAX) ||
1600 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1601 VN_RELE(vp);
1602 error = EEXIST;
1603 goto out;
1605 } else {
1607 if ((args->how.mode == UNCHECKED ||
1608 args->how.mode == GUARDED) &&
1609 args->how.createhow3_u.obj_attributes.size.set_it &&
1610 va.va_size == 0)
1611 trunc = TRUE;
1612 else
1613 trunc = FALSE;
1615 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1616 VN_RELE(vp);
1617 resp->status = NFS3ERR_JUKEBOX;
1618 goto out1;
1621 va.va_mask = AT_ALL;
1622 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
1625 * We need to check to make sure that the file got
1626 * created to the indicated size. If not, we do a
1627 * setattr to try to change the size, but we don't
1628 * try too hard. This shouldn't a problem as most
1629 * clients will only specifiy a size of zero which
1630 * local file systems handle. However, even if
1631 * the client does specify a non-zero size, it can
1632 * still recover by checking the size of the file
1633 * after it has created it and then issue a setattr
1634 * request of its own to set the size of the file.
1636 if (vap != NULL &&
1637 (args->how.mode == UNCHECKED ||
1638 args->how.mode == GUARDED) &&
1639 args->how.createhow3_u.obj_attributes.size.set_it &&
1640 vap->va_size != reqsize) {
1641 va.va_mask = AT_SIZE;
1642 va.va_size = reqsize;
1643 (void) fop_setattr(vp, &va, 0, cr, NULL);
1644 va.va_mask = AT_ALL;
1645 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
1649 if (name != args->where.name)
1650 kmem_free(name, MAXPATHLEN + 1);
1652 error = makefh3(&resp->resok.obj.handle, vp, exi);
1653 if (error)
1654 resp->resok.obj.handle_follows = FALSE;
1655 else
1656 resp->resok.obj.handle_follows = TRUE;
1659 * Force modified data and metadata out to stable storage.
1661 (void) fop_fsync(vp, FNODSYNC, cr, NULL);
1662 (void) fop_fsync(dvp, 0, cr, NULL);
1664 VN_RELE(vp);
1665 if (tvp != NULL) {
1666 if (in_crit)
1667 nbl_end_crit(tvp);
1668 VN_RELE(tvp);
1671 resp->status = NFS3_OK;
1672 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1673 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1675 DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1676 cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1678 VN_RELE(dvp);
1679 return;
1681 out:
1682 if (curthread->t_flag & T_WOULDBLOCK) {
1683 curthread->t_flag &= ~T_WOULDBLOCK;
1684 resp->status = NFS3ERR_JUKEBOX;
1685 } else
1686 resp->status = puterrno3(error);
1687 out1:
1688 DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1689 cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1691 if (name != NULL && name != args->where.name)
1692 kmem_free(name, MAXPATHLEN + 1);
1694 if (tvp != NULL) {
1695 if (in_crit)
1696 nbl_end_crit(tvp);
1697 VN_RELE(tvp);
1699 if (dvp != NULL)
1700 VN_RELE(dvp);
1701 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1704 void *
1705 rfs3_create_getfh(CREATE3args *args)
1708 return (&args->where.dir);
1711 void
1712 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1713 struct svc_req *req, cred_t *cr, bool_t ro)
1715 int error;
1716 vnode_t *vp = NULL;
1717 vnode_t *dvp;
1718 struct vattr *vap;
1719 struct vattr va;
1720 struct vattr *dbvap;
1721 struct vattr dbva;
1722 struct vattr *davap;
1723 struct vattr dava;
1724 struct sockaddr *ca;
1725 char *name = NULL;
1727 dbvap = NULL;
1728 davap = NULL;
1730 dvp = nfs3_fhtovp(&args->where.dir, exi);
1732 DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1733 cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1735 if (dvp == NULL) {
1736 error = ESTALE;
1737 goto out;
1740 dbva.va_mask = AT_ALL;
1741 dbvap = fop_getattr(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1742 davap = dbvap;
1744 if (args->where.name == nfs3nametoolong) {
1745 resp->status = NFS3ERR_NAMETOOLONG;
1746 goto out1;
1749 if (args->where.name == NULL || *(args->where.name) == '\0') {
1750 resp->status = NFS3ERR_ACCES;
1751 goto out1;
1754 if (rdonly(ro, dvp)) {
1755 resp->status = NFS3ERR_ROFS;
1756 goto out1;
1759 error = sattr3_to_vattr(&args->attributes, &va);
1760 if (error)
1761 goto out;
1763 if (!(va.va_mask & AT_MODE)) {
1764 resp->status = NFS3ERR_INVAL;
1765 goto out1;
1768 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1769 name = nfscmd_convname(ca, exi, args->where.name,
1770 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1772 if (name == NULL) {
1773 resp->status = NFS3ERR_INVAL;
1774 goto out1;
1777 va.va_mask |= AT_TYPE;
1778 va.va_type = VDIR;
1780 error = fop_mkdir(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1782 if (name != args->where.name)
1783 kmem_free(name, MAXPATHLEN + 1);
1785 dava.va_mask = AT_ALL;
1786 davap = fop_getattr(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1789 * Force modified data and metadata out to stable storage.
1791 (void) fop_fsync(dvp, 0, cr, NULL);
1793 if (error)
1794 goto out;
1796 error = makefh3(&resp->resok.obj.handle, vp, exi);
1797 if (error)
1798 resp->resok.obj.handle_follows = FALSE;
1799 else
1800 resp->resok.obj.handle_follows = TRUE;
1802 va.va_mask = AT_ALL;
1803 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
1806 * Force modified data and metadata out to stable storage.
1808 (void) fop_fsync(vp, 0, cr, NULL);
1810 VN_RELE(vp);
1812 resp->status = NFS3_OK;
1813 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1814 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1816 DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1817 cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1818 VN_RELE(dvp);
1820 return;
1822 out:
1823 if (curthread->t_flag & T_WOULDBLOCK) {
1824 curthread->t_flag &= ~T_WOULDBLOCK;
1825 resp->status = NFS3ERR_JUKEBOX;
1826 } else
1827 resp->status = puterrno3(error);
1828 out1:
1829 DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1830 cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1831 if (dvp != NULL)
1832 VN_RELE(dvp);
1833 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1836 void *
1837 rfs3_mkdir_getfh(MKDIR3args *args)
1840 return (&args->where.dir);
1843 void
1844 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1845 struct svc_req *req, cred_t *cr, bool_t ro)
1847 int error;
1848 vnode_t *vp;
1849 vnode_t *dvp;
1850 struct vattr *vap;
1851 struct vattr va;
1852 struct vattr *dbvap;
1853 struct vattr dbva;
1854 struct vattr *davap;
1855 struct vattr dava;
1856 struct sockaddr *ca;
1857 char *name = NULL;
1858 char *symdata = NULL;
1860 dbvap = NULL;
1861 davap = NULL;
1863 dvp = nfs3_fhtovp(&args->where.dir, exi);
1865 DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
1866 cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
1868 if (dvp == NULL) {
1869 error = ESTALE;
1870 goto err;
1873 dbva.va_mask = AT_ALL;
1874 dbvap = fop_getattr(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1875 davap = dbvap;
1877 if (args->where.name == nfs3nametoolong) {
1878 resp->status = NFS3ERR_NAMETOOLONG;
1879 goto err1;
1882 if (args->where.name == NULL || *(args->where.name) == '\0') {
1883 resp->status = NFS3ERR_ACCES;
1884 goto err1;
1887 if (rdonly(ro, dvp)) {
1888 resp->status = NFS3ERR_ROFS;
1889 goto err1;
1892 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
1893 if (error)
1894 goto err;
1896 if (!(va.va_mask & AT_MODE)) {
1897 resp->status = NFS3ERR_INVAL;
1898 goto err1;
1901 if (args->symlink.symlink_data == nfs3nametoolong) {
1902 resp->status = NFS3ERR_NAMETOOLONG;
1903 goto err1;
1906 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1907 name = nfscmd_convname(ca, exi, args->where.name,
1908 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1910 if (name == NULL) {
1911 /* This is really a Solaris EILSEQ */
1912 resp->status = NFS3ERR_INVAL;
1913 goto err1;
1916 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
1917 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1918 if (symdata == NULL) {
1919 /* This is really a Solaris EILSEQ */
1920 resp->status = NFS3ERR_INVAL;
1921 goto err1;
1925 va.va_mask |= AT_TYPE;
1926 va.va_type = VLNK;
1928 error = fop_symlink(dvp, name, &va, symdata, cr, NULL, 0);
1930 dava.va_mask = AT_ALL;
1931 davap = fop_getattr(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1933 if (error)
1934 goto err;
1936 error = fop_lookup(dvp, name, &vp, NULL, 0, NULL, cr,
1937 NULL, NULL, NULL);
1940 * Force modified data and metadata out to stable storage.
1942 (void) fop_fsync(dvp, 0, cr, NULL);
1945 resp->status = NFS3_OK;
1946 if (error) {
1947 resp->resok.obj.handle_follows = FALSE;
1948 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
1949 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1950 goto out;
1953 error = makefh3(&resp->resok.obj.handle, vp, exi);
1954 if (error)
1955 resp->resok.obj.handle_follows = FALSE;
1956 else
1957 resp->resok.obj.handle_follows = TRUE;
1959 va.va_mask = AT_ALL;
1960 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
1963 * Force modified data and metadata out to stable storage.
1965 (void) fop_fsync(vp, 0, cr, NULL);
1967 VN_RELE(vp);
1969 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1970 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1971 goto out;
1973 err:
1974 if (curthread->t_flag & T_WOULDBLOCK) {
1975 curthread->t_flag &= ~T_WOULDBLOCK;
1976 resp->status = NFS3ERR_JUKEBOX;
1977 } else
1978 resp->status = puterrno3(error);
1979 err1:
1980 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1981 out:
1982 if (name != NULL && name != args->where.name)
1983 kmem_free(name, MAXPATHLEN + 1);
1984 if (symdata != NULL && symdata != args->symlink.symlink_data)
1985 kmem_free(symdata, MAXPATHLEN + 1);
1987 DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
1988 cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
1990 if (dvp != NULL)
1991 VN_RELE(dvp);
1994 void *
1995 rfs3_symlink_getfh(SYMLINK3args *args)
1998 return (&args->where.dir);
2001 void
2002 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2003 struct svc_req *req, cred_t *cr, bool_t ro)
2005 int error;
2006 vnode_t *vp;
2007 vnode_t *realvp;
2008 vnode_t *dvp;
2009 struct vattr *vap;
2010 struct vattr va;
2011 struct vattr *dbvap;
2012 struct vattr dbva;
2013 struct vattr *davap;
2014 struct vattr dava;
2015 int mode;
2016 enum vcexcl excl;
2017 struct sockaddr *ca;
2018 char *name = NULL;
2020 dbvap = NULL;
2021 davap = NULL;
2023 dvp = nfs3_fhtovp(&args->where.dir, exi);
2025 DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2026 cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2028 if (dvp == NULL) {
2029 error = ESTALE;
2030 goto out;
2033 dbva.va_mask = AT_ALL;
2034 dbvap = fop_getattr(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2035 davap = dbvap;
2037 if (args->where.name == nfs3nametoolong) {
2038 resp->status = NFS3ERR_NAMETOOLONG;
2039 goto out1;
2042 if (args->where.name == NULL || *(args->where.name) == '\0') {
2043 resp->status = NFS3ERR_ACCES;
2044 goto out1;
2047 if (rdonly(ro, dvp)) {
2048 resp->status = NFS3ERR_ROFS;
2049 goto out1;
2052 switch (args->what.type) {
2053 case NF3CHR:
2054 case NF3BLK:
2055 error = sattr3_to_vattr(
2056 &args->what.mknoddata3_u.device.dev_attributes, &va);
2057 if (error)
2058 goto out;
2059 if (secpolicy_sys_devices(cr) != 0) {
2060 resp->status = NFS3ERR_PERM;
2061 goto out1;
2063 if (args->what.type == NF3CHR)
2064 va.va_type = VCHR;
2065 else
2066 va.va_type = VBLK;
2067 va.va_rdev = makedevice(
2068 args->what.mknoddata3_u.device.spec.specdata1,
2069 args->what.mknoddata3_u.device.spec.specdata2);
2070 va.va_mask |= AT_TYPE | AT_RDEV;
2071 break;
2072 case NF3SOCK:
2073 error = sattr3_to_vattr(
2074 &args->what.mknoddata3_u.pipe_attributes, &va);
2075 if (error)
2076 goto out;
2077 va.va_type = VSOCK;
2078 va.va_mask |= AT_TYPE;
2079 break;
2080 case NF3FIFO:
2081 error = sattr3_to_vattr(
2082 &args->what.mknoddata3_u.pipe_attributes, &va);
2083 if (error)
2084 goto out;
2085 va.va_type = VFIFO;
2086 va.va_mask |= AT_TYPE;
2087 break;
2088 default:
2089 resp->status = NFS3ERR_BADTYPE;
2090 goto out1;
2094 * Must specify the mode.
2096 if (!(va.va_mask & AT_MODE)) {
2097 resp->status = NFS3ERR_INVAL;
2098 goto out1;
2101 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2102 name = nfscmd_convname(ca, exi, args->where.name,
2103 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2105 if (name == NULL) {
2106 resp->status = NFS3ERR_INVAL;
2107 goto out1;
2110 excl = EXCL;
2112 mode = 0;
2114 error = fop_create(dvp, name, &va, excl, mode,
2115 &vp, cr, 0, NULL, NULL);
2117 if (name != args->where.name)
2118 kmem_free(name, MAXPATHLEN + 1);
2120 dava.va_mask = AT_ALL;
2121 davap = fop_getattr(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2124 * Force modified data and metadata out to stable storage.
2126 (void) fop_fsync(dvp, 0, cr, NULL);
2128 if (error)
2129 goto out;
2131 resp->status = NFS3_OK;
2133 error = makefh3(&resp->resok.obj.handle, vp, exi);
2134 if (error)
2135 resp->resok.obj.handle_follows = FALSE;
2136 else
2137 resp->resok.obj.handle_follows = TRUE;
2139 va.va_mask = AT_ALL;
2140 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
2143 * Force modified metadata out to stable storage.
2145 * if a underlying vp exists, pass it to fop_fsync
2147 if (fop_realvp(vp, &realvp, NULL) == 0)
2148 (void) fop_fsync(realvp, FNODSYNC, cr, NULL);
2149 else
2150 (void) fop_fsync(vp, FNODSYNC, cr, NULL);
2152 VN_RELE(vp);
2154 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2155 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2156 DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2157 cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2158 VN_RELE(dvp);
2159 return;
2161 out:
2162 if (curthread->t_flag & T_WOULDBLOCK) {
2163 curthread->t_flag &= ~T_WOULDBLOCK;
2164 resp->status = NFS3ERR_JUKEBOX;
2165 } else
2166 resp->status = puterrno3(error);
2167 out1:
2168 DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2169 cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2170 if (dvp != NULL)
2171 VN_RELE(dvp);
2172 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2175 void *
2176 rfs3_mknod_getfh(MKNOD3args *args)
2179 return (&args->where.dir);
2182 void
2183 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2184 struct svc_req *req, cred_t *cr, bool_t ro)
2186 int error = 0;
2187 vnode_t *vp;
2188 struct vattr *bvap;
2189 struct vattr bva;
2190 struct vattr *avap;
2191 struct vattr ava;
2192 vnode_t *targvp = NULL;
2193 struct sockaddr *ca;
2194 char *name = NULL;
2196 bvap = NULL;
2197 avap = NULL;
2199 vp = nfs3_fhtovp(&args->object.dir, exi);
2201 DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2202 cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2204 if (vp == NULL) {
2205 error = ESTALE;
2206 goto err;
2209 bva.va_mask = AT_ALL;
2210 bvap = fop_getattr(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2211 avap = bvap;
2213 if (vp->v_type != VDIR) {
2214 resp->status = NFS3ERR_NOTDIR;
2215 goto err1;
2218 if (args->object.name == nfs3nametoolong) {
2219 resp->status = NFS3ERR_NAMETOOLONG;
2220 goto err1;
2223 if (args->object.name == NULL || *(args->object.name) == '\0') {
2224 resp->status = NFS3ERR_ACCES;
2225 goto err1;
2228 if (rdonly(ro, vp)) {
2229 resp->status = NFS3ERR_ROFS;
2230 goto err1;
2233 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2234 name = nfscmd_convname(ca, exi, args->object.name,
2235 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2237 if (name == NULL) {
2238 resp->status = NFS3ERR_INVAL;
2239 goto err1;
2243 * Check for a conflict with a non-blocking mandatory share
2244 * reservation and V4 delegations
2246 error = fop_lookup(vp, name, &targvp, NULL, 0,
2247 NULL, cr, NULL, NULL, NULL);
2248 if (error != 0)
2249 goto err;
2251 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2252 resp->status = NFS3ERR_JUKEBOX;
2253 goto err1;
2256 if (!nbl_need_check(targvp)) {
2257 error = fop_remove(vp, name, cr, NULL, 0);
2258 } else {
2259 nbl_start_crit(targvp, RW_READER);
2260 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2261 error = EACCES;
2262 } else {
2263 error = fop_remove(vp, name, cr, NULL, 0);
2265 nbl_end_crit(targvp);
2267 VN_RELE(targvp);
2268 targvp = NULL;
2270 ava.va_mask = AT_ALL;
2271 avap = fop_getattr(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2274 * Force modified data and metadata out to stable storage.
2276 (void) fop_fsync(vp, 0, cr, NULL);
2278 if (error)
2279 goto err;
2281 resp->status = NFS3_OK;
2282 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2283 goto out;
2285 err:
2286 if (curthread->t_flag & T_WOULDBLOCK) {
2287 curthread->t_flag &= ~T_WOULDBLOCK;
2288 resp->status = NFS3ERR_JUKEBOX;
2289 } else
2290 resp->status = puterrno3(error);
2291 err1:
2292 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2293 out:
2294 DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2295 cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2297 if (name != NULL && name != args->object.name)
2298 kmem_free(name, MAXPATHLEN + 1);
2300 if (vp != NULL)
2301 VN_RELE(vp);
2304 void *
2305 rfs3_remove_getfh(REMOVE3args *args)
2308 return (&args->object.dir);
2311 void
2312 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2313 struct svc_req *req, cred_t *cr, bool_t ro)
2315 int error;
2316 vnode_t *vp;
2317 struct vattr *bvap;
2318 struct vattr bva;
2319 struct vattr *avap;
2320 struct vattr ava;
2321 struct sockaddr *ca;
2322 char *name = NULL;
2324 bvap = NULL;
2325 avap = NULL;
2327 vp = nfs3_fhtovp(&args->object.dir, exi);
2329 DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2330 cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2332 if (vp == NULL) {
2333 error = ESTALE;
2334 goto err;
2337 bva.va_mask = AT_ALL;
2338 bvap = fop_getattr(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2339 avap = bvap;
2341 if (vp->v_type != VDIR) {
2342 resp->status = NFS3ERR_NOTDIR;
2343 goto err1;
2346 if (args->object.name == nfs3nametoolong) {
2347 resp->status = NFS3ERR_NAMETOOLONG;
2348 goto err1;
2351 if (args->object.name == NULL || *(args->object.name) == '\0') {
2352 resp->status = NFS3ERR_ACCES;
2353 goto err1;
2356 if (rdonly(ro, vp)) {
2357 resp->status = NFS3ERR_ROFS;
2358 goto err1;
2361 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2362 name = nfscmd_convname(ca, exi, args->object.name,
2363 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2365 if (name == NULL) {
2366 resp->status = NFS3ERR_INVAL;
2367 goto err1;
2370 error = fop_rmdir(vp, name, rootdir, cr, NULL, 0);
2372 if (name != args->object.name)
2373 kmem_free(name, MAXPATHLEN + 1);
2375 ava.va_mask = AT_ALL;
2376 avap = fop_getattr(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2379 * Force modified data and metadata out to stable storage.
2381 (void) fop_fsync(vp, 0, cr, NULL);
2383 if (error) {
2385 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2386 * if the directory is not empty. A System V NFS server
2387 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2388 * over the wire.
2390 if (error == EEXIST)
2391 error = ENOTEMPTY;
2392 goto err;
2395 resp->status = NFS3_OK;
2396 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2397 goto out;
2399 err:
2400 if (curthread->t_flag & T_WOULDBLOCK) {
2401 curthread->t_flag &= ~T_WOULDBLOCK;
2402 resp->status = NFS3ERR_JUKEBOX;
2403 } else
2404 resp->status = puterrno3(error);
2405 err1:
2406 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2407 out:
2408 DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2409 cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2410 if (vp != NULL)
2411 VN_RELE(vp);
2415 void *
2416 rfs3_rmdir_getfh(RMDIR3args *args)
2419 return (&args->object.dir);
2422 void
2423 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2424 struct svc_req *req, cred_t *cr, bool_t ro)
2426 int error = 0;
2427 vnode_t *fvp;
2428 vnode_t *tvp;
2429 vnode_t *targvp;
2430 struct vattr *fbvap;
2431 struct vattr fbva;
2432 struct vattr *favap;
2433 struct vattr fava;
2434 struct vattr *tbvap;
2435 struct vattr tbva;
2436 struct vattr *tavap;
2437 struct vattr tava;
2438 nfs_fh3 *fh3;
2439 struct exportinfo *to_exi;
2440 vnode_t *srcvp = NULL;
2441 struct sockaddr *ca;
2442 char *name = NULL;
2443 char *toname = NULL;
2445 fbvap = NULL;
2446 favap = NULL;
2447 tbvap = NULL;
2448 tavap = NULL;
2449 tvp = NULL;
2451 fvp = nfs3_fhtovp(&args->from.dir, exi);
2453 DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2454 cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2456 if (fvp == NULL) {
2457 error = ESTALE;
2458 goto err;
2461 fbva.va_mask = AT_ALL;
2462 fbvap = fop_getattr(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2463 favap = fbvap;
2465 fh3 = &args->to.dir;
2466 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2467 if (to_exi == NULL) {
2468 resp->status = NFS3ERR_ACCES;
2469 goto err1;
2471 exi_rele(to_exi);
2473 if (to_exi != exi) {
2474 resp->status = NFS3ERR_XDEV;
2475 goto err1;
2478 tvp = nfs3_fhtovp(&args->to.dir, exi);
2479 if (tvp == NULL) {
2480 error = ESTALE;
2481 goto err;
2484 tbva.va_mask = AT_ALL;
2485 tbvap = fop_getattr(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2486 tavap = tbvap;
2488 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2489 resp->status = NFS3ERR_NOTDIR;
2490 goto err1;
2493 if (args->from.name == nfs3nametoolong ||
2494 args->to.name == nfs3nametoolong) {
2495 resp->status = NFS3ERR_NAMETOOLONG;
2496 goto err1;
2498 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2499 args->to.name == NULL || *(args->to.name) == '\0') {
2500 resp->status = NFS3ERR_ACCES;
2501 goto err1;
2504 if (rdonly(ro, tvp)) {
2505 resp->status = NFS3ERR_ROFS;
2506 goto err1;
2509 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2510 name = nfscmd_convname(ca, exi, args->from.name,
2511 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2513 if (name == NULL) {
2514 resp->status = NFS3ERR_INVAL;
2515 goto err1;
2518 toname = nfscmd_convname(ca, exi, args->to.name,
2519 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2521 if (toname == NULL) {
2522 resp->status = NFS3ERR_INVAL;
2523 goto err1;
2527 * Check for a conflict with a non-blocking mandatory share
2528 * reservation or V4 delegations.
2530 error = fop_lookup(fvp, name, &srcvp, NULL, 0,
2531 NULL, cr, NULL, NULL, NULL);
2532 if (error != 0)
2533 goto err;
2536 * If we rename a delegated file we should recall the
2537 * delegation, since future opens should fail or would
2538 * refer to a new file.
2540 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2541 resp->status = NFS3ERR_JUKEBOX;
2542 goto err1;
2546 * Check for renaming over a delegated file. Check rfs4_deleg_policy
2547 * first to avoid fop_lookup if possible.
2549 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2550 fop_lookup(tvp, toname, &targvp, NULL, 0, NULL, cr,
2551 NULL, NULL, NULL) == 0) {
2553 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2554 VN_RELE(targvp);
2555 resp->status = NFS3ERR_JUKEBOX;
2556 goto err1;
2558 VN_RELE(targvp);
2561 if (!nbl_need_check(srcvp)) {
2562 error = fop_rename(fvp, name, tvp, toname, cr, NULL, 0);
2563 } else {
2564 nbl_start_crit(srcvp, RW_READER);
2565 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2566 error = EACCES;
2567 else
2568 error = fop_rename(fvp, name, tvp, toname, cr, NULL, 0);
2569 nbl_end_crit(srcvp);
2571 if (error == 0)
2572 vn_renamepath(tvp, srcvp, args->to.name,
2573 strlen(args->to.name));
2574 VN_RELE(srcvp);
2575 srcvp = NULL;
2577 fava.va_mask = AT_ALL;
2578 favap = fop_getattr(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2579 tava.va_mask = AT_ALL;
2580 tavap = fop_getattr(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2583 * Force modified data and metadata out to stable storage.
2585 (void) fop_fsync(fvp, 0, cr, NULL);
2586 (void) fop_fsync(tvp, 0, cr, NULL);
2588 if (error)
2589 goto err;
2591 resp->status = NFS3_OK;
2592 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2593 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2594 goto out;
2596 err:
2597 if (curthread->t_flag & T_WOULDBLOCK) {
2598 curthread->t_flag &= ~T_WOULDBLOCK;
2599 resp->status = NFS3ERR_JUKEBOX;
2600 } else {
2601 resp->status = puterrno3(error);
2603 err1:
2604 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2605 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2607 out:
2608 if (name != NULL && name != args->from.name)
2609 kmem_free(name, MAXPATHLEN + 1);
2610 if (toname != NULL && toname != args->to.name)
2611 kmem_free(toname, MAXPATHLEN + 1);
2613 DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2614 cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2615 if (fvp != NULL)
2616 VN_RELE(fvp);
2617 if (tvp != NULL)
2618 VN_RELE(tvp);
2621 void *
2622 rfs3_rename_getfh(RENAME3args *args)
2625 return (&args->from.dir);
2628 void
2629 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2630 struct svc_req *req, cred_t *cr, bool_t ro)
2632 int error;
2633 vnode_t *vp;
2634 vnode_t *dvp;
2635 struct vattr *vap;
2636 struct vattr va;
2637 struct vattr *bvap;
2638 struct vattr bva;
2639 struct vattr *avap;
2640 struct vattr ava;
2641 nfs_fh3 *fh3;
2642 struct exportinfo *to_exi;
2643 struct sockaddr *ca;
2644 char *name = NULL;
2646 vap = NULL;
2647 bvap = NULL;
2648 avap = NULL;
2649 dvp = NULL;
2651 vp = nfs3_fhtovp(&args->file, exi);
2653 DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2654 cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2656 if (vp == NULL) {
2657 error = ESTALE;
2658 goto out;
2661 va.va_mask = AT_ALL;
2662 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
2664 fh3 = &args->link.dir;
2665 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2666 if (to_exi == NULL) {
2667 resp->status = NFS3ERR_ACCES;
2668 goto out1;
2670 exi_rele(to_exi);
2672 if (to_exi != exi) {
2673 resp->status = NFS3ERR_XDEV;
2674 goto out1;
2677 dvp = nfs3_fhtovp(&args->link.dir, exi);
2678 if (dvp == NULL) {
2679 error = ESTALE;
2680 goto out;
2683 bva.va_mask = AT_ALL;
2684 bvap = fop_getattr(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2686 if (dvp->v_type != VDIR) {
2687 resp->status = NFS3ERR_NOTDIR;
2688 goto out1;
2691 if (args->link.name == nfs3nametoolong) {
2692 resp->status = NFS3ERR_NAMETOOLONG;
2693 goto out1;
2696 if (args->link.name == NULL || *(args->link.name) == '\0') {
2697 resp->status = NFS3ERR_ACCES;
2698 goto out1;
2701 if (rdonly(ro, dvp)) {
2702 resp->status = NFS3ERR_ROFS;
2703 goto out1;
2706 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2707 name = nfscmd_convname(ca, exi, args->link.name,
2708 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2710 if (name == NULL) {
2711 resp->status = NFS3ERR_SERVERFAULT;
2712 goto out1;
2715 error = fop_link(dvp, vp, name, cr, NULL, 0);
2717 va.va_mask = AT_ALL;
2718 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
2719 ava.va_mask = AT_ALL;
2720 avap = fop_getattr(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
2723 * Force modified data and metadata out to stable storage.
2725 (void) fop_fsync(vp, FNODSYNC, cr, NULL);
2726 (void) fop_fsync(dvp, 0, cr, NULL);
2728 if (error)
2729 goto out;
2731 VN_RELE(dvp);
2733 resp->status = NFS3_OK;
2734 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
2735 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
2737 DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
2738 cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
2740 VN_RELE(vp);
2742 return;
2744 out:
2745 if (curthread->t_flag & T_WOULDBLOCK) {
2746 curthread->t_flag &= ~T_WOULDBLOCK;
2747 resp->status = NFS3ERR_JUKEBOX;
2748 } else
2749 resp->status = puterrno3(error);
2750 out1:
2751 if (name != NULL && name != args->link.name)
2752 kmem_free(name, MAXPATHLEN + 1);
2754 DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
2755 cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
2757 if (vp != NULL)
2758 VN_RELE(vp);
2759 if (dvp != NULL)
2760 VN_RELE(dvp);
2761 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
2762 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
2765 void *
2766 rfs3_link_getfh(LINK3args *args)
2769 return (&args->file);
2773 * This macro defines the size of a response which contains attribute
2774 * information and one directory entry (whose length is specified by
2775 * the macro parameter). If the incoming request is larger than this,
2776 * then we are guaranteed to be able to return at one directory entry
2777 * if one exists. Therefore, we do not need to check for
2778 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
2779 * is not, then we need to check to make sure that this error does not
2780 * need to be returned.
2782 * NFS3_READDIR_MIN_COUNT is comprised of following :
2784 * status - 1 * BYTES_PER_XDR_UNIT
2785 * attr. flag - 1 * BYTES_PER_XDR_UNIT
2786 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
2787 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2788 * boolean - 1 * BYTES_PER_XDR_UNIT
2789 * file id - 2 * BYTES_PER_XDR_UNIT
2790 * directory name length - 1 * BYTES_PER_XDR_UNIT
2791 * cookie - 2 * BYTES_PER_XDR_UNIT
2792 * end of list - 1 * BYTES_PER_XDR_UNIT
2793 * end of file - 1 * BYTES_PER_XDR_UNIT
2794 * Name length of directory to the nearest byte
2797 #define NFS3_READDIR_MIN_COUNT(length) \
2798 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
2799 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
2801 /* ARGSUSED */
2802 void
2803 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
2804 struct svc_req *req, cred_t *cr, bool_t ro)
2806 int error;
2807 vnode_t *vp;
2808 struct vattr *vap;
2809 struct vattr va;
2810 struct iovec iov;
2811 struct uio uio;
2812 char *data;
2813 int iseof;
2814 int bufsize;
2815 int namlen;
2816 uint_t count;
2817 struct sockaddr *ca;
2819 vap = NULL;
2821 vp = nfs3_fhtovp(&args->dir, exi);
2823 DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
2824 cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
2826 if (vp == NULL) {
2827 error = ESTALE;
2828 goto out;
2831 (void) fop_rwlock(vp, V_WRITELOCK_FALSE, NULL);
2833 va.va_mask = AT_ALL;
2834 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
2836 if (vp->v_type != VDIR) {
2837 resp->status = NFS3ERR_NOTDIR;
2838 goto out1;
2841 error = fop_access(vp, VREAD, 0, cr, NULL);
2842 if (error)
2843 goto out;
2846 * Now don't allow arbitrary count to alloc;
2847 * allow the maximum not to exceed rfs3_tsize()
2849 if (args->count > rfs3_tsize(req))
2850 args->count = rfs3_tsize(req);
2853 * Make sure that there is room to read at least one entry
2854 * if any are available.
2856 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
2857 count = DIRENT64_RECLEN(MAXNAMELEN);
2858 else
2859 count = args->count;
2861 data = kmem_alloc(count, KM_SLEEP);
2863 iov.iov_base = data;
2864 iov.iov_len = count;
2865 uio.uio_iov = &iov;
2866 uio.uio_iovcnt = 1;
2867 uio.uio_segflg = UIO_SYSSPACE;
2868 uio.uio_extflg = UIO_COPY_CACHED;
2869 uio.uio_loffset = (offset_t)args->cookie;
2870 uio.uio_resid = count;
2872 error = fop_readdir(vp, &uio, cr, &iseof, NULL, 0);
2874 va.va_mask = AT_ALL;
2875 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
2877 if (error) {
2878 kmem_free(data, count);
2879 goto out;
2883 * If the count was not large enough to be able to guarantee
2884 * to be able to return at least one entry, then need to
2885 * check to see if NFS3ERR_TOOSMALL should be returned.
2887 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
2889 * bufsize is used to keep track of the size of the response.
2890 * It is primed with:
2891 * 1 for the status +
2892 * 1 for the dir_attributes.attributes boolean +
2893 * 2 for the cookie verifier
2894 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2895 * to bytes. If there are directory attributes to be
2896 * returned, then:
2897 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2898 * time BYTES_PER_XDR_UNIT is added to account for them.
2900 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2901 if (vap != NULL)
2902 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2904 * An entry is composed of:
2905 * 1 for the true/false list indicator +
2906 * 2 for the fileid +
2907 * 1 for the length of the name +
2908 * 2 for the cookie +
2909 * all times BYTES_PER_XDR_UNIT to convert from
2910 * XDR units to bytes, plus the length of the name
2911 * rounded up to the nearest BYTES_PER_XDR_UNIT.
2913 if (count != uio.uio_resid) {
2914 namlen = strlen(((struct dirent64 *)data)->d_name);
2915 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
2916 roundup(namlen, BYTES_PER_XDR_UNIT);
2919 * We need to check to see if the number of bytes left
2920 * to go into the buffer will actually fit into the
2921 * buffer. This is calculated as the size of this
2922 * entry plus:
2923 * 1 for the true/false list indicator +
2924 * 1 for the eof indicator
2925 * times BYTES_PER_XDR_UNIT to convert from from
2926 * XDR units to bytes.
2928 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
2929 if (bufsize > args->count) {
2930 kmem_free(data, count);
2931 resp->status = NFS3ERR_TOOSMALL;
2932 goto out1;
2937 * Have a valid readir buffer for the native character
2938 * set. Need to check if a conversion is necessary and
2939 * potentially rewrite the whole buffer. Note that if the
2940 * conversion expands names enough, the structure may not
2941 * fit. In this case, we need to drop entries until if fits
2942 * and patch the counts in order that the next readdir will
2943 * get the correct entries.
2945 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2946 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
2949 fop_rwunlock(vp, V_WRITELOCK_FALSE, NULL);
2951 #if 0 /* notyet */
2953 * Don't do this. It causes local disk writes when just
2954 * reading the file and the overhead is deemed larger
2955 * than the benefit.
2958 * Force modified metadata out to stable storage.
2960 (void) fop_fsync(vp, FNODSYNC, cr, NULL);
2961 #endif
2963 resp->status = NFS3_OK;
2964 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
2965 resp->resok.cookieverf = 0;
2966 resp->resok.reply.entries = (entry3 *)data;
2967 resp->resok.reply.eof = iseof;
2968 resp->resok.size = count - uio.uio_resid;
2969 resp->resok.count = args->count;
2970 resp->resok.freecount = count;
2972 DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
2973 cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
2975 VN_RELE(vp);
2977 return;
2979 out:
2980 if (curthread->t_flag & T_WOULDBLOCK) {
2981 curthread->t_flag &= ~T_WOULDBLOCK;
2982 resp->status = NFS3ERR_JUKEBOX;
2983 } else
2984 resp->status = puterrno3(error);
2985 out1:
2986 DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
2987 cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
2989 if (vp != NULL) {
2990 fop_rwunlock(vp, V_WRITELOCK_FALSE, NULL);
2991 VN_RELE(vp);
2993 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
2996 void *
2997 rfs3_readdir_getfh(READDIR3args *args)
3000 return (&args->dir);
3003 void
3004 rfs3_readdir_free(READDIR3res *resp)
3007 if (resp->status == NFS3_OK)
3008 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3011 #ifdef nextdp
3012 #undef nextdp
3013 #endif
3014 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3017 * This macro computes the size of a response which contains
3018 * one directory entry including the attributes as well as file handle.
3019 * If the incoming request is larger than this, then we are guaranteed to be
3020 * able to return at least one more directory entry if one exists.
3022 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3024 * boolean - 1 * BYTES_PER_XDR_UNIT
3025 * file id - 2 * BYTES_PER_XDR_UNIT
3026 * directory name length - 1 * BYTES_PER_XDR_UNIT
3027 * cookie - 2 * BYTES_PER_XDR_UNIT
3028 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3029 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3030 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3031 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3032 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3033 * name length of the entry to the nearest bytes
3035 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3036 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3037 BYTES_PER_XDR_UNIT + \
3038 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3040 static int rfs3_readdir_unit = MAXBSIZE;
3042 /* ARGSUSED */
3043 void
3044 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3045 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3047 int error;
3048 vnode_t *vp;
3049 struct vattr *vap;
3050 struct vattr va;
3051 struct iovec iov;
3052 struct uio uio;
3053 char *data;
3054 int iseof;
3055 struct dirent64 *dp;
3056 vnode_t *nvp;
3057 struct vattr *nvap;
3058 struct vattr nva;
3059 entryplus3_info *infop = NULL;
3060 int size = 0;
3061 int nents = 0;
3062 int bufsize = 0;
3063 int entrysize = 0;
3064 int tofit = 0;
3065 int rd_unit = rfs3_readdir_unit;
3066 int prev_len;
3067 int space_left;
3068 int i;
3069 uint_t *namlen = NULL;
3070 char *ndata = NULL;
3071 struct sockaddr *ca;
3072 size_t ret;
3074 vap = NULL;
3076 vp = nfs3_fhtovp(&args->dir, exi);
3078 DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3079 cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3081 if (vp == NULL) {
3082 error = ESTALE;
3083 goto out;
3086 (void) fop_rwlock(vp, V_WRITELOCK_FALSE, NULL);
3088 va.va_mask = AT_ALL;
3089 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
3091 if (vp->v_type != VDIR) {
3092 error = ENOTDIR;
3093 goto out;
3096 error = fop_access(vp, VREAD, 0, cr, NULL);
3097 if (error)
3098 goto out;
3101 * Don't allow arbitrary counts for allocation
3103 if (args->maxcount > rfs3_tsize(req))
3104 args->maxcount = rfs3_tsize(req);
3107 * Make sure that there is room to read at least one entry
3108 * if any are available
3110 args->dircount = MIN(args->dircount, args->maxcount);
3112 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3113 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3116 * This allocation relies on a minimum directory entry
3117 * being roughly 24 bytes. Therefore, the namlen array
3118 * will have enough space based on the maximum number of
3119 * entries to read.
3121 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3123 space_left = args->dircount;
3124 data = kmem_alloc(args->dircount, KM_SLEEP);
3125 dp = (struct dirent64 *)data;
3126 uio.uio_iov = &iov;
3127 uio.uio_iovcnt = 1;
3128 uio.uio_segflg = UIO_SYSSPACE;
3129 uio.uio_extflg = UIO_COPY_CACHED;
3130 uio.uio_loffset = (offset_t)args->cookie;
3133 * bufsize is used to keep track of the size of the response as we
3134 * get post op attributes and filehandles for each entry. This is
3135 * an optimization as the server may have read more entries than will
3136 * fit in the buffer specified by maxcount. We stop calculating
3137 * post op attributes and filehandles once we have exceeded maxcount.
3138 * This will minimize the effect of truncation.
3140 * It is primed with:
3141 * 1 for the status +
3142 * 1 for the dir_attributes.attributes boolean +
3143 * 2 for the cookie verifier
3144 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3145 * to bytes. If there are directory attributes to be
3146 * returned, then:
3147 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3148 * time BYTES_PER_XDR_UNIT is added to account for them.
3150 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3151 if (vap != NULL)
3152 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3154 getmoredents:
3156 * Here we make a check so that our read unit is not larger than
3157 * the space left in the buffer.
3159 rd_unit = MIN(rd_unit, space_left);
3160 iov.iov_base = (char *)dp;
3161 iov.iov_len = rd_unit;
3162 uio.uio_resid = rd_unit;
3163 prev_len = rd_unit;
3165 error = fop_readdir(vp, &uio, cr, &iseof, NULL, 0);
3167 if (error) {
3168 kmem_free(data, args->dircount);
3169 goto out;
3172 if (uio.uio_resid == prev_len && !iseof) {
3173 if (nents == 0) {
3174 kmem_free(data, args->dircount);
3175 resp->status = NFS3ERR_TOOSMALL;
3176 goto out1;
3180 * We could not get any more entries, so get the attributes
3181 * and filehandle for the entries already obtained.
3183 goto good;
3187 * We estimate the size of the response by assuming the
3188 * entry exists and attributes and filehandle are also valid
3190 for (size = prev_len - uio.uio_resid;
3191 size > 0;
3192 size -= dp->d_reclen, dp = nextdp(dp)) {
3194 if (dp->d_ino == 0) {
3195 nents++;
3196 continue;
3199 namlen[nents] = strlen(dp->d_name);
3200 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3203 * We need to check to see if the number of bytes left
3204 * to go into the buffer will actually fit into the
3205 * buffer. This is calculated as the size of this
3206 * entry plus:
3207 * 1 for the true/false list indicator +
3208 * 1 for the eof indicator
3209 * times BYTES_PER_XDR_UNIT to convert from XDR units
3210 * to bytes.
3212 * Also check the dircount limit against the first entry read
3215 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3216 if (bufsize + tofit > args->maxcount) {
3218 * We make a check here to see if this was the
3219 * first entry being measured. If so, then maxcount
3220 * was too small to begin with and so we need to
3221 * return with NFS3ERR_TOOSMALL.
3223 if (nents == 0) {
3224 kmem_free(data, args->dircount);
3225 resp->status = NFS3ERR_TOOSMALL;
3226 goto out1;
3228 iseof = FALSE;
3229 goto good;
3231 bufsize += entrysize;
3232 nents++;
3236 * If there is enough room to fit at least 1 more entry including
3237 * post op attributes and filehandle in the buffer AND that we haven't
3238 * exceeded dircount then go back and get some more.
3240 if (!iseof &&
3241 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3242 space_left -= (prev_len - uio.uio_resid);
3243 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3244 goto getmoredents;
3246 /* else, fall through */
3248 good:
3249 va.va_mask = AT_ALL;
3250 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
3252 fop_rwunlock(vp, V_WRITELOCK_FALSE, NULL);
3254 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3255 resp->resok.infop = infop;
3257 dp = (struct dirent64 *)data;
3258 for (i = 0; i < nents; i++) {
3260 if (dp->d_ino == 0) {
3261 infop[i].attr.attributes = FALSE;
3262 infop[i].fh.handle_follows = FALSE;
3263 dp = nextdp(dp);
3264 continue;
3267 infop[i].namelen = namlen[i];
3269 error = fop_lookup(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3270 NULL, NULL, NULL);
3271 if (error) {
3272 infop[i].attr.attributes = FALSE;
3273 infop[i].fh.handle_follows = FALSE;
3274 dp = nextdp(dp);
3275 continue;
3278 nva.va_mask = AT_ALL;
3279 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3281 /* Lie about the object type for a referral */
3282 if (vn_is_nfs_reparse(nvp, cr))
3283 nvap->va_type = VLNK;
3285 vattr_to_post_op_attr(nvap, &infop[i].attr);
3287 error = makefh3(&infop[i].fh.handle, nvp, exi);
3288 if (!error)
3289 infop[i].fh.handle_follows = TRUE;
3290 else
3291 infop[i].fh.handle_follows = FALSE;
3293 VN_RELE(nvp);
3294 dp = nextdp(dp);
3297 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3298 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3299 if (ndata == NULL)
3300 ndata = data;
3302 if (ret > 0) {
3304 * We had to drop one or more entries in order to fit
3305 * during the character conversion. We need to patch
3306 * up the size and eof info.
3308 if (iseof)
3309 iseof = FALSE;
3311 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3312 nents, ret);
3316 #if 0 /* notyet */
3318 * Don't do this. It causes local disk writes when just
3319 * reading the file and the overhead is deemed larger
3320 * than the benefit.
3323 * Force modified metadata out to stable storage.
3325 (void) fop_fsync(vp, FNODSYNC, cr, NULL);
3326 #endif
3328 kmem_free(namlen, args->dircount);
3330 resp->status = NFS3_OK;
3331 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3332 resp->resok.cookieverf = 0;
3333 resp->resok.reply.entries = (entryplus3 *)ndata;
3334 resp->resok.reply.eof = iseof;
3335 resp->resok.size = nents;
3336 resp->resok.count = args->dircount - ret;
3337 resp->resok.maxcount = args->maxcount;
3339 DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3340 cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3341 if (ndata != data)
3342 kmem_free(data, args->dircount);
3345 VN_RELE(vp);
3347 return;
3349 out:
3350 if (curthread->t_flag & T_WOULDBLOCK) {
3351 curthread->t_flag &= ~T_WOULDBLOCK;
3352 resp->status = NFS3ERR_JUKEBOX;
3353 } else {
3354 resp->status = puterrno3(error);
3356 out1:
3357 DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3358 cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3360 if (vp != NULL) {
3361 fop_rwunlock(vp, V_WRITELOCK_FALSE, NULL);
3362 VN_RELE(vp);
3365 if (namlen != NULL)
3366 kmem_free(namlen, args->dircount);
3368 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3371 void *
3372 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3375 return (&args->dir);
3378 void
3379 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3382 if (resp->status == NFS3_OK) {
3383 kmem_free(resp->resok.reply.entries, resp->resok.count);
3384 kmem_free(resp->resok.infop,
3385 resp->resok.size * sizeof (struct entryplus3_info));
3389 /* ARGSUSED */
3390 void
3391 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3392 struct svc_req *req, cred_t *cr, bool_t ro)
3394 int error;
3395 vnode_t *vp;
3396 struct vattr *vap;
3397 struct vattr va;
3398 struct statvfs64 sb;
3400 vap = NULL;
3402 vp = nfs3_fhtovp(&args->fsroot, exi);
3404 DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3405 cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3407 if (vp == NULL) {
3408 error = ESTALE;
3409 goto out;
3412 error = VFS_STATVFS(vp->v_vfsp, &sb);
3414 va.va_mask = AT_ALL;
3415 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
3417 if (error)
3418 goto out;
3420 resp->status = NFS3_OK;
3421 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3422 if (sb.f_blocks != (fsblkcnt64_t)-1)
3423 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3424 else
3425 resp->resok.tbytes = (size3)sb.f_blocks;
3426 if (sb.f_bfree != (fsblkcnt64_t)-1)
3427 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3428 else
3429 resp->resok.fbytes = (size3)sb.f_bfree;
3430 if (sb.f_bavail != (fsblkcnt64_t)-1)
3431 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3432 else
3433 resp->resok.abytes = (size3)sb.f_bavail;
3434 resp->resok.tfiles = (size3)sb.f_files;
3435 resp->resok.ffiles = (size3)sb.f_ffree;
3436 resp->resok.afiles = (size3)sb.f_favail;
3437 resp->resok.invarsec = 0;
3439 DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3440 cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3441 VN_RELE(vp);
3443 return;
3445 out:
3446 if (curthread->t_flag & T_WOULDBLOCK) {
3447 curthread->t_flag &= ~T_WOULDBLOCK;
3448 resp->status = NFS3ERR_JUKEBOX;
3449 } else
3450 resp->status = puterrno3(error);
3451 out1:
3452 DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3453 cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3455 if (vp != NULL)
3456 VN_RELE(vp);
3457 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3460 void *
3461 rfs3_fsstat_getfh(FSSTAT3args *args)
3464 return (&args->fsroot);
3467 /* ARGSUSED */
3468 void
3469 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3470 struct svc_req *req, cred_t *cr, bool_t ro)
3472 vnode_t *vp;
3473 struct vattr *vap;
3474 struct vattr va;
3475 uint32_t xfer_size;
3476 ulong_t l = 0;
3477 int error;
3479 vp = nfs3_fhtovp(&args->fsroot, exi);
3481 DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3482 cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3484 if (vp == NULL) {
3485 if (curthread->t_flag & T_WOULDBLOCK) {
3486 curthread->t_flag &= ~T_WOULDBLOCK;
3487 resp->status = NFS3ERR_JUKEBOX;
3488 } else
3489 resp->status = NFS3ERR_STALE;
3490 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3491 goto out;
3494 va.va_mask = AT_ALL;
3495 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
3497 resp->status = NFS3_OK;
3498 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3499 xfer_size = rfs3_tsize(req);
3500 resp->resok.rtmax = xfer_size;
3501 resp->resok.rtpref = xfer_size;
3502 resp->resok.rtmult = DEV_BSIZE;
3503 resp->resok.wtmax = xfer_size;
3504 resp->resok.wtpref = xfer_size;
3505 resp->resok.wtmult = DEV_BSIZE;
3506 resp->resok.dtpref = MAXBSIZE;
3509 * Large file spec: want maxfilesize based on limit of
3510 * underlying filesystem. We can guess 2^31-1 if need be.
3512 error = fop_pathconf(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3513 if (error) {
3514 resp->status = puterrno3(error);
3515 goto out;
3519 * If the underlying file system does not support _PC_FILESIZEBITS,
3520 * return a reasonable default. Note that error code on fop_pathconf
3521 * will be 0, even if the underlying file system does not support
3522 * _PC_FILESIZEBITS.
3524 if (l == (ulong_t)-1) {
3525 resp->resok.maxfilesize = MAXOFF32_T;
3526 } else {
3527 if (l >= (sizeof (uint64_t) * 8))
3528 resp->resok.maxfilesize = INT64_MAX;
3529 else
3530 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3533 resp->resok.time_delta.seconds = 0;
3534 resp->resok.time_delta.nseconds = 1000;
3535 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3536 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3538 DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3539 cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3541 VN_RELE(vp);
3543 return;
3545 out:
3546 DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3547 cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3548 if (vp != NULL)
3549 VN_RELE(vp);
3552 void *
3553 rfs3_fsinfo_getfh(FSINFO3args *args)
3555 return (&args->fsroot);
3558 /* ARGSUSED */
3559 void
3560 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3561 struct svc_req *req, cred_t *cr, bool_t ro)
3563 int error;
3564 vnode_t *vp;
3565 struct vattr *vap;
3566 struct vattr va;
3567 ulong_t val;
3569 vap = NULL;
3571 vp = nfs3_fhtovp(&args->object, exi);
3573 DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3574 cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3576 if (vp == NULL) {
3577 error = ESTALE;
3578 goto out;
3581 va.va_mask = AT_ALL;
3582 vap = fop_getattr(vp, &va, 0, cr, NULL) ? NULL : &va;
3584 error = fop_pathconf(vp, _PC_LINK_MAX, &val, cr, NULL);
3585 if (error)
3586 goto out;
3587 resp->resok.info.link_max = (uint32)val;
3589 error = fop_pathconf(vp, _PC_NAME_MAX, &val, cr, NULL);
3590 if (error)
3591 goto out;
3592 resp->resok.info.name_max = (uint32)val;
3594 error = fop_pathconf(vp, _PC_NO_TRUNC, &val, cr, NULL);
3595 if (error)
3596 goto out;
3597 if (val == 1)
3598 resp->resok.info.no_trunc = TRUE;
3599 else
3600 resp->resok.info.no_trunc = FALSE;
3602 error = fop_pathconf(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3603 if (error)
3604 goto out;
3605 if (val == 1)
3606 resp->resok.info.chown_restricted = TRUE;
3607 else
3608 resp->resok.info.chown_restricted = FALSE;
3610 resp->status = NFS3_OK;
3611 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3612 resp->resok.info.case_insensitive = FALSE;
3613 resp->resok.info.case_preserving = TRUE;
3614 DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3615 cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3616 VN_RELE(vp);
3617 return;
3619 out:
3620 if (curthread->t_flag & T_WOULDBLOCK) {
3621 curthread->t_flag &= ~T_WOULDBLOCK;
3622 resp->status = NFS3ERR_JUKEBOX;
3623 } else
3624 resp->status = puterrno3(error);
3625 out1:
3626 DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3627 cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3628 if (vp != NULL)
3629 VN_RELE(vp);
3630 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3633 void *
3634 rfs3_pathconf_getfh(PATHCONF3args *args)
3637 return (&args->object);
3640 void
3641 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
3642 struct svc_req *req, cred_t *cr, bool_t ro)
3644 int error;
3645 vnode_t *vp;
3646 struct vattr *bvap;
3647 struct vattr bva;
3648 struct vattr *avap;
3649 struct vattr ava;
3651 bvap = NULL;
3652 avap = NULL;
3654 vp = nfs3_fhtovp(&args->file, exi);
3656 DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
3657 cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
3659 if (vp == NULL) {
3660 error = ESTALE;
3661 goto out;
3664 bva.va_mask = AT_ALL;
3665 error = fop_getattr(vp, &bva, 0, cr, NULL);
3668 * If we can't get the attributes, then we can't do the
3669 * right access checking. So, we'll fail the request.
3671 if (error)
3672 goto out;
3674 bvap = &bva;
3676 if (rdonly(ro, vp)) {
3677 resp->status = NFS3ERR_ROFS;
3678 goto out1;
3681 if (vp->v_type != VREG) {
3682 resp->status = NFS3ERR_INVAL;
3683 goto out1;
3686 if (crgetuid(cr) != bva.va_uid &&
3687 (error = fop_access(vp, VWRITE, 0, cr, NULL)))
3688 goto out;
3690 error = fop_fsync(vp, FSYNC, cr, NULL);
3692 ava.va_mask = AT_ALL;
3693 avap = fop_getattr(vp, &ava, 0, cr, NULL) ? NULL : &ava;
3695 if (error)
3696 goto out;
3698 resp->status = NFS3_OK;
3699 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
3700 resp->resok.verf = write3verf;
3702 DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
3703 cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
3705 VN_RELE(vp);
3707 return;
3709 out:
3710 if (curthread->t_flag & T_WOULDBLOCK) {
3711 curthread->t_flag &= ~T_WOULDBLOCK;
3712 resp->status = NFS3ERR_JUKEBOX;
3713 } else
3714 resp->status = puterrno3(error);
3715 out1:
3716 DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
3717 cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
3719 if (vp != NULL)
3720 VN_RELE(vp);
3721 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
3724 void *
3725 rfs3_commit_getfh(COMMIT3args *args)
3728 return (&args->file);
3731 static int
3732 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
3735 vap->va_mask = 0;
3737 if (sap->mode.set_it) {
3738 vap->va_mode = (mode_t)sap->mode.mode;
3739 vap->va_mask |= AT_MODE;
3741 if (sap->uid.set_it) {
3742 vap->va_uid = (uid_t)sap->uid.uid;
3743 vap->va_mask |= AT_UID;
3745 if (sap->gid.set_it) {
3746 vap->va_gid = (gid_t)sap->gid.gid;
3747 vap->va_mask |= AT_GID;
3749 if (sap->size.set_it) {
3750 if (sap->size.size > (size3)((u_longlong_t)-1))
3751 return (EINVAL);
3752 vap->va_size = sap->size.size;
3753 vap->va_mask |= AT_SIZE;
3755 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
3756 #ifndef _LP64
3757 /* check time validity */
3758 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
3759 return (EOVERFLOW);
3760 #endif
3762 * nfs protocol defines times as unsigned so don't extend sign,
3763 * unless sysadmin set nfs_allow_preepoch_time.
3765 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
3766 sap->atime.atime.seconds);
3767 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
3768 vap->va_mask |= AT_ATIME;
3769 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
3770 gethrestime(&vap->va_atime);
3771 vap->va_mask |= AT_ATIME;
3773 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
3774 #ifndef _LP64
3775 /* check time validity */
3776 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
3777 return (EOVERFLOW);
3778 #endif
3780 * nfs protocol defines times as unsigned so don't extend sign,
3781 * unless sysadmin set nfs_allow_preepoch_time.
3783 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
3784 sap->mtime.mtime.seconds);
3785 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
3786 vap->va_mask |= AT_MTIME;
3787 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
3788 gethrestime(&vap->va_mtime);
3789 vap->va_mask |= AT_MTIME;
3792 return (0);
3795 static ftype3 vt_to_nf3[] = {
3796 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
3799 static int
3800 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
3803 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
3804 /* Return error if time or size overflow */
3805 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
3806 return (EOVERFLOW);
3808 fap->type = vt_to_nf3[vap->va_type];
3809 fap->mode = (mode3)(vap->va_mode & MODEMASK);
3810 fap->nlink = (uint32)vap->va_nlink;
3811 if (vap->va_uid == UID_NOBODY)
3812 fap->uid = (uid3)NFS_UID_NOBODY;
3813 else
3814 fap->uid = (uid3)vap->va_uid;
3815 if (vap->va_gid == GID_NOBODY)
3816 fap->gid = (gid3)NFS_GID_NOBODY;
3817 else
3818 fap->gid = (gid3)vap->va_gid;
3819 fap->size = (size3)vap->va_size;
3820 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
3821 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
3822 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
3823 fap->fsid = (uint64)vap->va_fsid;
3824 fap->fileid = (fileid3)vap->va_nodeid;
3825 fap->atime.seconds = vap->va_atime.tv_sec;
3826 fap->atime.nseconds = vap->va_atime.tv_nsec;
3827 fap->mtime.seconds = vap->va_mtime.tv_sec;
3828 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
3829 fap->ctime.seconds = vap->va_ctime.tv_sec;
3830 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
3831 return (0);
3834 static int
3835 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
3838 /* Return error if time or size overflow */
3839 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
3840 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
3841 NFS3_SIZE_OK(vap->va_size))) {
3842 return (EOVERFLOW);
3844 wccap->size = (size3)vap->va_size;
3845 wccap->mtime.seconds = vap->va_mtime.tv_sec;
3846 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
3847 wccap->ctime.seconds = vap->va_ctime.tv_sec;
3848 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
3849 return (0);
3852 static void
3853 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
3856 /* don't return attrs if time overflow */
3857 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
3858 poap->attributes = TRUE;
3859 } else
3860 poap->attributes = FALSE;
3863 void
3864 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
3867 /* don't return attrs if time overflow */
3868 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
3869 poap->attributes = TRUE;
3870 } else
3871 poap->attributes = FALSE;
3874 static void
3875 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
3878 vattr_to_pre_op_attr(bvap, &wccp->before);
3879 vattr_to_post_op_attr(avap, &wccp->after);
3882 void
3883 rfs3_srvrinit(void)
3885 struct rfs3_verf_overlay {
3886 uint_t id; /* a "unique" identifier */
3887 int ts; /* a unique timestamp */
3888 } *verfp;
3889 timestruc_t now;
3892 * The following algorithm attempts to find a unique verifier
3893 * to be used as the write verifier returned from the server
3894 * to the client. It is important that this verifier change
3895 * whenever the server reboots. Of secondary importance, it
3896 * is important for the verifier to be unique between two
3897 * different servers.
3899 * Thus, an attempt is made to use the system hostid and the
3900 * current time in seconds when the nfssrv kernel module is
3901 * loaded. It is assumed that an NFS server will not be able
3902 * to boot and then to reboot in less than a second. If the
3903 * hostid has not been set, then the current high resolution
3904 * time is used. This will ensure different verifiers each
3905 * time the server reboots and minimize the chances that two
3906 * different servers will have the same verifier.
3910 * We ASSERT that this constant logic expression is
3911 * always true because in the past, it wasn't.
3913 ASSERT(sizeof (*verfp) <= sizeof (write3verf));
3915 gethrestime(&now);
3916 verfp = (struct rfs3_verf_overlay *)&write3verf;
3917 verfp->ts = (int)now.tv_sec;
3918 verfp->id = zone_get_hostid(NULL);
3920 if (verfp->id == 0)
3921 verfp->id = (uint_t)now.tv_nsec;
3923 nfs3_srv_caller_id = fs_new_caller_id();
3927 static int
3928 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
3930 struct clist *wcl;
3931 int wlist_len;
3932 count3 count = rok->count;
3934 wcl = args->wlist;
3935 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3936 return (FALSE);
3939 wcl = args->wlist;
3940 rok->wlist_len = wlist_len;
3941 rok->wlist = wcl;
3942 return (TRUE);
3945 void
3946 rfs3_srvrfini(void)
3948 /* Nothing to do */