dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / nfs / nfs4_srv_deleg.c
blobeacb7f4d045c80a64f6a1571be351ae1248b1470
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
28 #include <sys/systm.h>
29 #include <rpc/auth.h>
30 #include <rpc/clnt.h>
31 #include <nfs/nfs4_kprot.h>
32 #include <nfs/nfs4.h>
33 #include <nfs/lm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/disp.h>
36 #include <sys/sdt.h>
38 #include <sys/pathname.h>
40 #include <sys/strsubr.h>
41 #include <sys/ddi.h>
43 #include <sys/vnode.h>
44 #include <sys/sdt.h>
45 #include <inet/common.h>
46 #include <inet/ip.h>
47 #include <inet/ip6.h>
49 #define MAX_READ_DELEGATIONS 5
51 krwlock_t rfs4_deleg_policy_lock;
52 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE;
53 static int rfs4_deleg_wlp = 5;
54 kmutex_t rfs4_deleg_lock;
55 static int rfs4_deleg_disabled;
56 static int rfs4_max_setup_cb_tries = 5;
58 #ifdef DEBUG
60 static int rfs4_test_cbgetattr_fail = 0;
61 int rfs4_cb_null;
62 int rfs4_cb_debug;
63 int rfs4_deleg_debug;
65 #endif
67 static void rfs4_recall_file(rfs4_file_t *,
68 void (*recall)(rfs4_deleg_state_t *, bool_t),
69 bool_t, rfs4_client_t *);
70 static void rfs4_revoke_file(rfs4_file_t *);
71 static void rfs4_cb_chflush(rfs4_cbinfo_t *);
72 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *);
73 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
74 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
75 open_delegation_type4, int *);
78 * Convert a universal address to an transport specific
79 * address using inet_pton.
81 static int
82 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
84 int dots = 0, i, j, len, k;
85 unsigned char c;
86 in_port_t port = 0;
88 len = strlen(ua);
90 for (i = len-1; i >= 0; i--) {
92 if (ua[i] == '.')
93 dots++;
95 if (dots == 2) {
97 ua[i] = '\0';
99 * We use k to remember were to stick '.' back, since
100 * ua was kmem_allocateded from the pool len+1.
102 k = i;
103 if (inet_pton(af, ua, ap) == 1) {
105 c = 0;
107 for (j = i+1; j < len; j++) {
108 if (ua[j] == '.') {
109 port = c << 8;
110 c = 0;
111 } else if (ua[j] >= '0' &&
112 ua[j] <= '9') {
113 c *= 10;
114 c += ua[j] - '0';
115 } else {
116 ua[k] = '.';
117 return (EINVAL);
120 port += c;
122 *pp = htons(port);
124 ua[k] = '.';
125 return (0);
126 } else {
127 ua[k] = '.';
128 return (EINVAL);
133 return (EINVAL);
137 * Update the delegation policy with the
138 * value of "new_policy"
140 void
141 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)
143 rw_enter(&rfs4_deleg_policy_lock, RW_WRITER);
144 rfs4_deleg_policy = new_policy;
145 rw_exit(&rfs4_deleg_policy_lock);
148 void
149 rfs4_hold_deleg_policy(void)
151 rw_enter(&rfs4_deleg_policy_lock, RW_READER);
154 void
155 rfs4_rele_deleg_policy(void)
157 rw_exit(&rfs4_deleg_policy_lock);
162 * This free function is to be used when the client struct is being
163 * released and nothing at all is needed of the callback info any
164 * longer.
166 void
167 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
169 char *addr = cbp->cb_callback.cb_location.r_addr;
170 char *netid = cbp->cb_callback.cb_location.r_netid;
172 /* Free old address if any */
174 if (addr)
175 kmem_free(addr, strlen(addr) + 1);
176 if (netid)
177 kmem_free(netid, strlen(netid) + 1);
179 addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
180 netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
182 if (addr)
183 kmem_free(addr, strlen(addr) + 1);
184 if (netid)
185 kmem_free(netid, strlen(netid) + 1);
187 if (cbp->cb_chc_free) {
188 rfs4_cb_chflush(cbp);
193 * The server uses this to check the callback path supplied by the
194 * client. The callback connection is marked "in progress" while this
195 * work is going on and then eventually marked either OK or FAILED.
196 * This work can be done as part of a separate thread and at the end
197 * of this the thread will exit or it may be done such that the caller
198 * will continue with other work.
200 static void
201 rfs4_do_cb_null(rfs4_client_t *cp)
203 struct timeval tv;
204 CLIENT *ch;
205 rfs4_cbstate_t newstate;
206 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
208 mutex_enter(cbp->cb_lock);
209 /* If another thread is doing CB_NULL RPC then return */
210 if (cbp->cb_nullcaller == TRUE) {
211 mutex_exit(cbp->cb_lock);
212 rfs4_client_rele(cp);
213 return;
216 /* Mark the cbinfo as having a thread in the NULL callback */
217 cbp->cb_nullcaller = TRUE;
220 * Are there other threads still using the cbinfo client
221 * handles? If so, this thread must wait before going and
222 * mucking aroiund with the callback information
224 while (cbp->cb_refcnt != 0)
225 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
228 * This thread itself may find that new callback info has
229 * arrived and is set up to handle this case and redrive the
230 * call to the client's callback server.
232 retry:
233 if (cbp->cb_newer.cb_new == TRUE &&
234 cbp->cb_newer.cb_confirmed == TRUE) {
235 char *addr = cbp->cb_callback.cb_location.r_addr;
236 char *netid = cbp->cb_callback.cb_location.r_netid;
239 * Free the old stuff if it exists; may be the first
240 * time through this path
242 if (addr)
243 kmem_free(addr, strlen(addr) + 1);
244 if (netid)
245 kmem_free(netid, strlen(netid) + 1);
247 /* Move over the addr/netid */
248 cbp->cb_callback.cb_location.r_addr =
249 cbp->cb_newer.cb_callback.cb_location.r_addr;
250 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
251 cbp->cb_callback.cb_location.r_netid =
252 cbp->cb_newer.cb_callback.cb_location.r_netid;
253 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
255 /* Get the program number */
256 cbp->cb_callback.cb_program =
257 cbp->cb_newer.cb_callback.cb_program;
258 cbp->cb_newer.cb_callback.cb_program = 0;
260 /* Don't forget the protocol's "cb_ident" field */
261 cbp->cb_ident = cbp->cb_newer.cb_ident;
262 cbp->cb_newer.cb_ident = 0;
264 /* no longer new */
265 cbp->cb_newer.cb_new = FALSE;
266 cbp->cb_newer.cb_confirmed = FALSE;
268 /* get rid of the old client handles that may exist */
269 rfs4_cb_chflush(cbp);
271 cbp->cb_state = CB_NONE;
272 cbp->cb_timefailed = 0; /* reset the clock */
273 cbp->cb_notified_of_cb_path_down = TRUE;
276 if (cbp->cb_state != CB_NONE) {
277 cv_broadcast(cbp->cb_cv); /* let the others know */
278 cbp->cb_nullcaller = FALSE;
279 mutex_exit(cbp->cb_lock);
280 rfs4_client_rele(cp);
281 return;
284 /* mark rfs4_client_t as CALLBACK NULL in progress */
285 cbp->cb_state = CB_INPROG;
286 mutex_exit(cbp->cb_lock);
288 /* get/generate a client handle */
289 if ((ch = rfs4_cb_getch(cbp)) == NULL) {
290 mutex_enter(cbp->cb_lock);
291 cbp->cb_state = CB_BAD;
292 cbp->cb_timefailed = gethrestime_sec(); /* observability */
293 goto retry;
297 tv.tv_sec = 30;
298 tv.tv_usec = 0;
299 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
300 newstate = CB_BAD;
301 } else {
302 newstate = CB_OK;
303 #ifdef DEBUG
304 rfs4_cb_null++;
305 #endif
308 /* Check to see if the client has specified new callback info */
309 mutex_enter(cbp->cb_lock);
310 rfs4_cb_freech(cbp, ch, TRUE);
311 if (cbp->cb_newer.cb_new == TRUE &&
312 cbp->cb_newer.cb_confirmed == TRUE) {
313 goto retry; /* give the CB_NULL another chance */
316 cbp->cb_state = newstate;
317 if (cbp->cb_state == CB_BAD)
318 cbp->cb_timefailed = gethrestime_sec(); /* observability */
320 cv_broadcast(cbp->cb_cv); /* start up the other threads */
321 cbp->cb_nullcaller = FALSE;
322 mutex_exit(cbp->cb_lock);
324 rfs4_client_rele(cp);
328 * Given a client struct, inspect the callback info to see if the
329 * callback path is up and available.
331 * If new callback path is available and no one has set it up then
332 * try to set it up. If setup is not successful after 5 tries (5 secs)
333 * then gives up and returns NULL.
335 * If callback path is being initialized, then wait for the CB_NULL RPC
336 * call to occur.
338 static rfs4_cbinfo_t *
339 rfs4_cbinfo_hold(rfs4_client_t *cp)
341 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
342 int retries = 0;
344 mutex_enter(cbp->cb_lock);
346 while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
348 * Looks like a new callback path may be available and
349 * noone has set it up.
351 mutex_exit(cbp->cb_lock);
352 rfs4_dbe_hold(cp->rc_dbe);
353 rfs4_do_cb_null(cp); /* caller will release client hold */
355 mutex_enter(cbp->cb_lock);
357 * If callback path is no longer new, or it's being setup
358 * then stop and wait for it to be done.
360 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
361 break;
362 mutex_exit(cbp->cb_lock);
364 if (++retries >= rfs4_max_setup_cb_tries)
365 return (NULL);
366 ddi_sleep(1);
367 mutex_enter(cbp->cb_lock);
370 /* Is there a thread working on doing the CB_NULL RPC? */
371 if (cbp->cb_nullcaller == TRUE)
372 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */
374 /* If the callback path is not okay (up and running), just quit */
375 if (cbp->cb_state != CB_OK) {
376 mutex_exit(cbp->cb_lock);
377 return (NULL);
380 /* Let someone know we are using the current callback info */
381 cbp->cb_refcnt++;
382 mutex_exit(cbp->cb_lock);
383 return (cbp);
387 * The caller is done with the callback info. It may be that the
388 * caller's RPC failed and the NFSv4 client has actually provided new
389 * callback information. If so, let the caller know so they can
390 * advantage of this and maybe retry the RPC that originally failed.
392 static int
393 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
395 int cb_new = FALSE;
397 mutex_enter(cbp->cb_lock);
399 /* The caller gets a chance to mark the callback info as bad */
400 if (newstate != CB_NOCHANGE)
401 cbp->cb_state = newstate;
402 if (newstate == CB_FAILED) {
403 cbp->cb_timefailed = gethrestime_sec(); /* observability */
404 cbp->cb_notified_of_cb_path_down = FALSE;
407 cbp->cb_refcnt--; /* no longer using the information */
410 * A thread may be waiting on this one to finish and if so,
411 * let it know that it is okay to do the CB_NULL to the
412 * client's callback server.
414 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
415 cv_broadcast(cbp->cb_cv_nullcaller);
418 * If this is the last thread to use the callback info and
419 * there is new callback information to try and no thread is
420 * there ready to do the CB_NULL, then return true to teh
421 * caller so they can do the CB_NULL
423 if (cbp->cb_refcnt == 0 &&
424 cbp->cb_nullcaller == FALSE &&
425 cbp->cb_newer.cb_new == TRUE &&
426 cbp->cb_newer.cb_confirmed == TRUE)
427 cb_new = TRUE;
429 mutex_exit(cbp->cb_lock);
431 return (cb_new);
435 * Given the information in the callback info struct, create a client
436 * handle that can be used by the server for its callback path.
438 static CLIENT *
439 rfs4_cbch_init(rfs4_cbinfo_t *cbp)
441 struct knetconfig knc;
442 vnode_t *vp;
443 struct sockaddr_in addr4;
444 struct sockaddr_in6 addr6;
445 void *addr, *taddr;
446 in_port_t *pp;
447 int af;
448 char *devnam;
449 struct netbuf nb;
450 int size;
451 CLIENT *ch = NULL;
452 int useresvport = 0;
454 mutex_enter(cbp->cb_lock);
456 if (cbp->cb_callback.cb_location.r_netid == NULL ||
457 cbp->cb_callback.cb_location.r_addr == NULL) {
458 goto cb_init_out;
461 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
462 knc.knc_semantics = NC_TPI_COTS;
463 knc.knc_protofmly = "inet";
464 knc.knc_proto = "tcp";
465 devnam = "/dev/tcp";
466 af = AF_INET;
467 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
468 == 0) {
469 knc.knc_semantics = NC_TPI_CLTS;
470 knc.knc_protofmly = "inet";
471 knc.knc_proto = "udp";
472 devnam = "/dev/udp";
473 af = AF_INET;
474 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
475 == 0) {
476 knc.knc_semantics = NC_TPI_COTS;
477 knc.knc_protofmly = "inet6";
478 knc.knc_proto = "tcp";
479 devnam = "/dev/tcp6";
480 af = AF_INET6;
481 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
482 == 0) {
483 knc.knc_semantics = NC_TPI_CLTS;
484 knc.knc_protofmly = "inet6";
485 knc.knc_proto = "udp";
486 devnam = "/dev/udp6";
487 af = AF_INET6;
488 } else {
489 goto cb_init_out;
492 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
494 goto cb_init_out;
497 if (vp->v_type != VCHR) {
498 VN_RELE(vp);
499 goto cb_init_out;
502 knc.knc_rdev = vp->v_rdev;
504 VN_RELE(vp);
506 if (af == AF_INET) {
507 size = sizeof (addr4);
508 bzero(&addr4, size);
509 addr4.sin_family = (sa_family_t)af;
510 addr = &addr4.sin_addr;
511 pp = &addr4.sin_port;
512 taddr = &addr4;
513 } else /* AF_INET6 */ {
514 size = sizeof (addr6);
515 bzero(&addr6, size);
516 addr6.sin6_family = (sa_family_t)af;
517 addr = &addr6.sin6_addr;
518 pp = &addr6.sin6_port;
519 taddr = &addr6;
522 if (uaddr2sockaddr(af,
523 cbp->cb_callback.cb_location.r_addr, addr, pp)) {
525 goto cb_init_out;
529 nb.maxlen = nb.len = size;
530 nb.buf = (char *)taddr;
532 if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
533 NFS_CB, 0, 0, curthread->t_cred, &ch)) {
535 ch = NULL;
538 /* turn off reserved port usage */
539 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
541 cb_init_out:
542 mutex_exit(cbp->cb_lock);
543 return (ch);
547 * Iterate over the client handle cache and
548 * destroy it.
550 static void
551 rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
553 CLIENT *ch;
555 while (cbp->cb_chc_free) {
556 cbp->cb_chc_free--;
557 ch = cbp->cb_chc[cbp->cb_chc_free];
558 cbp->cb_chc[cbp->cb_chc_free] = NULL;
559 if (ch) {
560 if (ch->cl_auth)
561 auth_destroy(ch->cl_auth);
562 clnt_destroy(ch);
568 * Return a client handle, either from a the small
569 * rfs4_client_t cache or one that we just created.
571 static CLIENT *
572 rfs4_cb_getch(rfs4_cbinfo_t *cbp)
574 CLIENT *cbch = NULL;
575 uint32_t zilch = 0;
577 mutex_enter(cbp->cb_lock);
579 if (cbp->cb_chc_free) {
580 cbp->cb_chc_free--;
581 cbch = cbp->cb_chc[ cbp->cb_chc_free ];
582 mutex_exit(cbp->cb_lock);
583 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
584 return (cbch);
587 mutex_exit(cbp->cb_lock);
589 /* none free so make it now */
590 cbch = rfs4_cbch_init(cbp);
592 return (cbch);
596 * Return the client handle to the small cache or
597 * destroy it.
599 static void
600 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
602 if (lockheld == FALSE)
603 mutex_enter(cbp->cb_lock);
605 if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
606 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
607 if (lockheld == FALSE)
608 mutex_exit(cbp->cb_lock);
609 return;
611 if (lockheld == FALSE)
612 mutex_exit(cbp->cb_lock);
615 * cache maxed out of free entries, obliterate
616 * this client handle, destroy it, throw it away.
618 if (ch->cl_auth)
619 auth_destroy(ch->cl_auth);
620 clnt_destroy(ch);
624 * With the supplied callback information - initialize the client
625 * callback data. If there is a callback in progress, save the
626 * callback info so that a thread can pick it up in the future.
628 void
629 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
631 char *addr = NULL;
632 char *netid = NULL;
633 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
634 size_t len;
636 /* Set the call back for the client */
637 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
638 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
639 len = strlen(cb->cb_location.r_addr) + 1;
640 addr = kmem_alloc(len, KM_SLEEP);
641 bcopy(cb->cb_location.r_addr, addr, len);
642 len = strlen(cb->cb_location.r_netid) + 1;
643 netid = kmem_alloc(len, KM_SLEEP);
644 bcopy(cb->cb_location.r_netid, netid, len);
646 /* ready to save the new information but first free old, if exists */
647 mutex_enter(cbp->cb_lock);
649 cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
651 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
652 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
653 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
654 cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
656 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
657 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
658 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
659 cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
661 cbp->cb_newer.cb_ident = cb_ident;
663 if (addr && *addr && netid && *netid) {
664 cbp->cb_newer.cb_new = TRUE;
665 cbp->cb_newer.cb_confirmed = FALSE;
666 } else {
667 cbp->cb_newer.cb_new = FALSE;
668 cbp->cb_newer.cb_confirmed = FALSE;
671 mutex_exit(cbp->cb_lock);
675 * The server uses this when processing SETCLIENTID_CONFIRM. Callback
676 * information may have been provided on SETCLIENTID and this call
677 * marks that information as confirmed and then starts a thread to
678 * test the callback path.
680 void
681 rfs4_deleg_cb_check(rfs4_client_t *cp)
683 if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
684 return;
686 cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
688 rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
690 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN,
691 minclsyspri);
694 static void
695 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
697 CB_RECALL4args *rec_argp;
699 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
700 if (rec_argp->fh.nfs_fh4_val)
701 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
704 /* ARGSUSED */
705 static void
706 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
708 CB_GETATTR4args *argp;
710 argp = &argop->nfs_cb_argop4_u.opcbgetattr;
711 if (argp->fh.nfs_fh4_val)
712 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
715 static void
716 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
718 int i, arglen;
719 nfs_cb_argop4 *argop;
722 * First free any special args alloc'd for specific ops.
724 arglen = args->array_len;
725 argop = args->array;
726 for (i = 0; i < arglen; i++, argop++) {
728 switch (argop->argop) {
729 case OP_CB_RECALL:
730 rfs4args_cb_recall_free(argop);
731 break;
733 case OP_CB_GETATTR:
734 rfs4args_cb_getattr_free(argop);
735 break;
737 default:
738 return;
742 if (args->tag.utf8string_len > 0)
743 UTF8STRING_FREE(args->tag)
745 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
746 if (resp)
747 xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
751 * General callback routine for the server to the client.
753 static enum clnt_stat
754 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
755 CB_COMPOUND4res *res, struct timeval timeout)
757 rfs4_cbinfo_t *cbp;
758 CLIENT *ch;
759 /* start with this in case cb_getch() fails */
760 enum clnt_stat stat = RPC_FAILED;
762 res->tag.utf8string_val = NULL;
763 res->array = NULL;
765 retry:
766 cbp = rfs4_cbinfo_hold(cp);
767 if (cbp == NULL)
768 return (stat);
770 /* get a client handle */
771 if ((ch = rfs4_cb_getch(cbp)) != NULL) {
773 * reset the cb_ident since it may have changed in
774 * rfs4_cbinfo_hold()
776 args->callback_ident = cbp->cb_ident;
778 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
779 (caddr_t)args, xdr_CB_COMPOUND4res,
780 (caddr_t)res, timeout);
782 /* free client handle */
783 rfs4_cb_freech(cbp, ch, FALSE);
787 * If the rele says that there may be new callback info then
788 * retry this sequence and it may succeed as a result of the
789 * new callback path
791 if (rfs4_cbinfo_rele(cbp,
792 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
793 goto retry;
795 return (stat);
799 * Used by the NFSv4 server to get attributes for a file while
800 * handling the case where a file has been write delegated. For the
801 * time being, fop_getattr() is called and CB_GETATTR processing is
802 * not undertaken. This call site is maintained in case the server is
803 * updated in the future to handle write delegation space guarantees.
805 nfsstat4
806 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
809 int error;
811 error = fop_getattr(vp, vap, flag, cr, NULL);
812 return (puterrno4(error));
816 * This is used everywhere in the v2/v3 server to allow the
817 * integration of all NFS versions and the support of delegation. For
818 * now, just call the fop_getattr(). If the NFSv4 server is enhanced
819 * in the future to provide space guarantees for write delegations
820 * then this call site should be expanded to interact with the client.
823 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
825 return (fop_getattr(vp, vap, flag, cr, NULL));
829 * Place the actual cb_recall otw call to client.
831 static void
832 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
834 CB_COMPOUND4args cb4_args;
835 CB_COMPOUND4res cb4_res;
836 CB_RECALL4args *rec_argp;
837 CB_RECALL4res *rec_resp;
838 nfs_cb_argop4 *argop;
839 int numops;
840 int argoplist_size;
841 struct timeval timeout;
842 nfs_fh4 *fhp;
843 enum clnt_stat call_stat;
846 * set up the compound args
848 numops = 1; /* CB_RECALL only */
850 argoplist_size = numops * sizeof (nfs_cb_argop4);
851 argop = kmem_zalloc(argoplist_size, KM_SLEEP);
852 argop->argop = OP_CB_RECALL;
853 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
855 (void) str_to_utf8("cb_recall", &cb4_args.tag);
856 cb4_args.minorversion = CB4_MINORVERSION;
857 /* cb4_args.callback_ident is set in rfs4_do_callback() */
858 cb4_args.array_len = numops;
859 cb4_args.array = argop;
862 * fill in the args struct
864 bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
865 rec_argp->truncate = trunc;
867 fhp = &dsp->rds_finfo->rf_filehandle;
868 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
869 fhp->nfs_fh4_len, KM_SLEEP);
870 nfs_fh4_copy(fhp, &rec_argp->fh);
872 /* Keep track of when we did this for observability */
873 dsp->rds_time_recalled = gethrestime_sec();
876 * Set up the timeout for the callback and make the actual call.
877 * Timeout will be 80% of the lease period for this server.
879 timeout.tv_sec = (rfs4_lease_time * 80) / 100;
880 timeout.tv_usec = 0;
882 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
883 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
885 call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
886 timeout);
888 rec_resp = (cb4_res.array_len == 0) ? NULL :
889 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
891 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
892 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
894 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
895 rfs4_return_deleg(dsp, TRUE);
898 rfs4freeargres(&cb4_args, &cb4_res);
901 struct recall_arg {
902 rfs4_deleg_state_t *dsp;
903 void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
904 bool_t trunc;
907 static void
908 do_recall(struct recall_arg *arg)
910 rfs4_deleg_state_t *dsp = arg->dsp;
911 rfs4_file_t *fp = dsp->rds_finfo;
912 callb_cpr_t cpr_info;
913 kmutex_t cpr_lock;
915 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
916 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
919 * It is possible that before this thread starts
920 * the client has send us a return_delegation, and
921 * if that is the case we do not need to send the
922 * recall callback.
924 if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
925 DTRACE_PROBE3(nfss__i__recall,
926 struct recall_arg *, arg,
927 struct rfs4_deleg_state_t *, dsp,
928 struct rfs4_file_t *, fp);
930 if (arg->recall)
931 (void) (*arg->recall)(dsp, arg->trunc);
934 mutex_enter(fp->rf_dinfo.rd_recall_lock);
936 * Recall count may go negative if the parent thread that is
937 * creating the individual callback threads does not modify
938 * the recall_count field before the callback thread actually
939 * gets a response from the CB_RECALL
941 fp->rf_dinfo.rd_recall_count--;
942 if (fp->rf_dinfo.rd_recall_count == 0)
943 cv_signal(fp->rf_dinfo.rd_recall_cv);
944 mutex_exit(fp->rf_dinfo.rd_recall_lock);
946 mutex_enter(&cpr_lock);
947 CALLB_CPR_EXIT(&cpr_info);
948 mutex_destroy(&cpr_lock);
950 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
952 kmem_free(arg, sizeof (struct recall_arg));
955 struct master_recall_args {
956 rfs4_file_t *fp;
957 void (*recall)(rfs4_deleg_state_t *, bool_t);
958 bool_t trunc;
961 static void
962 do_recall_file(struct master_recall_args *map)
964 rfs4_file_t *fp = map->fp;
965 rfs4_deleg_state_t *dsp;
966 struct recall_arg *arg;
967 callb_cpr_t cpr_info;
968 kmutex_t cpr_lock;
969 int32_t recall_count;
971 rfs4_dbe_lock(fp->rf_dbe);
973 /* Recall already in progress ? */
974 mutex_enter(fp->rf_dinfo.rd_recall_lock);
975 if (fp->rf_dinfo.rd_recall_count != 0) {
976 mutex_exit(fp->rf_dinfo.rd_recall_lock);
977 rfs4_dbe_rele_nolock(fp->rf_dbe);
978 rfs4_dbe_unlock(fp->rf_dbe);
979 kmem_free(map, sizeof (struct master_recall_args));
980 return;
983 mutex_exit(fp->rf_dinfo.rd_recall_lock);
985 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
986 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile");
988 recall_count = 0;
989 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
990 dsp = list_next(&fp->rf_delegstatelist, dsp)) {
992 rfs4_dbe_lock(dsp->rds_dbe);
994 * if this delegation state
995 * is being reaped skip it
997 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
998 rfs4_dbe_unlock(dsp->rds_dbe);
999 continue;
1002 /* hold for receiving thread */
1003 rfs4_dbe_hold(dsp->rds_dbe);
1004 rfs4_dbe_unlock(dsp->rds_dbe);
1006 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1007 arg->recall = map->recall;
1008 arg->trunc = map->trunc;
1009 arg->dsp = dsp;
1011 recall_count++;
1013 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN,
1014 minclsyspri);
1017 rfs4_dbe_unlock(fp->rf_dbe);
1019 mutex_enter(fp->rf_dinfo.rd_recall_lock);
1021 * Recall count may go negative if the parent thread that is
1022 * creating the individual callback threads does not modify
1023 * the recall_count field before the callback thread actually
1024 * gets a response from the CB_RECALL
1026 fp->rf_dinfo.rd_recall_count += recall_count;
1027 while (fp->rf_dinfo.rd_recall_count)
1028 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1030 mutex_exit(fp->rf_dinfo.rd_recall_lock);
1032 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1033 rfs4_file_rele(fp);
1034 kmem_free(map, sizeof (struct master_recall_args));
1035 mutex_enter(&cpr_lock);
1036 CALLB_CPR_EXIT(&cpr_info);
1037 mutex_destroy(&cpr_lock);
1040 static void
1041 rfs4_recall_file(rfs4_file_t *fp,
1042 void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1043 bool_t trunc, rfs4_client_t *cp)
1045 struct master_recall_args *args;
1047 rfs4_dbe_lock(fp->rf_dbe);
1048 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1049 rfs4_dbe_unlock(fp->rf_dbe);
1050 return;
1052 rfs4_dbe_hold(fp->rf_dbe); /* hold for new thread */
1055 * Mark the time we started the recall processing.
1056 * If it has been previously recalled, do not reset the
1057 * timer since this is used for the revocation decision.
1059 if (fp->rf_dinfo.rd_time_recalled == 0)
1060 fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1061 fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1062 /* Client causing recall not always available */
1063 if (cp)
1064 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1066 rfs4_dbe_unlock(fp->rf_dbe);
1068 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1069 args->fp = fp;
1070 args->recall = recall;
1071 args->trunc = trunc;
1073 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN,
1074 minclsyspri);
1077 void
1078 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1080 time_t elapsed1, elapsed2;
1082 if (fp->rf_dinfo.rd_time_recalled != 0) {
1083 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1084 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1085 /* First check to see if a revocation should occur */
1086 if (elapsed1 > rfs4_lease_time &&
1087 elapsed2 > rfs4_lease_time) {
1088 rfs4_revoke_file(fp);
1089 return;
1092 * Next check to see if a recall should be done again
1093 * so quickly.
1095 if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1096 return;
1098 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1102 * rfs4_check_recall is called from rfs4_do_open to determine if the current
1103 * open conflicts with the delegation.
1104 * Return true if we need recall otherwise false.
1105 * Assumes entry locks for sp and sp->rs_finfo are held.
1107 bool_t
1108 rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1110 open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1112 switch (dtype) {
1113 case OPEN_DELEGATE_NONE:
1114 /* Not currently delegated so there is nothing to do */
1115 return (FALSE);
1116 case OPEN_DELEGATE_READ:
1118 * If the access is only asking for READ then there is
1119 * no conflict and nothing to do. If it is asking
1120 * for write, then there will be conflict and the read
1121 * delegation should be recalled.
1123 if (access == OPEN4_SHARE_ACCESS_READ)
1124 return (FALSE);
1125 else
1126 return (TRUE);
1127 case OPEN_DELEGATE_WRITE:
1128 /* Check to see if this client has the delegation */
1129 return (rfs4_is_deleg(sp));
1132 return (FALSE);
1136 * Return the "best" allowable delegation available given the current
1137 * delegation type and the desired access and deny modes on the file.
1138 * At the point that this routine is called we know that the access and
1139 * deny modes are consistent with the file modes.
1141 static open_delegation_type4
1142 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1144 open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1145 uint32_t access = sp->rs_share_access;
1146 uint32_t deny = sp->rs_share_deny;
1147 int readcnt = 0;
1148 int writecnt = 0;
1150 switch (dtype) {
1151 case OPEN_DELEGATE_NONE:
1153 * Determine if more than just this OPEN have the file
1154 * open and if so, no delegation may be provided to
1155 * the client.
1157 if (access & OPEN4_SHARE_ACCESS_WRITE)
1158 writecnt++;
1159 if (access & OPEN4_SHARE_ACCESS_READ)
1160 readcnt++;
1162 if (fp->rf_access_read > readcnt ||
1163 fp->rf_access_write > writecnt)
1164 return (OPEN_DELEGATE_NONE);
1167 * If the client is going to write, or if the client
1168 * has exclusive access, return a write delegation.
1170 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1171 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1172 return (OPEN_DELEGATE_WRITE);
1174 * If we don't want to write or we've haven't denied read
1175 * access to others, return a read delegation.
1177 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1178 (deny & ~OPEN4_SHARE_DENY_READ))
1179 return (OPEN_DELEGATE_READ);
1181 /* Shouldn't get here */
1182 return (OPEN_DELEGATE_NONE);
1184 case OPEN_DELEGATE_READ:
1186 * If the file is delegated for read but we wan't to
1187 * write or deny others to read then we can't delegate
1188 * the file. We shouldn't get here since the delegation should
1189 * have been recalled already.
1191 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1192 (deny & OPEN4_SHARE_DENY_READ))
1193 return (OPEN_DELEGATE_NONE);
1194 return (OPEN_DELEGATE_READ);
1196 case OPEN_DELEGATE_WRITE:
1197 return (OPEN_DELEGATE_WRITE);
1200 /* Shouldn't get here */
1201 return (OPEN_DELEGATE_NONE);
1205 * Given the desired delegation type and the "history" of the file
1206 * determine the actual delegation type to return.
1208 static open_delegation_type4
1209 rfs4_delegation_policy(open_delegation_type4 dtype,
1210 rfs4_dinfo_t *dinfo, clientid4 cid)
1212 time_t elapsed;
1214 if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1215 return (OPEN_DELEGATE_NONE);
1218 * Has this file/delegation ever been recalled? If not then
1219 * no further checks for a delegation race need to be done.
1220 * However if a recall has occurred, then check to see if a
1221 * client has caused its own delegation recall to occur. If
1222 * not, then has a delegation for this file been returned
1223 * recently? If so, then do not assign a new delegation to
1224 * avoid a "delegation race" between the original client and
1225 * the new/conflicting client.
1227 if (dinfo->rd_ever_recalled == TRUE) {
1228 if (dinfo->rd_conflicted_client != cid) {
1229 elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1230 if (elapsed < rfs4_lease_time)
1231 return (OPEN_DELEGATE_NONE);
1235 /* Limit the number of read grants */
1236 if (dtype == OPEN_DELEGATE_READ &&
1237 dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1238 return (OPEN_DELEGATE_NONE);
1241 * Should consider limiting total number of read/write
1242 * delegations the server will permit.
1245 return (dtype);
1249 * Try and grant a delegation for an open give the state. The routine
1250 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1252 * The state and associate file entry must be locked
1254 rfs4_deleg_state_t *
1255 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1257 rfs4_file_t *fp = sp->rs_finfo;
1258 open_delegation_type4 dtype;
1259 int no_delegation;
1261 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1262 ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1264 /* Is the server even providing delegations? */
1265 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
1266 return (NULL);
1268 /* Check to see if delegations have been temporarily disabled */
1269 mutex_enter(&rfs4_deleg_lock);
1270 no_delegation = rfs4_deleg_disabled;
1271 mutex_exit(&rfs4_deleg_lock);
1273 if (no_delegation)
1274 return (NULL);
1276 /* Don't grant a delegation if a deletion is impending. */
1277 if (fp->rf_dinfo.rd_hold_grant > 0) {
1278 return (NULL);
1282 * Don't grant a delegation if there are any lock manager
1283 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g.,
1284 * if there are only read locks we should be able to grant a
1285 * read-only delegation), but it's good enough for now.
1287 * MT safety: the lock manager checks for conflicting delegations
1288 * before processing a lock request. That check will block until
1289 * we are done here. So if the lock manager acquires a lock after
1290 * we decide to grant the delegation, the delegation will get
1291 * immediately recalled (if there's a conflict), so we're safe.
1293 if (lm_vp_active(fp->rf_vp)) {
1294 return (NULL);
1298 * Based on the type of delegation request passed in, take the
1299 * appropriate action (DELEG_NONE is handled above)
1301 switch (dreq) {
1303 case DELEG_READ:
1304 case DELEG_WRITE:
1306 * The server "must" grant the delegation in this case.
1307 * Client is using open previous
1309 dtype = (open_delegation_type4)dreq;
1310 *recall = 1;
1311 break;
1312 case DELEG_ANY:
1314 * If a valid callback path does not exist, no delegation may
1315 * be granted.
1317 if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1318 return (NULL);
1321 * If the original operation which caused time_rm_delayed
1322 * to be set hasn't been retried and completed for one
1323 * full lease period, clear it and allow delegations to
1324 * get granted again.
1326 if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1327 gethrestime_sec() >
1328 fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1329 fp->rf_dinfo.rd_time_rm_delayed = 0;
1332 * If we are waiting for a delegation to be returned then
1333 * don't delegate this file. We do this for correctness as
1334 * well as if the file is being recalled we would likely
1335 * recall this file again.
1338 if (fp->rf_dinfo.rd_time_recalled != 0 ||
1339 fp->rf_dinfo.rd_time_rm_delayed != 0)
1340 return (NULL);
1342 /* Get the "best" delegation candidate */
1343 dtype = rfs4_check_delegation(sp, fp);
1345 if (dtype == OPEN_DELEGATE_NONE)
1346 return (NULL);
1349 * Based on policy and the history of the file get the
1350 * actual delegation.
1352 dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo,
1353 sp->rs_owner->ro_client->rc_clientid);
1355 if (dtype == OPEN_DELEGATE_NONE)
1356 return (NULL);
1357 break;
1358 default:
1359 return (NULL);
1362 /* set the delegation for the state */
1363 return (rfs4_deleg_state(sp, dtype, recall));
1366 void
1367 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1368 nfsace4 *ace, int recall)
1370 open_write_delegation4 *wp;
1371 open_read_delegation4 *rp;
1372 nfs_space_limit4 *spl;
1373 nfsace4 nace;
1376 * We need to allocate a new copy of the who string.
1377 * this string will be freed by the rfs4_op_open dis_resfree
1378 * routine. We need to do this allocation since replays will
1379 * be allocated and rfs4_compound can't tell the difference from
1380 * a replay and an inital open. N.B. if an ace is passed in, it
1381 * the caller's responsibility to free it.
1384 if (ace == NULL) {
1386 * Default is to deny all access, the client will have
1387 * to contact the server. XXX Do we want to actually
1388 * set a deny for every one, or do we simply want to
1389 * construct an entity that will match no one?
1391 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1392 nace.flag = 0;
1393 nace.access_mask = ACE4_VALID_MASK_BITS;
1394 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1395 } else {
1396 nace.type = ace->type;
1397 nace.flag = ace->flag;
1398 nace.access_mask = ace->access_mask;
1399 (void) utf8_copy(&ace->who, &nace.who);
1402 dp->delegation_type = dsp->rds_dtype;
1404 switch (dsp->rds_dtype) {
1405 case OPEN_DELEGATE_NONE:
1406 break;
1407 case OPEN_DELEGATE_READ:
1408 rp = &dp->open_delegation4_u.read;
1409 rp->stateid = dsp->rds_delegid.stateid;
1410 rp->recall = (bool_t)recall;
1411 rp->permissions = nace;
1412 break;
1413 case OPEN_DELEGATE_WRITE:
1414 wp = &dp->open_delegation4_u.write;
1415 wp->stateid = dsp->rds_delegid.stateid;
1416 wp->recall = (bool_t)recall;
1417 spl = &wp->space_limit;
1418 spl->limitby = NFS_LIMIT_SIZE;
1419 spl->nfs_space_limit4_u.filesize = 0;
1420 wp->permissions = nace;
1421 break;
1426 * Check if the file is delegated via the provided file struct.
1427 * Return TRUE if it is delegated. This is intended for use by
1428 * the v4 server. The v2/v3 server code should use rfs4_check_delegated().
1430 * Note that if the file is found to have a delegation, it is
1431 * recalled, unless the clientid of the caller matches the clientid of the
1432 * delegation. If the caller has specified, there is a slight delay
1433 * inserted in the hopes that the delegation will be returned quickly.
1435 bool_t
1436 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1437 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1439 rfs4_deleg_state_t *dsp;
1441 /* Is delegation enabled? */
1442 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1443 return (FALSE);
1445 /* do we have a delegation on this file? */
1446 rfs4_dbe_lock(fp->rf_dbe);
1447 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1448 if (is_rm)
1449 fp->rf_dinfo.rd_hold_grant++;
1450 rfs4_dbe_unlock(fp->rf_dbe);
1451 return (FALSE);
1454 * do we have a write delegation on this file or are we
1455 * requesting write access to a file with any type of existing
1456 * delegation?
1458 if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1459 if (cp != NULL) {
1460 dsp = list_head(&fp->rf_delegstatelist);
1461 if (dsp == NULL) {
1462 rfs4_dbe_unlock(fp->rf_dbe);
1463 return (FALSE);
1466 * Does the requestor already own the delegation?
1468 if (dsp->rds_client->rc_clientid == *(cp)) {
1469 rfs4_dbe_unlock(fp->rf_dbe);
1470 return (FALSE);
1474 rfs4_dbe_unlock(fp->rf_dbe);
1475 rfs4_recall_deleg(fp, trunc, NULL);
1477 if (!do_delay) {
1478 rfs4_dbe_lock(fp->rf_dbe);
1479 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1480 rfs4_dbe_unlock(fp->rf_dbe);
1481 return (TRUE);
1484 delay(NFS4_DELEGATION_CONFLICT_DELAY);
1486 rfs4_dbe_lock(fp->rf_dbe);
1487 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1488 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1489 rfs4_dbe_unlock(fp->rf_dbe);
1490 return (TRUE);
1493 if (is_rm)
1494 fp->rf_dinfo.rd_hold_grant++;
1495 rfs4_dbe_unlock(fp->rf_dbe);
1496 return (FALSE);
1500 * Check if the file is delegated in the case of a v2 or v3 access.
1501 * Return TRUE if it is delegated which in turn means that v2 should
1502 * drop the request and in the case of v3 JUKEBOX should be returned.
1504 bool_t
1505 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1507 rfs4_file_t *fp;
1508 bool_t create = FALSE;
1509 bool_t rc = FALSE;
1511 rfs4_hold_deleg_policy();
1513 /* Is delegation enabled? */
1514 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1515 fp = rfs4_findfile(vp, NULL, &create);
1516 if (fp != NULL) {
1517 if (rfs4_check_delegated_byfp(mode, fp, trunc,
1518 TRUE, FALSE, NULL)) {
1519 rc = TRUE;
1521 rfs4_file_rele(fp);
1524 rfs4_rele_deleg_policy();
1525 return (rc);
1529 * Release a hold on the hold_grant counter which
1530 * prevents delegation from being granted while a remove
1531 * or a rename is in progress.
1533 void
1534 rfs4_clear_dont_grant(rfs4_file_t *fp)
1536 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1537 return;
1538 rfs4_dbe_lock(fp->rf_dbe);
1539 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1540 fp->rf_dinfo.rd_hold_grant--;
1541 fp->rf_dinfo.rd_time_rm_delayed = 0;
1542 rfs4_dbe_unlock(fp->rf_dbe);
1546 * State support for delegation.
1547 * Set the state delegation type for this state;
1548 * This routine is called from open via rfs4_grant_delegation and the entry
1549 * locks on sp and sp->rs_finfo are assumed.
1551 static rfs4_deleg_state_t *
1552 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1554 rfs4_file_t *fp = sp->rs_finfo;
1555 bool_t create = TRUE;
1556 rfs4_deleg_state_t *dsp;
1557 vnode_t *vp;
1558 int open_prev = *recall;
1559 int ret;
1560 int fflags = 0;
1562 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1563 ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1565 /* Shouldn't happen */
1566 if (fp->rf_dinfo.rd_recall_count != 0 ||
1567 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1568 dtype != OPEN_DELEGATE_READ)) {
1569 return (NULL);
1572 /* Unlock to avoid deadlock */
1573 rfs4_dbe_unlock(fp->rf_dbe);
1574 rfs4_dbe_unlock(sp->rs_dbe);
1576 dsp = rfs4_finddeleg(sp, &create);
1578 rfs4_dbe_lock(sp->rs_dbe);
1579 rfs4_dbe_lock(fp->rf_dbe);
1581 if (dsp == NULL)
1582 return (NULL);
1585 * It is possible that since we dropped the lock
1586 * in order to call finddeleg, the rfs4_file_t
1587 * was marked such that we should not grant a
1588 * delegation, if so bail out.
1590 if (fp->rf_dinfo.rd_hold_grant > 0) {
1591 rfs4_deleg_state_rele(dsp);
1592 return (NULL);
1595 if (create == FALSE) {
1596 if (sp->rs_owner->ro_client == dsp->rds_client &&
1597 dsp->rds_dtype == dtype) {
1598 return (dsp);
1599 } else {
1600 rfs4_deleg_state_rele(dsp);
1601 return (NULL);
1606 * Check that this file has not been delegated to another
1607 * client
1609 if (fp->rf_dinfo.rd_recall_count != 0 ||
1610 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1611 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1612 dtype != OPEN_DELEGATE_READ)) {
1613 rfs4_deleg_state_rele(dsp);
1614 return (NULL);
1617 vp = fp->rf_vp;
1618 /* vnevent_support returns 0 if file system supports vnevents */
1619 if (vnevent_support(vp, NULL)) {
1620 rfs4_deleg_state_rele(dsp);
1621 return (NULL);
1624 /* Calculate the fflags for this OPEN. */
1625 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1626 fflags |= FREAD;
1627 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1628 fflags |= FWRITE;
1630 *recall = 0;
1632 * Before granting a delegation we need to know if anyone else has
1633 * opened the file in a conflicting mode. However, first we need to
1634 * know how we opened the file to check the counts properly.
1636 if (dtype == OPEN_DELEGATE_READ) {
1637 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1638 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1639 vn_is_mapped(vp, V_WRITE)) {
1640 if (open_prev) {
1641 *recall = 1;
1642 } else {
1643 rfs4_deleg_state_rele(dsp);
1644 return (NULL);
1647 ret = fem_install(vp, &deleg_rdops, fp, OPUNIQ,
1648 rfs4_mon_hold, rfs4_mon_rele);
1649 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1650 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1651 vn_is_mapped(vp, V_WRITE)) {
1652 if (open_prev) {
1653 *recall = 1;
1654 } else {
1655 (void) fem_uninstall(vp, &deleg_rdops, fp);
1656 rfs4_deleg_state_rele(dsp);
1657 return (NULL);
1661 * Because a client can hold onto a delegation after the
1662 * file has been closed, we need to keep track of the
1663 * access to this file. Otherwise the CIFS server would
1664 * not know about the client accessing the file and could
1665 * inappropriately grant an OPLOCK.
1666 * fem_install() returns EBUSY when asked to install a
1667 * OPUNIQ monitor more than once. Therefore, check the
1668 * return code because we only want this done once.
1670 if (ret == 0)
1671 vn_open_upgrade(vp, FREAD);
1672 } else { /* WRITE */
1673 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1674 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1675 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1676 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1677 vn_is_mapped(vp, V_RDORWR)) {
1678 if (open_prev) {
1679 *recall = 1;
1680 } else {
1681 rfs4_deleg_state_rele(dsp);
1682 return (NULL);
1685 ret = fem_install(vp, &deleg_wrops, fp, OPUNIQ,
1686 rfs4_mon_hold, rfs4_mon_rele);
1687 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1688 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1689 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1690 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1691 vn_is_mapped(vp, V_RDORWR)) {
1692 if (open_prev) {
1693 *recall = 1;
1694 } else {
1695 (void) fem_uninstall(vp, &deleg_wrops, fp);
1696 rfs4_deleg_state_rele(dsp);
1697 return (NULL);
1701 * Because a client can hold onto a delegation after the
1702 * file has been closed, we need to keep track of the
1703 * access to this file. Otherwise the CIFS server would
1704 * not know about the client accessing the file and could
1705 * inappropriately grant an OPLOCK.
1706 * fem_install() returns EBUSY when asked to install a
1707 * OPUNIQ monitor more than once. Therefore, check the
1708 * return code because we only want this done once.
1710 if (ret == 0)
1711 vn_open_upgrade(vp, FREAD|FWRITE);
1713 /* Place on delegation list for file */
1714 ASSERT(!list_link_active(&dsp->rds_node));
1715 list_insert_tail(&fp->rf_delegstatelist, dsp);
1717 dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1719 /* Update delegation stats for this file */
1720 fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1722 /* reset since this is a new delegation */
1723 fp->rf_dinfo.rd_conflicted_client = 0;
1724 fp->rf_dinfo.rd_ever_recalled = FALSE;
1726 if (dtype == OPEN_DELEGATE_READ)
1727 fp->rf_dinfo.rd_rdgrants++;
1728 else
1729 fp->rf_dinfo.rd_wrgrants++;
1731 return (dsp);
1735 * State routine for the server when a delegation is returned.
1737 void
1738 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1740 rfs4_file_t *fp = dsp->rds_finfo;
1741 open_delegation_type4 dtypewas;
1743 rfs4_dbe_lock(fp->rf_dbe);
1745 /* nothing to do if no longer on list */
1746 if (!list_link_active(&dsp->rds_node)) {
1747 rfs4_dbe_unlock(fp->rf_dbe);
1748 return;
1751 /* Remove state from recall list */
1752 list_remove(&fp->rf_delegstatelist, dsp);
1754 if (list_is_empty(&fp->rf_delegstatelist)) {
1755 dtypewas = fp->rf_dinfo.rd_dtype;
1756 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1757 rfs4_dbe_cv_broadcast(fp->rf_dbe);
1759 /* if file system was unshared, the vp will be NULL */
1760 if (fp->rf_vp != NULL) {
1762 * Once a delegation is no longer held by any client,
1763 * the monitor is uninstalled. At this point, the
1764 * client must send OPEN otw, so we don't need the
1765 * reference on the vnode anymore. The open
1766 * downgrade removes the reference put on earlier.
1768 if (dtypewas == OPEN_DELEGATE_READ) {
1769 (void) fem_uninstall(fp->rf_vp, &deleg_rdops,
1770 fp);
1771 vn_open_downgrade(fp->rf_vp, FREAD);
1772 } else if (dtypewas == OPEN_DELEGATE_WRITE) {
1773 (void) fem_uninstall(fp->rf_vp, &deleg_wrops,
1774 fp);
1775 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1780 switch (dsp->rds_dtype) {
1781 case OPEN_DELEGATE_READ:
1782 fp->rf_dinfo.rd_rdgrants--;
1783 break;
1784 case OPEN_DELEGATE_WRITE:
1785 fp->rf_dinfo.rd_wrgrants--;
1786 break;
1787 default:
1788 break;
1791 /* used in the policy decision */
1792 fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1795 * reset the time_recalled field so future delegations are not
1796 * accidentally revoked
1798 if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1799 fp->rf_dinfo.rd_time_recalled = 0;
1801 rfs4_dbe_unlock(fp->rf_dbe);
1803 rfs4_dbe_lock(dsp->rds_dbe);
1805 dsp->rds_dtype = OPEN_DELEGATE_NONE;
1807 if (revoked == TRUE)
1808 dsp->rds_time_revoked = gethrestime_sec();
1810 rfs4_dbe_invalidate(dsp->rds_dbe);
1812 rfs4_dbe_unlock(dsp->rds_dbe);
1814 if (revoked == TRUE) {
1815 rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1816 dsp->rds_client->rc_deleg_revoked++; /* observability */
1817 rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1821 static void
1822 rfs4_revoke_file(rfs4_file_t *fp)
1824 rfs4_deleg_state_t *dsp;
1827 * The lock for rfs4_file_t must be held when traversing the
1828 * delegation list but that lock needs to be released to call
1829 * rfs4_return_deleg()
1831 rfs4_dbe_lock(fp->rf_dbe);
1832 while (dsp = list_head(&fp->rf_delegstatelist)) {
1833 rfs4_dbe_hold(dsp->rds_dbe);
1834 rfs4_dbe_unlock(fp->rf_dbe);
1835 rfs4_return_deleg(dsp, TRUE);
1836 rfs4_deleg_state_rele(dsp);
1837 rfs4_dbe_lock(fp->rf_dbe);
1839 rfs4_dbe_unlock(fp->rf_dbe);
1843 * A delegation is assumed to be present on the file associated with
1844 * "sp". Check to see if the delegation matches is associated with
1845 * the same client as referenced by "sp". If it is not, TRUE is
1846 * returned. If the delegation DOES match the client (or no
1847 * delegation is present), return FALSE.
1848 * Assume the state entry and file entry are locked.
1850 bool_t
1851 rfs4_is_deleg(rfs4_state_t *sp)
1853 rfs4_deleg_state_t *dsp;
1854 rfs4_file_t *fp = sp->rs_finfo;
1855 rfs4_client_t *cp = sp->rs_owner->ro_client;
1857 ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1858 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1859 dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1860 if (cp != dsp->rds_client) {
1861 return (TRUE);
1864 return (FALSE);
1867 void
1868 rfs4_disable_delegation(void)
1870 mutex_enter(&rfs4_deleg_lock);
1871 rfs4_deleg_disabled++;
1872 mutex_exit(&rfs4_deleg_lock);
1875 void
1876 rfs4_enable_delegation(void)
1878 mutex_enter(&rfs4_deleg_lock);
1879 ASSERT(rfs4_deleg_disabled > 0);
1880 rfs4_deleg_disabled--;
1881 mutex_exit(&rfs4_deleg_lock);
1884 void
1885 rfs4_mon_hold(void *arg)
1887 rfs4_file_t *fp = arg;
1889 rfs4_dbe_hold(fp->rf_dbe);
1892 void
1893 rfs4_mon_rele(void *arg)
1895 rfs4_file_t *fp = arg;
1897 rfs4_dbe_rele_nolock(fp->rf_dbe);