dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / nfs / nfs_auth.c
blob9a54649d77d420c949c0305c147969f74bc8b152
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2015 by Delphix. All rights reserved.
28 #include <sys/param.h>
29 #include <sys/errno.h>
30 #include <sys/vfs.h>
31 #include <sys/vnode.h>
32 #include <sys/cred.h>
33 #include <sys/cmn_err.h>
34 #include <sys/systm.h>
35 #include <sys/kmem.h>
36 #include <sys/pathname.h>
37 #include <sys/utsname.h>
38 #include <sys/debug.h>
39 #include <sys/door.h>
40 #include <sys/sdt.h>
41 #include <sys/thread.h>
42 #include <sys/avl.h>
44 #include <rpc/types.h>
45 #include <rpc/auth.h>
46 #include <rpc/clnt.h>
48 #include <nfs/nfs.h>
49 #include <nfs/export.h>
50 #include <nfs/nfs_clnt.h>
51 #include <nfs/auth.h>
53 static struct kmem_cache *exi_cache_handle;
54 static void exi_cache_reclaim(void *);
55 static void exi_cache_trim(struct exportinfo *exi);
57 extern pri_t minclsyspri;
59 volatile uint_t nfsauth_cache_hit;
60 volatile uint_t nfsauth_cache_miss;
61 volatile uint_t nfsauth_cache_refresh;
62 volatile uint_t nfsauth_cache_reclaim;
63 volatile uint_t exi_cache_auth_reclaim_failed;
64 volatile uint_t exi_cache_clnt_reclaim_failed;
67 * The lifetime of an auth cache entry:
68 * ------------------------------------
70 * An auth cache entry is created with both the auth_time
71 * and auth_freshness times set to the current time.
73 * Upon every client access which results in a hit, the
74 * auth_time will be updated.
76 * If a client access determines that the auth_freshness
77 * indicates that the entry is STALE, then it will be
78 * refreshed. Note that this will explicitly reset
79 * auth_time.
81 * When the REFRESH successfully occurs, then the
82 * auth_freshness is updated.
84 * There are two ways for an entry to leave the cache:
86 * 1) Purged by an action on the export (remove or changed)
87 * 2) Memory backpressure from the kernel (check against NFSAUTH_CACHE_TRIM)
89 * For 2) we check the timeout value against auth_time.
93 * Number of seconds until we mark for refresh an auth cache entry.
95 #define NFSAUTH_CACHE_REFRESH 600
98 * Number of idle seconds until we yield to backpressure
99 * to trim a cache entry.
101 #define NFSAUTH_CACHE_TRIM 3600
104 * While we could encapuslate the exi_list inside the
105 * exi structure, we can't do that for the auth_list.
106 * So, to keep things looking clean, we keep them both
107 * in these external lists.
109 typedef struct refreshq_exi_node {
110 struct exportinfo *ren_exi;
111 list_t ren_authlist;
112 list_node_t ren_node;
113 } refreshq_exi_node_t;
115 typedef struct refreshq_auth_node {
116 struct auth_cache *ran_auth;
117 char *ran_netid;
118 list_node_t ran_node;
119 } refreshq_auth_node_t;
122 * Used to manipulate things on the refreshq_queue.
123 * Note that the refresh thread will effectively
124 * pop a node off of the queue, at which point it
125 * will no longer need to hold the mutex.
127 static kmutex_t refreshq_lock;
128 static list_t refreshq_queue;
129 static kcondvar_t refreshq_cv;
132 * If there is ever a problem with loading the
133 * module, then nfsauth_fini() needs to be called
134 * to remove state. In that event, since the
135 * refreshq thread has been started, they need to
136 * work together to get rid of state.
138 typedef enum nfsauth_refreshq_thread_state {
139 REFRESHQ_THREAD_RUNNING,
140 REFRESHQ_THREAD_FINI_REQ,
141 REFRESHQ_THREAD_HALTED
142 } nfsauth_refreshq_thread_state_t;
144 nfsauth_refreshq_thread_state_t
145 refreshq_thread_state = REFRESHQ_THREAD_HALTED;
147 static void nfsauth_free_node(struct auth_cache *);
148 static void nfsauth_refresh_thread(void);
150 static int nfsauth_cache_compar(const void *, const void *);
153 * mountd is a server-side only daemon. This will need to be
154 * revisited if the NFS server is ever made zones-aware.
156 kmutex_t mountd_lock;
157 door_handle_t mountd_dh;
159 void
160 mountd_args(uint_t did)
162 mutex_enter(&mountd_lock);
163 if (mountd_dh != NULL)
164 door_ki_rele(mountd_dh);
165 mountd_dh = door_ki_lookup(did);
166 mutex_exit(&mountd_lock);
169 void
170 nfsauth_init(void)
173 * mountd can be restarted by smf(5). We need to make sure
174 * the updated door handle will safely make it to mountd_dh
176 mutex_init(&mountd_lock, NULL, MUTEX_DEFAULT, NULL);
178 mutex_init(&refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
179 list_create(&refreshq_queue, sizeof (refreshq_exi_node_t),
180 offsetof(refreshq_exi_node_t, ren_node));
182 cv_init(&refreshq_cv, NULL, CV_DEFAULT, NULL);
185 * Allocate nfsauth cache handle
187 exi_cache_handle = kmem_cache_create("exi_cache_handle",
188 sizeof (struct auth_cache), 0, NULL, NULL,
189 exi_cache_reclaim, NULL, NULL, 0);
191 refreshq_thread_state = REFRESHQ_THREAD_RUNNING;
192 (void) zthread_create(NULL, 0, nfsauth_refresh_thread,
193 NULL, 0, minclsyspri);
197 * Finalization routine for nfsauth. It is important to call this routine
198 * before destroying the exported_lock.
200 void
201 nfsauth_fini(void)
203 refreshq_exi_node_t *ren;
206 * Prevent the nfsauth_refresh_thread from getting new
207 * work.
209 mutex_enter(&refreshq_lock);
210 if (refreshq_thread_state != REFRESHQ_THREAD_HALTED) {
211 refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
212 cv_broadcast(&refreshq_cv);
215 * Also, wait for nfsauth_refresh_thread() to exit.
217 while (refreshq_thread_state != REFRESHQ_THREAD_HALTED) {
218 cv_wait(&refreshq_cv, &refreshq_lock);
221 mutex_exit(&refreshq_lock);
224 * Walk the exi_list and in turn, walk the auth_lists and free all
225 * lists. In addition, free INVALID auth_cache entries.
227 while ((ren = list_remove_head(&refreshq_queue))) {
228 refreshq_auth_node_t *ran;
230 while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
231 struct auth_cache *p = ran->ran_auth;
232 if (p->auth_state == NFS_AUTH_INVALID)
233 nfsauth_free_node(p);
234 strfree(ran->ran_netid);
235 kmem_free(ran, sizeof (refreshq_auth_node_t));
238 list_destroy(&ren->ren_authlist);
239 exi_rele(ren->ren_exi);
240 kmem_free(ren, sizeof (refreshq_exi_node_t));
242 list_destroy(&refreshq_queue);
244 cv_destroy(&refreshq_cv);
245 mutex_destroy(&refreshq_lock);
247 mutex_destroy(&mountd_lock);
250 * Deallocate nfsauth cache handle
252 kmem_cache_destroy(exi_cache_handle);
256 * Convert the address in a netbuf to
257 * a hash index for the auth_cache table.
259 static int
260 hash(struct netbuf *a)
262 int i, h = 0;
264 for (i = 0; i < a->len; i++)
265 h ^= a->buf[i];
267 return (h & (AUTH_TABLESIZE - 1));
271 * Mask out the components of an
272 * address that do not identify
273 * a host. For socket addresses the
274 * masking gets rid of the port number.
276 static void
277 addrmask(struct netbuf *addr, struct netbuf *mask)
279 int i;
281 for (i = 0; i < addr->len; i++)
282 addr->buf[i] &= mask->buf[i];
286 * nfsauth4_access is used for NFS V4 auth checking. Besides doing
287 * the common nfsauth_access(), it will check if the client can
288 * have a limited access to this vnode even if the security flavor
289 * used does not meet the policy.
292 nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req,
293 cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
295 int access;
297 access = nfsauth_access(exi, req, cr, uid, gid, ngids, gids);
300 * There are cases that the server needs to allow the client
301 * to have a limited view.
303 * e.g.
304 * /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
305 * /export/home is shared as "sec=sys,rw"
307 * When the client mounts /export with sec=sys, the client
308 * would get a limited view with RO access on /export to see
309 * "home" only because the client is allowed to access
310 * /export/home with auth_sys.
312 if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
314 * Allow ro permission with LIMITED view if there is a
315 * sub-dir exported under vp.
317 if (has_visible(exi, vp))
318 return (NFSAUTH_LIMITED);
321 return (access);
324 static void
325 sys_log(const char *msg)
327 static time_t tstamp = 0;
328 time_t now;
331 * msg is shown (at most) once per minute
333 now = gethrestime_sec();
334 if ((tstamp + 60) < now) {
335 tstamp = now;
336 cmn_err(CE_WARN, msg);
341 * Callup to the mountd to get access information in the kernel.
343 static bool_t
344 nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor,
345 struct netbuf *addr, int *access, cred_t *clnt_cred, uid_t *srv_uid,
346 gid_t *srv_gid, uint_t *srv_gids_cnt, gid_t **srv_gids)
348 varg_t varg = {0};
349 nfsauth_res_t res = {0};
350 XDR xdrs;
351 size_t absz;
352 caddr_t abuf;
353 int last = 0;
354 door_arg_t da;
355 door_info_t di;
356 door_handle_t dh;
357 uint_t ntries = 0;
360 * No entry in the cache for this client/flavor
361 * so we need to call the nfsauth service in the
362 * mount daemon.
365 varg.vers = V_PROTO;
366 varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
367 varg.arg_u.arg.areq.req_client.n_len = addr->len;
368 varg.arg_u.arg.areq.req_client.n_bytes = addr->buf;
369 varg.arg_u.arg.areq.req_netid = req_netid;
370 varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
371 varg.arg_u.arg.areq.req_flavor = flavor;
372 varg.arg_u.arg.areq.req_clnt_uid = crgetuid(clnt_cred);
373 varg.arg_u.arg.areq.req_clnt_gid = crgetgid(clnt_cred);
374 varg.arg_u.arg.areq.req_clnt_gids.len = crgetngroups(clnt_cred);
375 varg.arg_u.arg.areq.req_clnt_gids.val = (gid_t *)crgetgroups(clnt_cred);
377 DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);
380 * Setup the XDR stream for encoding the arguments. Notice that
381 * in addition to the args having variable fields (req_netid and
382 * req_path), the argument data structure is itself versioned,
383 * so we need to make sure we can size the arguments buffer
384 * appropriately to encode all the args. If we can't get sizing
385 * info _or_ properly encode the arguments, there's really no
386 * point in continuting, so we fail the request.
388 if ((absz = xdr_sizeof(xdr_varg, &varg)) == 0) {
389 *access = NFSAUTH_DENIED;
390 return (FALSE);
393 abuf = kmem_alloc(absz, KM_SLEEP);
394 xdrmem_create(&xdrs, abuf, absz, XDR_ENCODE);
395 if (!xdr_varg(&xdrs, &varg)) {
396 XDR_DESTROY(&xdrs);
397 goto fail;
399 XDR_DESTROY(&xdrs);
402 * Prepare the door arguments
404 * We don't know the size of the message the daemon
405 * will pass back to us. By setting rbuf to NULL,
406 * we force the door code to allocate a buf of the
407 * appropriate size. We must set rsize > 0, however,
408 * else the door code acts as if no response was
409 * expected and doesn't pass the data to us.
411 da.data_ptr = (char *)abuf;
412 da.data_size = absz;
413 da.desc_ptr = NULL;
414 da.desc_num = 0;
415 da.rbuf = NULL;
416 da.rsize = 1;
418 retry:
419 mutex_enter(&mountd_lock);
420 dh = mountd_dh;
421 if (dh != NULL)
422 door_ki_hold(dh);
423 mutex_exit(&mountd_lock);
425 if (dh == NULL) {
427 * The rendezvous point has not been established yet!
428 * This could mean that either mountd(1m) has not yet
429 * been started or that _this_ routine nuked the door
430 * handle after receiving an EINTR for a REVOKED door.
432 * Returning NFSAUTH_DROP will cause the NFS client
433 * to retransmit the request, so let's try to be more
434 * rescillient and attempt for ntries before we bail.
436 if (++ntries % NFSAUTH_DR_TRYCNT) {
437 ddi_sleep(1);
438 goto retry;
441 kmem_free(abuf, absz);
443 sys_log("nfsauth: mountd has not established door");
444 *access = NFSAUTH_DROP;
445 return (FALSE);
448 ntries = 0;
451 * Now that we've got what we need, place the call.
453 switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
454 case 0: /* Success */
455 door_ki_rele(dh);
457 if (da.data_ptr == NULL && da.data_size == 0) {
459 * The door_return that contained the data
460 * failed! We're here because of the 2nd
461 * door_return (w/o data) such that we can
462 * get control of the thread (and exit
463 * gracefully).
465 DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
466 door_arg_t *, &da);
467 goto fail;
470 break;
472 case EAGAIN:
474 * Server out of resources; back off for a bit
476 door_ki_rele(dh);
477 ddi_sleep(1);
478 goto retry;
479 /* NOTREACHED */
481 case EINTR:
482 if (!door_ki_info(dh, &di)) {
483 door_ki_rele(dh);
485 if (di.di_attributes & DOOR_REVOKED) {
487 * The server barfed and revoked
488 * the (existing) door on us; we
489 * want to wait to give smf(5) a
490 * chance to restart mountd(1m)
491 * and establish a new door handle.
493 mutex_enter(&mountd_lock);
494 if (dh == mountd_dh) {
495 door_ki_rele(mountd_dh);
496 mountd_dh = NULL;
498 mutex_exit(&mountd_lock);
499 ddi_sleep(1);
500 goto retry;
503 * If the door was _not_ revoked on us,
504 * then more than likely we took an INTR,
505 * so we need to fail the operation.
507 goto fail;
510 * The only failure that can occur from getting
511 * the door info is EINVAL, so we let the code
512 * below handle it.
514 /* FALLTHROUGH */
516 case EBADF:
517 case EINVAL:
518 default:
520 * If we have a stale door handle, give smf a last
521 * chance to start it by sleeping for a little bit.
522 * If we're still hosed, we'll fail the call.
524 * Since we're going to reacquire the door handle
525 * upon the retry, we opt to sleep for a bit and
526 * _not_ to clear mountd_dh. If mountd restarted
527 * and was able to set mountd_dh, we should see
528 * the new instance; if not, we won't get caught
529 * up in the retry/DELAY loop.
531 door_ki_rele(dh);
532 if (!last) {
533 ddi_sleep(1);
534 last++;
535 goto retry;
537 sys_log("nfsauth: stale mountd door handle");
538 goto fail;
541 ASSERT(da.rbuf != NULL);
544 * No door errors encountered; setup the XDR stream for decoding
545 * the results. If we fail to decode the results, we've got no
546 * other recourse than to fail the request.
548 xdrmem_create(&xdrs, da.rbuf, da.rsize, XDR_DECODE);
549 if (!xdr_nfsauth_res(&xdrs, &res)) {
550 xdr_free(xdr_nfsauth_res, (char *)&res);
551 XDR_DESTROY(&xdrs);
552 kmem_free(da.rbuf, da.rsize);
553 goto fail;
555 XDR_DESTROY(&xdrs);
556 kmem_free(da.rbuf, da.rsize);
558 DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
559 switch (res.stat) {
560 case NFSAUTH_DR_OKAY:
561 *access = res.ares.auth_perm;
562 *srv_uid = res.ares.auth_srv_uid;
563 *srv_gid = res.ares.auth_srv_gid;
564 *srv_gids_cnt = res.ares.auth_srv_gids.len;
565 *srv_gids = kmem_alloc(*srv_gids_cnt * sizeof (gid_t),
566 KM_SLEEP);
567 bcopy(res.ares.auth_srv_gids.val, *srv_gids,
568 *srv_gids_cnt * sizeof (gid_t));
569 break;
571 case NFSAUTH_DR_EFAIL:
572 case NFSAUTH_DR_DECERR:
573 case NFSAUTH_DR_BADCMD:
574 default:
575 xdr_free(xdr_nfsauth_res, (char *)&res);
576 fail:
577 *access = NFSAUTH_DENIED;
578 kmem_free(abuf, absz);
579 return (FALSE);
580 /* NOTREACHED */
583 xdr_free(xdr_nfsauth_res, (char *)&res);
584 kmem_free(abuf, absz);
586 return (TRUE);
589 static void
590 nfsauth_refresh_thread(void)
592 refreshq_exi_node_t *ren;
593 refreshq_auth_node_t *ran;
595 struct exportinfo *exi;
597 int access;
598 bool_t retrieval;
600 callb_cpr_t cprinfo;
602 CALLB_CPR_INIT(&cprinfo, &refreshq_lock, callb_generic_cpr,
603 "nfsauth_refresh");
605 for (;;) {
606 mutex_enter(&refreshq_lock);
607 if (refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
608 /* Keep the hold on the lock! */
609 break;
612 ren = list_remove_head(&refreshq_queue);
613 if (ren == NULL) {
614 CALLB_CPR_SAFE_BEGIN(&cprinfo);
615 cv_wait(&refreshq_cv, &refreshq_lock);
616 CALLB_CPR_SAFE_END(&cprinfo, &refreshq_lock);
617 mutex_exit(&refreshq_lock);
618 continue;
620 mutex_exit(&refreshq_lock);
622 exi = ren->ren_exi;
623 ASSERT(exi != NULL);
626 * Since the ren was removed from the refreshq_queue above,
627 * this is the only thread aware about the ren existence, so we
628 * have the exclusive ownership of it and we do not need to
629 * protect it by any lock.
631 while ((ran = list_remove_head(&ren->ren_authlist))) {
632 uid_t uid;
633 gid_t gid;
634 uint_t ngids;
635 gid_t *gids;
636 struct auth_cache *p = ran->ran_auth;
637 char *netid = ran->ran_netid;
639 ASSERT(p != NULL);
640 ASSERT(netid != NULL);
642 kmem_free(ran, sizeof (refreshq_auth_node_t));
644 mutex_enter(&p->auth_lock);
647 * Once the entry goes INVALID, it can not change
648 * state.
650 * No need to refresh entries also in a case we are
651 * just shutting down.
653 * In general, there is no need to hold the
654 * refreshq_lock to test the refreshq_thread_state. We
655 * do hold it at other places because there is some
656 * related thread synchronization (or some other tasks)
657 * close to the refreshq_thread_state check.
659 * The check for the refreshq_thread_state value here
660 * is purely advisory to allow the faster
661 * nfsauth_refresh_thread() shutdown. In a case we
662 * will miss such advisory, nothing catastrophic
663 * happens: we will just spin longer here before the
664 * shutdown.
666 if (p->auth_state == NFS_AUTH_INVALID ||
667 refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
668 mutex_exit(&p->auth_lock);
670 if (p->auth_state == NFS_AUTH_INVALID)
671 nfsauth_free_node(p);
673 strfree(netid);
675 continue;
679 * Make sure the state is valid. Note that once we
680 * change the state to NFS_AUTH_REFRESHING, no other
681 * thread will be able to work on this entry.
683 ASSERT(p->auth_state == NFS_AUTH_STALE);
685 p->auth_state = NFS_AUTH_REFRESHING;
686 mutex_exit(&p->auth_lock);
688 DTRACE_PROBE2(nfsauth__debug__cache__refresh,
689 struct exportinfo *, exi,
690 struct auth_cache *, p);
693 * The first caching of the access rights
694 * is done with the netid pulled out of the
695 * request from the client. All subsequent
696 * users of the cache may or may not have
697 * the same netid. It doesn't matter. So
698 * when we refresh, we simply use the netid
699 * of the request which triggered the
700 * refresh attempt.
702 retrieval = nfsauth_retrieve(exi, netid,
703 p->auth_flavor, &p->auth_clnt->authc_addr, &access,
704 p->auth_clnt_cred, &uid, &gid, &ngids, &gids);
707 * This can only be set in one other place
708 * and the state has to be NFS_AUTH_FRESH.
710 strfree(netid);
712 mutex_enter(&p->auth_lock);
713 if (p->auth_state == NFS_AUTH_INVALID) {
714 mutex_exit(&p->auth_lock);
715 nfsauth_free_node(p);
716 if (retrieval == TRUE)
717 kmem_free(gids, ngids * sizeof (gid_t));
718 } else {
720 * If we got an error, do not reset the
721 * time. This will cause the next access
722 * check for the client to reschedule this
723 * node.
725 if (retrieval == TRUE) {
726 p->auth_access = access;
728 p->auth_srv_uid = uid;
729 p->auth_srv_gid = gid;
730 kmem_free(p->auth_srv_gids,
731 p->auth_srv_ngids * sizeof (gid_t));
732 p->auth_srv_ngids = ngids;
733 p->auth_srv_gids = gids;
735 p->auth_freshness = gethrestime_sec();
737 p->auth_state = NFS_AUTH_FRESH;
739 cv_broadcast(&p->auth_cv);
740 mutex_exit(&p->auth_lock);
744 list_destroy(&ren->ren_authlist);
745 exi_rele(ren->ren_exi);
746 kmem_free(ren, sizeof (refreshq_exi_node_t));
749 refreshq_thread_state = REFRESHQ_THREAD_HALTED;
750 cv_broadcast(&refreshq_cv);
751 CALLB_CPR_EXIT(&cprinfo);
752 zthread_exit();
756 nfsauth_cache_clnt_compar(const void *v1, const void *v2)
758 int c;
760 const struct auth_cache_clnt *a1 = (const struct auth_cache_clnt *)v1;
761 const struct auth_cache_clnt *a2 = (const struct auth_cache_clnt *)v2;
763 if (a1->authc_addr.len < a2->authc_addr.len)
764 return (-1);
765 if (a1->authc_addr.len > a2->authc_addr.len)
766 return (1);
768 c = memcmp(a1->authc_addr.buf, a2->authc_addr.buf, a1->authc_addr.len);
769 if (c < 0)
770 return (-1);
771 if (c > 0)
772 return (1);
774 return (0);
777 static int
778 nfsauth_cache_compar(const void *v1, const void *v2)
780 int c;
782 const struct auth_cache *a1 = (const struct auth_cache *)v1;
783 const struct auth_cache *a2 = (const struct auth_cache *)v2;
785 if (a1->auth_flavor < a2->auth_flavor)
786 return (-1);
787 if (a1->auth_flavor > a2->auth_flavor)
788 return (1);
790 if (crgetuid(a1->auth_clnt_cred) < crgetuid(a2->auth_clnt_cred))
791 return (-1);
792 if (crgetuid(a1->auth_clnt_cred) > crgetuid(a2->auth_clnt_cred))
793 return (1);
795 if (crgetgid(a1->auth_clnt_cred) < crgetgid(a2->auth_clnt_cred))
796 return (-1);
797 if (crgetgid(a1->auth_clnt_cred) > crgetgid(a2->auth_clnt_cred))
798 return (1);
800 if (crgetngroups(a1->auth_clnt_cred) < crgetngroups(a2->auth_clnt_cred))
801 return (-1);
802 if (crgetngroups(a1->auth_clnt_cred) > crgetngroups(a2->auth_clnt_cred))
803 return (1);
805 c = memcmp(crgetgroups(a1->auth_clnt_cred),
806 crgetgroups(a2->auth_clnt_cred), crgetngroups(a1->auth_clnt_cred));
807 if (c < 0)
808 return (-1);
809 if (c > 0)
810 return (1);
812 return (0);
816 * Get the access information from the cache or callup to the mountd
817 * to get and cache the access information in the kernel.
819 static int
820 nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
821 cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
823 struct netbuf *taddrmask;
824 struct netbuf addr; /* temporary copy of client's address */
825 const struct netbuf *claddr;
826 avl_tree_t *tree;
827 struct auth_cache ac; /* used as a template for avl_find() */
828 struct auth_cache_clnt *c;
829 struct auth_cache_clnt acc; /* used as a template for avl_find() */
830 struct auth_cache *p = NULL;
831 int access;
833 uid_t tmpuid;
834 gid_t tmpgid;
835 uint_t tmpngids;
836 gid_t *tmpgids;
838 avl_index_t where; /* used for avl_find()/avl_insert() */
840 ASSERT(cr != NULL);
843 * Now check whether this client already
844 * has an entry for this flavor in the cache
845 * for this export.
846 * Get the caller's address, mask off the
847 * parts of the address that do not identify
848 * the host (port number, etc), and then hash
849 * it to find the chain of cache entries.
852 claddr = svc_getrpccaller(req->rq_xprt);
853 addr = *claddr;
854 addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
855 bcopy(claddr->buf, addr.buf, claddr->len);
857 SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
858 ASSERT(taddrmask != NULL);
859 addrmask(&addr, taddrmask);
861 ac.auth_flavor = flavor;
862 ac.auth_clnt_cred = crdup(cr);
864 acc.authc_addr = addr;
866 tree = exi->exi_cache[hash(&addr)];
868 rw_enter(&exi->exi_cache_lock, RW_READER);
869 c = (struct auth_cache_clnt *)avl_find(tree, &acc, NULL);
871 if (c == NULL) {
872 struct auth_cache_clnt *nc;
874 rw_exit(&exi->exi_cache_lock);
876 nc = kmem_alloc(sizeof (*nc), KM_NOSLEEP | KM_NORMALPRI);
877 if (nc == NULL)
878 goto retrieve;
881 * Initialize the new auth_cache_clnt
883 nc->authc_addr = addr;
884 nc->authc_addr.buf = kmem_alloc(addr.maxlen,
885 KM_NOSLEEP | KM_NORMALPRI);
886 if (addr.maxlen != 0 && nc->authc_addr.buf == NULL) {
887 kmem_free(nc, sizeof (*nc));
888 goto retrieve;
890 bcopy(addr.buf, nc->authc_addr.buf, addr.len);
891 rw_init(&nc->authc_lock, NULL, RW_DEFAULT, NULL);
892 avl_create(&nc->authc_tree, nfsauth_cache_compar,
893 sizeof (struct auth_cache),
894 offsetof(struct auth_cache, auth_link));
896 rw_enter(&exi->exi_cache_lock, RW_WRITER);
897 c = (struct auth_cache_clnt *)avl_find(tree, &acc, &where);
898 if (c == NULL) {
899 avl_insert(tree, nc, where);
900 rw_downgrade(&exi->exi_cache_lock);
901 c = nc;
902 } else {
903 rw_downgrade(&exi->exi_cache_lock);
905 avl_destroy(&nc->authc_tree);
906 rw_destroy(&nc->authc_lock);
907 kmem_free(nc->authc_addr.buf, nc->authc_addr.maxlen);
908 kmem_free(nc, sizeof (*nc));
912 ASSERT(c != NULL);
914 rw_enter(&c->authc_lock, RW_READER);
915 p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, NULL);
917 if (p == NULL) {
918 struct auth_cache *np;
920 rw_exit(&c->authc_lock);
922 np = kmem_cache_alloc(exi_cache_handle,
923 KM_NOSLEEP | KM_NORMALPRI);
924 if (np == NULL) {
925 rw_exit(&exi->exi_cache_lock);
926 goto retrieve;
930 * Initialize the new auth_cache
932 np->auth_clnt = c;
933 np->auth_flavor = flavor;
934 np->auth_clnt_cred = ac.auth_clnt_cred;
935 np->auth_srv_ngids = 0;
936 np->auth_srv_gids = NULL;
937 np->auth_time = np->auth_freshness = gethrestime_sec();
938 np->auth_state = NFS_AUTH_NEW;
939 mutex_init(&np->auth_lock, NULL, MUTEX_DEFAULT, NULL);
940 cv_init(&np->auth_cv, NULL, CV_DEFAULT, NULL);
942 rw_enter(&c->authc_lock, RW_WRITER);
943 rw_exit(&exi->exi_cache_lock);
945 p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, &where);
946 if (p == NULL) {
947 avl_insert(&c->authc_tree, np, where);
948 rw_downgrade(&c->authc_lock);
949 p = np;
950 } else {
951 rw_downgrade(&c->authc_lock);
953 cv_destroy(&np->auth_cv);
954 mutex_destroy(&np->auth_lock);
955 crfree(ac.auth_clnt_cred);
956 kmem_cache_free(exi_cache_handle, np);
958 } else {
959 rw_exit(&exi->exi_cache_lock);
960 crfree(ac.auth_clnt_cred);
963 mutex_enter(&p->auth_lock);
964 rw_exit(&c->authc_lock);
967 * If the entry is in the WAITING state then some other thread is just
968 * retrieving the required info. The entry was either NEW, or the list
969 * of client's supplemental groups is going to be changed (either by
970 * this thread, or by some other thread). We need to wait until the
971 * nfsauth_retrieve() is done.
973 while (p->auth_state == NFS_AUTH_WAITING)
974 cv_wait(&p->auth_cv, &p->auth_lock);
977 * Here the entry cannot be in WAITING or INVALID state.
979 ASSERT(p->auth_state != NFS_AUTH_WAITING);
980 ASSERT(p->auth_state != NFS_AUTH_INVALID);
983 * If the cache entry is not valid yet, we need to retrieve the
984 * info ourselves.
986 if (p->auth_state == NFS_AUTH_NEW) {
987 bool_t res;
989 * NFS_AUTH_NEW is the default output auth_state value in a
990 * case we failed somewhere below.
992 auth_state_t state = NFS_AUTH_NEW;
994 p->auth_state = NFS_AUTH_WAITING;
995 mutex_exit(&p->auth_lock);
996 kmem_free(addr.buf, addr.maxlen);
997 addr = p->auth_clnt->authc_addr;
999 atomic_inc_uint(&nfsauth_cache_miss);
1001 res = nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor,
1002 &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids);
1004 p->auth_access = access;
1005 p->auth_time = p->auth_freshness = gethrestime_sec();
1007 if (res == TRUE) {
1008 if (uid != NULL)
1009 *uid = tmpuid;
1010 if (gid != NULL)
1011 *gid = tmpgid;
1012 if (ngids != NULL && gids != NULL) {
1013 *ngids = tmpngids;
1014 *gids = tmpgids;
1017 * We need a copy of gids for the
1018 * auth_cache entry
1020 tmpgids = kmem_alloc(tmpngids * sizeof (gid_t),
1021 KM_NOSLEEP | KM_NORMALPRI);
1022 if (tmpgids != NULL)
1023 bcopy(*gids, tmpgids,
1024 tmpngids * sizeof (gid_t));
1027 if (tmpgids != NULL || tmpngids == 0) {
1028 p->auth_srv_uid = tmpuid;
1029 p->auth_srv_gid = tmpgid;
1030 p->auth_srv_ngids = tmpngids;
1031 p->auth_srv_gids = tmpgids;
1033 state = NFS_AUTH_FRESH;
1038 * Set the auth_state and notify waiters.
1040 mutex_enter(&p->auth_lock);
1041 p->auth_state = state;
1042 cv_broadcast(&p->auth_cv);
1043 mutex_exit(&p->auth_lock);
1044 } else {
1045 uint_t nach;
1046 time_t refresh;
1048 refresh = gethrestime_sec() - p->auth_freshness;
1050 p->auth_time = gethrestime_sec();
1052 if (uid != NULL)
1053 *uid = p->auth_srv_uid;
1054 if (gid != NULL)
1055 *gid = p->auth_srv_gid;
1056 if (ngids != NULL && gids != NULL) {
1057 *ngids = p->auth_srv_ngids;
1058 *gids = kmem_alloc(*ngids * sizeof (gid_t), KM_SLEEP);
1059 bcopy(p->auth_srv_gids, *gids, *ngids * sizeof (gid_t));
1062 access = p->auth_access;
1064 if ((refresh > NFSAUTH_CACHE_REFRESH) &&
1065 p->auth_state == NFS_AUTH_FRESH) {
1066 refreshq_auth_node_t *ran;
1067 uint_t nacr;
1069 p->auth_state = NFS_AUTH_STALE;
1070 mutex_exit(&p->auth_lock);
1072 nacr = atomic_inc_uint_nv(&nfsauth_cache_refresh);
1073 DTRACE_PROBE3(nfsauth__debug__cache__stale,
1074 struct exportinfo *, exi,
1075 struct auth_cache *, p,
1076 uint_t, nacr);
1078 ran = kmem_alloc(sizeof (refreshq_auth_node_t),
1079 KM_SLEEP);
1080 ran->ran_auth = p;
1081 ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));
1083 mutex_enter(&refreshq_lock);
1085 * We should not add a work queue
1086 * item if the thread is not
1087 * accepting them.
1089 if (refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
1090 refreshq_exi_node_t *ren;
1093 * Is there an existing exi_list?
1095 for (ren = list_head(&refreshq_queue);
1096 ren != NULL;
1097 ren = list_next(&refreshq_queue, ren)) {
1098 if (ren->ren_exi == exi) {
1099 list_insert_tail(
1100 &ren->ren_authlist, ran);
1101 break;
1105 if (ren == NULL) {
1106 ren = kmem_alloc(
1107 sizeof (refreshq_exi_node_t),
1108 KM_SLEEP);
1110 exi_hold(exi);
1111 ren->ren_exi = exi;
1113 list_create(&ren->ren_authlist,
1114 sizeof (refreshq_auth_node_t),
1115 offsetof(refreshq_auth_node_t,
1116 ran_node));
1118 list_insert_tail(&ren->ren_authlist,
1119 ran);
1120 list_insert_tail(&refreshq_queue, ren);
1123 cv_broadcast(&refreshq_cv);
1124 } else {
1125 strfree(ran->ran_netid);
1126 kmem_free(ran, sizeof (refreshq_auth_node_t));
1129 mutex_exit(&refreshq_lock);
1130 } else {
1131 mutex_exit(&p->auth_lock);
1134 nach = atomic_inc_uint_nv(&nfsauth_cache_hit);
1135 DTRACE_PROBE2(nfsauth__debug__cache__hit,
1136 uint_t, nach,
1137 time_t, refresh);
1139 kmem_free(addr.buf, addr.maxlen);
1142 return (access);
1144 retrieve:
1145 crfree(ac.auth_clnt_cred);
1148 * Retrieve the required data without caching.
1151 ASSERT(p == NULL);
1153 atomic_inc_uint(&nfsauth_cache_miss);
1155 if (nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor, &addr,
1156 &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
1157 if (uid != NULL)
1158 *uid = tmpuid;
1159 if (gid != NULL)
1160 *gid = tmpgid;
1161 if (ngids != NULL && gids != NULL) {
1162 *ngids = tmpngids;
1163 *gids = tmpgids;
1164 } else {
1165 kmem_free(tmpgids, tmpngids * sizeof (gid_t));
1169 kmem_free(addr.buf, addr.maxlen);
1171 return (access);
1175 * Check if the requesting client has access to the filesystem with
1176 * a given nfs flavor number which is an explicitly shared flavor.
1179 nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
1180 int flavor, int perm, cred_t *cr)
1182 int access;
1184 if (! (perm & M_4SEC_EXPORTED)) {
1185 return (NFSAUTH_DENIED);
1189 * Optimize if there are no lists
1191 if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
1192 perm &= ~M_4SEC_EXPORTED;
1193 if (perm == M_RO)
1194 return (NFSAUTH_RO);
1195 if (perm == M_RW)
1196 return (NFSAUTH_RW);
1199 access = nfsauth_cache_get(exi, req, flavor, cr, NULL, NULL, NULL,
1200 NULL);
1202 return (access);
1206 nfsauth_access(struct exportinfo *exi, struct svc_req *req, cred_t *cr,
1207 uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
1209 int access, mapaccess;
1210 struct secinfo *sp;
1211 int i, flavor, perm;
1212 int authnone_entry = -1;
1215 * By default root is mapped to anonymous user.
1216 * This might get overriden later in nfsauth_cache_get().
1218 if (crgetuid(cr) == 0) {
1219 if (uid != NULL)
1220 *uid = exi->exi_export.ex_anon;
1221 if (gid != NULL)
1222 *gid = exi->exi_export.ex_anon;
1223 } else {
1224 if (uid != NULL)
1225 *uid = crgetuid(cr);
1226 if (gid != NULL)
1227 *gid = crgetgid(cr);
1230 if (ngids != NULL)
1231 *ngids = 0;
1232 if (gids != NULL)
1233 *gids = NULL;
1236 * Get the nfs flavor number from xprt.
1238 flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
1241 * First check the access restrictions on the filesystem. If
1242 * there are no lists associated with this flavor then there's no
1243 * need to make an expensive call to the nfsauth service or to
1244 * cache anything.
1247 sp = exi->exi_export.ex_secinfo;
1248 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1249 if (flavor != sp[i].s_secinfo.sc_nfsnum) {
1250 if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1251 authnone_entry = i;
1252 continue;
1254 break;
1257 mapaccess = 0;
1259 if (i >= exi->exi_export.ex_seccnt) {
1261 * Flavor not found, but use AUTH_NONE if it exists
1263 if (authnone_entry == -1)
1264 return (NFSAUTH_DENIED);
1265 flavor = AUTH_NONE;
1266 mapaccess = NFSAUTH_MAPNONE;
1267 i = authnone_entry;
1271 * If the flavor is in the ex_secinfo list, but not an explicitly
1272 * shared flavor by the user, it is a result of the nfsv4 server
1273 * namespace setup. We will grant an RO permission similar for
1274 * a pseudo node except that this node is a shared one.
1276 * e.g. flavor in (flavor) indicates that it is not explictly
1277 * shared by the user:
1279 * / (sys, krb5)
1281 * export #share -o sec=sys (krb5)
1283 * secure #share -o sec=krb5
1285 * In this case, when a krb5 request coming in to access
1286 * /export, RO permission is granted.
1288 if (!(sp[i].s_flags & M_4SEC_EXPORTED))
1289 return (mapaccess | NFSAUTH_RO);
1292 * Optimize if there are no lists.
1293 * We cannot optimize for AUTH_SYS with NGRPS (16) supplemental groups.
1295 perm = sp[i].s_flags;
1296 if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0 && (ngroups_max <= NGRPS ||
1297 flavor != AUTH_SYS || crgetngroups(cr) < NGRPS)) {
1298 perm &= ~M_4SEC_EXPORTED;
1299 if (perm == M_RO)
1300 return (mapaccess | NFSAUTH_RO);
1301 if (perm == M_RW)
1302 return (mapaccess | NFSAUTH_RW);
1305 access = nfsauth_cache_get(exi, req, flavor, cr, uid, gid, ngids, gids);
1308 * For both NFSAUTH_DENIED and NFSAUTH_WRONGSEC we do not care about
1309 * the supplemental groups.
1311 if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
1312 if (ngids != NULL && gids != NULL) {
1313 kmem_free(*gids, *ngids * sizeof (gid_t));
1314 *ngids = 0;
1315 *gids = NULL;
1320 * Client's security flavor doesn't match with "ro" or
1321 * "rw" list. Try again using AUTH_NONE if present.
1323 if ((access & NFSAUTH_WRONGSEC) && (flavor != AUTH_NONE)) {
1325 * Have we already encountered AUTH_NONE ?
1327 if (authnone_entry != -1) {
1328 mapaccess = NFSAUTH_MAPNONE;
1329 access = nfsauth_cache_get(exi, req, AUTH_NONE, cr,
1330 NULL, NULL, NULL, NULL);
1331 } else {
1333 * Check for AUTH_NONE presence.
1335 for (; i < exi->exi_export.ex_seccnt; i++) {
1336 if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1337 mapaccess = NFSAUTH_MAPNONE;
1338 access = nfsauth_cache_get(exi, req,
1339 AUTH_NONE, cr, NULL, NULL, NULL,
1340 NULL);
1341 break;
1347 if (access & NFSAUTH_DENIED)
1348 access = NFSAUTH_DENIED;
1350 return (access | mapaccess);
1353 static void
1354 nfsauth_free_clnt_node(struct auth_cache_clnt *p)
1356 void *cookie = NULL;
1357 struct auth_cache *node;
1359 while ((node = avl_destroy_nodes(&p->authc_tree, &cookie)) != NULL)
1360 nfsauth_free_node(node);
1361 avl_destroy(&p->authc_tree);
1363 kmem_free(p->authc_addr.buf, p->authc_addr.maxlen);
1364 rw_destroy(&p->authc_lock);
1366 kmem_free(p, sizeof (*p));
1369 static void
1370 nfsauth_free_node(struct auth_cache *p)
1372 crfree(p->auth_clnt_cred);
1373 kmem_free(p->auth_srv_gids, p->auth_srv_ngids * sizeof (gid_t));
1374 mutex_destroy(&p->auth_lock);
1375 cv_destroy(&p->auth_cv);
1376 kmem_cache_free(exi_cache_handle, p);
1380 * Free the nfsauth cache for a given export
1382 void
1383 nfsauth_cache_free(struct exportinfo *exi)
1385 int i;
1388 * The only way we got here was with an exi_rele, which means that no
1389 * auth cache entry is being refreshed.
1392 for (i = 0; i < AUTH_TABLESIZE; i++) {
1393 avl_tree_t *tree = exi->exi_cache[i];
1394 void *cookie = NULL;
1395 struct auth_cache_clnt *node;
1397 while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
1398 nfsauth_free_clnt_node(node);
1403 * Called by the kernel memory allocator when
1404 * memory is low. Free unused cache entries.
1405 * If that's not enough, the VM system will
1406 * call again for some more.
1408 /*ARGSUSED*/
1409 void
1410 exi_cache_reclaim(void *cdrarg)
1412 int i;
1413 struct exportinfo *exi;
1415 rw_enter(&exported_lock, RW_READER);
1417 for (i = 0; i < EXPTABLESIZE; i++) {
1418 for (exi = exptable[i]; exi; exi = exi->fid_hash.next) {
1419 exi_cache_trim(exi);
1423 rw_exit(&exported_lock);
1425 atomic_inc_uint(&nfsauth_cache_reclaim);
1428 void
1429 exi_cache_trim(struct exportinfo *exi)
1431 struct auth_cache_clnt *c;
1432 struct auth_cache_clnt *nextc;
1433 struct auth_cache *p;
1434 struct auth_cache *next;
1435 int i;
1436 time_t stale_time;
1437 avl_tree_t *tree;
1439 for (i = 0; i < AUTH_TABLESIZE; i++) {
1440 tree = exi->exi_cache[i];
1441 stale_time = gethrestime_sec() - NFSAUTH_CACHE_TRIM;
1442 rw_enter(&exi->exi_cache_lock, RW_READER);
1445 * Free entries that have not been
1446 * used for NFSAUTH_CACHE_TRIM seconds.
1448 for (c = avl_first(tree); c != NULL; c = AVL_NEXT(tree, c)) {
1450 * We are being called by the kmem subsystem to reclaim
1451 * memory so don't block if we can't get the lock.
1453 if (rw_tryenter(&c->authc_lock, RW_WRITER) == 0) {
1454 exi_cache_auth_reclaim_failed++;
1455 rw_exit(&exi->exi_cache_lock);
1456 return;
1459 for (p = avl_first(&c->authc_tree); p != NULL;
1460 p = next) {
1461 next = AVL_NEXT(&c->authc_tree, p);
1463 ASSERT(p->auth_state != NFS_AUTH_INVALID);
1465 mutex_enter(&p->auth_lock);
1468 * We won't trim recently used and/or WAITING
1469 * entries.
1471 if (p->auth_time > stale_time ||
1472 p->auth_state == NFS_AUTH_WAITING) {
1473 mutex_exit(&p->auth_lock);
1474 continue;
1477 DTRACE_PROBE1(nfsauth__debug__trim__state,
1478 auth_state_t, p->auth_state);
1481 * STALE and REFRESHING entries needs to be
1482 * marked INVALID only because they are
1483 * referenced by some other structures or
1484 * threads. They will be freed later.
1486 if (p->auth_state == NFS_AUTH_STALE ||
1487 p->auth_state == NFS_AUTH_REFRESHING) {
1488 p->auth_state = NFS_AUTH_INVALID;
1489 mutex_exit(&p->auth_lock);
1491 avl_remove(&c->authc_tree, p);
1492 } else {
1493 mutex_exit(&p->auth_lock);
1495 avl_remove(&c->authc_tree, p);
1496 nfsauth_free_node(p);
1499 rw_exit(&c->authc_lock);
1502 if (rw_tryupgrade(&exi->exi_cache_lock) == 0) {
1503 rw_exit(&exi->exi_cache_lock);
1504 exi_cache_clnt_reclaim_failed++;
1505 continue;
1508 for (c = avl_first(tree); c != NULL; c = nextc) {
1509 nextc = AVL_NEXT(tree, c);
1511 if (avl_is_empty(&c->authc_tree) == B_FALSE)
1512 continue;
1514 avl_remove(tree, c);
1516 nfsauth_free_clnt_node(c);
1519 rw_exit(&exi->exi_cache_lock);