4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
27 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
32 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
37 * Portions of this source code were derived from Berkeley 4.3 BSD
38 * under license from the Regents of the University of California.
43 * Implements a kernel based, client side RPC.
46 #include <sys/param.h>
47 #include <sys/types.h>
48 #include <sys/systm.h>
49 #include <sys/sysmacros.h>
50 #include <sys/stream.h>
51 #include <sys/strsubr.h>
53 #include <sys/tiuser.h>
54 #include <sys/tihdr.h>
55 #include <sys/t_kuser.h>
56 #include <sys/errno.h>
58 #include <sys/debug.h>
59 #include <sys/kstat.h>
60 #include <sys/t_lock.h>
61 #include <sys/cmn_err.h>
64 #include <sys/taskq.h>
66 #include <sys/atomic.h>
68 #include <netinet/in.h>
69 #include <rpc/types.h>
73 #include <rpc/rpc_msg.h>
77 static enum clnt_stat
clnt_clts_kcallit(CLIENT
*, rpcproc_t
, xdrproc_t
,
78 caddr_t
, xdrproc_t
, caddr_t
, struct timeval
);
79 static void clnt_clts_kabort(CLIENT
*);
80 static void clnt_clts_kerror(CLIENT
*, struct rpc_err
*);
81 static bool_t
clnt_clts_kfreeres(CLIENT
*, xdrproc_t
, caddr_t
);
82 static bool_t
clnt_clts_kcontrol(CLIENT
*, int, char *);
83 static void clnt_clts_kdestroy(CLIENT
*);
84 static int clnt_clts_ksettimers(CLIENT
*, struct rpc_timers
*,
85 struct rpc_timers
*, int, void (*)(), caddr_t
, uint32_t);
88 * Operations vector for CLTS based RPC
90 static struct clnt_ops clts_ops
= {
91 clnt_clts_kcallit
, /* do rpc call */
92 clnt_clts_kabort
, /* abort call */
93 clnt_clts_kerror
, /* return error status */
94 clnt_clts_kfreeres
, /* free results */
95 clnt_clts_kdestroy
, /* destroy rpc handle */
96 clnt_clts_kcontrol
, /* the ioctl() of rpc */
97 clnt_clts_ksettimers
/* set retry timers */
101 * Endpoint for CLTS (INET, INET6, loopback, etc.)
103 typedef struct endpnt_type
{
104 struct endpnt_type
*e_next
; /* pointer to next endpoint type */
105 list_t e_pool
; /* list of available endpoints */
106 list_t e_ilist
; /* list of idle endpoints */
107 struct endpnt
*e_pcurr
; /* pointer to current endpoint */
108 char e_protofmly
[KNC_STRSIZE
]; /* protocol family */
109 dev_t e_rdev
; /* device */
110 kmutex_t e_plock
; /* pool lock */
111 kmutex_t e_ilock
; /* idle list lock */
112 timeout_id_t e_itimer
; /* timer to dispatch the taskq */
113 uint_t e_cnt
; /* number of endpoints in the pool */
114 zoneid_t e_zoneid
; /* zoneid of endpoint type */
115 kcondvar_t e_async_cv
; /* cv for asynchronous reap threads */
116 uint_t e_async_count
; /* count of asynchronous reap threads */
119 typedef struct endpnt
{
120 list_node_t e_node
; /* link to the pool */
121 list_node_t e_idle
; /* link to the idle list */
122 endpnt_type_t
*e_type
; /* back pointer to endpoint type */
123 TIUSER
*e_tiptr
; /* pointer to transport endpoint */
124 queue_t
*e_wq
; /* write queue */
125 uint_t e_flags
; /* endpoint flags */
126 uint_t e_ref
; /* ref count on endpoint */
127 kcondvar_t e_cv
; /* condition variable */
128 kmutex_t e_lock
; /* protects cv and flags */
129 time_t e_itime
; /* time when rele'd */
132 #define ENDPNT_ESTABLISHED 0x1 /* endpoint is established */
133 #define ENDPNT_WAITING 0x2 /* thread waiting for endpoint */
134 #define ENDPNT_BOUND 0x4 /* endpoint is bound */
135 #define ENDPNT_STALE 0x8 /* endpoint is dead */
136 #define ENDPNT_ONIDLE 0x10 /* endpoint is on the idle list */
138 static krwlock_t endpnt_type_lock
; /* protects endpnt_type_list */
139 static endpnt_type_t
*endpnt_type_list
= NULL
; /* list of CLTS endpoints */
140 static struct kmem_cache
*endpnt_cache
; /* cache of endpnt_t's */
141 static taskq_t
*endpnt_taskq
; /* endpnt_t reaper thread */
142 static bool_t taskq_created
; /* flag for endpnt_taskq */
143 static kmutex_t endpnt_taskq_lock
; /* taskq lock */
144 static zone_key_t endpnt_destructor_key
;
146 #define DEFAULT_ENDPOINT_REAP_INTERVAL 60 /* 1 minute */
147 #define DEFAULT_INTERVAL_SHIFT 30 /* 30 seconds */
152 static int clnt_clts_max_endpoints
= -1;
153 static int clnt_clts_hash_size
= DEFAULT_HASH_SIZE
;
154 static time_t clnt_clts_endpoint_reap_interval
= -1;
155 static clock_t clnt_clts_taskq_dispatch_interval
;
158 * Response completion hash queue
160 static call_table_t
*clts_call_ht
;
163 * Routines for the endpoint manager
165 static struct endpnt_type
*endpnt_type_create(struct knetconfig
*);
166 static void endpnt_type_free(struct endpnt_type
*);
167 static int check_endpnt(struct endpnt
*, struct endpnt
**);
168 static struct endpnt
*endpnt_get(struct knetconfig
*, int);
169 static void endpnt_rele(struct endpnt
*);
170 static void endpnt_reap_settimer(endpnt_type_t
*);
171 static void endpnt_reap(endpnt_type_t
*);
172 static void endpnt_reap_dispatch(void *);
173 static void endpnt_reclaim(zoneid_t
);
177 * Request dipatching function.
179 static int clnt_clts_dispatch_send(queue_t
*q
, mblk_t
*, struct netbuf
*addr
,
180 calllist_t
*, uint_t
, cred_t
*);
183 * The size of the preserialized RPC header information.
185 #define CKU_HDRSIZE 20
187 * The initial allocation size. It is small to reduce space requirements.
189 #define CKU_INITSIZE 2048
191 * The size of additional allocations, if required. It is larger to
192 * reduce the number of actual allocations.
194 #define CKU_ALLOCSIZE 8192
197 * Private data per rpc handle. This structure is allocated by
198 * clnt_clts_kcreate, and freed by clnt_clts_kdestroy.
201 CLIENT cku_client
; /* client handle */
202 int cku_retrys
; /* request retrys */
204 struct endpnt
*cku_endpnt
; /* open end point */
205 struct knetconfig cku_config
;
206 struct netbuf cku_addr
; /* remote address */
207 struct rpc_err cku_err
; /* error status */
208 XDR cku_outxdr
; /* xdr stream for output */
209 XDR cku_inxdr
; /* xdr stream for input */
210 char cku_rpchdr
[CKU_HDRSIZE
+ 4]; /* rpc header */
211 struct cred
*cku_cred
; /* credentials */
212 struct rpc_timers
*cku_timers
; /* for estimating RTT */
213 struct rpc_timers
*cku_timeall
; /* for estimating RTT */
214 void (*cku_feedback
)(int, int, caddr_t
);
215 /* ptr to feedback rtn */
216 caddr_t cku_feedarg
; /* argument for feedback func */
217 uint32_t cku_xid
; /* current XID */
218 bool_t cku_bcast
; /* RPC broadcast hint */
219 int cku_useresvport
; /* Use reserved port */
220 struct rpc_clts_client
*cku_stats
; /* counters for the zone */
223 static const struct rpc_clts_client
{
224 kstat_named_t rccalls
;
225 kstat_named_t rcbadcalls
;
226 kstat_named_t rcretrans
;
227 kstat_named_t rcbadxids
;
228 kstat_named_t rctimeouts
;
229 kstat_named_t rcnewcreds
;
230 kstat_named_t rcbadverfs
;
231 kstat_named_t rctimers
;
232 kstat_named_t rcnomem
;
233 kstat_named_t rccantsend
;
234 } clts_rcstat_tmpl
= {
235 { "calls", KSTAT_DATA_UINT64
},
236 { "badcalls", KSTAT_DATA_UINT64
},
237 { "retrans", KSTAT_DATA_UINT64
},
238 { "badxids", KSTAT_DATA_UINT64
},
239 { "timeouts", KSTAT_DATA_UINT64
},
240 { "newcreds", KSTAT_DATA_UINT64
},
241 { "badverfs", KSTAT_DATA_UINT64
},
242 { "timers", KSTAT_DATA_UINT64
},
243 { "nomem", KSTAT_DATA_UINT64
},
244 { "cantsend", KSTAT_DATA_UINT64
},
247 static uint_t clts_rcstat_ndata
=
248 sizeof (clts_rcstat_tmpl
) / sizeof (kstat_named_t
);
250 #define RCSTAT_INCR(s, x) \
251 atomic_inc_64(&(s)->x.value.ui64)
253 #define ptoh(p) (&((p)->cku_client))
254 #define htop(h) ((struct cku_private *)((h)->cl_private))
260 #define REFRESHES 2 /* authentication refreshes */
263 * The following is used to determine the global default behavior for
264 * CLTS when binding to a local port.
266 * If the value is set to 1 the default will be to select a reserved
267 * (aka privileged) port, if the value is zero the default will be to
268 * use non-reserved ports. Users of kRPC may override this by using
269 * CLNT_CONTROL() and CLSET_BINDRESVPORT.
271 static int clnt_clts_do_bindresvport
= 1;
273 #define BINDRESVPORT_RETRIES 5
276 clnt_clts_stats_init(zoneid_t zoneid
, struct rpc_clts_client
**statsp
)
281 knp
= rpcstat_zone_init_common(zoneid
, "unix", "rpc_clts_client",
282 (const kstat_named_t
*)&clts_rcstat_tmpl
,
283 sizeof (clts_rcstat_tmpl
));
285 * Backwards compatibility for old kstat clients
287 ksp
= kstat_create_zone("unix", 0, "rpc_client", "rpc",
288 KSTAT_TYPE_NAMED
, clts_rcstat_ndata
,
289 KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_WRITABLE
, zoneid
);
294 *statsp
= (struct rpc_clts_client
*)knp
;
298 clnt_clts_stats_fini(zoneid_t zoneid
, struct rpc_clts_client
**statsp
)
300 rpcstat_zone_fini_common(zoneid
, "unix", "rpc_clts_client");
301 kstat_delete_byname_zone("unix", 0, "rpc_client", zoneid
);
302 kmem_free(*statsp
, sizeof (clts_rcstat_tmpl
));
306 * Create an rpc handle for a clts rpc connection.
307 * Allocates space for the handle structure and the private data.
311 clnt_clts_kcreate(struct knetconfig
*config
, struct netbuf
*addr
,
312 rpcprog_t pgm
, rpcvers_t vers
, int retrys
, struct cred
*cred
,
316 struct cku_private
*p
;
317 struct rpc_msg call_msg
;
327 p
= kmem_zalloc(sizeof (*p
), KM_SLEEP
);
332 h
->cl_ops
= &clts_ops
;
333 h
->cl_private
= (caddr_t
)p
;
334 h
->cl_auth
= authkern_create();
336 /* call message, just used to pre-serialize below */
338 call_msg
.rm_direction
= CALL
;
339 call_msg
.rm_call
.cb_rpcvers
= RPC_MSG_VERSION
;
340 call_msg
.rm_call
.cb_prog
= pgm
;
341 call_msg
.rm_call
.cb_vers
= vers
;
344 clnt_clts_kinit(h
, addr
, retrys
, cred
);
346 xdrmem_create(&p
->cku_outxdr
, p
->cku_rpchdr
, CKU_HDRSIZE
, XDR_ENCODE
);
348 /* pre-serialize call message header */
349 if (!xdr_callhdr(&p
->cku_outxdr
, &call_msg
)) {
350 XDR_DESTROY(&p
->cku_outxdr
);
351 error
= EINVAL
; /* XXX */
354 XDR_DESTROY(&p
->cku_outxdr
);
356 p
->cku_config
.knc_rdev
= config
->knc_rdev
;
357 p
->cku_config
.knc_semantics
= config
->knc_semantics
;
358 plen
= strlen(config
->knc_protofmly
) + 1;
359 p
->cku_config
.knc_protofmly
= kmem_alloc(plen
, KM_SLEEP
);
360 bcopy(config
->knc_protofmly
, p
->cku_config
.knc_protofmly
, plen
);
361 p
->cku_useresvport
= -1; /* value is has not been set */
363 cv_init(&p
->cku_call
.call_cv
, NULL
, CV_DEFAULT
, NULL
);
364 mutex_init(&p
->cku_call
.call_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
370 auth_destroy(h
->cl_auth
);
371 kmem_free(p
->cku_addr
.buf
, addr
->maxlen
);
372 kmem_free(p
, sizeof (struct cku_private
));
378 clnt_clts_kinit(CLIENT
*h
, struct netbuf
*addr
, int retrys
, cred_t
*cred
)
380 /* LINTED pointer alignment */
381 struct cku_private
*p
= htop(h
);
384 rsp
= zone_getspecific(rpcstat_zone_key
, rpc_zone());
387 p
->cku_retrys
= retrys
;
389 if (p
->cku_addr
.maxlen
< addr
->len
) {
390 if (p
->cku_addr
.maxlen
!= 0 && p
->cku_addr
.buf
!= NULL
)
391 kmem_free(p
->cku_addr
.buf
, p
->cku_addr
.maxlen
);
393 p
->cku_addr
.buf
= kmem_zalloc(addr
->maxlen
, KM_SLEEP
);
394 p
->cku_addr
.maxlen
= addr
->maxlen
;
397 p
->cku_addr
.len
= addr
->len
;
398 bcopy(addr
->buf
, p
->cku_addr
.buf
, addr
->len
);
402 p
->cku_timers
= NULL
;
403 p
->cku_timeall
= NULL
;
404 p
->cku_feedback
= NULL
;
405 p
->cku_bcast
= FALSE
;
406 p
->cku_call
.call_xid
= 0;
407 p
->cku_call
.call_hash
= 0;
408 p
->cku_call
.call_notified
= FALSE
;
409 p
->cku_call
.call_next
= NULL
;
410 p
->cku_call
.call_prev
= NULL
;
411 p
->cku_call
.call_reply
= NULL
;
412 p
->cku_call
.call_wq
= NULL
;
413 p
->cku_stats
= rsp
->rpc_clts_client
;
417 * set the timers. Return current retransmission timeout.
420 clnt_clts_ksettimers(CLIENT
*h
, struct rpc_timers
*t
, struct rpc_timers
*all
,
421 int minimum
, void (*feedback
)(int, int, caddr_t
), caddr_t arg
,
424 /* LINTED pointer alignment */
425 struct cku_private
*p
= htop(h
);
428 p
->cku_feedback
= feedback
;
429 p
->cku_feedarg
= arg
;
431 p
->cku_timeall
= all
;
434 value
= all
->rt_rtxcur
;
435 value
+= t
->rt_rtxcur
;
438 RCSTAT_INCR(p
->cku_stats
, rctimers
);
443 * Time out back off function. tim is in HZ
445 #define MAXTIMO (20 * hz)
446 #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
447 #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
449 #define RETRY_POLL_TIMO 30
452 * Call remote procedure.
453 * Most of the work of rpc is done here. We serialize what is left
454 * of the header (some was pre-serialized in the handle), serialize
455 * the arguments, and send it off. We wait for a reply or a time out.
456 * Timeout causes an immediate return, other packet problems may cause
457 * a retry on the receive. When a good packet is received we deserialize
458 * it, and check verification. A bad reply code will cause one retry
459 * with full (longhand) credentials.
462 clnt_clts_kcallit_addr(CLIENT
*h
, rpcproc_t procnum
, xdrproc_t xdr_args
,
463 caddr_t argsp
, xdrproc_t xdr_results
, caddr_t resultsp
,
464 struct timeval wait
, struct netbuf
*sin
)
466 /* LINTED pointer alignment */
467 struct cku_private
*p
= htop(h
);
469 int stries
= p
->cku_retrys
;
470 int refreshes
= REFRESHES
; /* number of times to refresh cred */
471 int round_trip
; /* time the RPC */
477 calllist_t
*call
= &p
->cku_call
;
478 clock_t ori_timout
, timout
;
480 enum clnt_stat status
;
481 struct rpc_msg reply_msg
;
482 enum clnt_stat re_status
;
485 RCSTAT_INCR(p
->cku_stats
, rccalls
);
487 RPCLOG(2, "clnt_clts_kcallit_addr: wait.tv_sec: %ld\n", wait
.tv_sec
);
488 RPCLOG(2, "clnt_clts_kcallit_addr: wait.tv_usec: %ld\n", wait
.tv_usec
);
490 timout
= TIMEVAL_TO_TICK(&wait
);
493 if (p
->cku_xid
== 0) {
494 p
->cku_xid
= alloc_xid();
495 if (p
->cku_endpnt
!= NULL
)
496 endpnt_rele(p
->cku_endpnt
);
497 p
->cku_endpnt
= NULL
;
499 call
->call_zoneid
= rpc_zoneid();
506 while ((mp
= allocb(CKU_INITSIZE
, BPRI_LO
)) == NULL
) {
507 if (strwaitbuf(CKU_INITSIZE
, BPRI_LO
)) {
508 p
->cku_err
.re_status
= RPC_SYSTEMERROR
;
509 p
->cku_err
.re_errno
= ENOSR
;
514 xdrs
= &p
->cku_outxdr
;
515 xdrmblk_init(xdrs
, mp
, XDR_ENCODE
, CKU_ALLOCSIZE
);
517 if (h
->cl_auth
->ah_cred
.oa_flavor
!= RPCSEC_GSS
) {
519 * Copy in the preserialized RPC header
522 bcopy(p
->cku_rpchdr
, mp
->b_rptr
, CKU_HDRSIZE
);
525 * transaction id is the 1st thing in the output
528 /* LINTED pointer alignment */
529 (*(uint32_t *)(mp
->b_rptr
)) = p
->cku_xid
;
531 /* Skip the preserialized stuff. */
532 XDR_SETPOS(xdrs
, CKU_HDRSIZE
);
534 /* Serialize dynamic stuff into the output buffer. */
535 if ((!XDR_PUTINT32(xdrs
, (int32_t *)&procnum
)) ||
536 (!AUTH_MARSHALL(h
->cl_auth
, xdrs
, p
->cku_cred
)) ||
537 (!(*xdr_args
)(xdrs
, argsp
))) {
540 p
->cku_err
.re_status
= RPC_CANTENCODEARGS
;
541 p
->cku_err
.re_errno
= EIO
;
545 uint32_t *uproc
= (uint32_t *)
546 &p
->cku_rpchdr
[CKU_HDRSIZE
];
547 IXDR_PUT_U_INT32(uproc
, procnum
);
549 (*(uint32_t *)(&p
->cku_rpchdr
[0])) = p
->cku_xid
;
552 /* Serialize the procedure number and the arguments. */
553 if (!AUTH_WRAP(h
->cl_auth
, (caddr_t
)p
->cku_rpchdr
,
554 CKU_HDRSIZE
+4, xdrs
, xdr_args
, argsp
)) {
557 p
->cku_err
.re_status
= RPC_CANTENCODEARGS
;
558 p
->cku_err
.re_errno
= EIO
;
570 p
->cku_err
.re_status
= RPC_SYSTEMERROR
;
571 p
->cku_err
.re_errno
= ENOSR
;
576 * Grab an endpnt only if the endpoint is NULL. We could be retrying
577 * the request and in this case we want to go through the same
578 * source port, so that the duplicate request cache may detect a
582 if (p
->cku_endpnt
== NULL
)
583 p
->cku_endpnt
= endpnt_get(&p
->cku_config
, p
->cku_useresvport
);
585 if (p
->cku_endpnt
== NULL
) {
587 p
->cku_err
.re_status
= RPC_SYSTEMERROR
;
588 p
->cku_err
.re_errno
= ENOSR
;
592 round_trip
= ddi_get_lbolt();
594 error
= clnt_clts_dispatch_send(p
->cku_endpnt
->e_wq
, mp
,
595 &p
->cku_addr
, call
, p
->cku_xid
, p
->cku_cred
);
599 p
->cku_err
.re_status
= RPC_CANTSEND
;
600 p
->cku_err
.re_errno
= error
;
601 RCSTAT_INCR(p
->cku_stats
, rccantsend
);
605 RPCLOG(64, "clnt_clts_kcallit_addr: sent call for xid 0x%x\n",
609 * There are two reasons for which we go back to to tryread.
611 * a) In case the status is RPC_PROCUNAVAIL and we sent out a
612 * broadcast we should not get any invalid messages with the
613 * RPC_PROCUNAVAIL error back. Some broken RPC implementations
614 * send them and for this we have to ignore them ( as we would
615 * have never received them ) and look for another message
616 * which might contain the valid response because we don't know
617 * how many broken implementations are in the network. So we are
618 * going to loop until
619 * - we received a valid response
620 * - we have processed all invalid responses and
621 * got a time out when we try to receive again a
624 * b) We will jump back to tryread also in case we failed
625 * within the AUTH_VALIDATE. In this case we should move
626 * on and loop until we received a valid response or we
627 * have processed all responses with broken authentication
628 * and we got a time out when we try to receive a message.
631 mutex_enter(&call
->call_lock
);
633 if (call
->call_notified
== FALSE
) {
634 klwp_t
*lwp
= ttolwp(curthread
);
635 clock_t cv_wait_ret
= 1; /* init to > 0 */
636 clock_t cv_timout
= timout
;
641 cv_timout
+= ddi_get_lbolt();
644 while ((cv_wait_ret
=
645 cv_timedwait(&call
->call_cv
,
646 &call
->call_lock
, cv_timout
)) > 0 &&
647 call
->call_notified
== FALSE
)
650 while ((cv_wait_ret
=
651 cv_timedwait_sig(&call
->call_cv
,
652 &call
->call_lock
, cv_timout
)) > 0 &&
653 call
->call_notified
== FALSE
)
656 if (cv_wait_ret
== 0)
662 resp
= call
->call_reply
;
663 call
->call_reply
= NULL
;
664 status
= call
->call_status
;
666 * We have to reset the call_notified here. In case we have
667 * to do a retry ( e.g. in case we got a RPC_PROCUNAVAIL
668 * error ) we need to set this to false to ensure that
669 * we will wait for the next message. When the next message
670 * is going to arrive the function clnt_clts_dispatch_notify
671 * will set this to true again.
673 call
->call_notified
= FALSE
;
674 call
->call_status
= RPC_TIMEDOUT
;
675 mutex_exit(&call
->call_lock
);
677 if (status
== RPC_TIMEDOUT
) {
680 * We got interrupted, bail out
682 p
->cku_err
.re_status
= RPC_INTR
;
683 p
->cku_err
.re_errno
= EINTR
;
686 RPCLOG(8, "clnt_clts_kcallit_addr: "
687 "request w/xid 0x%x timedout "
688 "waiting for reply\n", p
->cku_xid
);
689 #if 0 /* XXX not yet */
691 * Timeout may be due to a dead gateway. Send
692 * an ioctl downstream advising deletion of
693 * route when we reach the half-way point to
696 if (stries
== p
->cku_retrys
/2) {
697 t_kadvise(p
->cku_endpnt
->e_tiptr
,
698 (uchar_t
*)p
->cku_addr
.buf
,
702 p
->cku_err
.re_status
= RPC_TIMEDOUT
;
703 p
->cku_err
.re_errno
= ETIMEDOUT
;
704 RCSTAT_INCR(p
->cku_stats
, rctimeouts
);
709 ASSERT(resp
!= NULL
);
712 * Prepare the message for further processing. We need to remove
713 * the datagram header and copy the source address if necessary. No
714 * need to verify the header since rpcmod took care of that.
717 * Copy the source address if the caller has supplied a netbuf.
720 union T_primitives
*pptr
;
722 pptr
= (union T_primitives
*)resp
->b_rptr
;
723 bcopy(resp
->b_rptr
+ pptr
->unitdata_ind
.SRC_offset
, sin
->buf
,
724 pptr
->unitdata_ind
.SRC_length
);
725 sin
->len
= pptr
->unitdata_ind
.SRC_length
;
729 * Pop off the datagram header.
730 * It was retained in rpcmodrput().
737 round_trip
= ddi_get_lbolt() - round_trip
;
739 * Van Jacobson timer algorithm here, only if NOT a retransmission.
741 if (p
->cku_timers
!= NULL
&& stries
== p
->cku_retrys
) {
745 rt
-= (p
->cku_timers
->rt_srtt
>> 3);
746 p
->cku_timers
->rt_srtt
+= rt
;
749 rt
-= (p
->cku_timers
->rt_deviate
>> 2);
750 p
->cku_timers
->rt_deviate
+= rt
;
751 p
->cku_timers
->rt_rtxcur
=
752 (clock_t)((p
->cku_timers
->rt_srtt
>> 2) +
753 p
->cku_timers
->rt_deviate
) >> 1;
756 rt
-= (p
->cku_timeall
->rt_srtt
>> 3);
757 p
->cku_timeall
->rt_srtt
+= rt
;
760 rt
-= (p
->cku_timeall
->rt_deviate
>> 2);
761 p
->cku_timeall
->rt_deviate
+= rt
;
762 p
->cku_timeall
->rt_rtxcur
=
763 (clock_t)((p
->cku_timeall
->rt_srtt
>> 2) +
764 p
->cku_timeall
->rt_deviate
) >> 1;
765 if (p
->cku_feedback
!= NULL
) {
766 (*p
->cku_feedback
)(FEEDBACK_OK
, procnum
,
774 xdrs
= &(p
->cku_inxdr
);
775 xdrmblk_init(xdrs
, resp
, XDR_DECODE
, 0);
777 reply_msg
.rm_direction
= REPLY
;
778 reply_msg
.rm_reply
.rp_stat
= MSG_ACCEPTED
;
779 reply_msg
.acpted_rply
.ar_stat
= SUCCESS
;
780 reply_msg
.acpted_rply
.ar_verf
= _null_auth
;
782 * xdr_results will be done in AUTH_UNWRAP.
784 reply_msg
.acpted_rply
.ar_results
.where
= NULL
;
785 reply_msg
.acpted_rply
.ar_results
.proc
= xdr_void
;
788 * Decode and validate the response.
790 if (!xdr_replymsg(xdrs
, &reply_msg
)) {
791 p
->cku_err
.re_status
= RPC_CANTDECODERES
;
792 p
->cku_err
.re_errno
= EIO
;
793 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
798 _seterr_reply(&reply_msg
, &(p
->cku_err
));
800 re_status
= p
->cku_err
.re_status
;
801 if (re_status
== RPC_SUCCESS
) {
803 * Reply is good, check auth.
805 if (!AUTH_VALIDATE(h
->cl_auth
,
806 &reply_msg
.acpted_rply
.ar_verf
)) {
807 p
->cku_err
.re_status
= RPC_AUTHERROR
;
808 p
->cku_err
.re_why
= AUTH_INVALIDRESP
;
809 RCSTAT_INCR(p
->cku_stats
, rcbadverfs
);
810 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
814 if (!AUTH_UNWRAP(h
->cl_auth
, xdrs
, xdr_results
, resultsp
)) {
815 p
->cku_err
.re_status
= RPC_CANTDECODERES
;
816 p
->cku_err
.re_errno
= EIO
;
818 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
822 /* set errno in case we can't recover */
823 if (re_status
!= RPC_VERSMISMATCH
&&
824 re_status
!= RPC_AUTHERROR
&& re_status
!= RPC_PROGVERSMISMATCH
)
825 p
->cku_err
.re_errno
= EIO
;
827 * Determine whether or not we're doing an RPC
828 * broadcast. Some server implementations don't
829 * follow RFC 1050, section 7.4.2 in that they
830 * don't remain silent when they see a proc
831 * they don't support. Therefore we keep trying
832 * to receive on RPC_PROCUNAVAIL, hoping to get
833 * a valid response from a compliant server.
835 if (re_status
== RPC_PROCUNAVAIL
&& p
->cku_bcast
) {
836 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
840 if (re_status
== RPC_AUTHERROR
) {
842 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
844 call_table_remove(call
);
845 if (call
->call_reply
!= NULL
) {
846 freemsg(call
->call_reply
);
847 call
->call_reply
= NULL
;
851 * Maybe our credential need to be refreshed
854 AUTH_REFRESH(h
->cl_auth
, &reply_msg
, p
->cku_cred
)) {
856 * The credential is refreshed. Try the request again.
857 * Even if stries == 0, we still retry as long as
858 * refreshes > 0. This prevents a soft authentication
859 * error turning into a hard one at an upper level.
862 RCSTAT_INCR(p
->cku_stats
, rcbadcalls
);
863 RCSTAT_INCR(p
->cku_stats
, rcnewcreds
);
872 * We have used the client handle to do an AUTH_REFRESH
873 * and the RPC status may be set to RPC_SUCCESS;
874 * Let's make sure to set it to RPC_AUTHERROR.
876 p
->cku_err
.re_status
= RPC_CANTDECODERES
;
879 * Map recoverable and unrecoverable
880 * authentication errors to appropriate errno
882 switch (p
->cku_err
.re_why
) {
885 * Could be an nfsportmon failure, set
886 * useresvport and try again.
888 if (p
->cku_useresvport
!= 1) {
889 p
->cku_useresvport
= 1;
896 endpt
= p
->cku_endpnt
;
897 if (endpt
->e_tiptr
!= NULL
) {
898 mutex_enter(&endpt
->e_lock
);
899 endpt
->e_flags
&= ~ENDPNT_BOUND
;
900 (void) t_kclose(endpt
->e_tiptr
, 1);
901 endpt
->e_tiptr
= NULL
;
902 mutex_exit(&endpt
->e_lock
);
906 p
->cku_xid
= alloc_xid();
907 endpnt_rele(p
->cku_endpnt
);
908 p
->cku_endpnt
= NULL
;
914 case AUTH_INVALIDRESP
:
916 case RPCSEC_GSS_NOCRED
:
917 case RPCSEC_GSS_FAILED
:
918 p
->cku_err
.re_errno
= EACCES
;
920 case AUTH_REJECTEDCRED
:
921 case AUTH_REJECTEDVERF
:
923 p
->cku_err
.re_errno
= EIO
;
926 RPCLOG(1, "clnt_clts_kcallit : authentication failed "
927 "with RPC_AUTHERROR of type %d\n",
932 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
936 call_table_remove(call
);
937 if (call
->call_reply
!= NULL
) {
938 freemsg(call
->call_reply
);
939 call
->call_reply
= NULL
;
941 RPCLOG(64, "clnt_clts_kcallit_addr: xid 0x%x taken off dispatch list",
950 if ((p
->cku_err
.re_status
!= RPC_SUCCESS
) &&
951 (p
->cku_err
.re_status
!= RPC_INTR
) &&
952 (p
->cku_err
.re_status
!= RPC_UDERROR
) &&
953 !IS_UNRECOVERABLE_RPC(p
->cku_err
.re_status
)) {
954 if (p
->cku_feedback
!= NULL
&& stries
== p
->cku_retrys
) {
955 (*p
->cku_feedback
)(FEEDBACK_REXMIT1
, procnum
,
959 timout
= backoff(timout
);
960 if (p
->cku_timeall
!= NULL
)
961 p
->cku_timeall
->rt_rtxcur
= timout
;
963 if (p
->cku_err
.re_status
== RPC_SYSTEMERROR
||
964 p
->cku_err
.re_status
== RPC_CANTSEND
) {
966 * Errors due to lack of resources, wait a bit
969 (void) ddi_msleep(100);
972 RCSTAT_INCR(p
->cku_stats
, rcretrans
);
980 if (p
->cku_err
.re_status
!= RPC_SUCCESS
) {
981 RCSTAT_INCR(p
->cku_stats
, rcbadcalls
);
985 * Allow the endpoint to be held by the client handle in case this
986 * RPC was not successful. A retry may occur at a higher level and
987 * in this case we may want to send the request over the same
989 * Endpoint is also released for one-way RPC: no reply, nor retransmit
992 if ((p
->cku_err
.re_status
== RPC_SUCCESS
||
993 (p
->cku_err
.re_status
== RPC_TIMEDOUT
&& ori_timout
== 0)) &&
994 p
->cku_endpnt
!= NULL
) {
995 endpnt_rele(p
->cku_endpnt
);
996 p
->cku_endpnt
= NULL
;
998 DTRACE_PROBE2(clnt_clts_kcallit_done
, int, p
->cku_err
.re_status
,
999 struct endpnt
*, p
->cku_endpnt
);
1002 return (p
->cku_err
.re_status
);
1005 static enum clnt_stat
1006 clnt_clts_kcallit(CLIENT
*h
, rpcproc_t procnum
, xdrproc_t xdr_args
,
1007 caddr_t argsp
, xdrproc_t xdr_results
, caddr_t resultsp
,
1008 struct timeval wait
)
1010 return (clnt_clts_kcallit_addr(h
, procnum
, xdr_args
, argsp
,
1011 xdr_results
, resultsp
, wait
, NULL
));
1015 * Return error info on this handle.
1018 clnt_clts_kerror(CLIENT
*h
, struct rpc_err
*err
)
1020 /* LINTED pointer alignment */
1021 struct cku_private
*p
= htop(h
);
1028 clnt_clts_kfreeres(CLIENT
*h
, xdrproc_t xdr_res
, caddr_t res_ptr
)
1030 xdr_free(xdr_res
, res_ptr
);
1037 clnt_clts_kabort(CLIENT
*h
)
1042 clnt_clts_kcontrol(CLIENT
*h
, int cmd
, char *arg
)
1044 /* LINTED pointer alignment */
1045 struct cku_private
*p
= htop(h
);
1049 p
->cku_xid
= *((uint32_t *)arg
);
1053 *((uint32_t *)arg
) = p
->cku_xid
;
1057 p
->cku_bcast
= *((uint32_t *)arg
);
1061 *((uint32_t *)arg
) = p
->cku_bcast
;
1063 case CLSET_BINDRESVPORT
:
1067 if (*(int *)arg
!= 1 && *(int *)arg
!= 0)
1070 p
->cku_useresvport
= *(int *)arg
;
1074 case CLGET_BINDRESVPORT
:
1078 *(int *)arg
= p
->cku_useresvport
;
1088 * Destroy rpc handle.
1089 * Frees the space used for output buffer, private data, and handle
1090 * structure, and the file pointer/TLI data on last reference.
1093 clnt_clts_kdestroy(CLIENT
*h
)
1095 /* LINTED pointer alignment */
1096 struct cku_private
*p
= htop(h
);
1097 calllist_t
*call
= &p
->cku_call
;
1101 RPCLOG(8, "clnt_clts_kdestroy h: %p\n", (void *)h
);
1102 RPCLOG(8, "clnt_clts_kdestroy h: xid=0x%x\n", p
->cku_xid
);
1104 if (p
->cku_endpnt
!= NULL
)
1105 endpnt_rele(p
->cku_endpnt
);
1107 cv_destroy(&call
->call_cv
);
1108 mutex_destroy(&call
->call_lock
);
1110 plen
= strlen(p
->cku_config
.knc_protofmly
) + 1;
1111 kmem_free(p
->cku_config
.knc_protofmly
, plen
);
1112 kmem_free(p
->cku_addr
.buf
, p
->cku_addr
.maxlen
);
1113 kmem_free(p
, sizeof (*p
));
1117 * The connectionless (CLTS) kRPC endpoint management subsystem.
1119 * Because endpoints are potentially shared among threads making RPC calls,
1120 * they are managed in a pool according to type (endpnt_type_t). Each
1121 * endpnt_type_t points to a list of usable endpoints through the e_pool
1122 * field, which is of type list_t. list_t is a doubly-linked list.
1123 * The number of endpoints in the pool is stored in the e_cnt field of
1124 * endpnt_type_t and the endpoints are reference counted using the e_ref field
1125 * in the endpnt_t structure.
1127 * As an optimization, endpoints that have no references are also linked
1128 * to an idle list via e_ilist which is also of type list_t. When a thread
1129 * calls endpnt_get() to obtain a transport endpoint, the idle list is first
1130 * consulted and if such an endpoint exists, it is removed from the idle list
1131 * and returned to the caller.
1133 * If the idle list is empty, then a check is made to see if more endpoints
1134 * can be created. If so, we proceed and create a new endpoint which is added
1135 * to the pool and returned to the caller. If we have reached the limit and
1136 * cannot make a new endpoint then one is returned to the caller via round-
1139 * When an endpoint is placed on the idle list by a thread calling
1140 * endpnt_rele(), it is timestamped and then a reaper taskq is scheduled to
1141 * be dispatched if one hasn't already been. When the timer fires, the
1142 * taskq traverses the idle list and checks to see which endpoints are
1143 * eligible to be closed. It determines this by checking if the timestamp
1144 * when the endpoint was released has exceeded the the threshold for how long
1145 * it should stay alive.
1147 * endpnt_t structures remain persistent until the memory reclaim callback,
1148 * endpnt_reclaim(), is invoked.
1150 * Here is an example of how the data structures would be laid out by the
1156 * _______________ ______________
1157 * | e_next |----------------------->| e_next |---->>
1158 * | e_pool |<---+ | e_pool |<----+
1159 * | e_ilist |<---+--+ | e_ilist |<----+--+
1160 * +->| e_pcurr |----+--+--+ +->| e_pcurr |-----+--+--+
1161 * | | ... | | | | | | ... | | | |
1162 * | | e_itimer (90) | | | | | | e_itimer (0) | | | |
1163 * | | e_cnt (1) | | | | | | e_cnt (3) | | | |
1164 * | +---------------+ | | | | +--------------+ | | |
1166 * | endpnt_t | | | | | | |
1167 * | ____________ | | | | ____________ | | |
1168 * | | e_node |<------+ | | | | e_node |<------+ | |
1169 * | | e_idle |<---------+ | | | e_idle | | | |
1170 * +--| e_type |<------------+ +--| e_type | | | |
1171 * | e_tiptr | | | e_tiptr | | | |
1172 * | ... | | | ... | | | |
1173 * | e_lock | | | e_lock | | | |
1174 * | ... | | | ... | | | |
1175 * | e_ref (0) | | | e_ref (2) | | | |
1176 * | e_itime | | | e_itime | | | |
1177 * +------------+ | +------------+ | | |
1180 * | ____________ | | |
1181 * | | e_node |<------+ | |
1182 * | | e_idle |<------+--+ |
1188 * | | e_ref (0) | | |
1190 * | +------------+ | |
1193 * | ____________ | |
1194 * | | e_node |<------+ |
1196 * +--| e_type |<------------+
1205 * Endpoint locking strategy:
1207 * The following functions manipulate lists which hold the endpoint and the
1208 * endpoints themselves:
1210 * endpnt_get()/check_endpnt()/endpnt_rele()/endpnt_reap()/do_endpnt_reclaim()
1212 * Lock description follows:
1214 * endpnt_type_lock: Global reader/writer lock which protects accesses to the
1217 * e_plock: Lock defined in the endpnt_type_t. It is intended to
1218 * protect accesses to the pool of endopints (e_pool) for a given
1221 * e_ilock: Lock defined in endpnt_type_t. It is intended to protect accesses
1222 * to the idle list (e_ilist) of available endpoints for a given
1223 * endpnt_type_t. It also protects access to the e_itimer, e_async_cv,
1224 * and e_async_count fields in endpnt_type_t.
1226 * e_lock: Lock defined in the endpnt structure. It is intended to protect
1227 * flags, cv, and ref count.
1229 * The order goes as follows so as not to induce deadlock.
1231 * endpnt_type_lock -> e_plock -> e_ilock -> e_lock
1233 * Interaction with Zones and shutting down:
1235 * endpnt_type_ts are uniquely identified by the (e_zoneid, e_rdev, e_protofmly)
1236 * tuple, which means that a zone may not reuse another zone's idle endpoints
1237 * without first doing a t_kclose().
1239 * A zone's endpnt_type_ts are destroyed when a zone is shut down; e_async_cv
1240 * and e_async_count are used to keep track of the threads in endpnt_taskq
1241 * trying to reap endpnt_ts in the endpnt_type_t.
1245 * Allocate and initialize an endpnt_type_t
1247 static struct endpnt_type
*
1248 endpnt_type_create(struct knetconfig
*config
)
1250 struct endpnt_type
*etype
;
1253 * Allocate a new endpoint type to hang a list of
1254 * endpoints off of it.
1256 etype
= kmem_alloc(sizeof (struct endpnt_type
), KM_SLEEP
);
1257 etype
->e_next
= NULL
;
1258 etype
->e_pcurr
= NULL
;
1259 etype
->e_itimer
= 0;
1262 (void) strncpy(etype
->e_protofmly
, config
->knc_protofmly
, KNC_STRSIZE
);
1263 mutex_init(&etype
->e_plock
, NULL
, MUTEX_DEFAULT
, NULL
);
1264 mutex_init(&etype
->e_ilock
, NULL
, MUTEX_DEFAULT
, NULL
);
1265 etype
->e_rdev
= config
->knc_rdev
;
1266 etype
->e_zoneid
= rpc_zoneid();
1267 etype
->e_async_count
= 0;
1268 cv_init(&etype
->e_async_cv
, NULL
, CV_DEFAULT
, NULL
);
1270 list_create(&etype
->e_pool
, sizeof (endpnt_t
),
1271 offsetof(endpnt_t
, e_node
));
1272 list_create(&etype
->e_ilist
, sizeof (endpnt_t
),
1273 offsetof(endpnt_t
, e_idle
));
1276 * Check to see if we need to create a taskq for endpoint
1279 mutex_enter(&endpnt_taskq_lock
);
1280 if (taskq_created
== FALSE
) {
1281 taskq_created
= TRUE
;
1282 mutex_exit(&endpnt_taskq_lock
);
1283 ASSERT(endpnt_taskq
== NULL
);
1284 endpnt_taskq
= taskq_create("clts_endpnt_taskq", 1,
1285 minclsyspri
, 200, INT_MAX
, 0);
1287 mutex_exit(&endpnt_taskq_lock
);
1293 * Free an endpnt_type_t
1296 endpnt_type_free(struct endpnt_type
*etype
)
1298 mutex_destroy(&etype
->e_plock
);
1299 mutex_destroy(&etype
->e_ilock
);
1300 list_destroy(&etype
->e_pool
);
1301 list_destroy(&etype
->e_ilist
);
1302 kmem_free(etype
, sizeof (endpnt_type_t
));
1306 * Check the endpoint to ensure that it is suitable for use.
1308 * Possible return values:
1310 * return (1) - Endpoint is established, but needs to be re-opened.
1311 * return (0) && *newp == NULL - Endpoint is established, but unusable.
1312 * return (0) && *newp != NULL - Endpoint is established and usable.
1315 check_endpnt(struct endpnt
*endp
, struct endpnt
**newp
)
1319 mutex_enter(&endp
->e_lock
);
1320 ASSERT(endp
->e_ref
>= 1);
1323 * The first condition we check for is if the endpoint has been
1324 * allocated, but is unusable either because it has been closed or
1325 * has been marked stale. Only *one* thread will be allowed to
1326 * execute the then clause. This is enforced because the first thread
1327 * to check this condition will clear the flags, so that subsequent
1328 * thread(s) checking this endpoint will move on.
1330 if ((endp
->e_flags
& ENDPNT_ESTABLISHED
) &&
1331 (!(endp
->e_flags
& ENDPNT_BOUND
) ||
1332 (endp
->e_flags
& ENDPNT_STALE
))) {
1334 * Clear the flags here since they will be
1335 * set again by this thread. They need to be
1336 * individually cleared because we want to maintain
1337 * the state for ENDPNT_ONIDLE.
1339 endp
->e_flags
&= ~(ENDPNT_ESTABLISHED
|
1340 ENDPNT_WAITING
| ENDPNT_BOUND
| ENDPNT_STALE
);
1341 mutex_exit(&endp
->e_lock
);
1346 * The second condition is meant for any thread that is waiting for
1347 * an endpoint to become established. It will cv_wait() until
1348 * the condition for the endpoint has been changed to ENDPNT_BOUND or
1351 while (!(endp
->e_flags
& ENDPNT_BOUND
) &&
1352 !(endp
->e_flags
& ENDPNT_STALE
)) {
1353 endp
->e_flags
|= ENDPNT_WAITING
;
1354 cv_wait(&endp
->e_cv
, &endp
->e_lock
);
1357 ASSERT(endp
->e_flags
& ENDPNT_ESTABLISHED
);
1360 * The last case we check for is if the endpoint has been marked stale.
1361 * If this is the case then set *newp to NULL and return, so that the
1362 * caller is notified of the error and can take appropriate action.
1364 if (endp
->e_flags
& ENDPNT_STALE
) {
1368 mutex_exit(&endp
->e_lock
);
1374 * Provide a fault injection setting to test error conditions.
1376 static int endpnt_get_return_null
= 0;
1380 * Returns a handle (struct endpnt *) to an open and bound endpoint
1381 * specified by the knetconfig passed in. Returns NULL if no valid endpoint
1384 static struct endpnt
*
1385 endpnt_get(struct knetconfig
*config
, int useresvport
)
1387 struct endpnt_type
*n_etype
= NULL
;
1388 struct endpnt_type
*np
= NULL
;
1389 struct endpnt
*new = NULL
;
1390 struct endpnt
*endp
= NULL
;
1391 struct endpnt
*next
= NULL
;
1392 TIUSER
*tiptr
= NULL
;
1393 int rtries
= BINDRESVPORT_RETRIES
;
1397 zoneid_t zoneid
= rpc_zoneid();
1400 RPCLOG(1, "endpnt_get: protofmly %s, ", config
->knc_protofmly
);
1401 RPCLOG(1, "rdev %ld\n", config
->knc_rdev
);
1405 * Inject fault if desired. Pretend we have a stale endpoint
1408 if (endpnt_get_return_null
> 0) {
1409 endpnt_get_return_null
--;
1413 rw_enter(&endpnt_type_lock
, RW_READER
);
1416 for (np
= endpnt_type_list
; np
!= NULL
; np
= np
->e_next
)
1417 if ((np
->e_zoneid
== zoneid
) &&
1418 (np
->e_rdev
== config
->knc_rdev
) &&
1419 (strcmp(np
->e_protofmly
,
1420 config
->knc_protofmly
) == 0))
1423 if (np
== NULL
&& n_etype
!= NULL
) {
1424 ASSERT(rw_write_held(&endpnt_type_lock
));
1427 * Link the endpoint type onto the list
1429 n_etype
->e_next
= endpnt_type_list
;
1430 endpnt_type_list
= n_etype
;
1437 * The logic here is that we were unable to find an
1438 * endpnt_type_t that matched our criteria, so we allocate a
1439 * new one. Because kmem_alloc() needs to be called with
1440 * KM_SLEEP, we drop our locks so that we don't induce
1441 * deadlock. After allocating and initializing the
1442 * endpnt_type_t, we reaquire the lock and go back to check
1443 * if this entry needs to be added to the list. Since we do
1444 * some operations without any locking other threads may
1445 * have been looking for the same endpnt_type_t and gone
1446 * through this code path. We check for this case and allow
1447 * one thread to link its endpnt_type_t to the list and the
1448 * other threads will simply free theirs.
1450 rw_exit(&endpnt_type_lock
);
1451 n_etype
= endpnt_type_create(config
);
1454 * We need to reaquire the lock with RW_WRITER here so that
1455 * we can safely link the new endpoint type onto the list.
1457 rw_enter(&endpnt_type_lock
, RW_WRITER
);
1461 rw_exit(&endpnt_type_lock
);
1463 * If n_etype is not NULL, then another thread was able to
1464 * insert an endpnt_type_t of this type onto the list before
1465 * we did. Go ahead and free ours.
1467 if (n_etype
!= NULL
)
1468 endpnt_type_free(n_etype
);
1470 mutex_enter(&np
->e_ilock
);
1472 * The algorithm to hand out endpoints is to first
1473 * give out those that are idle if such endpoints
1474 * exist. Otherwise, create a new one if we haven't
1475 * reached the max threshold. Finally, we give out
1476 * endpoints in a pseudo LRU fashion (round-robin).
1478 * Note: The idle list is merely a hint of those endpoints
1479 * that should be idle. There exists a window after the
1480 * endpoint is released and before it is linked back onto the
1481 * idle list where a thread could get a reference to it and
1482 * use it. This is okay, since the reference counts will
1483 * still be consistent.
1485 if ((endp
= (endpnt_t
*)list_head(&np
->e_ilist
)) != NULL
) {
1486 timeout_id_t t_id
= 0;
1488 mutex_enter(&endp
->e_lock
);
1491 endp
->e_flags
&= ~ENDPNT_ONIDLE
;
1492 mutex_exit(&endp
->e_lock
);
1495 * Pop the endpoint off the idle list and hand it off
1497 list_remove(&np
->e_ilist
, endp
);
1499 if (np
->e_itimer
!= 0) {
1500 t_id
= np
->e_itimer
;
1503 mutex_exit(&np
->e_ilock
);
1505 * Reset the idle timer if it has been set
1507 if (t_id
!= (timeout_id_t
)0)
1508 (void) untimeout(t_id
);
1510 if (check_endpnt(endp
, &new) == 0)
1512 } else if (np
->e_cnt
>= clnt_clts_max_endpoints
) {
1514 * There are no idle endpoints currently, so
1515 * create a new one if we have not reached the maximum or
1516 * hand one out in round-robin.
1518 mutex_exit(&np
->e_ilock
);
1519 mutex_enter(&np
->e_plock
);
1521 mutex_enter(&endp
->e_lock
);
1523 mutex_exit(&endp
->e_lock
);
1525 ASSERT(endp
!= NULL
);
1527 * Advance the pointer to the next eligible endpoint, if
1530 if (np
->e_cnt
> 1) {
1531 next
= (endpnt_t
*)list_next(&np
->e_pool
, np
->e_pcurr
);
1533 next
= (endpnt_t
*)list_head(&np
->e_pool
);
1537 mutex_exit(&np
->e_plock
);
1540 * We need to check to see if this endpoint is bound or
1541 * not. If it is in progress then just wait until
1542 * the set up is complete
1544 if (check_endpnt(endp
, &new) == 0)
1547 mutex_exit(&np
->e_ilock
);
1548 mutex_enter(&np
->e_plock
);
1551 * Allocate a new endpoint to use. If we can't allocate any
1552 * more memory then use one that is already established if any
1553 * such endpoints exist.
1555 new = kmem_cache_alloc(endpnt_cache
, KM_NOSLEEP
);
1557 RPCLOG0(1, "endpnt_get: kmem_cache_alloc failed\n");
1559 * Try to recover by using an existing endpoint.
1561 if (np
->e_cnt
<= 0) {
1562 mutex_exit(&np
->e_plock
);
1566 if ((next
= list_next(&np
->e_pool
, np
->e_pcurr
)) !=
1569 ASSERT(endp
!= NULL
);
1570 mutex_enter(&endp
->e_lock
);
1572 mutex_exit(&endp
->e_lock
);
1573 mutex_exit(&np
->e_plock
);
1575 if (check_endpnt(endp
, &new) == 0)
1579 * Partially init an endpoint structure and put
1580 * it on the list, so that other interested threads
1581 * know that one is being created
1583 bzero(new, sizeof (struct endpnt
));
1585 cv_init(&new->e_cv
, NULL
, CV_DEFAULT
, NULL
);
1586 mutex_init(&new->e_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1591 * Link the endpoint into the pool.
1593 list_insert_head(&np
->e_pool
, new);
1595 if (np
->e_pcurr
== NULL
)
1597 mutex_exit(&np
->e_plock
);
1602 * The transport should be opened with sufficient privs
1605 error
= t_kopen(NULL
, config
->knc_rdev
, FREAD
|FWRITE
|FNDELAY
, &tiptr
,
1608 RPCLOG(1, "endpnt_get: t_kopen: %d\n", error
);
1612 new->e_tiptr
= tiptr
;
1613 rpc_poptimod(tiptr
->fp
->f_vnode
);
1616 * Allow the kernel to push the module on behalf of the user.
1618 error
= strioctl(tiptr
->fp
->f_vnode
, I_PUSH
, (intptr_t)"rpcmod", 0,
1619 K_TO_K
, cr
, &retval
);
1621 RPCLOG(1, "endpnt_get: kstr_push on rpcmod failed %d\n", error
);
1625 error
= strioctl(tiptr
->fp
->f_vnode
, RPC_CLIENT
, 0, 0, K_TO_K
,
1628 RPCLOG(1, "endpnt_get: strioctl failed %d\n", error
);
1633 * Connectionless data flow should bypass the stream head.
1635 new->e_wq
= tiptr
->fp
->f_vnode
->v_stream
->sd_wrq
->q_next
;
1637 error
= strioctl(tiptr
->fp
->f_vnode
, I_PUSH
, (intptr_t)"timod", 0,
1638 K_TO_K
, cr
, &retval
);
1640 RPCLOG(1, "endpnt_get: kstr_push on timod failed %d\n", error
);
1645 * Attempt to bind the endpoint. If we fail then propogate
1646 * error back to calling subsystem, so that it can be handled
1648 * If the caller has not specified reserved port usage then
1649 * take the system default.
1651 if (useresvport
== -1)
1652 useresvport
= clnt_clts_do_bindresvport
;
1655 (strcmp(config
->knc_protofmly
, NC_INET
) == 0 ||
1656 strcmp(config
->knc_protofmly
, NC_INET6
) == 0)) {
1659 bindresvport(new->e_tiptr
, NULL
, NULL
, FALSE
)) != 0) {
1661 "endpnt_get: bindresvport error %d\n", error
);
1662 if (error
!= EPROTO
) {
1670 (void) t_kclose(new->e_tiptr
, 1);
1672 * reopen with all privileges
1674 error
= t_kopen(NULL
, config
->knc_rdev
,
1675 FREAD
|FWRITE
|FNDELAY
,
1678 RPCLOG(1, "endpnt_get: t_kopen: %d\n", error
);
1679 new->e_tiptr
= NULL
;
1683 } else if ((error
= t_kbind(new->e_tiptr
, NULL
, NULL
)) != 0) {
1684 RPCLOG(1, "endpnt_get: t_kbind failed: %d\n", error
);
1689 * Set the flags and notify and waiters that we have an established
1692 mutex_enter(&new->e_lock
);
1693 new->e_flags
|= ENDPNT_ESTABLISHED
;
1694 new->e_flags
|= ENDPNT_BOUND
;
1695 if (new->e_flags
& ENDPNT_WAITING
) {
1696 cv_broadcast(&new->e_cv
);
1697 new->e_flags
&= ~ENDPNT_WAITING
;
1699 mutex_exit(&new->e_lock
);
1704 ASSERT(new != NULL
);
1706 * mark this endpoint as stale and notify any threads waiting
1707 * on this endpoint that it will be going away.
1709 mutex_enter(&new->e_lock
);
1710 if (new->e_ref
> 0) {
1711 new->e_flags
|= ENDPNT_ESTABLISHED
;
1712 new->e_flags
|= ENDPNT_STALE
;
1713 if (new->e_flags
& ENDPNT_WAITING
) {
1714 cv_broadcast(&new->e_cv
);
1715 new->e_flags
&= ~ENDPNT_WAITING
;
1719 new->e_tiptr
= NULL
;
1720 mutex_exit(&new->e_lock
);
1723 * If there was a transport endopoint opened, then close it.
1726 (void) t_kclose(tiptr
, 1);
1732 * Release a referece to the endpoint
1735 endpnt_rele(struct endpnt
*sp
)
1737 mutex_enter(&sp
->e_lock
);
1738 ASSERT(sp
->e_ref
> 0);
1741 * If the ref count is zero, then start the idle timer and link
1742 * the endpoint onto the idle list.
1744 if (sp
->e_ref
== 0) {
1745 sp
->e_itime
= gethrestime_sec();
1748 * Check to see if the endpoint is already linked to the idle
1749 * list, so that we don't try to reinsert it.
1751 if (sp
->e_flags
& ENDPNT_ONIDLE
) {
1752 mutex_exit(&sp
->e_lock
);
1753 mutex_enter(&sp
->e_type
->e_ilock
);
1754 endpnt_reap_settimer(sp
->e_type
);
1755 mutex_exit(&sp
->e_type
->e_ilock
);
1759 sp
->e_flags
|= ENDPNT_ONIDLE
;
1760 mutex_exit(&sp
->e_lock
);
1761 mutex_enter(&sp
->e_type
->e_ilock
);
1762 list_insert_tail(&sp
->e_type
->e_ilist
, sp
);
1763 endpnt_reap_settimer(sp
->e_type
);
1764 mutex_exit(&sp
->e_type
->e_ilock
);
1766 mutex_exit(&sp
->e_lock
);
1770 endpnt_reap_settimer(endpnt_type_t
*etp
)
1772 if (etp
->e_itimer
== (timeout_id_t
)0)
1773 etp
->e_itimer
= timeout(endpnt_reap_dispatch
, (void *)etp
,
1774 clnt_clts_taskq_dispatch_interval
);
1778 endpnt_reap_dispatch(void *a
)
1780 endpnt_type_t
*etp
= a
;
1783 * The idle timer has fired, so dispatch the taskq to close the
1786 if (taskq_dispatch(endpnt_taskq
, (task_func_t
*)endpnt_reap
, etp
,
1787 TQ_NOSLEEP
) == (uintptr_t)NULL
)
1789 mutex_enter(&etp
->e_ilock
);
1790 etp
->e_async_count
++;
1791 mutex_exit(&etp
->e_ilock
);
1795 * Traverse the idle list and close those endpoints that have reached their
1799 endpnt_reap(endpnt_type_t
*etp
)
1802 struct endpnt
*next_node
= NULL
;
1804 mutex_enter(&etp
->e_ilock
);
1805 e
= list_head(&etp
->e_ilist
);
1807 next_node
= list_next(&etp
->e_ilist
, e
);
1809 mutex_enter(&e
->e_lock
);
1811 mutex_exit(&e
->e_lock
);
1816 ASSERT(e
->e_ref
== 0);
1817 if (e
->e_itime
> 0 &&
1818 (e
->e_itime
+ clnt_clts_endpoint_reap_interval
) <
1819 gethrestime_sec()) {
1820 e
->e_flags
&= ~ENDPNT_BOUND
;
1821 (void) t_kclose(e
->e_tiptr
, 1);
1825 mutex_exit(&e
->e_lock
);
1829 if (--etp
->e_async_count
== 0)
1830 cv_signal(&etp
->e_async_cv
);
1831 mutex_exit(&etp
->e_ilock
);
1835 endpnt_reclaim(zoneid_t zoneid
)
1837 struct endpnt_type
*np
;
1839 struct endpnt
*next_node
= NULL
;
1843 list_create(&free_list
, sizeof (endpnt_t
), offsetof(endpnt_t
, e_node
));
1845 RPCLOG0(1, "endpnt_reclaim: reclaim callback started\n");
1846 rw_enter(&endpnt_type_lock
, RW_READER
);
1847 for (np
= endpnt_type_list
; np
!= NULL
; np
= np
->e_next
) {
1848 if (zoneid
!= ALL_ZONES
&& zoneid
!= np
->e_zoneid
)
1851 mutex_enter(&np
->e_plock
);
1852 RPCLOG(1, "endpnt_reclaim: protofmly %s, ",
1854 RPCLOG(1, "rdev %ld\n", np
->e_rdev
);
1855 RPCLOG(1, "endpnt_reclaim: found %d endpoint(s)\n",
1858 if (np
->e_cnt
== 0) {
1859 mutex_exit(&np
->e_plock
);
1864 * The nice thing about maintaining an idle list is that if
1865 * there are any endpoints to reclaim, they are going to be
1866 * on this list. Just go through and reap the one's that
1867 * have ref counts of zero.
1869 mutex_enter(&np
->e_ilock
);
1870 e
= list_head(&np
->e_ilist
);
1872 next_node
= list_next(&np
->e_ilist
, e
);
1873 mutex_enter(&e
->e_lock
);
1875 mutex_exit(&e
->e_lock
);
1879 ASSERT(e
->e_ref
== 0);
1880 mutex_exit(&e
->e_lock
);
1882 list_remove(&np
->e_ilist
, e
);
1883 list_remove(&np
->e_pool
, e
);
1884 list_insert_head(&free_list
, e
);
1890 mutex_exit(&np
->e_ilock
);
1892 * Reset the current pointer to be safe
1894 if ((e
= (struct endpnt
*)list_head(&np
->e_pool
)) != NULL
)
1897 ASSERT(np
->e_cnt
== 0);
1901 mutex_exit(&np
->e_plock
);
1903 rw_exit(&endpnt_type_lock
);
1905 while ((e
= list_head(&free_list
)) != NULL
) {
1906 list_remove(&free_list
, e
);
1907 if (e
->e_tiptr
!= NULL
)
1908 (void) t_kclose(e
->e_tiptr
, 1);
1910 cv_destroy(&e
->e_cv
);
1911 mutex_destroy(&e
->e_lock
);
1912 kmem_cache_free(endpnt_cache
, e
);
1914 list_destroy(&free_list
);
1915 RPCLOG(1, "endpnt_reclaim: reclaimed %d endpoint(s)\n", rcnt
);
1919 * Endpoint reclaim zones destructor callback routine.
1921 * After reclaiming any cached entries, we basically go through the endpnt_type
1922 * list, canceling outstanding timeouts and free'ing data structures.
1926 endpnt_destructor(zoneid_t zoneid
, void *a
)
1928 struct endpnt_type
**npp
;
1929 struct endpnt_type
*np
;
1930 struct endpnt_type
*free_list
= NULL
;
1931 timeout_id_t t_id
= 0;
1932 extern void clcleanup_zone(zoneid_t
);
1933 extern void clcleanup4_zone(zoneid_t
);
1935 /* Make sure NFS client handles are released. */
1936 clcleanup_zone(zoneid
);
1937 clcleanup4_zone(zoneid
);
1939 endpnt_reclaim(zoneid
);
1941 * We don't need to be holding on to any locks across the call to
1942 * endpnt_reclaim() and the code below; we know that no-one can
1943 * be holding open connections for this zone (all processes and kernel
1944 * threads are gone), so nothing could be adding anything to the list.
1946 rw_enter(&endpnt_type_lock
, RW_WRITER
);
1947 npp
= &endpnt_type_list
;
1948 while ((np
= *npp
) != NULL
) {
1949 if (np
->e_zoneid
!= zoneid
) {
1953 mutex_enter(&np
->e_plock
);
1954 mutex_enter(&np
->e_ilock
);
1955 if (np
->e_itimer
!= 0) {
1956 t_id
= np
->e_itimer
;
1959 ASSERT(np
->e_cnt
== 0);
1960 ASSERT(list_head(&np
->e_pool
) == NULL
);
1961 ASSERT(list_head(&np
->e_ilist
) == NULL
);
1963 mutex_exit(&np
->e_ilock
);
1964 mutex_exit(&np
->e_plock
);
1967 * untimeout() any outstanding timers that have not yet fired.
1969 if (t_id
!= (timeout_id_t
)0)
1970 (void) untimeout(t_id
);
1972 np
->e_next
= free_list
;
1975 rw_exit(&endpnt_type_lock
);
1977 while (free_list
!= NULL
) {
1979 free_list
= free_list
->e_next
;
1981 * Wait for threads in endpnt_taskq trying to reap endpnt_ts in
1982 * the endpnt_type_t.
1984 mutex_enter(&np
->e_ilock
);
1985 while (np
->e_async_count
> 0)
1986 cv_wait(&np
->e_async_cv
, &np
->e_ilock
);
1987 cv_destroy(&np
->e_async_cv
);
1988 mutex_destroy(&np
->e_plock
);
1989 mutex_destroy(&np
->e_ilock
);
1990 list_destroy(&np
->e_pool
);
1991 list_destroy(&np
->e_ilist
);
1992 kmem_free(np
, sizeof (endpnt_type_t
));
1997 * Endpoint reclaim kmem callback routine.
2001 endpnt_repossess(void *a
)
2004 * Reclaim idle endpnt's from all zones.
2006 if (endpnt_taskq
!= NULL
)
2007 (void) taskq_dispatch(endpnt_taskq
,
2008 (task_func_t
*)endpnt_reclaim
, (void *)ALL_ZONES
,
2013 * RPC request dispatch routine. Constructs a datagram message and wraps it
2014 * around the RPC request to pass downstream.
2017 clnt_clts_dispatch_send(queue_t
*q
, mblk_t
*mp
, struct netbuf
*addr
,
2018 calllist_t
*cp
, uint_t xid
, cred_t
*cr
)
2022 struct T_unitdata_req
*udreq
;
2025 * Set up the call record.
2029 cp
->call_status
= RPC_TIMEDOUT
;
2030 cp
->call_notified
= FALSE
;
2032 "clnt_clts_dispatch_send: putting xid 0x%x on "
2033 "dispatch list\n", xid
);
2034 cp
->call_hash
= call_hash(xid
, clnt_clts_hash_size
);
2035 cp
->call_bucket
= &clts_call_ht
[cp
->call_hash
];
2036 call_table_enter(cp
);
2039 * Construct the datagram
2041 msgsz
= (int)TUNITDATAREQSZ
;
2043 * Note: if the receiver uses SCM_UCRED/getpeerucred the pid will
2046 while (!(bp
= allocb_cred(msgsz
+ addr
->len
, cr
, NOPID
))) {
2047 if (strwaitbuf(msgsz
+ addr
->len
, BPRI_LO
))
2051 udreq
= (struct T_unitdata_req
*)bp
->b_wptr
;
2052 udreq
->PRIM_type
= T_UNITDATA_REQ
;
2053 udreq
->DEST_length
= addr
->len
;
2056 bcopy(addr
->buf
, bp
->b_wptr
+ msgsz
, addr
->len
);
2057 udreq
->DEST_offset
= (t_scalar_t
)msgsz
;
2060 udreq
->DEST_offset
= 0;
2061 udreq
->OPT_length
= 0;
2062 udreq
->OPT_offset
= 0;
2064 bp
->b_datap
->db_type
= M_PROTO
;
2065 bp
->b_wptr
+= msgsz
;
2068 * Link the datagram header with the actual data
2075 if (canput(cp
->call_wq
)) {
2076 put(cp
->call_wq
, bp
);
2084 * RPC response delivery routine. Deliver the response to the waiting
2085 * thread by matching the xid.
2088 clnt_clts_dispatch_notify(mblk_t
*mp
, int resp_off
, zoneid_t zoneid
)
2090 calllist_t
*e
= NULL
;
2094 unsigned char *hdr_offset
;
2098 * If the RPC response is not contained in the same mblk as the
2099 * datagram header, then move to the next mblk.
2101 hdr_offset
= mp
->b_rptr
;
2103 if ((mp
->b_wptr
- (mp
->b_rptr
+ resp_off
)) == 0)
2106 resp
->b_rptr
+= resp_off
;
2108 ASSERT(resp
!= NULL
);
2110 if ((IS_P2ALIGNED(resp
->b_rptr
, sizeof (uint32_t))) &&
2111 (resp
->b_wptr
- resp
->b_rptr
) >= sizeof (xid
))
2112 xid
= *((uint32_t *)resp
->b_rptr
);
2115 unsigned char *p
= (unsigned char *)&xid
;
2116 unsigned char *rptr
;
2120 * Copy the xid, byte-by-byte into xid.
2124 while (rptr
< tmp
->b_wptr
) {
2126 if (++i
>= sizeof (xid
))
2133 * If we got here, we ran out of mblk space before the
2134 * xid could be copied.
2136 ASSERT(tmp
== NULL
&& i
< sizeof (xid
));
2139 "clnt_dispatch_notify(clts): message less than "
2149 * Reset the read pointer back to the beginning of the protocol
2150 * header if we moved it.
2152 if (mp
->b_rptr
!= hdr_offset
)
2153 mp
->b_rptr
= hdr_offset
;
2155 hash
= call_hash(xid
, clnt_clts_hash_size
);
2156 chtp
= &clts_call_ht
[hash
];
2157 /* call_table_find returns with the hash bucket locked */
2158 call_table_find(chtp
, xid
, e
);
2161 mutex_enter(&e
->call_lock
);
2164 * verify that the reply is coming in on
2165 * the same zone that it was sent from.
2167 if (e
->call_zoneid
!= zoneid
) {
2168 mutex_exit(&e
->call_lock
);
2169 mutex_exit(&chtp
->ct_lock
);
2170 RPCLOG0(8, "clnt_dispatch_notify (clts): incorrect "
2177 * found thread waiting for this reply.
2179 if (e
->call_reply
) {
2181 "clnt_dispatch_notify (clts): discarding old "
2182 "reply for xid 0x%x\n",
2184 freemsg(e
->call_reply
);
2186 e
->call_notified
= TRUE
;
2188 e
->call_status
= RPC_SUCCESS
;
2189 cv_signal(&e
->call_cv
);
2190 mutex_exit(&e
->call_lock
);
2191 mutex_exit(&chtp
->ct_lock
);
2194 struct rpcstat
*rpcstat
;
2196 mutex_exit(&chtp
->ct_lock
);
2197 RPCLOG(8, "clnt_dispatch_notify (clts): no caller for reply "
2201 * This is unfortunate, but we need to lookup the zone so we
2202 * can increment its "rcbadxids" counter.
2204 zone
= zone_find_by_id(zoneid
);
2207 * The zone went away...
2211 rpcstat
= zone_getspecific(rpcstat_zone_key
, zone
);
2212 if (zone_status_get(zone
) >= ZONE_IS_SHUTTING_DOWN
) {
2219 RCSTAT_INCR(rpcstat
->rpc_clts_client
, rcbadxids
);
2225 * Init routine. Called when rpcmod is loaded.
2228 clnt_clts_init(void)
2230 endpnt_cache
= kmem_cache_create("clnt_clts_endpnt_cache",
2231 sizeof (struct endpnt
), 0, NULL
, NULL
, endpnt_repossess
, NULL
,
2234 rw_init(&endpnt_type_lock
, NULL
, RW_DEFAULT
, NULL
);
2237 * Perform simple bounds checking to make sure that the setting is
2240 if (clnt_clts_max_endpoints
<= 0) {
2241 if (clnt_clts_do_bindresvport
)
2242 clnt_clts_max_endpoints
= RESERVED_PORTSPACE
;
2244 clnt_clts_max_endpoints
= NONRESERVED_PORTSPACE
;
2247 if (clnt_clts_do_bindresvport
&&
2248 clnt_clts_max_endpoints
> RESERVED_PORTSPACE
)
2249 clnt_clts_max_endpoints
= RESERVED_PORTSPACE
;
2250 else if (clnt_clts_max_endpoints
> NONRESERVED_PORTSPACE
)
2251 clnt_clts_max_endpoints
= NONRESERVED_PORTSPACE
;
2253 if (clnt_clts_hash_size
< DEFAULT_MIN_HASH_SIZE
)
2254 clnt_clts_hash_size
= DEFAULT_MIN_HASH_SIZE
;
2257 * Defer creating the taskq until rpcmod gets pushed. If we are
2258 * in diskless boot mode, rpcmod will get loaded early even before
2259 * thread_create() is available.
2261 endpnt_taskq
= NULL
;
2262 taskq_created
= FALSE
;
2263 mutex_init(&endpnt_taskq_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2265 if (clnt_clts_endpoint_reap_interval
< DEFAULT_ENDPOINT_REAP_INTERVAL
)
2266 clnt_clts_endpoint_reap_interval
=
2267 DEFAULT_ENDPOINT_REAP_INTERVAL
;
2270 * Dispatch the taskq at an interval which is offset from the
2271 * interval that the endpoints should be reaped.
2273 clnt_clts_taskq_dispatch_interval
=
2274 (clnt_clts_endpoint_reap_interval
+ DEFAULT_INTERVAL_SHIFT
) * hz
;
2277 * Initialize the completion queue
2279 clts_call_ht
= call_table_init(clnt_clts_hash_size
);
2281 * Initialize the zone destructor callback.
2283 zone_key_create(&endpnt_destructor_key
, NULL
, NULL
, endpnt_destructor
);
2287 clnt_clts_fini(void)
2289 (void) zone_key_delete(endpnt_destructor_key
);