2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
6 * This file contains code imported from the OFED rds source file af_rds.c
7 * Oracle elects to have and use the contents of af_rds.c under and governed
8 * by the OpenIB.org BSD license (see below for full license text). However,
9 * the following notice accompanied the original version of this file:
13 * Copyright (c) 2006 Oracle. All rights reserved.
15 * This software is available to you under a choice of one of two
16 * licenses. You may choose to be licensed under the terms of the GNU
17 * General Public License (GPL) Version 2, available from the file
18 * COPYING in the main directory of this source tree, or the
19 * OpenIB.org BSD license below:
21 * Redistribution and use in source and binary forms, with or
22 * without modification, are permitted provided that the following
25 * - Redistributions of source code must retain the above
26 * copyright notice, this list of conditions and the following
29 * - Redistributions in binary form must reproduce the above
30 * copyright notice, this list of conditions and the following
31 * disclaimer in the documentation and/or other materials
32 * provided with the distribution.
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
44 #include <sys/types.h>
48 #include <sys/sunddi.h>
49 #include <sys/modctl.h>
51 #include <sys/stropts.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/sockio.h>
55 #include <sys/sysmacros.h>
58 #include <net/if_types.h>
60 #include <sys/ib/clients/rdsv3/rdsv3.h>
61 #include <sys/ib/clients/rdsv3/rdma.h>
62 #include <sys/ib/clients/rdsv3/rdma_transport.h>
63 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
65 extern void rdsv3_remove_bound(struct rdsv3_sock
*rds
);
66 extern int rdsv3_verify_bind_address(ipaddr_t addr
);
68 extern ddi_taskq_t
*rdsv3_taskq
;
69 extern struct rdma_cm_id
*rdsv3_rdma_listen_id
;
71 /* this is just used for stats gathering :/ */
72 kmutex_t rdsv3_sock_lock
;
73 static unsigned long rdsv3_sock_count
;
74 list_t rdsv3_sock_list
;
77 * This is called as the final descriptor referencing this socket is closed.
78 * We have to unbind the socket so that another socket can be bound to the
79 * address it was using.
81 * We have to be careful about racing with the incoming path. sock_orphan()
82 * sets SOCK_DEAD and we use that as an indicator to the rx path that new
83 * messages shouldn't be queued.
87 rdsv3_release(sock_lower_handle_t proto_handle
, int flgs
, cred_t
*cr
)
89 struct rsock
*sk
= (struct rsock
*)proto_handle
;
90 struct rdsv3_sock
*rs
;
95 rs
= rdsv3_sk_to_rs(sk
);
96 RDSV3_DPRINTF4("rdsv3_release", "Enter(rs: %p, sk: %p)", rs
, sk
);
98 rdsv3_sk_sock_orphan(sk
);
99 rdsv3_cong_remove_socket(rs
);
100 rdsv3_remove_bound(rs
);
103 * Note - rdsv3_clear_recv_queue grabs rs_recv_lock, so
104 * that ensures the recv path has completed messing
107 * Note2 - rdsv3_clear_recv_queue(rs) should be called first
108 * to prevent some race conditions, which is different from
111 rdsv3_clear_recv_queue(rs
);
112 rdsv3_send_drop_to(rs
, NULL
);
113 rdsv3_rdma_drop_keys(rs
);
114 (void) rdsv3_notify_queue_get(rs
, NULL
);
116 mutex_enter(&rdsv3_sock_lock
);
117 list_remove_node(&rs
->rs_item
);
119 mutex_exit(&rdsv3_sock_lock
);
121 while (sk
->sk_refcount
> 1) {
122 /* wait for 1 sec and try again */
126 /* this will free the rs and sk */
127 rdsv3_sk_sock_put(sk
);
129 RDSV3_DPRINTF4("rdsv3_release", "Return (rds: %p)", rs
);
135 __rdsv3_wake_sk_sleep(struct rsock
*sk
)
137 /* wakup anyone waiting in recvmsg */
138 if (!rdsv3_sk_sock_flag(sk
, SOCK_DEAD
) && sk
->sk_sleep
)
139 rdsv3_wake_up(sk
->sk_sleep
);
143 * Careful not to race with rdsv3_release -> sock_orphan which clears sk_sleep.
144 * _bh() isn't OK here, we're called from interrupt handlers. It's probably OK
145 * to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but
146 * this seems more conservative.
147 * NB - normally, one would use sk_callback_lock for this, but we can
148 * get here from interrupts, whereas the network code grabs sk_callback_lock
149 * with _lock_bh only - so relying on sk_callback_lock introduces livelocks.
152 rdsv3_wake_sk_sleep(struct rdsv3_sock
*rs
)
154 RDSV3_DPRINTF4("rdsv3_wake_sk_sleep", "Enter(rs: %p)", rs
);
156 rw_enter(&rs
->rs_recv_lock
, RW_READER
);
157 __rdsv3_wake_sk_sleep(rdsv3_rs_to_sk(rs
));
158 rw_exit(&rs
->rs_recv_lock
);
163 rdsv3_getname(sock_lower_handle_t proto_handle
, struct sockaddr
*addr
,
164 socklen_t
*addr_len
, cred_t
*cr
)
166 struct rsock
*sk
= (struct rsock
*)proto_handle
;
167 struct sockaddr_in
*sin
= (struct sockaddr_in
*)addr
;
168 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
170 RDSV3_DPRINTF4("rdsv3_getname", "Enter(rs: %p, port: %d)", rs
,
173 sin
->sin_port
= rs
->rs_bound_port
;
174 sin
->sin_addr
.s_addr
= rs
->rs_bound_addr
;
176 sin
->sin_family
= AF_INET_OFFLOAD
;
178 *addr_len
= sizeof (*sin
);
183 * RDS' poll is without a doubt the least intuitive part of the interface,
184 * as POLLIN and POLLOUT do not behave entirely as you would expect from
185 * a network protocol.
187 * POLLIN is asserted if
188 * - there is data on the receive queue.
189 * - to signal that a previously congested destination may have become
191 * - A notification has been queued to the socket (this can be a congestion
192 * update, or a RDMA completion).
194 * POLLOUT is asserted if there is room on the send queue. This does not mean
195 * however, that the next sendmsg() call will succeed. If the application tries
196 * to send to a congested destination, the system call may still fail (and
201 rdsv3_poll(sock_lower_handle_t proto_handle
, short events
, int anyyet
,
204 struct rsock
*sk
= (struct rsock
*)proto_handle
;
205 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
206 unsigned short mask
= 0;
209 RDSV3_DPRINTF4("rdsv3_poll", "enter(%p %x %d)", rs
, events
, anyyet
);
213 * If rs_seen_congestion is on, wait until it's off.
214 * This is implemented for the following OFED code.
215 * if (rs->rs_seen_congestion)
216 * poll_wait(file, &rds_poll_waitq, wait);
218 mutex_enter(&rs
->rs_congested_lock
);
219 while (rs
->rs_seen_congestion
) {
220 cv_wait(&rs
->rs_congested_cv
,
221 &rs
->rs_congested_lock
);
223 mutex_exit(&rs
->rs_congested_lock
);
225 rw_enter(&rs
->rs_recv_lock
, RW_READER
);
226 if (!rs
->rs_cong_monitor
) {
228 * When a congestion map was updated, we signal POLLIN for
229 * "historical" reasons. Applications can also poll for
232 if (rdsv3_cong_updated_since(&rs
->rs_cong_track
))
233 mask
|= (POLLIN
| POLLRDNORM
| POLLWRBAND
);
235 mutex_enter(&rs
->rs_lock
);
236 if (rs
->rs_cong_notify
)
237 mask
|= (POLLIN
| POLLRDNORM
);
238 mutex_exit(&rs
->rs_lock
);
240 if (!list_is_empty(&rs
->rs_recv_queue
) ||
241 !list_is_empty(&rs
->rs_notify_queue
))
242 mask
|= (POLLIN
| POLLRDNORM
);
243 if (rs
->rs_snd_bytes
< rdsv3_sk_sndbuf(rs
))
244 mask
|= (POLLOUT
| POLLWRNORM
);
246 /* clear state any time we wake a seen-congested socket */
248 mutex_enter(&rs
->rs_congested_lock
);
249 rs
->rs_seen_congestion
= 0;
250 mutex_exit(&rs
->rs_congested_lock
);
253 rw_exit(&rs
->rs_recv_lock
);
256 RDSV3_DPRINTF4("rdsv3_poll", "return(%p %x)", rs
, mask
);
264 rdsv3_ioctl(sock_lower_handle_t proto_handle
, int cmd
, intptr_t arg
,
265 int mode
, int32_t *rvalp
, cred_t
*cr
)
269 struct lifreq lifr
, *lifrp
;
272 int rval
= 0, rc
, len
;
277 RDSV3_DPRINTF4("rdsv3_ioctl", "enter: cmd: %d", cmd
);
279 /* Only ipv4 for now */
280 rval
= ksocket_socket(&so4
, PF_INET
, SOCK_DGRAM
, 0, KSOCKET_NOSLEEP
,
283 RDSV3_DPRINTF2("rdsv3_ioctl", "ksocket_socket returned %d",
291 rval
= rdsv3_do_ip_ioctl(so4
, &buf
, &bufsize
, &numifs
);
292 if (rval
!= 0) break;
293 if (cmd
== SIOCGLIFNUM
) {
295 lifn
.lifn_family
= AF_INET_OFFLOAD
;
297 lifn
.lifn_count
= numifs
;
298 (void) ddi_copyout(&lifn
, (void *)arg
,
299 sizeof (struct lifnum
), 0);
302 for (lifrp
= (struct lifreq
*)buf
, rc
= 0; rc
< numifs
;
304 if (strlen(lifrp
->lifr_name
) <= IFNAMSIZ
) {
308 (void) ddi_copyout(&len
, (void *)arg
,
311 kmem_free(buf
, bufsize
);
315 if (ddi_copyin((void *)arg
, &lifc
, sizeof (struct lifconf
), 0)
317 RDSV3_DPRINTF2("rdsv3_ioctl", "ddi_copyin failed lifc");
322 rval
= rdsv3_do_ip_ioctl(so4
, &buf
, &bufsize
, &numifs
);
324 RDSV3_DPRINTF2("rdsv3_ioctl",
325 "rdsv3_do_ip_ioctl failed: %d", rval
);
329 if ((lifc
.lifc_len
> 0) && (numifs
> 0)) {
330 if (ddi_copyout(buf
, (void *)lifc
.lifc_req
,
331 (lifc
.lifc_len
< bufsize
) ? lifc
.lifc_len
:
333 RDSV3_DPRINTF2("rdsv3_ioctl",
334 "copyout of records failed");
340 lifc
.lifc_len
= bufsize
;
341 if (ddi_copyout(&lifc
, (void *)arg
, sizeof (struct lifconf
),
343 RDSV3_DPRINTF2("rdsv3_ioctl",
344 "copyout of lifconf failed");
348 kmem_free(buf
, bufsize
);
353 if (ddi_copyin((void *)arg
, &ifc
, sizeof (struct ifconf
), 0)
355 RDSV3_DPRINTF2("rdsv3_ioctl", "ddi_copyin failed ifc");
360 RDSV3_DPRINTF2("rdsv3_ioctl",
361 "O_SIOCGIFCONF: ifc_len: %d, req: %p",
362 ifc
.ifc_len
, ifc
.ifc_req
);
364 rval
= rdsv3_do_ip_ioctl_old(so4
, &buf
, &bufsize
, &numifs
);
366 RDSV3_DPRINTF2("rdsv3_ioctl",
367 "rdsv3_do_ip_ioctl_old failed: %d", rval
);
371 if ((ifc
.ifc_len
> 0) && (numifs
> 0)) {
372 if (ddi_copyout(buf
, (void *)ifc
.ifc_req
,
373 (ifc
.ifc_len
< bufsize
) ? ifc
.ifc_len
:
375 RDSV3_DPRINTF2("rdsv3_ioctl",
376 "copyout of records failed");
382 ifc
.ifc_len
= bufsize
;
383 if (ddi_copyout(&ifc
, (void *)arg
, sizeof (struct ifconf
),
385 RDSV3_DPRINTF2("rdsv3_ioctl",
386 "copyout of ifconf failed");
390 kmem_free(buf
, bufsize
);
396 case SIOCGLIFNETMASK
:
398 if (ddi_copyin((void *)arg
, &lifr
, sizeof (struct lifreq
), 0)
400 RDSV3_DPRINTF2("rdsv3_ioctl", "ddi_copyin failed lifr");
405 rc
= ksocket_ioctl(so4
, cmd
, (intptr_t)&lifr
, &rval
, CRED());
407 RDSV3_DPRINTF2("rdsv3_ioctl",
408 "ksocket_ioctl failed: %d, name: %s cmd: 0x%x",
409 rc
, lifr
.lifr_name
, cmd
);
413 (void) ddi_copyout(&lifr
, (void *)arg
,
414 sizeof (struct lifreq
), 0);
420 case SIOCGIFNETMASK
:
422 if (ddi_copyin((void *)arg
, &ifr
, sizeof (struct ifreq
), 0)
424 RDSV3_DPRINTF2("rdsv3_ioctl", "ddi_copyin failed ifr");
429 RDSV3_DPRINTF2("rdsv3_ioctl", "1. name: %s", ifr
.ifr_name
);
431 rc
= ksocket_ioctl(so4
, cmd
, (intptr_t)&ifr
, &rval
, CRED());
433 RDSV3_DPRINTF2("rdsv3_ioctl",
434 "ksocket_ioctl failed: %d, name: %s cmd: 0x%x",
435 rc
, ifr
.ifr_name
, cmd
);
440 RDSV3_DPRINTF2("rdsv3_ioctl", "2. name: %s", ifr
.ifr_name
);
442 (void) ddi_copyout(&ifr
, (void *)arg
,
443 sizeof (struct ifreq
), 0);
447 if ((cmd
>= RDS_INFO_FIRST
) &&
448 (cmd
<= RDS_INFO_LAST
)) {
449 return (rdsv3_info_ioctl((struct rsock
*)proto_handle
,
450 cmd
, (char *)arg
, rvalp
));
452 RDSV3_DPRINTF2("rdsv3_ioctl", "Unknown ioctl cmd: %d", cmd
);
453 cmn_err(CE_CONT
, "unsupported IOCTL cmd: %d \n", cmd
);
457 (void) ksocket_close(so4
, CRED());
459 RDSV3_DPRINTF4("rdsv3_ioctl", "return: %d cmd: %d", rval
, cmd
);
466 rdsv3_cancel_sent_to(struct rdsv3_sock
*rs
, char *optval
, int len
)
468 struct sockaddr_in sin
;
470 /* racing with another thread binding seems ok here */
471 if (rs
->rs_bound_addr
== 0)
472 return (-ENOTCONN
); /* XXX not a great errno */
474 if (len
< sizeof (struct sockaddr_in
))
477 if (ddi_copyin((void *)optval
, &sin
, sizeof (struct sockaddr_in
),
479 RDSV3_DPRINTF2("rdsv3_cancel_sent_to", "ddi_copyin failed sin");
483 rdsv3_send_drop_to(rs
, &sin
);
489 rdsv3_set_bool_option(unsigned char *optvar
, char *optval
, int optlen
)
493 if (optlen
< sizeof (int))
500 rdsv3_cong_monitor(struct rdsv3_sock
*rs
, char *optval
, int optlen
)
504 ret
= rdsv3_set_bool_option(&rs
->rs_cong_monitor
, optval
, optlen
);
506 if (rs
->rs_cong_monitor
) {
507 rdsv3_cong_add_socket(rs
);
509 rdsv3_cong_remove_socket(rs
);
510 rs
->rs_cong_mask
= 0;
511 rs
->rs_cong_notify
= 0;
519 rdsv3_setsockopt(sock_lower_handle_t proto_handle
, int level
,
520 int optname
, const void *optval
, socklen_t optlen
, cred_t
*cr
)
522 struct rsock
*sk
= (struct rsock
*)proto_handle
;
523 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
526 RDSV3_DPRINTF4("rdsv3_setsockopt", "enter(%p %d %d)",
530 case RDS_CANCEL_SENT_TO
:
531 ret
= rdsv3_cancel_sent_to(rs
, (char *)optval
, optlen
);
534 ret
= rdsv3_get_mr(rs
, optval
, optlen
);
536 case RDS_GET_MR_FOR_DEST
:
537 ret
= rdsv3_get_mr_for_dest(rs
, optval
, optlen
);
540 ret
= rdsv3_free_mr(rs
, optval
, optlen
);
543 ret
= rdsv3_set_bool_option(&rs
->rs_recverr
,
544 (char *)optval
, optlen
);
546 case RDS_CONG_MONITOR
:
547 ret
= rdsv3_cong_monitor(rs
, (char *)optval
, optlen
);
550 sk
->sk_sndbuf
= *(uint_t
*)optval
;
553 sk
->sk_rcvbuf
= *(uint_t
*)optval
;
569 rdsv3_getsockopt(sock_lower_handle_t proto_handle
, int level
,
570 int optname
, void *optval
, socklen_t
*optlen
, cred_t
*cr
)
572 struct rsock
*sk
= (struct rsock
*)proto_handle
;
573 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
576 RDSV3_DPRINTF4("rdsv3_getsockopt", "enter(%p %d %d)",
577 rs
, optname
, *optlen
);
581 RDSV3_DPRINTF4("rdsv3_getsockopt", "SO_SNDBUF(%d)",
584 *((int *)optval
) = sk
->sk_sndbuf
;
585 *optlen
= sizeof (uint_t
);
589 RDSV3_DPRINTF4("rdsv3_getsockopt", "SO_RCVBUF(%d)",
592 *((int *)optval
) = sk
->sk_rcvbuf
;
593 *optlen
= sizeof (uint_t
);
597 RDSV3_DPRINTF4("rdsv3_getsockopt", "RDSV3_RECVERR(%d)",
599 if (*optlen
< sizeof (int))
602 *(int *)optval
= rs
->rs_recverr
;
603 *optlen
= sizeof (int);
607 RDSV3_DPRINTF2("rdsv3_getsockopt",
608 "Unknown: level: %d optname: %d", level
, optname
);
612 RDSV3_DPRINTF4("rdsv3_getsockopt", "return(%p %d %d)",
618 static int rdsv3_connect(sock_lower_handle_t proto_handle
,
619 const struct sockaddr
*addr
, socklen_t addr_len
, sock_connid_t
*conn
,
622 struct rsock
*sk
= (struct rsock
*)proto_handle
;
623 struct sockaddr_in
*sin
= (struct sockaddr_in
*)addr
;
624 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
627 RDSV3_DPRINTF4("rdsv3_connect", "Enter(rs: %p)", rs
);
629 mutex_enter(&sk
->sk_lock
);
631 if (addr_len
!= sizeof (struct sockaddr_in
)) {
636 if (sin
->sin_family
!= AF_INET_OFFLOAD
) {
641 if (sin
->sin_addr
.s_addr
== htonl(INADDR_ANY
)) {
646 rs
->rs_conn_addr
= sin
->sin_addr
.s_addr
;
647 rs
->rs_conn_port
= sin
->sin_port
;
649 sk
->sk_upcalls
->su_connected(sk
->sk_upper_handle
, 0, NULL
, -1);
651 RDSV3_DPRINTF4("rdsv3_connect", "Return(rs: %p)", rs
);
654 mutex_exit(&sk
->sk_lock
);
660 rdsv3_shutdown(sock_lower_handle_t proto_handle
, int how
, cred_t
*cr
)
662 struct rsock
*sk
= (struct rsock
*)proto_handle
;
663 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
665 RDSV3_DPRINTF4("rdsv3_shutdown", "Enter(rs: %p)", rs
);
672 rdsv3_activate(sock_lower_handle_t proto_handle
,
673 sock_upper_handle_t sock_handle
, sock_upcalls_t
*sock_upcalls
,
674 int flags
, cred_t
*cr
)
676 struct rsock
*sk
= (struct rsock
*)proto_handle
;
677 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
679 RDSV3_DPRINTF4("rdsv3_activate", "Enter(rs: %p)", rs
);
681 sk
->sk_upcalls
= sock_upcalls
;
682 sk
->sk_upper_handle
= sock_handle
;
684 RDSV3_DPRINTF4("rdsv3_activate", "Return (rs: %p)", rs
);
690 rdsv3_send_uio(sock_lower_handle_t proto_handle
, uio_t
*uio
,
691 struct msghdr
*msg
, cred_t
*cr
)
693 struct rsock
*sk
= (struct rsock
*)proto_handle
;
694 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
697 RDSV3_DPRINTF4("rdsv3_send_uio", "Enter(rs: %p)", rs
);
698 ret
= rdsv3_sendmsg(rs
, uio
, msg
, uio
->uio_resid
);
700 RDSV3_DPRINTF4("rdsv3_send_uio", "Return(rs: %p ret %d)", rs
, ret
);
710 rdsv3_recv_uio(sock_lower_handle_t proto_handle
, uio_t
*uio
,
711 struct msghdr
*msg
, cred_t
*cr
)
713 struct rsock
*sk
= (struct rsock
*)proto_handle
;
714 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
717 RDSV3_DPRINTF4("rdsv3_recv_uio", "Enter (rs: %p)", rs
);
718 ret
= rdsv3_recvmsg(rs
, uio
, msg
, uio
->uio_resid
, msg
->msg_flags
);
720 RDSV3_DPRINTF4("rdsv3_recv_uio", "Return(rs: %p ret %d)", rs
, ret
);
731 rdsv3_getpeername(sock_lower_handle_t proto_handle
, struct sockaddr
*addr
,
732 socklen_t
*addr_len
, cred_t
*cr
)
734 struct sockaddr_in
*sin
= (struct sockaddr_in
*)addr
;
735 struct rsock
*sk
= (struct rsock
*)proto_handle
;
736 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
738 RDSV3_DPRINTF2("rdsv3_getpeername", "enter(rs: %p)", rs
);
740 (void) memset(sin
->sin_zero
, 0, sizeof (sin
->sin_zero
));
742 /* racey, don't care */
743 if (!rs
->rs_conn_addr
)
746 sin
->sin_port
= rs
->rs_conn_port
;
747 sin
->sin_addr
.s_addr
= rs
->rs_conn_addr
;
749 sin
->sin_family
= AF_INET_OFFLOAD
;
751 *addr_len
= sizeof (*sin
);
756 rdsv3_clrflowctrl(sock_lower_handle_t proto_handle
)
758 struct rsock
*sk
= (struct rsock
*)proto_handle
;
759 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
761 RDSV3_DPRINTF2("rdsv3_clrflowctrl", "enter(rs: %p)", rs
);
765 static struct sock_downcalls_s rdsv3_sock_downcalls
= {
766 .sd_close
= rdsv3_release
,
767 .sd_bind
= rdsv3_bind
,
768 .sd_connect
= rdsv3_connect
,
770 .sd_getsockname
= rdsv3_getname
,
771 .sd_poll
= rdsv3_poll
,
772 .sd_ioctl
= rdsv3_ioctl
,
774 .sd_shutdown
= rdsv3_shutdown
,
775 .sd_setsockopt
= rdsv3_setsockopt
,
776 .sd_getsockopt
= rdsv3_getsockopt
,
777 .sd_send_uio
= rdsv3_send_uio
,
778 .sd_recv_uio
= rdsv3_recv_uio
,
779 .sd_activate
= rdsv3_activate
,
780 .sd_getpeername
= rdsv3_getpeername
,
782 .sd_clr_flowctrl
= NULL
785 static struct sock_downcalls_s rdsv3_sock_downcalls
= {
807 rdsv3_create(int family
, int type
, int proto
, sock_downcalls_t
**sock_downcalls
,
808 uint_t
*smodep
, int *errorp
, int flags
, cred_t
*credp
)
810 struct rdsv3_sock
*rs
;
813 RDSV3_DPRINTF4("rdsv3_create", "Enter (family: %d type: %d, proto: %d "
814 "flags: %d", family
, type
, proto
, flags
);
816 sk
= rdsv3_sk_alloc();
819 rdsv3_sock_init_data(sk
);
821 rs
= rdsv3_sk_to_rs(sk
);
823 mutex_init(&rs
->rs_lock
, NULL
, MUTEX_DRIVER
, NULL
);
824 rw_init(&rs
->rs_recv_lock
, NULL
, RW_DRIVER
, NULL
);
825 list_create(&rs
->rs_send_queue
, sizeof (struct rdsv3_message
),
826 offsetof(struct rdsv3_message
, m_sock_item
));
827 list_create(&rs
->rs_recv_queue
, sizeof (struct rdsv3_incoming
),
828 offsetof(struct rdsv3_incoming
, i_item
));
829 list_create(&rs
->rs_notify_queue
, sizeof (struct rdsv3_notifier
),
830 offsetof(struct rdsv3_notifier
, n_list
));
831 mutex_init(&rs
->rs_rdma_lock
, NULL
, MUTEX_DRIVER
, NULL
);
832 avl_create(&rs
->rs_rdma_keys
, rdsv3_mr_compare
,
833 sizeof (struct rdsv3_mr
), offsetof(struct rdsv3_mr
, r_rb_node
));
834 mutex_init(&rs
->rs_conn_lock
, NULL
, MUTEX_DRIVER
, NULL
);
835 mutex_init(&rs
->rs_congested_lock
, NULL
, MUTEX_DRIVER
, NULL
);
836 cv_init(&rs
->rs_congested_cv
, NULL
, CV_DRIVER
, NULL
);
838 rs
->rs_zoneid
= getzoneid();
841 mutex_enter(&rdsv3_sock_lock
);
842 list_insert_tail(&rdsv3_sock_list
, rs
);
844 /* Initialize RDMA/IB on the 1st socket if not done at attach */
845 if (rdsv3_sock_count
== 1) {
848 mutex_exit(&rdsv3_sock_lock
);
852 *sock_downcalls
= &rdsv3_sock_downcalls
;
854 RDSV3_DPRINTF4("rdsv3_create", "Return: %p", rs
);
856 return ((sock_lower_handle_t
)rdsv3_rs_to_sk(rs
));
860 rdsv3_sock_addref(struct rdsv3_sock
*rs
)
862 RDSV3_DPRINTF4("rdsv3_sock_addref", "Enter(rs: %p)", rs
);
863 rdsv3_sk_sock_hold(rdsv3_rs_to_sk(rs
));
867 rdsv3_sock_put(struct rdsv3_sock
*rs
)
869 RDSV3_DPRINTF4("rdsv3_sock_put", "Enter(rs: %p)", rs
);
870 rdsv3_sk_sock_put(rdsv3_rs_to_sk(rs
));
874 rdsv3_sock_inc_info(struct rsock
*sock
, unsigned int len
,
875 struct rdsv3_info_iterator
*iter
, struct rdsv3_info_lengths
*lens
)
877 struct rdsv3_sock
*rs
;
878 struct rdsv3_incoming
*inc
;
879 unsigned int total
= 0;
881 RDSV3_DPRINTF4("rdsv3_sock_inc_info", "Enter(rs: %p)",
882 rdsv3_sk_to_rs(sock
));
884 len
/= sizeof (struct rds_info_message
);
886 mutex_enter(&rdsv3_sock_lock
);
888 RDSV3_FOR_EACH_LIST_NODE(rs
, &rdsv3_sock_list
, rs_item
) {
889 rw_enter(&rs
->rs_recv_lock
, RW_READER
);
891 /* XXX too lazy to maintain counts.. */
892 RDSV3_FOR_EACH_LIST_NODE(inc
, &rs
->rs_recv_queue
, i_item
) {
895 rdsv3_inc_info_copy(inc
, iter
, inc
->i_saddr
,
896 rs
->rs_bound_addr
, 1);
899 rw_exit(&rs
->rs_recv_lock
);
902 mutex_exit(&rdsv3_sock_lock
);
905 lens
->each
= sizeof (struct rds_info_message
);
907 RDSV3_DPRINTF4("rdsv3_sock_inc_info", "return(rs: %p)",
908 rdsv3_sk_to_rs(sock
));
912 rdsv3_sock_info(struct rsock
*sock
, unsigned int len
,
913 struct rdsv3_info_iterator
*iter
, struct rdsv3_info_lengths
*lens
)
915 struct rds_info_socket sinfo
;
916 struct rdsv3_sock
*rs
;
919 RDSV3_DPRINTF4("rdsv3_sock_info", "Enter(rs: %p)",
920 rdsv3_sk_to_rs(sock
));
922 len
/= sizeof (struct rds_info_socket
);
924 mutex_enter(&rdsv3_sock_lock
);
926 if ((len
< rdsv3_sock_count
) || (iter
->addr
== NULL
))
929 bytes
= sizeof (struct rds_info_socket
);
930 RDSV3_FOR_EACH_LIST_NODE(rs
, &rdsv3_sock_list
, rs_item
) {
931 sinfo
.sndbuf
= rdsv3_sk_sndbuf(rs
);
932 sinfo
.rcvbuf
= rdsv3_sk_rcvbuf(rs
);
933 sinfo
.bound_addr
= rs
->rs_bound_addr
;
934 sinfo
.connected_addr
= rs
->rs_conn_addr
;
935 sinfo
.bound_port
= rs
->rs_bound_port
;
936 sinfo
.connected_port
= rs
->rs_conn_port
;
938 rdsv3_info_copy(iter
, &sinfo
, bytes
);
941 RDSV3_DPRINTF4("rdsv3_sock_info", "Return(rs: %p)",
942 rdsv3_sk_to_rs(sock
));
945 lens
->nr
= rdsv3_sock_count
;
946 lens
->each
= sizeof (struct rds_info_socket
);
948 mutex_exit(&rdsv3_sock_lock
);
951 rdsv3_delayed_work_t
*rdsv3_rdma_dwp
= NULL
;
952 uint_t rdsv3_rdma_init_delay
= 5; /* secs */
953 extern void rdsv3_rdma_init_worker(struct rdsv3_work_s
*work
);
958 RDSV3_DPRINTF4("rdsv3_exit", "Enter");
960 if (rdsv3_rdma_dwp
) {
961 rdsv3_cancel_delayed_work(rdsv3_rdma_dwp
);
964 (void) ddi_taskq_dispatch(rdsv3_taskq
, rdsv3_rdma_exit
,
966 while (rdsv3_rdma_listen_id
!= NULL
) {
968 RDSV3_DPRINTF5("rdsv3", "%s-%d Waiting for rdsv3_rdma_exit",
977 rdsv3_threads_exit();
979 rdsv3_info_deregister_func(RDS_INFO_SOCKETS
, rdsv3_sock_info
);
980 rdsv3_info_deregister_func(RDS_INFO_RECV_MESSAGES
,
981 rdsv3_sock_inc_info
);
983 if (rdsv3_rdma_dwp
) {
984 kmem_free(rdsv3_rdma_dwp
, sizeof (rdsv3_delayed_work_t
));
985 rdsv3_rdma_dwp
= NULL
;
988 RDSV3_DPRINTF4("rdsv3_exit", "Return");
997 RDSV3_DPRINTF4("rdsv3_init", "Enter");
1001 ret
= rdsv3_conn_init();
1004 ret
= rdsv3_threads_init();
1007 ret
= rdsv3_sysctl_init();
1010 ret
= rdsv3_stats_init();
1014 rdsv3_info_register_func(RDS_INFO_SOCKETS
, rdsv3_sock_info
);
1015 rdsv3_info_register_func(RDS_INFO_RECV_MESSAGES
, rdsv3_sock_inc_info
);
1017 /* rdsv3_rdma_init need to be called with a little delay */
1018 rdsv3_rdma_dwp
= kmem_zalloc(sizeof (rdsv3_delayed_work_t
), KM_SLEEP
);
1019 RDSV3_INIT_DELAYED_WORK(rdsv3_rdma_dwp
, rdsv3_rdma_init_worker
);
1020 rdsv3_queue_delayed_work(rdsv3_wq
, rdsv3_rdma_dwp
,
1021 rdsv3_rdma_init_delay
);
1023 RDSV3_DPRINTF4("rdsv3_init", "Return");
1030 rdsv3_sysctl_exit();
1032 rdsv3_threads_exit();