2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
6 * This file contains code imported from the OFED rds source file bind.c
7 * Oracle elects to have and use the contents of bind.c under and governed
8 * by the OpenIB.org BSD license (see below for full license text). However,
9 * the following notice accompanied the original version of this file:
13 * Copyright (c) 2006 Oracle. All rights reserved.
15 * This software is available to you under a choice of one of two
16 * licenses. You may choose to be licensed under the terms of the GNU
17 * General Public License (GPL) Version 2, available from the file
18 * COPYING in the main directory of this source tree, or the
19 * OpenIB.org BSD license below:
21 * Redistribution and use in source and binary forms, with or
22 * without modification, are permitted provided that the following
25 * - Redistributions of source code must retain the above
26 * copyright notice, this list of conditions and the following
29 * - Redistributions in binary form must reproduce the above
30 * copyright notice, this list of conditions and the following
31 * disclaimer in the documentation and/or other materials
32 * provided with the distribution.
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
44 #include <sys/types.h>
45 #include <sys/sysmacros.h>
46 #include <sys/random.h>
49 #include <sys/ib/clients/rdsv3/rdsv3.h>
50 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
52 kmutex_t rdsv3_bind_lock
;
53 avl_tree_t rdsv3_bind_tree
;
56 * Each node in the rdsv3_bind_tree is of this type.
58 struct rdsv3_ip_bucket
{
61 avl_node_t ip_avl_node
;
64 struct rdsv3_sock
*port
[65536];
68 rdsv3_bind_node_compare(const void *a
, const void *b
)
70 struct rdsv3_ip_bucket
*bp
= (struct rdsv3_ip_bucket
*)b
;
72 if (*(uint64_t *)a
> (((uint64_t)bp
->ip
<< 32) | bp
->zone
))
74 else if (*(uint64_t *)a
< (((uint64_t)bp
->ip
<< 32) | bp
->zone
))
83 RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Enter");
85 mutex_init(&rdsv3_bind_lock
, NULL
, MUTEX_DRIVER
, NULL
);
86 avl_create(&rdsv3_bind_tree
, rdsv3_bind_node_compare
,
87 sizeof (struct rdsv3_ip_bucket
),
88 offsetof(struct rdsv3_ip_bucket
, ip_avl_node
));
90 RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Return");
93 /* called on detach */
97 struct rdsv3_ip_bucket
*bucketp
;
100 RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Enter");
103 avl_destroy_nodes(&rdsv3_bind_tree
, &cookie
)) != NULL
) {
104 rw_destroy(&bucketp
->rwlock
);
105 kmem_free(bucketp
, sizeof (struct rdsv3_ip_bucket
));
108 avl_destroy(&rdsv3_bind_tree
);
109 mutex_destroy(&rdsv3_bind_lock
);
111 RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Return");
114 struct rdsv3_ip_bucket
*
115 rdsv3_find_ip_bucket(ipaddr_t ipaddr
, zoneid_t zoneid
)
117 struct rdsv3_ip_bucket
*bucketp
;
119 uint64_t needle
= ((uint64_t)ipaddr
<< 32) | zoneid
;
121 mutex_enter(&rdsv3_bind_lock
);
122 bucketp
= avl_find(&rdsv3_bind_tree
, &needle
, &where
);
123 if (bucketp
== NULL
) {
124 /* allocate a new bucket for this IP & zone */
126 kmem_zalloc(sizeof (struct rdsv3_ip_bucket
), KM_SLEEP
);
127 rw_init(&bucketp
->rwlock
, NULL
, RW_DRIVER
, NULL
);
128 bucketp
->ip
= ipaddr
;
129 bucketp
->zone
= zoneid
;
130 avl_insert(&rdsv3_bind_tree
, bucketp
, where
);
132 mutex_exit(&rdsv3_bind_lock
);
138 * Return the rdsv3_sock bound at the given local address.
140 * The rx path can race with rdsv3_release. We notice if rdsv3_release() has
141 * marked this socket and don't return a rs ref to the rx path.
144 rdsv3_find_bound(struct rdsv3_connection
*conn
, uint16_be_t port
)
146 struct rdsv3_sock
*rs
;
148 RDSV3_DPRINTF4("rdsv3_find_bound", "Enter(ip:port: %u.%u.%u.%u:%d)",
149 NIPQUAD(conn
->c_laddr
), ntohs(port
));
151 rw_enter(&conn
->c_bucketp
->rwlock
, RW_READER
);
152 ASSERT(ntohl(conn
->c_laddr
) == conn
->c_bucketp
->ip
);
153 rs
= conn
->c_bucketp
->port
[ntohs(port
)];
154 if (rs
&& !rdsv3_sk_sock_flag(rdsv3_rs_to_sk(rs
), SOCK_DEAD
))
155 rdsv3_sk_sock_hold(rdsv3_rs_to_sk(rs
));
158 rw_exit(&conn
->c_bucketp
->rwlock
);
160 RDSV3_DPRINTF5("rdsv3_find_bound", "returning rs %p for %u.%u.%u.%u:%d",
161 rs
, NIPQUAD(conn
->c_laddr
), ntohs(port
));
166 /* returns -ve errno or +ve port */
168 rdsv3_add_bound(struct rdsv3_sock
*rs
, uint32_be_t addr
, uint16_be_t
*port
)
170 int ret
= -EADDRINUSE
;
171 uint16_t rover
, last
;
172 struct rdsv3_ip_bucket
*bucketp
;
174 RDSV3_DPRINTF4("rdsv3_add_bound", "Enter(addr:port: %x:%x)",
175 ntohl(addr
), ntohs(*port
));
178 rover
= ntohs(*port
);
181 (void) random_get_pseudo_bytes((uint8_t *)&rover
,
183 rover
= MAX(rover
, 2);
187 bucketp
= rdsv3_find_ip_bucket(ntohl(addr
), rs
->rs_zoneid
);
189 /* leave the bind lock and get the bucket lock */
190 rw_enter(&bucketp
->rwlock
, RW_WRITER
);
196 if (bucketp
->port
[rover
] == NULL
) {
197 *port
= htons(rover
);
201 } while (rover
++ != last
);
204 rs
->rs_bound_addr
= addr
;
205 rs
->rs_bound_port
= *port
;
206 bucketp
->port
[rover
] = rs
;
208 rdsv3_sock_addref(rs
);
210 RDSV3_DPRINTF5("rdsv3_add_bound",
211 "rs %p binding to %u.%u.%u.%u:%d",
212 rs
, NIPQUAD(addr
), rover
);
215 rw_exit(&bucketp
->rwlock
);
217 RDSV3_DPRINTF4("rdsv3_add_bound", "Return(ret: %d port: %d)",
225 rdsv3_remove_bound(struct rdsv3_sock
*rs
)
227 RDSV3_DPRINTF4("rdsv3_remove_bound", "Enter(rs: %p)", rs
);
229 if (rs
->rs_bound_addr
) {
230 struct rdsv3_ip_bucket
*bucketp
;
232 RDSV3_DPRINTF5("rdsv3_remove_bound",
233 "rs %p unbinding from %u.%u.%u.%u:%x",
234 rs
, NIPQUAD(htonl(rs
->rs_bound_addr
)), rs
->rs_bound_port
);
236 bucketp
= rdsv3_find_ip_bucket(ntohl(rs
->rs_bound_addr
),
239 rw_enter(&bucketp
->rwlock
, RW_WRITER
);
240 bucketp
->port
[ntohs(rs
->rs_bound_port
)] = NULL
;
242 rs
->rs_bound_addr
= 0;
243 rw_exit(&bucketp
->rwlock
);
248 RDSV3_DPRINTF4("rdsv3_remove_bound", "Return(rs: %p)", rs
);
253 rdsv3_bind(sock_lower_handle_t proto_handle
, struct sockaddr
*sa
,
254 socklen_t len
, cred_t
*cr
)
256 struct rsock
*sk
= (struct rsock
*)proto_handle
;
257 sin_t
*sin
= (sin_t
*)sa
;
258 struct rdsv3_sock
*rs
= rdsv3_sk_to_rs(sk
);
261 if (len
!= sizeof (sin_t
) || (sin
== NULL
) ||
262 !OK_32PTR((char *)sin
)) {
263 RDSV3_DPRINTF2("rdsv3_bind", "address to bind not specified");
267 RDSV3_DPRINTF4("rdsv3_bind", "Enter(rs: %p, addr: 0x%x, port: %x)",
268 rs
, ntohl(sin
->sin_addr
.s_addr
), htons(sin
->sin_port
));
270 if (sin
->sin_addr
.s_addr
== INADDR_ANY
) {
271 RDSV3_DPRINTF2("rdsv3_bind", "Invalid address");
275 /* We don't allow multiple binds */
276 if (rs
->rs_bound_addr
) {
277 RDSV3_DPRINTF2("rdsv3_bind", "Multiple binds not allowed");
281 ret
= rdsv3_add_bound(rs
, sin
->sin_addr
.s_addr
, &sin
->sin_port
);
286 rs
->rs_transport
= rdsv3_trans_get_preferred(sin
->sin_addr
.s_addr
);
287 if (!rs
->rs_transport
) {
288 rdsv3_remove_bound(rs
);
289 if (rdsv3_printk_ratelimit()) {
290 RDSV3_DPRINTF1("rdsv3_bind",
291 "RDS: rdsv3_bind() could not find a transport.\n");
293 return (EADDRNOTAVAIL
);
296 RDSV3_DPRINTF4("rdsv3_bind", "Return: Assigned port: %x to sock: %p",