4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2007, The Ohio State University. All rights reserved.
27 * Portions of this source code is developed by the team members of
28 * The Ohio State University's Network-Based Computing Laboratory (NBCL),
29 * headed by Professor Dhabaleswar K. (DK) Panda.
31 * Acknowledgements to contributions from developors:
32 * Ranjit Noronha: noronha@cse.ohio-state.edu
33 * Lei Chai : chail@cse.ohio-state.edu
34 * Weikuan Yu : yuw@cse.ohio-state.edu
43 * ib.h, rpcib plugin interface.
46 #include <sys/types.h>
48 #include <sys/sunddi.h>
52 #include <rpc/rpc_rdma.h>
53 #include <sys/ib/ibtl/ibti.h>
60 #define MAX_BUFS 1024 /* max no. of buffers per pool */
62 #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */
64 * Tavor returns the next higher power of 2
65 * CQ entries than the requested size.
66 * For instance, if you request (2^12 - 1)
67 * CQ entries, Tavor returns 2^12 entries.
68 * 4K CQ entries suffice. Hence, 4096 - 1.
70 #define DEF_SQ_SIZE 128 /* default SendQ size */
71 #define DEF_RQ_SIZE 256 /* default RecvQ size */
73 #define RQ_DSEG_MAX 1 /* default RQ data seg */
74 #define IBSRM_HB 0x8000 /* high order bit of pkey */
76 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */
77 #define REFRESH_ATTEMPTS 3
79 typedef struct rib_hca_s rib_hca_t
;
80 typedef struct rib_qp_s rib_qp_t
;
81 typedef struct rib_cq_s rib_cq_t
;
84 * Notification for RDMA_DONE is based on xid
86 struct rdma_done_list
{
87 uint32_t xid
; /* XID waiting for RDMA_DONE */
88 kcondvar_t rdma_done_cv
; /* cv for RDMA_DONE */
89 struct rdma_done_list
*next
;
90 struct rdma_done_list
*prev
;
94 * State of the plugin.
95 * ACCEPT = accepting new connections and requests
96 * NO_ACCEPT = not accepting new connection and requests
109 #define REPLY_WAIT -1
111 typedef void * rib_pvoid
;
112 typedef rib_pvoid RIB_SYNCMEM_HANDLE
;
115 * IB buffer pool management structure
122 kmutex_t buflock
; /* lock for this structure */
123 caddr_t buf
; /* pool address */
124 uint32_t bufhandle
; /* rkey for this pool */
125 ulong_t bufsize
; /* size of pool */
126 int rsize
; /* size of each element */
127 int numelems
; /* no. of elements allocated */
128 int buffree
; /* no. of free elements */
129 void *buflist
[1]; /* free elements in pool */
134 ibt_mr_hdl_t
*mr_hdl
;
135 ibt_mr_desc_t
*mr_desc
; /* vaddr, lkey, rkey */
139 * ATS relsted defines and structures.
141 #define ATS_AR_DATA_LEN 16
145 * Service types supported by RPCIB
146 * For now only NFS is supported.
152 * Tracks consumer state (client or server).
164 ibt_cq_hdl_t rib_cq_hdl
;
168 * Each registered service's data structure.
170 typedef struct rib_service_s rib_service_t
;
171 struct rib_service_s
{
172 uint32_t srv_type
; /* i.e, NFS, NLM, v4CBD */
173 ibt_srv_hdl_t srv_hdl
; /* from ibt_register call */
181 typedef struct rpcib_state
{
182 ibt_clnt_hdl_t ibt_clnt_hdl
;
184 uint32_t nhca_inited
;
185 rib_hca_t
*hcas_list
;
186 krwlock_t hcas_list_lock
; /* protects hcas_list */
188 kmutex_t open_hca_lock
;
189 queue_t
*q
; /* up queue for a serv_type */
191 rib_service_t
*service_list
;
192 krwlock_t service_list_lock
;
193 kmutex_t listen_lock
;
200 krwlock_t conn_lock
; /* list lock */
201 CONN
*conn_hd
; /* list head */
205 HCA_DETACHED
, /* hca in detached state */
206 HCA_INITED
, /* hca in up and running state */
209 typedef struct rib_hca_service_s rib_hca_service_t
;
210 struct rib_hca_service_s
{
213 ibt_sbind_hdl_t sbind_hdl
;
214 rib_hca_service_t
*next
;
218 * RPCIB per HCA structure
221 ibt_clnt_hdl_t ibt_clnt_hdl
;
226 ibt_hca_hdl_t hca_hdl
; /* HCA handle */
227 ibt_hca_attr_t hca_attrs
; /* HCA attributes */
229 rib_hca_service_t
*bound_services
;
230 krwlock_t bound_services_lock
;
233 ibt_hca_portinfo_t
*hca_ports
;
235 enum hca_state state
; /* state of HCA */
236 krwlock_t state_lock
; /* protects state field */
237 bool_t inuse
; /* indicates HCA usage */
238 kmutex_t inuse_lock
; /* protects inuse field */
240 rib_conn_list_t cl_conn_list
; /* client conn list */
241 rib_conn_list_t srv_conn_list
; /* server conn list */
250 rib_bufpool_t
*recv_pool
; /* recv buf pool */
251 rib_bufpool_t
*send_pool
; /* send buf pool */
253 void *iblock
; /* interrupt cookie */
255 kmem_cache_t
*server_side_cache
; /* long reply pool */
258 krwlock_t avl_rw_lock
;
259 volatile bool_t avl_init
;
260 kmutex_t cache_allocation_lock
;
261 ddi_taskq_t
*cleanup_helper
;
263 ibt_srv_hdl_t srv_hdl
;
266 volatile uint64_t cache_allocation
;
268 uint64_t cache_misses
;
269 uint64_t cache_cold_misses
;
270 uint64_t cache_hot_misses
;
271 uint64_t cache_misses_above_the_limit
;
273 struct rib_hca_s
*next
;
278 * Structure on wait state of a post send
283 kmutex_t sendwait_lock
;
287 int nsbufs
; /* # of send buffers posted */
288 uint64_t sbufaddr
[DSEG_MAX
]; /* posted send buffers */
298 * Structure on reply descriptor for recv queue.
299 * Different from the above posting of a descriptor.
304 uint64_t vaddr_cq
; /* buf addr from CQ */
320 uint64_t addr
; /* posted buf addr */
324 * Per QP data structure
328 rib_mode_t mode
; /* RIB_SERVER or RIB_CLIENT */
330 ibt_channel_hdl_t qp_hdl
;
335 ibt_rc_chan_query_attr_t qp_q_attrs
;
336 rib_cq_t
*send_cq
; /* send CQ */
337 rib_cq_t
*recv_cq
; /* recv CQ */
340 * Number of pre-posted rbufs
342 uint_t n_posted_rbufs
;
343 kcondvar_t posted_rbufs_cv
;
344 kmutex_t posted_rbufs_lock
;
347 * Number of SENDs pending completion
351 kcondvar_t send_rbufs_cv
;
352 kmutex_t send_rbufs_lock
;
357 uint_t rep_list_size
;
358 struct reply
*replylist
;
359 kmutex_t replylist_lock
;
362 * server only, RDMA_DONE
364 struct rdma_done_list
*rdlist
;
365 kmutex_t rdlist_lock
;
368 kcondvar_t cb_conn_cv
;
370 caddr_t q
; /* upstream queue */
374 #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private))
375 #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn))
378 * Timeout for various calls
380 #define CONN_WAIT_TIME 40
381 #define SEND_WAIT_TIME 40 /* time for send completion */
383 #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */