Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / rpc / ib.h
blobc3fa4010f633fbd33c0a9f61dcacc1be65c80dd2
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2007, The Ohio State University. All rights reserved.
27 * Portions of this source code is developed by the team members of
28 * The Ohio State University's Network-Based Computing Laboratory (NBCL),
29 * headed by Professor Dhabaleswar K. (DK) Panda.
31 * Acknowledgements to contributions from developors:
32 * Ranjit Noronha: noronha@cse.ohio-state.edu
33 * Lei Chai : chail@cse.ohio-state.edu
34 * Weikuan Yu : yuw@cse.ohio-state.edu
39 #ifndef _IB_H
40 #define _IB_H
43 * ib.h, rpcib plugin interface.
46 #include <sys/types.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/conf.h>
50 #include <sys/stat.h>
51 #include <rpc/rpc.h>
52 #include <rpc/rpc_rdma.h>
53 #include <sys/ib/ibtl/ibti.h>
54 #include <sys/avl.h>
56 #ifdef __cplusplus
57 extern "C" {
58 #endif
60 #define MAX_BUFS 1024 /* max no. of buffers per pool */
62 #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */
64 * Tavor returns the next higher power of 2
65 * CQ entries than the requested size.
66 * For instance, if you request (2^12 - 1)
67 * CQ entries, Tavor returns 2^12 entries.
68 * 4K CQ entries suffice. Hence, 4096 - 1.
70 #define DEF_SQ_SIZE 128 /* default SendQ size */
71 #define DEF_RQ_SIZE 256 /* default RecvQ size */
72 #define DSEG_MAX 2
73 #define RQ_DSEG_MAX 1 /* default RQ data seg */
74 #define IBSRM_HB 0x8000 /* high order bit of pkey */
76 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */
77 #define REFRESH_ATTEMPTS 3
79 typedef struct rib_hca_s rib_hca_t;
80 typedef struct rib_qp_s rib_qp_t;
81 typedef struct rib_cq_s rib_cq_t;
84 * Notification for RDMA_DONE is based on xid
86 struct rdma_done_list {
87 uint32_t xid; /* XID waiting for RDMA_DONE */
88 kcondvar_t rdma_done_cv; /* cv for RDMA_DONE */
89 struct rdma_done_list *next;
90 struct rdma_done_list *prev;
94 * State of the plugin.
95 * ACCEPT = accepting new connections and requests
96 * NO_ACCEPT = not accepting new connection and requests
98 #define ACCEPT 1
99 #define NO_ACCEPT 2
102 * Send Wait states
104 #define SEND_WAIT -1
107 * Reply states
109 #define REPLY_WAIT -1
111 typedef void * rib_pvoid;
112 typedef rib_pvoid RIB_SYNCMEM_HANDLE;
115 * IB buffer pool management structure
119 * Buffer pool info
121 typedef struct {
122 kmutex_t buflock; /* lock for this structure */
123 caddr_t buf; /* pool address */
124 uint32_t bufhandle; /* rkey for this pool */
125 ulong_t bufsize; /* size of pool */
126 int rsize; /* size of each element */
127 int numelems; /* no. of elements allocated */
128 int buffree; /* no. of free elements */
129 void *buflist[1]; /* free elements in pool */
130 } bufpool_t;
132 typedef struct {
133 bufpool_t *bpool;
134 ibt_mr_hdl_t *mr_hdl;
135 ibt_mr_desc_t *mr_desc; /* vaddr, lkey, rkey */
136 } rib_bufpool_t;
139 * ATS relsted defines and structures.
141 #define ATS_AR_DATA_LEN 16
145 * Service types supported by RPCIB
146 * For now only NFS is supported.
148 #define NFS 1
149 #define NLM 2
152 * Tracks consumer state (client or server).
154 typedef enum {
155 RIB_SERVER,
156 RIB_CLIENT
157 } rib_mode_t;
160 * CQ structure
162 struct rib_cq_s {
163 rib_hca_t *rib_hca;
164 ibt_cq_hdl_t rib_cq_hdl;
168 * Each registered service's data structure.
170 typedef struct rib_service_s rib_service_t;
171 struct rib_service_s {
172 uint32_t srv_type; /* i.e, NFS, NLM, v4CBD */
173 ibt_srv_hdl_t srv_hdl; /* from ibt_register call */
174 ib_svc_id_t srv_id;
175 rib_service_t *next;
179 * RPCIB plugin state
181 typedef struct rpcib_state {
182 ibt_clnt_hdl_t ibt_clnt_hdl;
183 uint32_t hca_count;
184 uint32_t nhca_inited;
185 rib_hca_t *hcas_list;
186 krwlock_t hcas_list_lock; /* protects hcas_list */
187 int refcount;
188 kmutex_t open_hca_lock;
189 queue_t *q; /* up queue for a serv_type */
190 void *private;
191 rib_service_t *service_list;
192 krwlock_t service_list_lock;
193 kmutex_t listen_lock;
194 } rpcib_state_t;
197 * Connection lists
199 typedef struct {
200 krwlock_t conn_lock; /* list lock */
201 CONN *conn_hd; /* list head */
202 } rib_conn_list_t;
204 enum hca_state {
205 HCA_DETACHED, /* hca in detached state */
206 HCA_INITED, /* hca in up and running state */
209 typedef struct rib_hca_service_s rib_hca_service_t;
210 struct rib_hca_service_s {
211 ib_svc_id_t srv_id;
212 ib_gid_t gid;
213 ibt_sbind_hdl_t sbind_hdl;
214 rib_hca_service_t *next;
218 * RPCIB per HCA structure
220 struct rib_hca_s {
221 ibt_clnt_hdl_t ibt_clnt_hdl;
224 * per HCA.
226 ibt_hca_hdl_t hca_hdl; /* HCA handle */
227 ibt_hca_attr_t hca_attrs; /* HCA attributes */
228 ibt_pd_hdl_t pd_hdl;
229 rib_hca_service_t *bound_services;
230 krwlock_t bound_services_lock;
231 ib_guid_t hca_guid;
232 uint32_t hca_nports;
233 ibt_hca_portinfo_t *hca_ports;
234 size_t hca_pinfosz;
235 enum hca_state state; /* state of HCA */
236 krwlock_t state_lock; /* protects state field */
237 bool_t inuse; /* indicates HCA usage */
238 kmutex_t inuse_lock; /* protects inuse field */
240 rib_conn_list_t cl_conn_list; /* client conn list */
241 rib_conn_list_t srv_conn_list; /* server conn list */
243 rib_cq_t *clnt_scq;
244 rib_cq_t *clnt_rcq;
245 rib_cq_t *svc_scq;
246 rib_cq_t *svc_rcq;
247 kmutex_t cb_lock;
248 kcondvar_t cb_cv;
250 rib_bufpool_t *recv_pool; /* recv buf pool */
251 rib_bufpool_t *send_pool; /* send buf pool */
253 void *iblock; /* interrupt cookie */
255 kmem_cache_t *server_side_cache; /* long reply pool */
256 avl_tree_t avl_tree;
257 kmutex_t avl_lock;
258 krwlock_t avl_rw_lock;
259 volatile bool_t avl_init;
260 kmutex_t cache_allocation_lock;
261 ddi_taskq_t *cleanup_helper;
262 ib_svc_id_t srv_id;
263 ibt_srv_hdl_t srv_hdl;
264 uint_t reg_state;
266 volatile uint64_t cache_allocation;
267 uint64_t cache_hits;
268 uint64_t cache_misses;
269 uint64_t cache_cold_misses;
270 uint64_t cache_hot_misses;
271 uint64_t cache_misses_above_the_limit;
273 struct rib_hca_s *next;
278 * Structure on wait state of a post send
280 struct send_wid {
281 uint32_t xid;
282 int cv_sig;
283 kmutex_t sendwait_lock;
284 kcondvar_t wait_cv;
285 uint_t status;
286 rib_qp_t *qp;
287 int nsbufs; /* # of send buffers posted */
288 uint64_t sbufaddr[DSEG_MAX]; /* posted send buffers */
289 caddr_t c;
290 caddr_t c1;
291 int l1;
292 caddr_t c2;
293 int l2;
294 int wl, rl;
298 * Structure on reply descriptor for recv queue.
299 * Different from the above posting of a descriptor.
301 struct reply {
302 uint32_t xid;
303 uint_t status;
304 uint64_t vaddr_cq; /* buf addr from CQ */
305 uint_t bytes_xfer;
306 kcondvar_t wait_cv;
307 struct reply *next;
308 struct reply *prev;
311 struct svc_recv {
312 rib_qp_t *qp;
313 uint64_t vaddr;
314 uint_t bytes_xfer;
317 struct recv_wid {
318 uint32_t xid;
319 rib_qp_t *qp;
320 uint64_t addr; /* posted buf addr */
324 * Per QP data structure
326 struct rib_qp_s {
327 rib_hca_t *hca;
328 rib_mode_t mode; /* RIB_SERVER or RIB_CLIENT */
329 CONN rdmaconn;
330 ibt_channel_hdl_t qp_hdl;
331 uint_t port_num;
332 ib_qpn_t qpn;
333 int chan_flags;
334 clock_t timeout;
335 ibt_rc_chan_query_attr_t qp_q_attrs;
336 rib_cq_t *send_cq; /* send CQ */
337 rib_cq_t *recv_cq; /* recv CQ */
340 * Number of pre-posted rbufs
342 uint_t n_posted_rbufs;
343 kcondvar_t posted_rbufs_cv;
344 kmutex_t posted_rbufs_lock;
347 * Number of SENDs pending completion
350 uint_t n_send_rbufs;
351 kcondvar_t send_rbufs_cv;
352 kmutex_t send_rbufs_lock;
355 * RPC reply
357 uint_t rep_list_size;
358 struct reply *replylist;
359 kmutex_t replylist_lock;
362 * server only, RDMA_DONE
364 struct rdma_done_list *rdlist;
365 kmutex_t rdlist_lock;
367 kmutex_t cb_lock;
368 kcondvar_t cb_conn_cv;
370 caddr_t q; /* upstream queue */
371 struct send_wid wd;
374 #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private))
375 #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn))
378 * Timeout for various calls
380 #define CONN_WAIT_TIME 40
381 #define SEND_WAIT_TIME 40 /* time for send completion */
383 #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */
385 #ifdef __cplusplus
387 #endif
389 #endif /* !_IB_H */