4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright (c) 2008, The Ohio State University. All rights reserved.
29 * Portions of this source code is developed by the team members of
30 * The Ohio State University's Network-Based Computing Laboratory (NBCL),
31 * headed by Professor Dhabaleswar K. (DK) Panda.
33 * Acknowledgements to contributions from developors:
34 * Ranjit Noronha: noronha@cse.ohio-state.edu
35 * Lei Chai : chail@cse.ohio-state.edu
36 * Weikuan Yu : yuw@cse.ohio-state.edu
40 #include <sys/systm.h>
41 #include <sys/kstat.h>
42 #include <sys/modctl.h>
44 #include <rpc/rpc_rdma.h>
46 #include <sys/ib/ibtl/ibti.h>
48 uint_t rdma_minchunk
= RDMA_MINCHUNK
;
53 int rdma_modloaded
= 0; /* flag to load RDMA plugin modules */
54 int rdma_dev_available
= 0; /* if any RDMA device is loaded */
55 kmutex_t rdma_modload_lock
; /* protects rdma_modloaded flag */
57 rdma_svc_wait_t rdma_wait
;
59 rdma_registry_t
*rdma_mod_head
= NULL
; /* head for RDMA modules */
60 krwlock_t rdma_lock
; /* protects rdma_mod_head list */
61 ldi_ident_t rpcmod_li
= NULL
; /* identifies us with ldi_ framework */
63 kmem_cache_t
*clist_cache
= NULL
;
68 ldi_handle_t rpcib_handle
= NULL
;
73 extern kstat_named_t
*rdmarcstat_ptr
;
74 extern uint_t rdmarcstat_ndata
;
75 extern kstat_named_t
*rdmarsstat_ptr
;
76 extern uint_t rdmarsstat_ndata
;
78 void rdma_kstat_init();
81 * RDMATF module registration routine.
82 * This routine is expected to be called by the init routine in
86 rdma_register_mod(rdma_mod_t
*mod
)
88 rdma_registry_t
**mp
, *m
;
90 if (mod
->rdma_version
!= RDMATF_VERS
) {
91 return (RDMA_BADVERS
);
94 rw_enter(&rdma_lock
, RW_WRITER
);
96 * Ensure not already registered
100 if (strncmp((*mp
)->r_mod
->rdma_api
, mod
->rdma_api
,
102 if ((*mp
)->r_mod_state
== RDMA_MOD_INACTIVE
) {
103 (*mp
)->r_mod_state
= RDMA_MOD_ACTIVE
;
104 (*mp
)->r_mod
->rdma_ops
= mod
->rdma_ops
;
105 (*mp
)->r_mod
->rdma_count
= mod
->rdma_count
;
109 return (RDMA_REG_EXIST
);
111 mp
= &((*mp
)->r_next
);
115 * New one, create and add to registry
117 m
= kmem_alloc(sizeof (rdma_registry_t
), KM_SLEEP
);
118 m
->r_mod
= kmem_alloc(sizeof (rdma_mod_t
), KM_SLEEP
);
121 m
->r_mod
->rdma_api
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
122 (void) strncpy(m
->r_mod
->rdma_api
, mod
->rdma_api
, KNC_STRSIZE
);
123 m
->r_mod
->rdma_api
[KNC_STRSIZE
- 1] = '\0';
124 m
->r_mod_state
= RDMA_MOD_ACTIVE
;
130 * Start the nfs service on the rdma xprts.
131 * (this notification mechanism will need to change when we support
132 * multiple hcas and have support for multiple rdma plugins).
134 mutex_enter(&rdma_wait
.svc_lock
);
135 rdma_wait
.svc_stat
= RDMA_HCA_ATTACH
;
136 cv_signal(&rdma_wait
.svc_cv
);
137 mutex_exit(&rdma_wait
.svc_lock
);
139 return (RDMA_SUCCESS
);
143 * RDMATF module unregistration routine.
144 * This routine is expected to be called by the fini routine in
145 * the plugin modules.
148 rdma_unregister_mod(rdma_mod_t
*mod
)
150 rdma_registry_t
**m
, *mmod
= NULL
;
152 rw_enter(&rdma_lock
, RW_WRITER
);
156 if (strncmp((*m
)->r_mod
->rdma_api
, mod
->rdma_api
,
162 * Check if any device attached, if so return error
164 if (mod
->rdma_count
!= 0) {
166 return (RDMA_FAILED
);
169 * Found entry. Mark it inactive.
172 mmod
->r_mod
->rdma_count
= 0;
173 mmod
->r_mod_state
= RDMA_MOD_INACTIVE
;
178 rdma_dev_available
= 0;
182 * Stop the nfs service running on the rdma xprts.
183 * (this notification mechanism will need to change when we support
184 * multiple hcas and have support for multiple rdma plugins).
186 mutex_enter(&rdma_wait
.svc_lock
);
187 rdma_wait
.svc_stat
= RDMA_HCA_DETACH
;
188 cv_signal(&rdma_wait
.svc_cv
);
189 mutex_exit(&rdma_wait
.svc_lock
);
194 return (RDMA_SUCCESS
);
202 clp
= kmem_cache_alloc(clist_cache
, KM_SLEEP
);
204 bzero(clp
, sizeof (*clp
));
210 clist_len(struct clist
*cl
)
221 clist_zero_len(struct clist
*cl
)
224 if (cl
->c_dmemhandle
.mrc_rmr
== 0)
232 * Creates a new chunk list entry, and
233 * adds it to the end of a chunk list.
236 clist_add(struct clist
**clp
, uint32_t xdroff
, int len
,
237 struct mrc
*shandle
, caddr_t saddr
,
238 struct mrc
*dhandle
, caddr_t daddr
)
242 /* Find the end of the list */
245 clp
= &((*clp
)->c_next
);
248 cl
->c_xdroff
= xdroff
;
250 cl
->w
.c_saddr
= (uint64_t)(uintptr_t)saddr
;
252 cl
->c_smemhandle
= *shandle
;
253 cl
->u
.c_daddr
= (uint64_t)(uintptr_t)daddr
;
255 cl
->c_dmemhandle
= *dhandle
;
262 clist_register(CONN
*conn
, struct clist
*cl
, clist_dstsrc dstsrc
)
267 for (c
= cl
; c
; c
= c
->c_next
) {
271 c
->c_regtype
= dstsrc
;
274 case CLIST_REG_SOURCE
:
275 status
= RDMA_REGMEMSYNC(conn
,
276 (caddr_t
)(struct as
*)c
->c_adspc
,
277 (caddr_t
)(uintptr_t)c
->w
.c_saddr3
, c
->c_len
,
278 &c
->c_smemhandle
, (void **)&c
->c_ssynchandle
,
279 (void *)c
->rb_longbuf
.rb_private
);
282 status
= RDMA_REGMEMSYNC(conn
,
283 (caddr_t
)(struct as
*)c
->c_adspc
,
284 (caddr_t
)(uintptr_t)c
->u
.c_daddr3
, c
->c_len
,
285 &c
->c_dmemhandle
, (void **)&c
->c_dsynchandle
,
286 (void *)c
->rb_longbuf
.rb_private
);
291 if (status
!= RDMA_SUCCESS
) {
292 (void) clist_deregister(conn
, cl
);
297 return (RDMA_SUCCESS
);
301 clist_deregister(CONN
*conn
, struct clist
*cl
)
305 for (c
= cl
; c
; c
= c
->c_next
) {
306 switch (c
->c_regtype
) {
307 case CLIST_REG_SOURCE
:
308 if (c
->c_smemhandle
.mrc_rmr
!= 0) {
309 (void) RDMA_DEREGMEMSYNC(conn
,
310 (caddr_t
)(uintptr_t)c
->w
.c_saddr3
,
312 (void *)(uintptr_t)c
->c_ssynchandle
,
313 (void *)c
->rb_longbuf
.rb_private
);
314 c
->c_smemhandle
.mrc_rmr
= 0;
315 c
->c_ssynchandle
= NULL
;
319 if (c
->c_dmemhandle
.mrc_rmr
!= 0) {
320 (void) RDMA_DEREGMEMSYNC(conn
,
321 (caddr_t
)(uintptr_t)c
->u
.c_daddr3
,
323 (void *)(uintptr_t)c
->c_dsynchandle
,
324 (void *)c
->rb_longbuf
.rb_private
);
325 c
->c_dmemhandle
.mrc_rmr
= 0;
326 c
->c_dsynchandle
= NULL
;
330 /* clist unregistered. continue */
335 return (RDMA_SUCCESS
);
339 clist_syncmem(CONN
*conn
, struct clist
*cl
, clist_dstsrc dstsrc
)
346 case CLIST_REG_SOURCE
:
348 if (c
->c_ssynchandle
) {
349 status
= RDMA_SYNCMEM(conn
,
350 (void *)(uintptr_t)c
->c_ssynchandle
,
351 (caddr_t
)(uintptr_t)c
->w
.c_saddr3
,
353 if (status
!= RDMA_SUCCESS
)
361 if (c
->c_ssynchandle
) {
362 status
= RDMA_SYNCMEM(conn
,
363 (void *)(uintptr_t)c
->c_dsynchandle
,
364 (caddr_t
)(uintptr_t)c
->u
.c_daddr3
,
366 if (status
!= RDMA_SUCCESS
)
376 return (RDMA_SUCCESS
);
380 * Frees up entries in chunk list
383 clist_free(struct clist
*cl
)
385 struct clist
*c
= cl
;
389 kmem_cache_free(clist_cache
, c
);
395 rdma_clnt_postrecv(CONN
*conn
, uint32_t xid
)
397 struct clist
*cl
= NULL
;
399 rdma_buf_t rbuf
= {0};
401 rbuf
.type
= RECV_BUFFER
;
402 if (RDMA_BUF_ALLOC(conn
, &rbuf
)) {
403 return (RDMA_NORESOURCE
);
406 clist_add(&cl
, 0, rbuf
.len
, &rbuf
.handle
, rbuf
.addr
,
408 retval
= RDMA_CLNT_RECVBUF(conn
, cl
, xid
);
415 rdma_clnt_postrecv_remove(CONN
*conn
, uint32_t xid
)
417 return (RDMA_CLNT_RECVBUF_REMOVE(conn
, xid
));
421 rdma_svc_postrecv(CONN
*conn
)
423 struct clist
*cl
= NULL
;
425 rdma_buf_t rbuf
= {0};
427 rbuf
.type
= RECV_BUFFER
;
428 if (RDMA_BUF_ALLOC(conn
, &rbuf
)) {
429 retval
= RDMA_NORESOURCE
;
431 clist_add(&cl
, 0, rbuf
.len
, &rbuf
.handle
, rbuf
.addr
,
433 retval
= RDMA_SVC_RECVBUF(conn
, cl
);
440 rdma_buf_alloc(CONN
*conn
, rdma_buf_t
*rbuf
)
442 return (RDMA_BUF_ALLOC(conn
, rbuf
));
446 rdma_buf_free(CONN
*conn
, rdma_buf_t
*rbuf
)
448 if (!rbuf
|| rbuf
->addr
== NULL
) {
451 RDMA_BUF_FREE(conn
, rbuf
);
452 bzero(rbuf
, sizeof (rdma_buf_t
));
456 * Caller is holding rdma_modload_lock mutex
462 ASSERT(MUTEX_HELD(&rdma_modload_lock
));
464 * Load all available RDMA plugins which right now is only IB plugin.
465 * If no IB hardware is present, then quit right away.
466 * ENODEV -- For no device on the system
467 * EPROTONOSUPPORT -- For module not avilable either due to failure to
468 * load or some other reason.
471 if (ibt_hw_is_present() == 0) {
472 rdma_dev_available
= 0;
476 rdma_dev_available
= 1;
477 if (rpcmod_li
== NULL
)
478 return (EPROTONOSUPPORT
);
480 status
= ldi_open_by_name("/devices/ib/rpcib@0:rpcib",
481 FREAD
| FWRITE
, kcred
,
482 &rpcib_handle
, rpcmod_li
);
485 return (EPROTONOSUPPORT
);
489 * We will need to reload the plugin module after it was unregistered
490 * but the resources below need to allocated only the first time.
493 clist_cache
= kmem_cache_create("rdma_clist",
494 sizeof (struct clist
), _POINTER_ALIGNMENT
, NULL
,
495 NULL
, NULL
, NULL
, 0, 0);
499 (void) ldi_close(rpcib_handle
, FREAD
|FWRITE
, kcred
);
505 rdma_kstat_init(void)
510 * The RDMA framework doesn't know how to deal with Zones, and is
511 * only available in the global zone.
513 ASSERT(INGLOBALZONE(curproc
));
514 ksp
= kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc",
515 KSTAT_TYPE_NAMED
, rdmarcstat_ndata
,
516 KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_WRITABLE
, GLOBAL_ZONEID
);
518 ksp
->ks_data
= (void *) rdmarcstat_ptr
;
522 ksp
= kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc",
523 KSTAT_TYPE_NAMED
, rdmarsstat_ndata
,
524 KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_WRITABLE
, GLOBAL_ZONEID
);
526 ksp
->ks_data
= (void *) rdmarsstat_ptr
;
537 mutex_enter(&rdma_wait
.svc_lock
);
539 ret
= cv_wait_sig(&rdma_wait
.svc_cv
, &rdma_wait
.svc_lock
);
542 * If signalled by a hca attach/detach, pass the right
547 stat
= rdma_wait
.svc_stat
;
551 mutex_exit(&rdma_wait
.svc_lock
);