Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / rpc / rdma_subr.c
blob78f95413367269200f48b6a77ee7bf5c633952c7
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright (c) 2008, The Ohio State University. All rights reserved.
29 * Portions of this source code is developed by the team members of
30 * The Ohio State University's Network-Based Computing Laboratory (NBCL),
31 * headed by Professor Dhabaleswar K. (DK) Panda.
33 * Acknowledgements to contributions from developors:
34 * Ranjit Noronha: noronha@cse.ohio-state.edu
35 * Lei Chai : chail@cse.ohio-state.edu
36 * Weikuan Yu : yuw@cse.ohio-state.edu
40 #include <sys/systm.h>
41 #include <sys/kstat.h>
42 #include <sys/modctl.h>
43 #include <sys/sdt.h>
44 #include <rpc/rpc_rdma.h>
46 #include <sys/ib/ibtl/ibti.h>
48 uint_t rdma_minchunk = RDMA_MINCHUNK;
51 * Globals
53 int rdma_modloaded = 0; /* flag to load RDMA plugin modules */
54 int rdma_dev_available = 0; /* if any RDMA device is loaded */
55 kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */
57 rdma_svc_wait_t rdma_wait;
59 rdma_registry_t *rdma_mod_head = NULL; /* head for RDMA modules */
60 krwlock_t rdma_lock; /* protects rdma_mod_head list */
61 ldi_ident_t rpcmod_li = NULL; /* identifies us with ldi_ framework */
63 kmem_cache_t *clist_cache = NULL;
66 * Statics
68 ldi_handle_t rpcib_handle = NULL;
71 * Externs
73 extern kstat_named_t *rdmarcstat_ptr;
74 extern uint_t rdmarcstat_ndata;
75 extern kstat_named_t *rdmarsstat_ptr;
76 extern uint_t rdmarsstat_ndata;
78 void rdma_kstat_init();
81 * RDMATF module registration routine.
82 * This routine is expected to be called by the init routine in
83 * the plugin modules.
85 rdma_stat
86 rdma_register_mod(rdma_mod_t *mod)
88 rdma_registry_t **mp, *m;
90 if (mod->rdma_version != RDMATF_VERS) {
91 return (RDMA_BADVERS);
94 rw_enter(&rdma_lock, RW_WRITER);
96 * Ensure not already registered
98 mp = &rdma_mod_head;
99 while (*mp != NULL) {
100 if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api,
101 KNC_STRSIZE) == 0) {
102 if ((*mp)->r_mod_state == RDMA_MOD_INACTIVE) {
103 (*mp)->r_mod_state = RDMA_MOD_ACTIVE;
104 (*mp)->r_mod->rdma_ops = mod->rdma_ops;
105 (*mp)->r_mod->rdma_count = mod->rdma_count;
106 goto announce_hca;
108 rw_exit(&rdma_lock);
109 return (RDMA_REG_EXIST);
111 mp = &((*mp)->r_next);
115 * New one, create and add to registry
117 m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP);
118 m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP);
119 *m->r_mod = *mod;
120 m->r_next = NULL;
121 m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
122 (void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE);
123 m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0';
124 m->r_mod_state = RDMA_MOD_ACTIVE;
125 *mp = m;
127 announce_hca:
128 rw_exit(&rdma_lock);
130 * Start the nfs service on the rdma xprts.
131 * (this notification mechanism will need to change when we support
132 * multiple hcas and have support for multiple rdma plugins).
134 mutex_enter(&rdma_wait.svc_lock);
135 rdma_wait.svc_stat = RDMA_HCA_ATTACH;
136 cv_signal(&rdma_wait.svc_cv);
137 mutex_exit(&rdma_wait.svc_lock);
139 return (RDMA_SUCCESS);
143 * RDMATF module unregistration routine.
144 * This routine is expected to be called by the fini routine in
145 * the plugin modules.
147 rdma_stat
148 rdma_unregister_mod(rdma_mod_t *mod)
150 rdma_registry_t **m, *mmod = NULL;
152 rw_enter(&rdma_lock, RW_WRITER);
154 m = &rdma_mod_head;
155 while (*m != NULL) {
156 if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api,
157 KNC_STRSIZE) != 0) {
158 m = &((*m)->r_next);
159 continue;
162 * Check if any device attached, if so return error
164 if (mod->rdma_count != 0) {
165 rw_exit(&rdma_lock);
166 return (RDMA_FAILED);
169 * Found entry. Mark it inactive.
171 mmod = *m;
172 mmod->r_mod->rdma_count = 0;
173 mmod->r_mod_state = RDMA_MOD_INACTIVE;
174 break;
177 rdma_modloaded = 0;
178 rdma_dev_available = 0;
179 rw_exit(&rdma_lock);
182 * Stop the nfs service running on the rdma xprts.
183 * (this notification mechanism will need to change when we support
184 * multiple hcas and have support for multiple rdma plugins).
186 mutex_enter(&rdma_wait.svc_lock);
187 rdma_wait.svc_stat = RDMA_HCA_DETACH;
188 cv_signal(&rdma_wait.svc_cv);
189 mutex_exit(&rdma_wait.svc_lock);
192 * Not found.
194 return (RDMA_SUCCESS);
197 struct clist *
198 clist_alloc(void)
200 struct clist *clp;
202 clp = kmem_cache_alloc(clist_cache, KM_SLEEP);
204 bzero(clp, sizeof (*clp));
206 return (clp);
209 uint32_t
210 clist_len(struct clist *cl)
212 uint32_t len = 0;
213 while (cl) {
214 len += cl->c_len;
215 cl = cl->c_next;
217 return (len);
220 void
221 clist_zero_len(struct clist *cl)
223 while (cl != NULL) {
224 if (cl->c_dmemhandle.mrc_rmr == 0)
225 break;
226 cl->c_len = 0;
227 cl = cl->c_next;
232 * Creates a new chunk list entry, and
233 * adds it to the end of a chunk list.
235 void
236 clist_add(struct clist **clp, uint32_t xdroff, int len,
237 struct mrc *shandle, caddr_t saddr,
238 struct mrc *dhandle, caddr_t daddr)
240 struct clist *cl;
242 /* Find the end of the list */
244 while (*clp != NULL)
245 clp = &((*clp)->c_next);
247 cl = clist_alloc();
248 cl->c_xdroff = xdroff;
249 cl->c_len = len;
250 cl->w.c_saddr = (uint64_t)(uintptr_t)saddr;
251 if (shandle)
252 cl->c_smemhandle = *shandle;
253 cl->u.c_daddr = (uint64_t)(uintptr_t)daddr;
254 if (dhandle)
255 cl->c_dmemhandle = *dhandle;
256 cl->c_next = NULL;
258 *clp = cl;
261 rdma_stat
262 clist_register(CONN *conn, struct clist *cl, clist_dstsrc dstsrc)
264 struct clist *c;
265 int status;
267 for (c = cl; c; c = c->c_next) {
268 if (c->c_len <= 0)
269 continue;
271 c->c_regtype = dstsrc;
273 switch (dstsrc) {
274 case CLIST_REG_SOURCE:
275 status = RDMA_REGMEMSYNC(conn,
276 (caddr_t)(struct as *)c->c_adspc,
277 (caddr_t)(uintptr_t)c->w.c_saddr3, c->c_len,
278 &c->c_smemhandle, (void **)&c->c_ssynchandle,
279 (void *)c->rb_longbuf.rb_private);
280 break;
281 case CLIST_REG_DST:
282 status = RDMA_REGMEMSYNC(conn,
283 (caddr_t)(struct as *)c->c_adspc,
284 (caddr_t)(uintptr_t)c->u.c_daddr3, c->c_len,
285 &c->c_dmemhandle, (void **)&c->c_dsynchandle,
286 (void *)c->rb_longbuf.rb_private);
287 break;
288 default:
289 return (RDMA_INVAL);
291 if (status != RDMA_SUCCESS) {
292 (void) clist_deregister(conn, cl);
293 return (status);
297 return (RDMA_SUCCESS);
300 rdma_stat
301 clist_deregister(CONN *conn, struct clist *cl)
303 struct clist *c;
305 for (c = cl; c; c = c->c_next) {
306 switch (c->c_regtype) {
307 case CLIST_REG_SOURCE:
308 if (c->c_smemhandle.mrc_rmr != 0) {
309 (void) RDMA_DEREGMEMSYNC(conn,
310 (caddr_t)(uintptr_t)c->w.c_saddr3,
311 c->c_smemhandle,
312 (void *)(uintptr_t)c->c_ssynchandle,
313 (void *)c->rb_longbuf.rb_private);
314 c->c_smemhandle.mrc_rmr = 0;
315 c->c_ssynchandle = 0;
317 break;
318 case CLIST_REG_DST:
319 if (c->c_dmemhandle.mrc_rmr != 0) {
320 (void) RDMA_DEREGMEMSYNC(conn,
321 (caddr_t)(uintptr_t)c->u.c_daddr3,
322 c->c_dmemhandle,
323 (void *)(uintptr_t)c->c_dsynchandle,
324 (void *)c->rb_longbuf.rb_private);
325 c->c_dmemhandle.mrc_rmr = 0;
326 c->c_dsynchandle = 0;
328 break;
329 default:
330 /* clist unregistered. continue */
331 break;
335 return (RDMA_SUCCESS);
338 rdma_stat
339 clist_syncmem(CONN *conn, struct clist *cl, clist_dstsrc dstsrc)
341 struct clist *c;
342 rdma_stat status;
344 c = cl;
345 switch (dstsrc) {
346 case CLIST_REG_SOURCE:
347 while (c != NULL) {
348 if (c->c_ssynchandle) {
349 status = RDMA_SYNCMEM(conn,
350 (void *)(uintptr_t)c->c_ssynchandle,
351 (caddr_t)(uintptr_t)c->w.c_saddr3,
352 c->c_len, 0);
353 if (status != RDMA_SUCCESS)
354 return (status);
356 c = c->c_next;
358 break;
359 case CLIST_REG_DST:
360 while (c != NULL) {
361 if (c->c_ssynchandle) {
362 status = RDMA_SYNCMEM(conn,
363 (void *)(uintptr_t)c->c_dsynchandle,
364 (caddr_t)(uintptr_t)c->u.c_daddr3,
365 c->c_len, 1);
366 if (status != RDMA_SUCCESS)
367 return (status);
369 c = c->c_next;
371 break;
372 default:
373 return (RDMA_INVAL);
376 return (RDMA_SUCCESS);
380 * Frees up entries in chunk list
382 void
383 clist_free(struct clist *cl)
385 struct clist *c = cl;
387 while (c != NULL) {
388 cl = cl->c_next;
389 kmem_cache_free(clist_cache, c);
390 c = cl;
394 rdma_stat
395 rdma_clnt_postrecv(CONN *conn, uint32_t xid)
397 struct clist *cl = NULL;
398 rdma_stat retval;
399 rdma_buf_t rbuf = {0};
401 rbuf.type = RECV_BUFFER;
402 if (RDMA_BUF_ALLOC(conn, &rbuf)) {
403 return (RDMA_NORESOURCE);
406 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
407 NULL, NULL);
408 retval = RDMA_CLNT_RECVBUF(conn, cl, xid);
409 clist_free(cl);
411 return (retval);
414 rdma_stat
415 rdma_clnt_postrecv_remove(CONN *conn, uint32_t xid)
417 return (RDMA_CLNT_RECVBUF_REMOVE(conn, xid));
420 rdma_stat
421 rdma_svc_postrecv(CONN *conn)
423 struct clist *cl = NULL;
424 rdma_stat retval;
425 rdma_buf_t rbuf = {0};
427 rbuf.type = RECV_BUFFER;
428 if (RDMA_BUF_ALLOC(conn, &rbuf)) {
429 retval = RDMA_NORESOURCE;
430 } else {
431 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
432 NULL, NULL);
433 retval = RDMA_SVC_RECVBUF(conn, cl);
434 clist_free(cl);
436 return (retval);
439 rdma_stat
440 rdma_buf_alloc(CONN *conn, rdma_buf_t *rbuf)
442 return (RDMA_BUF_ALLOC(conn, rbuf));
445 void
446 rdma_buf_free(CONN *conn, rdma_buf_t *rbuf)
448 if (!rbuf || rbuf->addr == NULL) {
449 return;
451 RDMA_BUF_FREE(conn, rbuf);
452 bzero(rbuf, sizeof (rdma_buf_t));
456 * Caller is holding rdma_modload_lock mutex
459 rdma_modload()
461 int status;
462 ASSERT(MUTEX_HELD(&rdma_modload_lock));
464 * Load all available RDMA plugins which right now is only IB plugin.
465 * If no IB hardware is present, then quit right away.
466 * ENODEV -- For no device on the system
467 * EPROTONOSUPPORT -- For module not avilable either due to failure to
468 * load or some other reason.
470 rdma_modloaded = 1;
471 if (ibt_hw_is_present() == 0) {
472 rdma_dev_available = 0;
473 return (ENODEV);
476 rdma_dev_available = 1;
477 if (rpcmod_li == NULL)
478 return (EPROTONOSUPPORT);
480 status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib",
481 FREAD | FWRITE, kcred,
482 &rpcib_handle, rpcmod_li);
484 if (status != 0)
485 return (EPROTONOSUPPORT);
489 * We will need to reload the plugin module after it was unregistered
490 * but the resources below need to allocated only the first time.
492 if (!clist_cache) {
493 clist_cache = kmem_cache_create("rdma_clist",
494 sizeof (struct clist), _POINTER_ALIGNMENT, NULL,
495 NULL, NULL, NULL, 0, 0);
496 rdma_kstat_init();
499 (void) ldi_close(rpcib_handle, FREAD|FWRITE, kcred);
501 return (0);
504 void
505 rdma_kstat_init(void)
507 kstat_t *ksp;
510 * The RDMA framework doesn't know how to deal with Zones, and is
511 * only available in the global zone.
513 ASSERT(INGLOBALZONE(curproc));
514 ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc",
515 KSTAT_TYPE_NAMED, rdmarcstat_ndata,
516 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
517 if (ksp) {
518 ksp->ks_data = (void *) rdmarcstat_ptr;
519 kstat_install(ksp);
522 ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc",
523 KSTAT_TYPE_NAMED, rdmarsstat_ndata,
524 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
525 if (ksp) {
526 ksp->ks_data = (void *) rdmarsstat_ptr;
527 kstat_install(ksp);
531 rdma_stat
532 rdma_kwait(void)
534 int ret;
535 rdma_stat stat;
537 mutex_enter(&rdma_wait.svc_lock);
539 ret = cv_wait_sig(&rdma_wait.svc_cv, &rdma_wait.svc_lock);
542 * If signalled by a hca attach/detach, pass the right
543 * stat back.
546 if (ret)
547 stat = rdma_wait.svc_stat;
548 else
549 stat = RDMA_INTR;
551 mutex_exit(&rdma_wait.svc_lock);
553 return (stat);