4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/stropts.h>
33 #include <sys/stream.h>
34 #include <sys/strlog.h>
35 #include <sys/cmn_err.h>
39 #include <sys/modctl.h>
40 #include <sys/kstat.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/taskq.h>
47 #include <sys/cpuvar.h>
48 #include <sys/atomic.h>
49 #include <sys/sysmacros.h>
50 #include <sys/esunddi.h>
54 #include <sys/ib/ibtl/ibti.h>
55 #include <sys/socket.h>
56 #include <netinet/in.h>
61 * The following variables support the debug log buffer scheme.
64 static char daplka_dbgbuf
[0x80000];
66 static char daplka_dbgbuf
[0x4000];
68 static int daplka_dbgsize
= sizeof (daplka_dbgbuf
);
69 static size_t daplka_dbgnext
;
70 static int daplka_dbginit
= 0;
71 static kmutex_t daplka_dbglock
;
72 _NOTE(MUTEX_PROTECTS_DATA(daplka_dbglock
,
76 static int daplka_dbg
= 0x0103;
77 static void daplka_console(const char *, ...);
78 static void daplka_debug(const char *, ...);
79 static int daplka_apm
= 0x1; /* default enable */
80 static int daplka_failback
= 0x1; /* default enable */
81 static int daplka_query_aft_setaltpath
= 10;
84 if (daplka_dbg & 0x100) \
93 if (daplka_dbg & 0x01) \
96 if (daplka_dbg & 0x02) \
99 if (daplka_dbg & 0x04) \
102 if (daplka_dbg & 0x08) \
107 #define DINFO if (0) printf
108 #define D1 if (0) printf
109 #define D2 if (0) printf
110 #define D3 if (0) printf
111 #define D4 if (0) printf
116 * driver entry points
118 static int daplka_open(dev_t
*, int, int, struct cred
*);
119 static int daplka_close(dev_t
, int, int, struct cred
*);
120 static int daplka_attach(dev_info_t
*, ddi_attach_cmd_t
);
121 static int daplka_detach(dev_info_t
*, ddi_detach_cmd_t
);
122 static int daplka_info(dev_info_t
*, ddi_info_cmd_t
, void *, void **);
123 static int daplka_ioctl(dev_t
, int, intptr_t, int, cred_t
*, int *);
128 static int daplka_common_ioctl(int, minor_t
, intptr_t, int, cred_t
*, int *);
129 static int daplka_misc_ioctl(int, daplka_ia_resource_t
*, intptr_t, int,
131 static int daplka_ep_ioctl(int, daplka_ia_resource_t
*, intptr_t, int,
133 static int daplka_evd_ioctl(int, daplka_ia_resource_t
*, intptr_t, int,
135 static int daplka_mr_ioctl(int, daplka_ia_resource_t
*, intptr_t, int,
137 static int daplka_cno_ioctl(int, daplka_ia_resource_t
*, intptr_t, int,
139 static int daplka_pd_ioctl(int, daplka_ia_resource_t
*, intptr_t, int,
141 static int daplka_sp_ioctl(int, daplka_ia_resource_t
*, intptr_t, int,
143 static int daplka_srq_ioctl(int, daplka_ia_resource_t
*, intptr_t, int,
147 * common ioctls and supporting functions
149 static int daplka_ia_create(minor_t
, intptr_t, int, cred_t
*, int *);
150 static int daplka_ia_destroy(daplka_resource_t
*);
153 * EP ioctls and supporting functions
155 static int daplka_ep_create(daplka_ia_resource_t
*, intptr_t, int,
157 static int daplka_ep_modify(daplka_ia_resource_t
*, intptr_t, int,
159 static int daplka_ep_free(daplka_ia_resource_t
*, intptr_t, int,
161 static int daplka_ep_connect(daplka_ia_resource_t
*, intptr_t, int,
163 static int daplka_ep_disconnect(daplka_ia_resource_t
*, intptr_t, int,
165 static int daplka_ep_reinit(daplka_ia_resource_t
*, intptr_t, int,
167 static int daplka_ep_destroy(daplka_resource_t
*);
168 static void daplka_hash_ep_free(void *);
169 static int daplka_ep_failback(void *objp
, void *arg
);
170 static int daplka_ep_altpath(daplka_ep_resource_t
*, ib_gid_t
*);
172 static uint32_t daplka_ep_get_state(daplka_ep_resource_t
*);
173 static void daplka_ep_set_state(daplka_ep_resource_t
*, uint32_t, uint32_t);
174 static boolean_t
daplka_ep_transition_is_valid(uint32_t, uint32_t);
175 static daplka_timer_info_t
*daplka_timer_info_alloc(daplka_ep_resource_t
*);
176 static void daplka_timer_info_free(daplka_timer_info_t
*);
177 static void daplka_timer_handler(void *);
178 static void daplka_timer_dispatch(void *);
179 static void daplka_timer_thread(void *);
180 static int daplka_cancel_timer(daplka_ep_resource_t
*);
181 static void daplka_hash_timer_free(void *);
184 * EVD ioctls and supporting functions
186 static int daplka_evd_create(daplka_ia_resource_t
*, intptr_t, int,
188 static int daplka_cq_resize(daplka_ia_resource_t
*, intptr_t, int,
190 static int daplka_evd_free(daplka_ia_resource_t
*, intptr_t, int,
192 static int daplka_event_poll(daplka_ia_resource_t
*, intptr_t, int,
194 static int daplka_evd_destroy(daplka_resource_t
*);
195 static void daplka_cq_handler(ibt_cq_hdl_t
, void *);
196 static void daplka_evd_wakeup(daplka_evd_resource_t
*,
197 daplka_evd_event_list_t
*, daplka_evd_event_t
*);
198 static void daplka_evd_event_enqueue(daplka_evd_event_list_t
*,
199 daplka_evd_event_t
*);
200 static daplka_evd_event_t
*daplka_evd_event_dequeue(daplka_evd_event_list_t
*);
201 static void daplka_hash_evd_free(void *);
205 * SRQ ioctls and supporting functions
207 static int daplka_srq_create(daplka_ia_resource_t
*, intptr_t, int,
209 static int daplka_srq_resize(daplka_ia_resource_t
*, intptr_t, int,
211 static int daplka_srq_free(daplka_ia_resource_t
*, intptr_t, int,
213 static int daplka_srq_destroy(daplka_resource_t
*);
214 static void daplka_hash_srq_free(void *);
217 * Miscellaneous ioctls
219 static int daplka_cr_accept(daplka_ia_resource_t
*, intptr_t, int,
221 static int daplka_cr_reject(daplka_ia_resource_t
*, intptr_t, int,
223 static int daplka_cr_handoff(daplka_ia_resource_t
*, intptr_t, int,
225 static int daplka_ia_query(daplka_ia_resource_t
*, intptr_t, int,
229 * PD ioctls and supporting functions
231 static int daplka_pd_alloc(daplka_ia_resource_t
*, intptr_t, int,
233 static int daplka_pd_free(daplka_ia_resource_t
*, intptr_t, int,
235 static int daplka_pd_destroy(daplka_resource_t
*);
236 static void daplka_hash_pd_free(void *);
239 * SP ioctls and supporting functions
241 static int daplka_service_register(daplka_ia_resource_t
*, intptr_t, int,
243 static int daplka_service_deregister(daplka_ia_resource_t
*, intptr_t, int,
245 static int daplka_sp_destroy(daplka_resource_t
*);
246 static void daplka_hash_sp_free(void *);
247 static void daplka_hash_sp_unref(void *);
250 * MR ioctls and supporting functions
252 static int daplka_mr_register(daplka_ia_resource_t
*, intptr_t, int,
254 static int daplka_mr_register_lmr(daplka_ia_resource_t
*, intptr_t, int,
256 static int daplka_mr_register_shared(daplka_ia_resource_t
*, intptr_t, int,
258 static int daplka_mr_deregister(daplka_ia_resource_t
*, intptr_t, int,
260 static int daplka_mr_sync(daplka_ia_resource_t
*, intptr_t, int,
262 static int daplka_mr_destroy(daplka_resource_t
*);
263 static void daplka_hash_mr_free(void *);
264 static void daplka_shared_mr_free(daplka_mr_resource_t
*);
267 * MW ioctls and supporting functions
269 static int daplka_mw_alloc(daplka_ia_resource_t
*, intptr_t, int,
271 static int daplka_mw_free(daplka_ia_resource_t
*, intptr_t, int,
273 static int daplka_mw_destroy(daplka_resource_t
*);
274 static void daplka_hash_mw_free(void *);
277 * CNO ioctls and supporting functions
279 static int daplka_cno_alloc(daplka_ia_resource_t
*, intptr_t, int,
281 static int daplka_cno_free(daplka_ia_resource_t
*, intptr_t, int,
283 static int daplka_cno_wait(daplka_ia_resource_t
*, intptr_t, int,
285 static int daplka_cno_destroy(daplka_resource_t
*);
286 static void daplka_hash_cno_free(void *);
291 static ibt_cm_status_t
daplka_cm_rc_handler(void *, ibt_cm_event_t
*,
292 ibt_cm_return_args_t
*, void *, ibt_priv_data_len_t
);
294 static ibt_cm_status_t
daplka_cm_service_handler(void *, ibt_cm_event_t
*,
295 ibt_cm_return_args_t
*, void *, ibt_priv_data_len_t
);
297 static ibt_cm_status_t
daplka_cm_service_req(daplka_sp_resource_t
*,
298 ibt_cm_event_t
*, ibt_cm_return_args_t
*, void *, ibt_priv_data_len_t
);
301 * resource management routines
303 static int daplka_resource_reserve(minor_t
*);
304 static int daplka_resource_insert(minor_t
, daplka_resource_t
*);
305 static daplka_resource_t
*daplka_resource_remove(minor_t rnum
);
306 static daplka_resource_t
*daplka_resource_lookup(minor_t
);
307 static void daplka_resource_init(void);
308 static void daplka_resource_fini(void);
309 static struct daplka_resource_table daplka_resource
;
312 * hash table routines
314 static int daplka_hash_insert(daplka_hash_table_t
*, uint64_t *, void *);
315 static int daplka_hash_remove(daplka_hash_table_t
*, uint64_t, void **);
316 static void daplka_hash_walk(daplka_hash_table_t
*, int (*)(void *, void *),
318 static void *daplka_hash_lookup(daplka_hash_table_t
*, uint64_t);
319 static int daplka_hash_create(daplka_hash_table_t
*, uint_t
,
320 void (*)(void *), void (*)(void *));
321 static void daplka_hash_destroy(daplka_hash_table_t
*);
322 static uint32_t daplka_hash_getsize(daplka_hash_table_t
*);
323 static void daplka_hash_generic_lookup(void *);
325 static uint32_t daplka_timer_hkey_gen();
328 * async event handlers
330 static void daplka_async_event_create(ibt_async_code_t
, ibt_async_event_t
*,
331 uint64_t, daplka_ia_resource_t
*);
332 static void daplka_rc_async_handler(void *, ibt_hca_hdl_t
, ibt_async_code_t
,
333 ibt_async_event_t
*);
334 static void daplka_cq_async_handler(void *, ibt_hca_hdl_t
, ibt_async_code_t
,
335 ibt_async_event_t
*);
336 static void daplka_un_async_handler(void *, ibt_hca_hdl_t
, ibt_async_code_t
,
337 ibt_async_event_t
*);
338 static void daplka_async_handler(void *, ibt_hca_hdl_t
, ibt_async_code_t
,
339 ibt_async_event_t
*);
340 static void daplka_sm_notice_handler(void *, ib_gid_t
, ibt_subnet_event_code_t
,
341 ibt_subnet_event_t
*event
);
342 static void daplka_sm_gid_avail(ib_gid_t
*, ib_gid_t
*);
345 * IBTF wrappers and default limits used for resource accounting
347 static boolean_t daplka_accounting_enabled
= B_TRUE
;
348 static uint32_t daplka_max_qp_percent
= 100;
349 static uint32_t daplka_max_cq_percent
= 100;
350 static uint32_t daplka_max_pd_percent
= 100;
351 static uint32_t daplka_max_mw_percent
= 100;
352 static uint32_t daplka_max_mr_percent
= 100;
353 static uint32_t daplka_max_srq_percent
= 100;
356 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t
*, ibt_hca_hdl_t
,
357 ibt_chan_alloc_flags_t
, ibt_rc_chan_alloc_args_t
*,
358 ibt_channel_hdl_t
*, ibt_chan_sizes_t
*);
361 daplka_ibt_free_channel(daplka_ep_resource_t
*, ibt_channel_hdl_t
);
364 daplka_ibt_alloc_cq(daplka_evd_resource_t
*, ibt_hca_hdl_t
,
365 ibt_cq_attr_t
*, ibt_cq_hdl_t
*, uint_t
*);
368 daplka_ibt_free_cq(daplka_evd_resource_t
*, ibt_cq_hdl_t
);
371 daplka_ibt_alloc_pd(daplka_pd_resource_t
*, ibt_hca_hdl_t
,
372 ibt_pd_flags_t
, ibt_pd_hdl_t
*);
375 daplka_ibt_free_pd(daplka_pd_resource_t
*, ibt_hca_hdl_t
, ibt_pd_hdl_t
);
378 daplka_ibt_alloc_mw(daplka_mw_resource_t
*, ibt_hca_hdl_t
, ibt_pd_hdl_t
,
379 ibt_mw_flags_t
, ibt_mw_hdl_t
*, ibt_rkey_t
*);
382 daplka_ibt_free_mw(daplka_mw_resource_t
*, ibt_hca_hdl_t
, ibt_mw_hdl_t
);
385 daplka_ibt_register_mr(daplka_mr_resource_t
*, ibt_hca_hdl_t
, ibt_pd_hdl_t
,
386 ibt_mr_attr_t
*, ibt_mr_hdl_t
*, ibt_mr_desc_t
*);
389 daplka_ibt_register_shared_mr(daplka_mr_resource_t
*, ibt_hca_hdl_t
,
390 ibt_mr_hdl_t
, ibt_pd_hdl_t
, ibt_smr_attr_t
*, ibt_mr_hdl_t
*,
394 daplka_ibt_deregister_mr(daplka_mr_resource_t
*, ibt_hca_hdl_t
, ibt_mr_hdl_t
);
397 daplka_ibt_alloc_srq(daplka_srq_resource_t
*, ibt_hca_hdl_t
, ibt_srq_flags_t
,
398 ibt_pd_hdl_t
, ibt_srq_sizes_t
*, ibt_srq_hdl_t
*, ibt_srq_sizes_t
*);
401 daplka_ibt_free_srq(daplka_srq_resource_t
*, ibt_srq_hdl_t
);
404 * macros for manipulating resource objects.
405 * these macros can be used on objects that begin with a
406 * daplka_resource_t header.
408 #define DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt)
410 #define DAPLKA_RS_REF(rp) { \
411 mutex_enter(&(rp)->header.rs_reflock); \
412 (rp)->header.rs_refcnt++; \
413 ASSERT((rp)->header.rs_refcnt != 0); \
414 mutex_exit(&(rp)->header.rs_reflock); \
417 #define DAPLKA_RS_UNREF(rp) { \
418 mutex_enter(&(rp)->header.rs_reflock); \
419 ASSERT((rp)->header.rs_refcnt != 0); \
420 if (--(rp)->header.rs_refcnt == 0) { \
421 ASSERT((rp)->header.rs_free != NULL); \
422 mutex_exit(&(rp)->header.rs_reflock); \
423 (rp)->header.rs_free((daplka_resource_t *)rp); \
425 mutex_exit(&(rp)->header.rs_reflock); \
429 #define DAPLKA_RS_INIT(rp, type, rnum, free_func) { \
430 (rp)->header.rs_refcnt = 1; \
431 (rp)->header.rs_type = (type); \
432 (rp)->header.rs_rnum = (rnum); \
433 (rp)->header.rs_charged = 0; \
434 (rp)->header.rs_free = (free_func); \
435 mutex_init(&(rp)->header.rs_reflock, NULL, \
436 MUTEX_DRIVER, NULL); \
439 #define DAPLKA_RS_FINI(rp) { \
440 mutex_destroy(&(rp)->header.rs_reflock); \
443 #define DAPLKA_RS_ACCT_INC(rp, cnt) { \
444 atomic_add_32(&(rp)->header.rs_charged, (cnt)); \
446 #define DAPLKA_RS_ACCT_DEC(rp, cnt) { \
447 atomic_add_32(&(rp)->header.rs_charged, -(cnt)); \
449 #define DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged)
451 #define DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum)
452 #define DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type)
453 #define DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED)
456 * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig
458 #define DAPLKA_EVD_WAIT(cvp, mp, timeout) \
459 ((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) : \
460 cv_timedwait_sig((cvp), (mp), (timeout))
462 #define DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt++)
463 #define DAPLKA_RELE_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt--)
465 #define DAPLKA_HOLD_HCA(dp, hca) { \
466 mutex_enter(&(dp)->daplka_mutex); \
467 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca); \
468 mutex_exit(&(dp)->daplka_mutex); \
471 #define DAPLKA_RELE_HCA(dp, hca) { \
472 mutex_enter(&(dp)->daplka_mutex); \
473 DAPLKA_RELE_HCA_WITHOUT_LOCK(hca); \
474 mutex_exit(&(dp)->daplka_mutex); \
477 #define DAPLKA_HCA_BUSY(hca) \
478 ((hca)->hca_ref_cnt != 0 || \
479 (hca)->hca_qp_count != 0 || \
480 (hca)->hca_cq_count != 0 || \
481 (hca)->hca_pd_count != 0 || \
482 (hca)->hca_mw_count != 0 || \
483 (hca)->hca_mr_count != 0)
486 static struct cb_ops daplka_cb_ops
= {
487 daplka_open
, /* cb_open */
488 daplka_close
, /* cb_close */
489 nodev
, /* cb_strategy */
490 nodev
, /* cb_print */
493 nodev
, /* cb_write */
494 daplka_ioctl
, /* cb_ioctl */
495 nodev
, /* cb_devmap */
497 nodev
, /* cb_segmap */
498 nochpoll
, /* cb_chpoll */
499 ddi_prop_op
, /* cb_prop_op */
500 NULL
, /* cb_stream */
501 D_NEW
| D_MP
, /* cb_flag */
503 nodev
, /* int (*cb_aread)() */
504 nodev
/* int (*cb_awrite)() */
507 static struct dev_ops daplka_ops
= {
508 DEVO_REV
, /* devo_rev */
510 daplka_info
, /* devo_getinfo */
511 nulldev
, /* devo_identify */
512 nulldev
, /* devo_probe */
513 daplka_attach
, /* devo_attach */
514 daplka_detach
, /* devo_detach */
515 nodev
, /* devo_reset */
516 &daplka_cb_ops
, /* devo_cb_ops */
517 NULL
, /* devo_bus_ops */
519 ddi_quiesce_not_needed
, /* devo_quiesce */
523 * Module linkage information for the kernel.
525 static struct modldrv modldrv
= {
527 "uDAPL Service Driver",
531 static struct modlinkage modlinkage
= {
533 MODREV_1
, { (void *) &modldrv
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
}
535 MODREV_1
, { (void *) &modldrv
, NULL
, NULL
, NULL
}
540 * daplka_dev holds global driver state and a list of HCAs
542 static daplka_t
*daplka_dev
= NULL
;
543 static void *daplka_state
= NULL
;
546 * global SP hash table
548 static daplka_hash_table_t daplka_global_sp_htbl
;
551 * timer_info hash table
553 static daplka_hash_table_t daplka_timer_info_htbl
;
554 static uint32_t daplka_timer_hkey
= 0;
559 static avl_tree_t daplka_shared_mr_tree
;
560 static kmutex_t daplka_shared_mr_lock
;
561 static int daplka_shared_mr_cmp(const void *, const void *);
562 _NOTE(MUTEX_PROTECTS_DATA(daplka_shared_mr_lock
,
563 daplka_shared_mr_tree
))
566 * default kmem flags used by this driver
568 static int daplka_km_flags
= KM_SLEEP
;
571 * taskq used for handling background tasks
573 static taskq_t
*daplka_taskq
= NULL
;
576 * daplka_cm_delay is the length of time the active
577 * side needs to wait before timing out on the REP message.
579 static clock_t daplka_cm_delay
= 60000000;
582 * modunload will fail if pending_close is non-zero
584 static uint32_t daplka_pending_close
= 0;
586 static struct ibt_clnt_modinfo_s daplka_clnt_modinfo
= {
589 daplka_async_handler
,
595 * Module Installation
602 status
= ddi_soft_state_init(&daplka_state
, sizeof (daplka_t
), 1);
607 mutex_init(&daplka_dbglock
, NULL
, MUTEX_DRIVER
, NULL
);
608 bzero(daplka_dbgbuf
, sizeof (daplka_dbgbuf
));
612 daplka_resource_init();
614 status
= mod_install(&modlinkage
);
615 if (status
!= DDI_SUCCESS
) {
616 /* undo inits done before mod_install */
617 daplka_resource_fini();
618 mutex_destroy(&daplka_dbglock
);
619 ddi_soft_state_fini(&daplka_state
);
633 * mod_remove causes detach to be called
635 if ((status
= mod_remove(&modlinkage
)) != 0) {
636 DERR("fini: mod_remove failed: 0x%x\n", status
);
640 daplka_resource_fini();
641 mutex_destroy(&daplka_dbglock
);
642 ddi_soft_state_fini(&daplka_state
);
648 * Return Module Info.
651 _info(struct modinfo
*modinfop
)
653 return (mod_info(&modlinkage
, modinfop
));
657 daplka_enqueue_hca(daplka_t
*dp
, daplka_hca_t
*hca
)
661 ASSERT(mutex_owned(&dp
->daplka_mutex
));
663 if (dp
->daplka_hca_list_head
== NULL
) {
664 dp
->daplka_hca_list_head
= hca
;
666 h
= dp
->daplka_hca_list_head
;
667 while (h
->hca_next
!= NULL
)
675 daplka_dequeue_hca(daplka_t
*dp
, daplka_hca_t
*hca
)
679 ASSERT(mutex_owned(&dp
->daplka_mutex
));
681 if (dp
->daplka_hca_list_head
== hca
)
682 dp
->daplka_hca_list_head
= hca
->hca_next
;
684 h
= dp
->daplka_hca_list_head
;
685 while (h
->hca_next
!= hca
)
687 h
->hca_next
= hca
->hca_next
;
692 daplka_init_hca(daplka_t
*dp
, ib_guid_t hca_guid
)
695 ibt_hca_portinfo_t
*pinfop
;
700 hca
= kmem_zalloc(sizeof (daplka_hca_t
), KM_SLEEP
);
702 hca
->hca_guid
= hca_guid
;
705 * open the HCA for use
707 status
= ibt_open_hca(dp
->daplka_clnt_hdl
, hca_guid
, &hca
->hca_hdl
);
708 if (status
!= IBT_SUCCESS
) {
709 if (status
== IBT_HCA_IN_USE
) {
710 DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n");
712 DERR("ibt_open_hca() returned %d\n", status
);
714 kmem_free(hca
, sizeof (daplka_hca_t
));
719 * query HCA to get its info
721 status
= ibt_query_hca(hca
->hca_hdl
, &hca
->hca_attr
);
722 if (status
!= IBT_SUCCESS
) {
723 DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n",
724 status
, (longlong_t
)hca_guid
);
729 * query HCA to get info of all ports
731 status
= ibt_query_hca_ports(hca
->hca_hdl
,
732 0, &pinfop
, &hca
->hca_nports
, &size
);
733 if (status
!= IBT_SUCCESS
) {
734 DERR("ibt_query_all_ports returned %d "
735 "(hca_guid 0x%llx)\n", status
,
736 (longlong_t
)hca_guid
);
739 hca
->hca_ports
= pinfop
;
740 hca
->hca_pinfosz
= size
;
742 DERR("hca guid 0x%llx, nports %d\n",
743 (longlong_t
)hca_guid
, hca
->hca_nports
);
744 for (j
= 0; j
< hca
->hca_nports
; j
++) {
745 DERR("port %d: state %d prefix 0x%016llx "
747 pinfop
[j
].p_port_num
, pinfop
[j
].p_linkstate
,
748 (longlong_t
)pinfop
[j
].p_sgid_tbl
[0].gid_prefix
,
749 (longlong_t
)pinfop
[j
].p_sgid_tbl
[0].gid_guid
);
752 mutex_enter(&dp
->daplka_mutex
);
753 daplka_enqueue_hca(dp
, hca
);
754 mutex_exit(&dp
->daplka_mutex
);
756 return (IBT_SUCCESS
);
759 (void) ibt_close_hca(hca
->hca_hdl
);
760 kmem_free(hca
, sizeof (daplka_hca_t
));
765 * this function obtains the list of HCAs from IBTF.
766 * the HCAs are then opened and the returned handles
767 * and attributes are stored into the global daplka_dev
771 daplka_init_hcas(daplka_t
*dp
)
774 ib_guid_t
*hca_guids
;
778 * get the num & list of HCAs present
780 hca_count
= ibt_get_hca_list(&hca_guids
);
781 DERR("No. of HCAs present %d\n", hca_count
);
783 if (hca_count
!= 0) {
785 * get the info for each available HCA
787 for (i
= 0; i
< hca_count
; i
++)
788 (void) daplka_init_hca(dp
, hca_guids
[i
]);
790 ibt_free_hca_list(hca_guids
, hca_count
);
793 if (dp
->daplka_hca_list_head
!= NULL
)
794 return (IBT_SUCCESS
);
796 return (IBT_FAILURE
);
800 daplka_fini_hca(daplka_t
*dp
, daplka_hca_t
*hca
)
804 if (hca
->hca_hdl
!= NULL
) {
805 status
= ibt_close_hca(hca
->hca_hdl
);
806 if (status
!= IBT_SUCCESS
) {
807 DERR("ibt_close_hca returned %d"
808 " (hca_guid 0x%llx)\n", status
,
809 (longlong_t
)hca
->hca_guid
);
811 mutex_enter(&dp
->daplka_mutex
);
812 daplka_enqueue_hca(dp
, hca
);
813 mutex_exit(&dp
->daplka_mutex
);
819 if (hca
->hca_ports
!= NULL
)
820 ibt_free_portinfo(hca
->hca_ports
, hca
->hca_pinfosz
);
822 kmem_free(hca
, sizeof (daplka_hca_t
));
823 return (IBT_SUCCESS
);
827 * closes all HCAs and frees up the HCA list
830 daplka_fini_hcas(daplka_t
*dp
)
835 mutex_enter(&daplka_dev
->daplka_mutex
);
836 while ((hca
= dp
->daplka_hca_list_head
) != NULL
) {
837 if (DAPLKA_HCA_BUSY(hca
)) {
838 mutex_exit(&daplka_dev
->daplka_mutex
);
839 return (IBT_HCA_RESOURCES_NOT_FREED
);
841 daplka_dequeue_hca(daplka_dev
, hca
);
842 mutex_exit(&daplka_dev
->daplka_mutex
);
844 if ((status
= daplka_fini_hca(dp
, hca
)) != IBT_SUCCESS
)
847 mutex_enter(&daplka_dev
->daplka_mutex
);
849 mutex_exit(&daplka_dev
->daplka_mutex
);
851 DERR("dapl kernel agent unloaded\n");
852 return (IBT_SUCCESS
);
857 * Attach the device, create and fill in daplka_dev
860 daplka_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
863 int instance
, retval
, err
;
864 boolean_t sp_htbl_allocated
= B_FALSE
;
865 boolean_t timer_htbl_allocated
= B_FALSE
;
866 boolean_t shared_mr_tree_allocated
= B_FALSE
;
872 return (DDI_SUCCESS
);
874 return (DDI_FAILURE
);
878 * Allocate soft data structure
880 instance
= ddi_get_instance(dip
);
881 if (ddi_soft_state_zalloc(daplka_state
, instance
) != DDI_SUCCESS
) {
882 DERR("attach: bad state zalloc\n");
883 return (DDI_FAILURE
);
886 dp
= ddi_get_soft_state(daplka_state
, instance
);
888 ddi_soft_state_free(daplka_state
, instance
);
889 DERR("attach: cannot get soft state\n");
890 return (DDI_FAILURE
);
893 * Stuff private info into dip.
895 dp
->daplka_dip
= dip
;
896 ddi_set_driver_private(dip
, dp
);
898 mutex_init(&dp
->daplka_mutex
, NULL
, MUTEX_DRIVER
, NULL
);
901 * Register driver with IBTF
903 retval
= ibt_attach(&daplka_clnt_modinfo
, dip
, dp
,
904 &dp
->daplka_clnt_hdl
);
905 if (retval
!= IBT_SUCCESS
) {
906 DERR("attach: ibt_attach failed: error = %d\n", retval
);
907 retval
= DDI_FAILURE
;
910 /* Register to receive SM events */
911 ibt_register_subnet_notices(dp
->daplka_clnt_hdl
,
912 daplka_sm_notice_handler
, NULL
);
914 retval
= daplka_init_hcas(dp
);
915 if (retval
!= IBT_SUCCESS
) {
916 DERR("attach: hca_init failed: error = %d\n", retval
);
917 retval
= DDI_FAILURE
;
921 * this table is used by cr_handoff
923 retval
= daplka_hash_create(&daplka_global_sp_htbl
,
924 DAPLKA_G_SP_HTBL_SZ
, daplka_hash_sp_unref
,
925 daplka_hash_generic_lookup
);
927 DERR("attach: cannot create sp hash table\n");
928 retval
= DDI_FAILURE
;
931 sp_htbl_allocated
= B_TRUE
;
934 * this table stores per EP timer information.
935 * timer_info_t objects are inserted into this table whenever
936 * a EP timer is set. timers get removed when they expire
937 * or when they get cancelled.
939 retval
= daplka_hash_create(&daplka_timer_info_htbl
,
940 DAPLKA_TIMER_HTBL_SZ
, daplka_hash_timer_free
, NULL
);
942 DERR("attach: cannot create timer hash table\n");
943 retval
= DDI_FAILURE
;
946 timer_htbl_allocated
= B_TRUE
;
949 * this taskq is currently only used for processing timers.
950 * other processing may also use this taskq in the future.
952 daplka_taskq
= taskq_create(DAPLKA_DRV_NAME
, DAPLKA_TQ_NTHREADS
,
953 maxclsyspri
, 1, DAPLKA_TQ_NTHREADS
, TASKQ_DYNAMIC
);
954 if (daplka_taskq
== NULL
) {
955 DERR("attach: cannot create daplka_taskq\n");
956 retval
= DDI_FAILURE
;
961 * daplka_shared_mr_tree holds daplka_shared_mr_t objects that
962 * gets retrieved or created when daplka_mr_register_shared is
965 mutex_init(&daplka_shared_mr_lock
, NULL
, MUTEX_DRIVER
, NULL
);
967 avl_create(&daplka_shared_mr_tree
, daplka_shared_mr_cmp
,
968 sizeof (daplka_shared_mr_t
),
969 offsetof(daplka_shared_mr_t
, smr_node
));
970 shared_mr_tree_allocated
= B_TRUE
;
973 * Create the filesystem device node.
975 if (ddi_create_minor_node(dip
, DAPLKA_MINOR_NAME
, S_IFCHR
,
976 0, DDI_PSEUDO
, 0) != DDI_SUCCESS
) {
977 DERR("attach: bad create_minor_node\n");
978 retval
= DDI_FAILURE
;
981 dp
->daplka_status
= DAPLKA_STATE_ATTACHED
;
983 return (DDI_SUCCESS
);
986 if (shared_mr_tree_allocated
) {
987 avl_destroy(&daplka_shared_mr_tree
);
988 mutex_destroy(&daplka_shared_mr_lock
);
992 taskq_destroy(daplka_taskq
);
996 if (timer_htbl_allocated
) {
997 daplka_hash_destroy(&daplka_timer_info_htbl
);
1000 if (sp_htbl_allocated
) {
1001 daplka_hash_destroy(&daplka_global_sp_htbl
);
1004 err
= daplka_fini_hcas(dp
);
1005 if (err
!= IBT_SUCCESS
) {
1006 DERR("attach: hca_fini returned %d\n", err
);
1009 if (dp
->daplka_clnt_hdl
!= NULL
) {
1010 /* unregister SM event notification */
1011 ibt_register_subnet_notices(dp
->daplka_clnt_hdl
,
1012 (ibt_sm_notice_handler_t
)NULL
, NULL
);
1013 err
= ibt_detach(dp
->daplka_clnt_hdl
);
1015 if (err
!= IBT_SUCCESS
) {
1016 DERR("attach: ibt_detach returned %d\n", err
);
1019 mutex_destroy(&dp
->daplka_mutex
);
1021 if (dp
->daplka_status
== DAPLKA_STATE_ATTACHED
) {
1022 ddi_remove_minor_node(dip
, NULL
);
1024 ddi_soft_state_free(daplka_state
, instance
);
1029 * Detach - Free resources allocated in attach
1033 daplka_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
1036 void *cookie
= NULL
;
1039 if (cmd
!= DDI_DETACH
) {
1040 return (DDI_FAILURE
);
1042 if (daplka_resource
.daplka_rc_cnt
> 0 ||
1043 daplka_pending_close
> 0) {
1044 DERR("detach: driver in use\n");
1045 return (DDI_FAILURE
);
1048 instance
= ddi_get_instance(dip
);
1049 dp
= ddi_get_soft_state(daplka_state
, instance
);
1051 DERR("detach: cannot get soft state\n");
1052 return (DDI_FAILURE
);
1054 err
= daplka_fini_hcas(dp
);
1055 if (err
!= IBT_SUCCESS
) {
1056 DERR("detach: hca_fini returned %d\n", err
);
1057 return (DDI_FAILURE
);
1059 if (dp
->daplka_clnt_hdl
!= NULL
) {
1060 /* unregister SM event notification */
1061 ibt_register_subnet_notices(dp
->daplka_clnt_hdl
,
1062 (ibt_sm_notice_handler_t
)NULL
, NULL
);
1063 err
= ibt_detach(dp
->daplka_clnt_hdl
);
1064 if (err
!= IBT_SUCCESS
) {
1065 DERR("detach: ibt_detach returned %d\n", err
);
1066 return (DDI_FAILURE
);
1068 dp
->daplka_clnt_hdl
= NULL
;
1070 mutex_destroy(&dp
->daplka_mutex
);
1071 if (dp
->daplka_status
== DAPLKA_STATE_ATTACHED
) {
1072 ddi_remove_minor_node(dip
, NULL
);
1074 dp
->daplka_status
= DAPLKA_STATE_DETACHED
;
1075 ddi_soft_state_free(daplka_state
, instance
);
1079 * by the time we get here, all clients of dapl should
1080 * have exited and completed their cleanup properly.
1081 * we can assert that all global data structures are now
1084 ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree
, &cookie
) == NULL
);
1085 avl_destroy(&daplka_shared_mr_tree
);
1086 mutex_destroy(&daplka_shared_mr_lock
);
1088 ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl
) == 0);
1089 daplka_hash_destroy(&daplka_timer_info_htbl
);
1091 ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl
) == 0);
1092 daplka_hash_destroy(&daplka_global_sp_htbl
);
1094 taskq_destroy(daplka_taskq
);
1096 return (DDI_SUCCESS
);
1101 daplka_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
1104 case DDI_INFO_DEVT2DEVINFO
:
1105 if (daplka_dev
!= NULL
) {
1106 *result
= daplka_dev
->daplka_dip
;
1107 return (DDI_SUCCESS
);
1109 return (DDI_FAILURE
);
1112 case DDI_INFO_DEVT2INSTANCE
:
1114 return (DDI_SUCCESS
);
1117 return (DDI_FAILURE
);
1122 * creates a EP resource.
1123 * A EP resource contains a RC channel. A EP resource holds a
1124 * reference to a send_evd (for the send CQ), recv_evd (for the
1125 * recv CQ), a connection evd and a PD. These references ensure
1126 * that the referenced resources are not freed until the EP itself
1131 daplka_ep_create(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
1132 cred_t
*cred
, int *rvalp
)
1134 daplka_ep_resource_t
*ep_rp
;
1135 daplka_pd_resource_t
*pd_rp
;
1136 dapl_ep_create_t args
;
1137 ibt_rc_chan_alloc_args_t chan_args
;
1138 ibt_chan_alloc_flags_t achan_flags
;
1139 ibt_chan_sizes_t chan_real_sizes
;
1140 ibt_hca_attr_t
*hca_attrp
;
1141 uint64_t ep_hkey
= 0;
1142 boolean_t inserted
= B_FALSE
;
1143 uint32_t old_state
, new_state
;
1145 ibt_status_t status
;
1147 D3("ep_create: enter\n");
1148 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_ep_create_t
),
1151 DERR("ep_create: copyin error %d\n", retval
);
1154 ep_rp
= kmem_zalloc(sizeof (daplka_ep_resource_t
), daplka_km_flags
);
1155 if (ep_rp
== NULL
) {
1156 DERR("ep_create: cannot allocate ep_rp\n");
1159 DAPLKA_RS_INIT(ep_rp
, DAPL_TYPE_EP
,
1160 DAPLKA_RS_RNUM(ia_rp
), daplka_ep_destroy
);
1162 mutex_init(&ep_rp
->ep_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1163 cv_init(&ep_rp
->ep_cv
, NULL
, CV_DRIVER
, NULL
);
1164 ep_rp
->ep_hca
= ia_rp
->ia_hca
;
1165 ep_rp
->ep_cookie
= args
.ep_cookie
;
1166 ep_rp
->ep_timer_hkey
= 0;
1169 * we don't have to use ep_get_state here because ep_rp is not in
1170 * ep_htbl yet. refer to the description of daplka_ep_set_state
1171 * for details about the EP state machine.
1173 ep_rp
->ep_state
= DAPLKA_EP_STATE_TRANSITIONING
;
1174 new_state
= old_state
= DAPLKA_EP_STATE_CLOSED
;
1176 /* get reference to send evd and get cq handle */
1177 ep_rp
->ep_snd_evd
= (daplka_evd_resource_t
*)
1178 daplka_hash_lookup(&ia_rp
->ia_evd_htbl
, args
.ep_snd_evd_hkey
);
1179 if (ep_rp
->ep_snd_evd
== NULL
) {
1180 DERR("ep_create: ep_snd_evd %llx not found\n",
1181 args
.ep_snd_evd_hkey
);
1185 chan_args
.rc_scq
= ep_rp
->ep_snd_evd
->evd_cq_hdl
;
1186 if (chan_args
.rc_scq
== NULL
) {
1187 DERR("ep_create: ep_snd_evd cq invalid\n");
1192 /* get reference to recv evd and get cq handle */
1193 ep_rp
->ep_rcv_evd
= (daplka_evd_resource_t
*)
1194 daplka_hash_lookup(&ia_rp
->ia_evd_htbl
, args
.ep_rcv_evd_hkey
);
1195 if (ep_rp
->ep_rcv_evd
== NULL
) {
1196 DERR("ep_create: ep_rcv_evd %llx not found\n",
1197 args
.ep_rcv_evd_hkey
);
1201 chan_args
.rc_rcq
= ep_rp
->ep_rcv_evd
->evd_cq_hdl
;
1202 if (chan_args
.rc_rcq
== NULL
) {
1203 DERR("ep_create: ep_rcv_evd cq invalid\n");
1208 /* get reference to conn evd */
1209 ep_rp
->ep_conn_evd
= (daplka_evd_resource_t
*)
1210 daplka_hash_lookup(&ia_rp
->ia_evd_htbl
, args
.ep_conn_evd_hkey
);
1211 if (ep_rp
->ep_conn_evd
== NULL
) {
1212 DERR("ep_create: ep_conn_evd %llx not found\n",
1213 args
.ep_conn_evd_hkey
);
1218 /* get reference to SRQ if needed */
1219 if (args
.ep_srq_attached
) {
1220 ep_rp
->ep_srq_res
= (daplka_srq_resource_t
*)daplka_hash_lookup(
1221 &ia_rp
->ia_srq_htbl
, args
.ep_srq_hkey
);
1222 if (ep_rp
->ep_srq_res
== NULL
) {
1223 DERR("ep_create: ep_srq %llx not found\n",
1224 (longlong_t
)args
.ep_srq_hkey
);
1228 ASSERT(DAPLKA_RS_TYPE(ep_rp
->ep_srq_res
) == DAPL_TYPE_SRQ
);
1229 D3("ep_create: ep_srq %p %llx\n", ep_rp
->ep_srq_res
,
1230 (longlong_t
)args
.ep_srq_hkey
);
1232 ep_rp
->ep_srq_res
= NULL
;
1236 pd_rp
= (daplka_pd_resource_t
*)
1237 daplka_hash_lookup(&ia_rp
->ia_pd_htbl
, args
.ep_pd_hkey
);
1238 if (pd_rp
== NULL
) {
1239 DERR("ep_create: cannot find pd resource\n");
1243 ASSERT(DAPLKA_RS_TYPE(pd_rp
) == DAPL_TYPE_PD
);
1244 ep_rp
->ep_pd_res
= pd_rp
;
1245 chan_args
.rc_pd
= pd_rp
->pd_hdl
;
1249 * these checks ensure that the requested channel sizes
1250 * are within the limits supported by the chosen HCA.
1252 hca_attrp
= &ia_rp
->ia_hca
->hca_attr
;
1253 if (args
.ep_ch_sizes
.dcs_sq_sgl
> hca_attrp
->hca_max_sgl
) {
1254 DERR("ep_create: invalid cs_sq_sgl %d\n",
1255 args
.ep_ch_sizes
.dcs_sq_sgl
);
1259 if (args
.ep_ch_sizes
.dcs_rq_sgl
> hca_attrp
->hca_max_sgl
) {
1260 DERR("ep_create: invalid cs_rq_sgl %d\n",
1261 args
.ep_ch_sizes
.dcs_rq_sgl
);
1265 if (args
.ep_ch_sizes
.dcs_sq
> hca_attrp
->hca_max_chan_sz
) {
1266 DERR("ep_create: invalid cs_sq %d\n",
1267 args
.ep_ch_sizes
.dcs_sq
);
1271 if (args
.ep_ch_sizes
.dcs_rq
> hca_attrp
->hca_max_chan_sz
) {
1272 DERR("ep_create: invalid cs_rq %d\n",
1273 args
.ep_ch_sizes
.dcs_rq
);
1278 chan_args
.rc_sizes
.cs_sq_sgl
= args
.ep_ch_sizes
.dcs_sq_sgl
;
1279 chan_args
.rc_sizes
.cs_rq_sgl
= args
.ep_ch_sizes
.dcs_rq_sgl
;
1280 chan_args
.rc_sizes
.cs_sq
= args
.ep_ch_sizes
.dcs_sq
;
1281 chan_args
.rc_sizes
.cs_rq
= args
.ep_ch_sizes
.dcs_rq
;
1282 chan_args
.rc_flags
= IBT_WR_SIGNALED
;
1283 chan_args
.rc_control
= IBT_CEP_RDMA_RD
| IBT_CEP_RDMA_WR
;
1284 chan_args
.rc_hca_port_num
= ia_rp
->ia_port_num
;
1285 chan_args
.rc_clone_chan
= NULL
;
1286 if (args
.ep_srq_attached
) {
1287 chan_args
.rc_srq
= ep_rp
->ep_srq_res
->srq_hdl
;
1289 chan_args
.rc_srq
= NULL
;
1292 D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, "
1293 "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n",
1294 args
.ep_ch_sizes
.dcs_sq_sgl
, args
.ep_ch_sizes
.dcs_rq_sgl
,
1295 args
.ep_ch_sizes
.dcs_sq
, args
.ep_ch_sizes
.dcs_rq
,
1296 chan_args
.rc_flags
, chan_args
.rc_control
,
1297 chan_args
.rc_hca_port_num
, chan_args
.rc_clone_chan
);
1299 if (args
.ep_srq_attached
) {
1300 achan_flags
= IBT_ACHAN_USER_MAP
| IBT_ACHAN_USES_SRQ
;
1302 achan_flags
= IBT_ACHAN_USER_MAP
;
1304 /* create rc channel */
1305 status
= daplka_ibt_alloc_rc_channel(ep_rp
, ia_rp
->ia_hca_hdl
,
1306 achan_flags
, &chan_args
, &ep_rp
->ep_chan_hdl
,
1308 if (status
!= IBT_SUCCESS
) {
1309 DERR("ep_create: alloc_rc_channel returned %d\n", status
);
1310 *rvalp
= (int)status
;
1315 args
.ep_ch_real_sizes
.dcs_sq
= chan_real_sizes
.cs_sq
;
1316 args
.ep_ch_real_sizes
.dcs_rq
= chan_real_sizes
.cs_rq
;
1317 args
.ep_ch_real_sizes
.dcs_sq_sgl
= chan_real_sizes
.cs_sq_sgl
;
1318 args
.ep_ch_real_sizes
.dcs_rq_sgl
= chan_real_sizes
.cs_rq_sgl
;
1321 * store ep ptr with chan_hdl.
1322 * this ep_ptr is used by the CM handlers (both active and
1324 * mutex is only needed for race of "destroy" and "async"
1326 mutex_enter(&daplka_dev
->daplka_mutex
);
1327 ibt_set_chan_private(ep_rp
->ep_chan_hdl
, (void *)ep_rp
);
1328 mutex_exit(&daplka_dev
->daplka_mutex
);
1330 /* Get HCA-specific data_out info */
1331 status
= ibt_ci_data_out(ia_rp
->ia_hca_hdl
,
1332 IBT_CI_NO_FLAGS
, IBT_HDL_CHANNEL
, (void *)ep_rp
->ep_chan_hdl
,
1333 &args
.ep_qp_data_out
, sizeof (args
.ep_qp_data_out
));
1335 if (status
!= IBT_SUCCESS
) {
1336 DERR("ep_create: ibt_ci_data_out error(%d)\n",
1338 *rvalp
= (int)status
;
1343 /* insert into ep hash table */
1344 retval
= daplka_hash_insert(&ia_rp
->ia_ep_htbl
,
1345 &ep_hkey
, (void *)ep_rp
);
1347 DERR("ep_create: cannot insert ep resource into ep_htbl\n");
1353 * at this point, the ep_rp can be looked up by other threads
1354 * if they manage to guess the correct hkey. but they are not
1355 * permitted to operate on ep_rp until we transition to the
1359 /* return hkey to library */
1360 args
.ep_hkey
= ep_hkey
;
1362 retval
= ddi_copyout(&args
, (void *)arg
, sizeof (dapl_ep_create_t
),
1365 DERR("ep_create: copyout error %d\n", retval
);
1370 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
1371 D3("ep_create: exit\n");
1376 daplka_ep_resource_t
*free_rp
= NULL
;
1378 (void) daplka_hash_remove(&ia_rp
->ia_ep_htbl
, ep_hkey
,
1380 if (free_rp
!= ep_rp
) {
1382 * this case is impossible because ep_free will
1383 * wait until our state transition is complete.
1385 DERR("ep_create: cannot remove ep from hash table\n");
1390 new_state
= DAPLKA_EP_STATE_FREED
;
1391 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
1392 DAPLKA_RS_UNREF(ep_rp
);
1397 * daplka_ep_get_state retrieves the current state of the EP and
1398 * sets the state to TRANSITIONING. if the current state is already
1399 * TRANSITIONING, this function will wait until the state becomes one
1400 * of the other EP states. Most of the EP related ioctls follow the
1403 * new_state = old_state = daplka_ep_get_state(ep_rp);
1405 * ...some code that affects the EP
1407 * new_state = <NEW_STATE>;
1408 * daplka_ep_set_state(ep_rp, old_state, new_state);
1410 * this call sequence ensures that only one thread may access the EP
1411 * during the time ep_state is in TRANSITIONING. daplka_ep_set_state
1412 * transitions ep_state to new_state and wakes up any waiters blocking
1417 daplka_ep_get_state(daplka_ep_resource_t
*ep_rp
)
1419 uint32_t old_state
= 0;
1421 mutex_enter(&ep_rp
->ep_lock
);
1422 while (ep_rp
->ep_state
== DAPLKA_EP_STATE_TRANSITIONING
) {
1423 D2("get_state: wait for state transition to complete\n");
1424 cv_wait(&ep_rp
->ep_cv
, &ep_rp
->ep_lock
);
1425 D2("get_state: done, curr state = %d\n", ep_rp
->ep_state
);
1427 ASSERT(ep_rp
->ep_state
!= DAPLKA_EP_STATE_TRANSITIONING
);
1428 old_state
= ep_rp
->ep_state
;
1431 * an ep that is in the FREED state cannot transition
1432 * back to any of the regular states
1434 if (old_state
!= DAPLKA_EP_STATE_FREED
) {
1435 ep_rp
->ep_state
= DAPLKA_EP_STATE_TRANSITIONING
;
1437 mutex_exit(&ep_rp
->ep_lock
);
1442 * EP state transition diagram
1444 * CLOSED<-------------------
1447 * ------------------------ |
1451 * CONNECTING ACCEPTING |
1455 * | | |_______|_______| |
1457 * | |___________| | | |
1459 * | v | |---->DISCONNECTED
1462 * ABORTING |---------|--------------|
1465 * | |-------->DISCONNECTING--|
1467 * |---------------------------------|
1469 * *not shown in this diagram:
1470 * -loopback transitions
1471 * -transitions to the FREED state
1474 daplka_ep_transition_is_valid(uint32_t old_state
, uint32_t new_state
)
1476 boolean_t valid
= B_FALSE
;
1479 * reseting to the same state is a no-op and is always
1480 * permitted. transitioning to the FREED state indicates
1481 * that the ep is about to be freed and no further operation
1482 * is allowed on it. to support abrupt close, the ep is
1483 * permitted to transition to the FREED state from any state.
1485 if (old_state
== new_state
||
1486 new_state
== DAPLKA_EP_STATE_FREED
) {
1490 switch (old_state
) {
1491 case DAPLKA_EP_STATE_CLOSED
:
1493 * this is the initial ep_state.
1494 * a transition to CONNECTING or ACCEPTING may occur
1495 * upon calling daplka_ep_connect or daplka_cr_accept,
1498 if (new_state
== DAPLKA_EP_STATE_CONNECTING
||
1499 new_state
== DAPLKA_EP_STATE_ACCEPTING
) {
1503 case DAPLKA_EP_STATE_CONNECTING
:
1505 * we transition to this state if daplka_ep_connect
1506 * is successful. from this state, we can transition
1507 * to CONNECTED if daplka_cm_rc_conn_est gets called;
1508 * or to DISCONNECTED if daplka_cm_rc_conn_closed or
1509 * daplka_cm_rc_event_failure gets called. If the
1510 * client calls daplka_ep_disconnect, we transition
1511 * to DISCONNECTING. If a timer was set at ep_connect
1512 * time and if the timer expires prior to any of the
1513 * CM callbacks, we transition to ABORTING and then
1516 if (new_state
== DAPLKA_EP_STATE_CONNECTED
||
1517 new_state
== DAPLKA_EP_STATE_DISCONNECTING
||
1518 new_state
== DAPLKA_EP_STATE_DISCONNECTED
||
1519 new_state
== DAPLKA_EP_STATE_ABORTING
) {
1523 case DAPLKA_EP_STATE_ACCEPTING
:
1525 * we transition to this state if daplka_cr_accept
1526 * is successful. from this state, we can transition
1527 * to CONNECTED if daplka_cm_service_conn_est gets called;
1528 * or to DISCONNECTED if daplka_cm_service_conn_closed or
1529 * daplka_cm_service_event_failure gets called. If the
1530 * client calls daplka_ep_disconnect, we transition to
1533 if (new_state
== DAPLKA_EP_STATE_CONNECTED
||
1534 new_state
== DAPLKA_EP_STATE_DISCONNECTING
||
1535 new_state
== DAPLKA_EP_STATE_DISCONNECTED
) {
1539 case DAPLKA_EP_STATE_CONNECTED
:
1541 * we transition to this state if a active or passive
1542 * connection gets established. if the client calls
1543 * daplka_ep_disconnect, we transition to the
1544 * DISCONNECTING state. subsequent CM callbacks will
1545 * cause ep_state to be set to DISCONNECTED. If the
1546 * remote peer terminates the connection before we do,
1547 * it is possible for us to transition directly from
1548 * CONNECTED to DISCONNECTED.
1550 if (new_state
== DAPLKA_EP_STATE_DISCONNECTING
||
1551 new_state
== DAPLKA_EP_STATE_DISCONNECTED
) {
1555 case DAPLKA_EP_STATE_DISCONNECTING
:
1557 * we transition to this state if the client calls
1558 * daplka_ep_disconnect.
1560 if (new_state
== DAPLKA_EP_STATE_DISCONNECTED
) {
1564 case DAPLKA_EP_STATE_ABORTING
:
1566 * we transition to this state if the active side
1567 * EP timer has expired. this is only a transient
1568 * state that is set during timer processing. when
1569 * timer processing completes, ep_state will become
1572 if (new_state
== DAPLKA_EP_STATE_DISCONNECTED
) {
1576 case DAPLKA_EP_STATE_DISCONNECTED
:
1578 * we transition to this state if we get a closed
1579 * or event_failure CM callback. an expired timer
1580 * can also cause us to be in this state. this
1581 * is the only state in which we permit the
1582 * ep_reinit operation.
1584 if (new_state
== DAPLKA_EP_STATE_CLOSED
) {
1593 DERR("ep_transition: invalid state change %d -> %d\n",
1594 old_state
, new_state
);
1600 * first check if the transition is valid. then set ep_state
1601 * to new_state and wake up all waiters.
1604 daplka_ep_set_state(daplka_ep_resource_t
*ep_rp
, uint32_t old_state
,
1609 ASSERT(new_state
!= DAPLKA_EP_STATE_TRANSITIONING
);
1611 valid
= daplka_ep_transition_is_valid(old_state
, new_state
);
1612 mutex_enter(&ep_rp
->ep_lock
);
1613 if (ep_rp
->ep_state
!= DAPLKA_EP_STATE_FREED
) {
1615 ep_rp
->ep_state
= new_state
;
1618 * this case is impossible.
1619 * we have a serious problem if we get here.
1620 * instead of panicing, we reset the state to
1621 * old_state. doing this would at least prevent
1622 * threads from hanging due to ep_state being
1623 * stuck in TRANSITIONING.
1625 ep_rp
->ep_state
= old_state
;
1629 cv_broadcast(&ep_rp
->ep_cv
);
1630 mutex_exit(&ep_rp
->ep_lock
);
1634 * modifies RC channel attributes.
1635 * currently, only the rdma_in and rdma_out attributes may
1636 * be modified. the channel must be in quiescent state when
1637 * this function is called.
1641 daplka_ep_modify(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
1642 cred_t
*cred
, int *rvalp
)
1644 daplka_ep_resource_t
*ep_rp
= NULL
;
1645 ibt_cep_modify_flags_t good_flags
;
1646 ibt_rc_chan_modify_attr_t rcm_attr
;
1647 ibt_hca_attr_t
*hca_attrp
;
1648 dapl_ep_modify_t args
;
1649 ibt_status_t status
;
1650 uint32_t old_state
, new_state
;
1653 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_ep_modify_t
),
1656 DERR("ep_modify: copyin error %d\n", retval
);
1659 ep_rp
= (daplka_ep_resource_t
*)
1660 daplka_hash_lookup(&ia_rp
->ia_ep_htbl
, args
.epm_hkey
);
1661 if (ep_rp
== NULL
) {
1662 DERR("ep_modify: cannot find ep resource\n");
1665 ASSERT(DAPLKA_RS_TYPE(ep_rp
) == DAPL_TYPE_EP
);
1666 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
1668 if (old_state
!= DAPLKA_EP_STATE_CLOSED
&&
1669 old_state
!= DAPLKA_EP_STATE_DISCONNECTED
) {
1670 DERR("ep_modify: invalid state %d\n", old_state
);
1675 good_flags
= IBT_CEP_SET_RDMARA_OUT
| IBT_CEP_SET_RDMARA_IN
;
1676 if ((args
.epm_flags
& ~good_flags
) != 0) {
1677 DERR("ep_modify: invalid flags 0x%x\n", args
.epm_flags
);
1682 hca_attrp
= &ia_rp
->ia_hca
->hca_attr
;
1684 bzero(&rcm_attr
, sizeof (ibt_rc_chan_modify_attr_t
));
1685 if ((args
.epm_flags
& IBT_CEP_SET_RDMARA_OUT
) != 0) {
1686 if (args
.epm_rdma_ra_out
> hca_attrp
->hca_max_rdma_out_chan
) {
1687 DERR("ep_modify: invalid epm_rdma_ra_out %d\n",
1688 args
.epm_rdma_ra_out
);
1692 rcm_attr
.rc_rdma_ra_out
= args
.epm_rdma_ra_out
;
1694 if ((args
.epm_flags
& IBT_CEP_SET_RDMARA_IN
) != 0) {
1695 if (args
.epm_rdma_ra_in
> hca_attrp
->hca_max_rdma_in_chan
) {
1696 DERR("ep_modify: epm_rdma_ra_in %d\n",
1697 args
.epm_rdma_ra_in
);
1701 rcm_attr
.rc_rdma_ra_in
= args
.epm_rdma_ra_in
;
1703 status
= ibt_modify_rc_channel(ep_rp
->ep_chan_hdl
, args
.epm_flags
,
1705 if (status
!= IBT_SUCCESS
) {
1706 DERR("ep_modify: modify_rc_channel returned %d\n", status
);
1707 *rvalp
= (int)status
;
1713 * ep_modify does not change ep_state
1716 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
1717 DAPLKA_RS_UNREF(ep_rp
);
1722 * Frees a EP resource.
1723 * a EP may only be freed when it is in the CLOSED or
1724 * DISCONNECTED state.
1728 daplka_ep_free(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
1729 cred_t
*cred
, int *rvalp
)
1731 daplka_ep_resource_t
*ep_rp
= NULL
;
1732 dapl_ep_free_t args
;
1733 uint32_t old_state
, new_state
;
1736 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_ep_free_t
), mode
);
1738 DERR("ep_free: copyin error %d\n", retval
);
1741 ep_rp
= (daplka_ep_resource_t
*)
1742 daplka_hash_lookup(&ia_rp
->ia_ep_htbl
, args
.epf_hkey
);
1743 if (ep_rp
== NULL
) {
1744 DERR("ep_free: cannot find ep resource\n");
1747 ASSERT(DAPLKA_RS_TYPE(ep_rp
) == DAPL_TYPE_EP
);
1748 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
1751 * ep cannot be freed if it is in an invalid state.
1753 if (old_state
!= DAPLKA_EP_STATE_CLOSED
&&
1754 old_state
!= DAPLKA_EP_STATE_DISCONNECTED
) {
1755 DERR("ep_free: invalid state %d\n", old_state
);
1760 retval
= daplka_hash_remove(&ia_rp
->ia_ep_htbl
,
1761 args
.epf_hkey
, (void **)&ep_rp
);
1762 if (retval
!= 0 || ep_rp
== NULL
) {
1764 * this is only possible if we have two threads
1765 * calling ep_free in parallel.
1767 DERR("ep_free: cannot find ep resource\n");
1770 /* there should not be any outstanding timers */
1771 ASSERT(ep_rp
->ep_timer_hkey
== 0);
1773 new_state
= DAPLKA_EP_STATE_FREED
;
1774 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
1776 /* remove reference obtained by lookup */
1777 DAPLKA_RS_UNREF(ep_rp
);
1779 /* UNREF calls the actual free function when refcnt is zero */
1780 DAPLKA_RS_UNREF(ep_rp
);
1784 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
1786 /* remove reference obtained by lookup */
1787 DAPLKA_RS_UNREF(ep_rp
);
1792 * The following routines supports the timeout feature of ep_connect.
1793 * Refer to the description of ep_connect for details.
1797 * this is the timer processing thread.
1800 daplka_timer_thread(void *arg
)
1802 daplka_timer_info_t
*timerp
= (daplka_timer_info_t
*)arg
;
1803 daplka_ep_resource_t
*ep_rp
;
1804 daplka_evd_event_t
*disc_ev
= NULL
;
1805 ibt_status_t status
;
1806 int old_state
, new_state
;
1808 ep_rp
= timerp
->ti_ep_res
;
1809 ASSERT(ep_rp
!= NULL
);
1810 ASSERT(timerp
->ti_tmo_id
!= 0);
1811 timerp
->ti_tmo_id
= 0;
1813 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
1814 if (old_state
!= DAPLKA_EP_STATE_CONNECTING
) {
1815 /* unblock hash_ep_free */
1816 mutex_enter(&ep_rp
->ep_lock
);
1817 ASSERT(ep_rp
->ep_timer_hkey
!= 0);
1818 ep_rp
->ep_timer_hkey
= 0;
1819 cv_broadcast(&ep_rp
->ep_cv
);
1820 mutex_exit(&ep_rp
->ep_lock
);
1822 /* reset state to original state */
1823 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
1825 /* this function will also unref ep_rp */
1826 daplka_timer_info_free(timerp
);
1830 ASSERT(ep_rp
->ep_timer_hkey
!= 0);
1831 ep_rp
->ep_timer_hkey
= 0;
1834 * we cannot keep ep_state in TRANSITIONING if we call
1835 * ibt_close_rc_channel in blocking mode. this would cause
1836 * a deadlock because the cm callbacks will be blocked and
1837 * will not be able to wake us up.
1839 new_state
= DAPLKA_EP_STATE_ABORTING
;
1840 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
1843 * when we return from close_rc_channel, all callbacks should have
1844 * completed. we can also be certain that these callbacks did not
1845 * enqueue any events to conn_evd.
1847 status
= ibt_close_rc_channel(ep_rp
->ep_chan_hdl
, IBT_BLOCKING
,
1848 NULL
, 0, NULL
, NULL
, NULL
);
1849 if (status
!= IBT_SUCCESS
) {
1850 DERR("timer_thread: ibt_close_rc_channel returned %d\n",
1853 old_state
= daplka_ep_get_state(ep_rp
);
1856 * this is the only thread that can transition ep_state out
1857 * of ABORTING. all other ep operations would fail when
1858 * ep_state is in ABORTING.
1860 ASSERT(old_state
== DAPLKA_EP_STATE_ABORTING
);
1862 disc_ev
= kmem_zalloc(sizeof (daplka_evd_event_t
), KM_SLEEP
);
1863 ASSERT(disc_ev
!= NULL
);
1865 disc_ev
->ee_cmev
.ec_cm_ev_type
= DAPL_IB_CME_TIMED_OUT
;
1866 disc_ev
->ee_cmev
.ec_cm_cookie
= ep_rp
->ep_cookie
;
1867 disc_ev
->ee_cmev
.ec_cm_is_passive
= B_FALSE
;
1868 disc_ev
->ee_cmev
.ec_cm_psep_cookie
= 0;
1869 disc_ev
->ee_cmev
.ec_cm_ev_priv_data
= NULL
;
1870 disc_ev
->ee_cmev
.ec_cm_ev_priv_data_len
= 0;
1872 D2("timer_thread: enqueue event(%p) evdp(%p)\n",
1873 disc_ev
, ep_rp
->ep_conn_evd
);
1875 new_state
= DAPLKA_EP_STATE_DISCONNECTED
;
1876 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
1878 daplka_evd_wakeup(ep_rp
->ep_conn_evd
,
1879 &ep_rp
->ep_conn_evd
->evd_conn_events
, disc_ev
);
1881 /* this function will also unref ep_rp */
1882 daplka_timer_info_free(timerp
);
1886 * dispatches a thread to continue with timer processing.
1889 daplka_timer_dispatch(void *arg
)
1892 * keep rescheduling this function until
1893 * taskq_dispatch succeeds.
1895 if (taskq_dispatch(daplka_taskq
,
1896 daplka_timer_thread
, arg
, TQ_NOSLEEP
) == 0) {
1897 DERR("timer_dispatch: taskq_dispatch failed, retrying...\n");
1898 (void) timeout(daplka_timer_dispatch
, arg
, 10);
1903 * this function is called by the kernel's callout thread.
1904 * we first attempt to remove the timer object from the
1905 * global timer table. if it is found, we dispatch a thread
1906 * to continue processing the timer object. if it is not
1907 * found, that means the timer has been cancelled by someone
1911 daplka_timer_handler(void *arg
)
1913 uint64_t timer_hkey
= (uintptr_t)arg
;
1914 daplka_timer_info_t
*timerp
= NULL
;
1916 D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t
)timer_hkey
);
1918 (void) daplka_hash_remove(&daplka_timer_info_htbl
,
1919 timer_hkey
, (void **)&timerp
);
1920 if (timerp
== NULL
) {
1921 D2("timer_handler: timer already cancelled\n");
1924 daplka_timer_dispatch((void *)timerp
);
1928 * allocates a timer_info object.
1929 * a reference to a EP is held by this object. this ensures
1930 * that the EP stays valid when a timer is outstanding.
1932 static daplka_timer_info_t
*
1933 daplka_timer_info_alloc(daplka_ep_resource_t
*ep_rp
)
1935 daplka_timer_info_t
*timerp
;
1937 timerp
= kmem_zalloc(sizeof (*timerp
), daplka_km_flags
);
1938 if (timerp
== NULL
) {
1939 DERR("timer_info_alloc: cannot allocate timer info\n");
1942 timerp
->ti_ep_res
= ep_rp
;
1943 timerp
->ti_tmo_id
= 0;
1949 * Frees the timer_info object.
1950 * we release the EP reference before freeing the object.
1953 daplka_timer_info_free(daplka_timer_info_t
*timerp
)
1955 ASSERT(timerp
->ti_ep_res
!= NULL
);
1956 DAPLKA_RS_UNREF(timerp
->ti_ep_res
);
1957 timerp
->ti_ep_res
= NULL
;
1958 ASSERT(timerp
->ti_tmo_id
== 0);
1959 kmem_free(timerp
, sizeof (*timerp
));
1963 * cancels the timer set by ep_connect.
1964 * returns -1 if timer handling is in progress
1968 daplka_cancel_timer(daplka_ep_resource_t
*ep_rp
)
1971 * this function can only be called when ep_state
1974 ASSERT(ep_rp
->ep_state
== DAPLKA_EP_STATE_TRANSITIONING
);
1975 if (ep_rp
->ep_timer_hkey
!= 0) {
1976 daplka_timer_info_t
*timerp
= NULL
;
1978 (void) daplka_hash_remove(&daplka_timer_info_htbl
,
1979 ep_rp
->ep_timer_hkey
, (void **)&timerp
);
1980 if (timerp
== NULL
) {
1982 * this is possible if the timer_handler has
1983 * removed the timerp but the taskq thread has
1984 * not transitioned the ep_state to DISCONNECTED.
1985 * we need to reset the ep_state to allow the
1986 * taskq thread to continue with its work. the
1987 * taskq thread will set the ep_timer_hkey to 0
1988 * so we don't have to do it here.
1990 DERR("cancel_timer: timer is being processed\n");
1994 * we got the timer object. if the handler fires at
1995 * this point, it will not be able to find the object
1996 * and will return immediately. normally, ti_tmo_id gets
1997 * cleared when the handler fires.
1999 ASSERT(timerp
->ti_tmo_id
!= 0);
2002 * note that untimeout can possibly call the handler.
2003 * we are safe because the handler will be a no-op.
2005 (void) untimeout(timerp
->ti_tmo_id
);
2006 timerp
->ti_tmo_id
= 0;
2007 daplka_timer_info_free(timerp
);
2008 ep_rp
->ep_timer_hkey
= 0;
2014 * this function is called by daplka_hash_destroy for
2015 * freeing timer_info objects
2018 daplka_hash_timer_free(void *obj
)
2020 daplka_timer_info_free((daplka_timer_info_t
*)obj
);
2025 daplka_hellomsg_cksum(DAPL_PRIVATE
*dp
)
2032 for (i
= 0; i
< sizeof (DAPL_PRIVATE
); i
++) {
2039 * ep_connect is called by the client to initiate a connection to a
2040 * remote service point. It is a non-blocking call. If a non-zero
2041 * timeout is specified by the client, a timer will be set just before
2042 * returning from ep_connect. Upon a successful return from ep_connect,
2043 * the client will call evd_wait to wait for the connection to complete.
2044 * If the connection is rejected or has failed due to an error, the
2045 * client will be notified with an event containing the appropriate error
2046 * code. If the connection is accepted, the client will be notified with
2047 * the CONN_ESTABLISHED event. If the timer expires before either of the
2048 * above events (error or established), a TIMED_OUT event will be delivered
2051 * the complicated part of the timer logic is the handling of race
2052 * conditions with CM callbacks. we need to ensure that either the CM or
2053 * the timer thread gets to deliver an event, but not both. when the
2054 * CM callback is about to deliver an event, it always tries to cancel
2055 * the outstanding timer. if cancel_timer indicates a that the timer is
2056 * already being processed, the CM callback will simply return without
2057 * delivering an event. when the timer thread executes, it tries to check
2058 * if the EP is still in CONNECTING state (timers only work on the active
2059 * side). if the EP is not in this state, the timer thread will return
2060 * without delivering an event.
2064 daplka_ep_connect(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
2065 cred_t
*cred
, int *rvalp
)
2067 daplka_ep_resource_t
*ep_rp
= NULL
;
2068 dapl_ep_connect_t args
;
2069 daplka_timer_info_t
*timerp
= NULL
;
2070 uint32_t old_state
, new_state
;
2071 boolean_t timer_inserted
= B_FALSE
;
2072 uint64_t timer_hkey
= 0;
2073 ibt_path_info_t path_info
;
2074 ibt_path_attr_t path_attr
;
2075 ibt_hca_attr_t
*hca_attrp
;
2076 ibt_chan_open_args_t chan_args
;
2077 ibt_status_t status
= IBT_SUCCESS
;
2085 ibt_ar_t ar_query_s
;
2086 ibt_ar_t ar_result_s
;
2087 ibt_path_flags_t pathflags
;
2089 D3("ep_connect: enter\n");
2090 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_ep_connect_t
),
2093 DERR("ep_connect: copyin error %d\n", retval
);
2096 ep_rp
= (daplka_ep_resource_t
*)
2097 daplka_hash_lookup(&ia_rp
->ia_ep_htbl
, args
.epc_hkey
);
2098 if (ep_rp
== NULL
) {
2099 DERR("ep_connect: cannot find ep resource\n");
2102 ASSERT(DAPLKA_RS_TYPE(ep_rp
) == DAPL_TYPE_EP
);
2104 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
2105 if (old_state
!= DAPLKA_EP_STATE_CLOSED
) {
2106 DERR("ep_connect: invalid state %d\n", old_state
);
2110 if (args
.epc_priv_sz
> DAPL_MAX_PRIVATE_DATA_SIZE
) {
2111 DERR("ep_connect: private data len (%d) exceeded "
2112 "max size %d\n", args
.epc_priv_sz
,
2113 DAPL_MAX_PRIVATE_DATA_SIZE
);
2119 * check for remote ipaddress to dgid resolution needs ATS
2121 dgid
= &args
.epc_dgid
;
2122 dgid_ored
= dgid
->gid_guid
| dgid
->gid_prefix
;
2123 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2125 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2126 /* check for unidentified dgid */
2127 if (dgid_ored
== 0ULL) {
2129 * setup for ibt_query_ar()
2131 sgid
= &ia_rp
->ia_hca_sgid
;
2132 ar_query_s
.ar_gid
.gid_guid
= 0ULL;
2133 ar_query_s
.ar_gid
.gid_prefix
= 0ULL;
2134 ar_query_s
.ar_pkey
= 0;
2135 bcopy(args
.epc_raddr_sadata
.iad_sadata
,
2136 ar_query_s
.ar_data
, DAPL_ATS_NBYTES
);
2137 #define UR(b) ar_query_s.ar_data[(b)]
2138 D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n",
2139 UR(8), UR(9), UR(10), UR(11));
2140 D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n",
2141 UR(12), UR(13), UR(14), UR(15));
2142 status
= ibt_query_ar(sgid
, &ar_query_s
, &ar_result_s
);
2143 if (status
!= IBT_SUCCESS
) {
2144 DERR("ep_connect: ibt_query_ar returned %d\n", status
);
2145 *rvalp
= (int)status
;
2150 * dgid identified from SA record
2152 dgid
= &ar_result_s
.ar_gid
;
2153 D2("daplka_ep_connect: ATS dgid=%llx:%llx\n",
2154 (longlong_t
)dgid
->gid_prefix
, (longlong_t
)dgid
->gid_guid
);
2157 bzero(&path_info
, sizeof (ibt_path_info_t
));
2158 bzero(&path_attr
, sizeof (ibt_path_attr_t
));
2159 bzero(&chan_args
, sizeof (ibt_chan_open_args_t
));
2161 path_attr
.pa_dgids
= dgid
;
2162 path_attr
.pa_num_dgids
= 1;
2164 * don't set sid in path_attr saves 1 SA query
2165 * Also makes server side not to write the service record
2167 path_attr
.pa_sgid
= ia_rp
->ia_hca_sgid
;
2168 path_attr
.pa_pkey
= ia_rp
->ia_port_pkey
;
2170 /* save the connection ep - struct copy */
2171 ep_rp
->ep_sgid
= ia_rp
->ia_hca_sgid
;
2172 ep_rp
->ep_dgid
= *dgid
;
2175 pathflags
= IBT_PATH_PKEY
;
2176 /* enable APM on remote port but not on loopback case */
2177 if (daplka_apm
&& ((dgid
->gid_prefix
!= path_attr
.pa_sgid
.gid_prefix
) ||
2178 (dgid
->gid_guid
!= path_attr
.pa_sgid
.gid_guid
))) {
2179 pathflags
|= IBT_PATH_APM
;
2181 status
= ibt_get_paths(daplka_dev
->daplka_clnt_hdl
,
2182 pathflags
, &path_attr
, 1, &path_info
, &num_paths
);
2184 if (status
!= IBT_SUCCESS
&& status
!= IBT_INSUFF_DATA
) {
2185 DERR("ep_connect: ibt_get_paths returned %d paths %d\n",
2187 *rvalp
= (int)status
;
2191 /* fill in the sid directly to path_info */
2192 path_info
.pi_sid
= args
.epc_sid
;
2193 hca_attrp
= &ia_rp
->ia_hca
->hca_attr
;
2195 /* fill in open channel args */
2196 chan_args
.oc_path
= &path_info
;
2197 chan_args
.oc_cm_handler
= daplka_cm_rc_handler
;
2198 chan_args
.oc_cm_clnt_private
= (void *)ep_rp
;
2199 chan_args
.oc_rdma_ra_out
= hca_attrp
->hca_max_rdma_out_chan
;
2200 chan_args
.oc_rdma_ra_in
= hca_attrp
->hca_max_rdma_in_chan
;
2201 chan_args
.oc_path_retry_cnt
= 7; /* 3-bit field */
2202 chan_args
.oc_path_rnr_retry_cnt
= IBT_RNR_INFINITE_RETRY
;
2204 ASSERT(args
.epc_priv_sz
> 0);
2205 priv_data
= (void *)args
.epc_priv
;
2207 chan_args
.oc_priv_data_len
= args
.epc_priv_sz
;
2208 chan_args
.oc_priv_data
= priv_data
;
2211 * calculate checksum value of hello message and
2212 * put hello message in networking byte order
2214 dp
= (DAPL_PRIVATE
*)priv_data
;
2215 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dp
))
2216 dp
->hello_msg
.hi_port
= htons(dp
->hello_msg
.hi_port
);
2217 dp
->hello_msg
.hi_checksum
= 0;
2218 dp
->hello_msg
.hi_checksum
= htons(daplka_hellomsg_cksum(dp
));
2219 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*dp
))
2221 if (args
.epc_timeout
> 0) {
2223 * increment refcnt before passing reference to
2226 DAPLKA_RS_REF(ep_rp
);
2227 timerp
= daplka_timer_info_alloc(ep_rp
);
2228 if (timerp
== NULL
) {
2229 DERR("ep_connect: cannot allocate timer\n");
2231 * we need to remove the reference if
2232 * allocation failed.
2234 DAPLKA_RS_UNREF(ep_rp
);
2239 * We generate our own hkeys so that timer_hkey can fit
2240 * into a pointer and passed as an arg to timeout()
2242 timer_hkey
= (uint64_t)daplka_timer_hkey_gen();
2243 retval
= daplka_hash_insert(&daplka_timer_info_htbl
,
2244 &timer_hkey
, (void *)timerp
);
2246 DERR("ep_connect: cannot insert timer info\n");
2249 ASSERT(ep_rp
->ep_timer_hkey
== 0);
2250 ep_rp
->ep_timer_hkey
= timer_hkey
;
2251 timer_inserted
= B_TRUE
;
2252 D2("ep_connect: timer_hkey = 0x%llx\n",
2253 (longlong_t
)timer_hkey
);
2255 status
= ibt_open_rc_channel(ep_rp
->ep_chan_hdl
, IBT_OCHAN_NO_FLAGS
,
2256 IBT_NONBLOCKING
, &chan_args
, NULL
);
2258 if (status
!= IBT_SUCCESS
) {
2259 DERR("ep_connect: ibt_open_rc_channel returned %d\n", status
);
2260 *rvalp
= (int)status
;
2265 * if a cm callback gets called at this point, it'll have to wait until
2266 * ep_state becomes connecting (or some other state if another thread
2267 * manages to get ahead of the callback). this guarantees that the
2268 * callback will not touch the timer until it gets set.
2270 if (timerp
!= NULL
) {
2273 tmo
= drv_usectohz((clock_t)args
.epc_timeout
);
2275 * We generate our own 32 bit timer_hkey so that it can fit
2278 ASSERT(timer_hkey
!= 0);
2279 timerp
->ti_tmo_id
= timeout(daplka_timer_handler
,
2280 (void *)(uintptr_t)timer_hkey
, tmo
);
2282 new_state
= DAPLKA_EP_STATE_CONNECTING
;
2285 if (timerp
!= NULL
&& (retval
!= 0 || status
!= IBT_SUCCESS
)) {
2287 * if ibt_open_rc_channel failed, the timerp must still
2288 * be in daplka_timer_info_htbl because neither the cm
2289 * callback nor the timer_handler will be called.
2291 if (timer_inserted
) {
2292 daplka_timer_info_t
*new_timerp
= NULL
;
2294 ASSERT(timer_hkey
!= 0);
2295 (void) daplka_hash_remove(&daplka_timer_info_htbl
,
2296 timer_hkey
, (void **)&new_timerp
);
2297 ASSERT(new_timerp
== timerp
);
2298 ep_rp
->ep_timer_hkey
= 0;
2300 daplka_timer_info_free(timerp
);
2302 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
2303 DAPLKA_RS_UNREF(ep_rp
);
2304 D3("ep_connect: exit\n");
2309 * ep_disconnect closes a connection with a remote peer.
2310 * if a connection has not been established, ep_disconnect
2311 * will instead flush all recv bufs posted to this channel.
2312 * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon
2313 * entry to ep_disconnect, the EP state will transition to
2314 * DISCONNECTING upon exit. the CM callbacks triggered by
2315 * ibt_close_rc_channel will cause EP state to become
2316 * DISCONNECTED. This function is a no-op if EP state is
2321 daplka_ep_disconnect(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
2322 cred_t
*cred
, int *rvalp
)
2324 daplka_ep_resource_t
*ep_rp
= NULL
;
2325 dapl_ep_disconnect_t args
;
2326 ibt_status_t status
;
2327 uint32_t old_state
, new_state
;
2330 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_ep_disconnect_t
),
2333 DERR("ep_disconnect: copyin error %d\n", retval
);
2336 ep_rp
= (daplka_ep_resource_t
*)
2337 daplka_hash_lookup(&ia_rp
->ia_ep_htbl
, args
.epd_hkey
);
2338 if (ep_rp
== NULL
) {
2339 DERR("ep_disconnect: cannot find ep resource\n");
2342 ASSERT(DAPLKA_RS_TYPE(ep_rp
) == DAPL_TYPE_EP
);
2344 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
2345 if (old_state
!= DAPLKA_EP_STATE_CONNECTED
&&
2346 old_state
!= DAPLKA_EP_STATE_CONNECTING
&&
2347 old_state
!= DAPLKA_EP_STATE_ACCEPTING
&&
2348 old_state
!= DAPLKA_EP_STATE_DISCONNECTED
&&
2349 old_state
!= DAPLKA_EP_STATE_DISCONNECTING
&&
2350 old_state
!= DAPLKA_EP_STATE_CLOSED
) {
2351 DERR("ep_disconnect: invalid state %d\n", old_state
);
2356 if ((old_state
== DAPLKA_EP_STATE_DISCONNECTED
) ||
2357 (old_state
== DAPLKA_EP_STATE_DISCONNECTING
)) {
2358 D2("ep_disconnect: ep already disconnected\n");
2360 /* we leave the state as DISCONNECTED */
2363 if (old_state
== DAPLKA_EP_STATE_CONNECTING
||
2364 old_state
== DAPLKA_EP_STATE_ACCEPTING
) {
2365 D2("ep_disconnect: aborting, old_state = %d\n", old_state
);
2369 * according to the udapl spec, ep_disconnect should
2370 * flush the channel if the channel is not CONNECTED.
2372 if (old_state
== DAPLKA_EP_STATE_CLOSED
) {
2373 status
= ibt_flush_channel(ep_rp
->ep_chan_hdl
);
2374 if (status
!= IBT_SUCCESS
) {
2375 DERR("ep_disconnect: ibt_flush_channel failed %d\n",
2377 *rvalp
= (int)status
;
2380 /* we leave the state as CLOSED */
2384 new_state
= DAPLKA_EP_STATE_DISCONNECTING
;
2385 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
2386 status
= ibt_close_rc_channel(ep_rp
->ep_chan_hdl
, IBT_NONBLOCKING
,
2387 NULL
, 0, NULL
, NULL
, NULL
);
2389 if (status
== IBT_SUCCESS
) {
2390 DAPLKA_RS_UNREF(ep_rp
);
2393 DERR("ep_disconnect: ibt_close_rc_channel returned %d\n",
2395 *rvalp
= (int)status
;
2397 new_state
= old_state
;
2401 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
2402 DAPLKA_RS_UNREF(ep_rp
);
2407 * this function resets the EP to a usable state (ie. from
2408 * DISCONNECTED to CLOSED). this function is best implemented using
2409 * the ibt_recycle_channel interface. until that is available, we will
2410 * instead clone and tear down the existing channel and replace the
2411 * existing channel with the cloned one.
2415 daplka_ep_reinit(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
2416 cred_t
*cred
, int *rvalp
)
2418 daplka_ep_resource_t
*ep_rp
= NULL
;
2419 dapl_ep_reinit_t args
;
2420 ibt_status_t status
;
2421 uint32_t old_state
, new_state
;
2424 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_ep_reinit_t
),
2427 DERR("reinit: copyin error %d\n", retval
);
2430 ep_rp
= (daplka_ep_resource_t
*)
2431 daplka_hash_lookup(&ia_rp
->ia_ep_htbl
, args
.epri_hkey
);
2432 if (ep_rp
== NULL
) {
2433 DERR("reinit: cannot find ep resource\n");
2436 ASSERT(DAPLKA_RS_TYPE(ep_rp
) == DAPL_TYPE_EP
);
2437 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
2438 if ((old_state
!= DAPLKA_EP_STATE_CLOSED
) &&
2439 (old_state
!= DAPLKA_EP_STATE_DISCONNECTED
)) {
2440 DERR("reinit: invalid state %d\n", old_state
);
2445 status
= ibt_recycle_rc(ep_rp
->ep_chan_hdl
,
2446 IBT_CEP_RDMA_RD
|IBT_CEP_RDMA_WR
,
2447 ia_rp
->ia_port_num
, NULL
, NULL
);
2448 if (status
!= IBT_SUCCESS
) {
2449 DERR("reinit: unable to clone channel\n");
2450 *rvalp
= (int)status
;
2454 new_state
= DAPLKA_EP_STATE_CLOSED
;
2457 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
2458 DAPLKA_RS_UNREF(ep_rp
);
2463 * destroys a EP resource.
2464 * called when refcnt drops to zero.
2467 daplka_ep_destroy(daplka_resource_t
*gen_rp
)
2469 daplka_ep_resource_t
*ep_rp
= (daplka_ep_resource_t
*)gen_rp
;
2470 ibt_status_t status
;
2472 ASSERT(DAPLKA_RS_REFCNT(ep_rp
) == 0);
2473 ASSERT(ep_rp
->ep_state
== DAPLKA_EP_STATE_FREED
);
2476 * by the time we get here, we can be sure that
2477 * there is no outstanding timer.
2479 ASSERT(ep_rp
->ep_timer_hkey
== 0);
2481 D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n",
2482 ep_rp
, DAPLKA_RS_RNUM(ep_rp
));
2486 if (ep_rp
->ep_chan_hdl
!= NULL
) {
2487 mutex_enter(&daplka_dev
->daplka_mutex
);
2488 ibt_set_chan_private(ep_rp
->ep_chan_hdl
, NULL
);
2489 mutex_exit(&daplka_dev
->daplka_mutex
);
2490 status
= daplka_ibt_free_channel(ep_rp
, ep_rp
->ep_chan_hdl
);
2491 if (status
!= IBT_SUCCESS
) {
2492 DERR("ep_free: ibt_free_channel returned %d\n",
2495 ep_rp
->ep_chan_hdl
= NULL
;
2496 D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp
));
2499 * release all references
2501 if (ep_rp
->ep_snd_evd
!= NULL
) {
2502 DAPLKA_RS_UNREF(ep_rp
->ep_snd_evd
);
2503 ep_rp
->ep_snd_evd
= NULL
;
2505 if (ep_rp
->ep_rcv_evd
!= NULL
) {
2506 DAPLKA_RS_UNREF(ep_rp
->ep_rcv_evd
);
2507 ep_rp
->ep_rcv_evd
= NULL
;
2509 if (ep_rp
->ep_conn_evd
!= NULL
) {
2510 DAPLKA_RS_UNREF(ep_rp
->ep_conn_evd
);
2511 ep_rp
->ep_conn_evd
= NULL
;
2513 if (ep_rp
->ep_srq_res
!= NULL
) {
2514 DAPLKA_RS_UNREF(ep_rp
->ep_srq_res
);
2515 ep_rp
->ep_srq_res
= NULL
;
2517 if (ep_rp
->ep_pd_res
!= NULL
) {
2518 DAPLKA_RS_UNREF(ep_rp
->ep_pd_res
);
2519 ep_rp
->ep_pd_res
= NULL
;
2521 cv_destroy(&ep_rp
->ep_cv
);
2522 mutex_destroy(&ep_rp
->ep_lock
);
2524 DAPLKA_RS_FINI(ep_rp
);
2525 kmem_free(ep_rp
, sizeof (daplka_ep_resource_t
));
2526 D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp
);
2531 * this function is called by daplka_hash_destroy for
2532 * freeing EP resource objects
2535 daplka_hash_ep_free(void *obj
)
2537 daplka_ep_resource_t
*ep_rp
= (daplka_ep_resource_t
*)obj
;
2538 ibt_status_t status
;
2539 uint32_t old_state
, new_state
;
2542 old_state
= daplka_ep_get_state(ep_rp
);
2543 retval
= daplka_cancel_timer(ep_rp
);
2544 new_state
= DAPLKA_EP_STATE_FREED
;
2545 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
2548 D2("hash_ep_free: ep_rp 0x%p "
2549 "timer is still being processed\n", ep_rp
);
2550 mutex_enter(&ep_rp
->ep_lock
);
2551 if (ep_rp
->ep_timer_hkey
!= 0) {
2552 D2("hash_ep_free: ep_rp 0x%p "
2553 "waiting for timer_hkey to be 0\n", ep_rp
);
2554 cv_wait(&ep_rp
->ep_cv
, &ep_rp
->ep_lock
);
2556 mutex_exit(&ep_rp
->ep_lock
);
2559 /* call ibt_close_rc_channel regardless of what state we are in */
2560 status
= ibt_close_rc_channel(ep_rp
->ep_chan_hdl
, IBT_BLOCKING
,
2561 NULL
, 0, NULL
, NULL
, NULL
);
2562 if (status
!= IBT_SUCCESS
) {
2563 if (old_state
== DAPLKA_EP_STATE_CONNECTED
||
2564 old_state
== DAPLKA_EP_STATE_CONNECTING
||
2565 old_state
== DAPLKA_EP_STATE_ACCEPTING
) {
2566 DERR("hash_ep_free: ep_rp 0x%p state %d "
2567 "unexpected error %d from close_rc_channel\n",
2568 ep_rp
, old_state
, status
);
2570 D2("hash_ep_free: close_rc_channel, status %d\n", status
);
2573 DAPLKA_RS_UNREF(ep_rp
);
2577 * creates a EVD resource.
2578 * a EVD is used by the client to wait for events from one
2583 daplka_evd_create(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
2584 cred_t
*cred
, int *rvalp
)
2586 daplka_evd_resource_t
*evd_rp
= NULL
;
2587 daplka_async_evd_hkey_t
*async_evd
;
2588 ibt_hca_attr_t
*hca_attrp
;
2589 ibt_cq_attr_t cq_attr
;
2590 dapl_evd_create_t args
;
2591 uint64_t evd_hkey
= 0;
2592 boolean_t inserted
= B_FALSE
;
2594 ibt_status_t status
;
2596 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_evd_create_t
),
2599 DERR("evd_create: copyin error %d", retval
);
2602 if ((args
.evd_flags
&
2603 ~(DAT_EVD_DEFAULT_FLAG
| DAT_EVD_SOFTWARE_FLAG
)) != 0) {
2604 DERR("evd_create: invalid flags 0x%x\n", args
.evd_flags
);
2608 evd_rp
= kmem_zalloc(sizeof (daplka_evd_resource_t
), daplka_km_flags
);
2609 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp
))
2610 DAPLKA_RS_INIT(evd_rp
, DAPL_TYPE_EVD
,
2611 DAPLKA_RS_RNUM(ia_rp
), daplka_evd_destroy
);
2613 mutex_init(&evd_rp
->evd_lock
, NULL
, MUTEX_DRIVER
, NULL
);
2614 cv_init(&evd_rp
->evd_cv
, NULL
, CV_DRIVER
, NULL
);
2615 evd_rp
->evd_hca
= ia_rp
->ia_hca
;
2616 evd_rp
->evd_flags
= args
.evd_flags
;
2617 evd_rp
->evd_hca_hdl
= ia_rp
->ia_hca_hdl
;
2618 evd_rp
->evd_cookie
= args
.evd_cookie
;
2619 evd_rp
->evd_cno_res
= NULL
;
2620 evd_rp
->evd_cr_events
.eel_event_type
= DAPLKA_EVD_CM_EVENTS
;
2621 evd_rp
->evd_conn_events
.eel_event_type
= DAPLKA_EVD_CM_EVENTS
;
2622 evd_rp
->evd_async_events
.eel_event_type
= DAPLKA_EVD_ASYNC_EVENTS
;
2625 * if the client specified a non-zero cno_hkey, we
2626 * lookup the cno and save the reference for later use.
2628 if (args
.evd_cno_hkey
> 0) {
2629 daplka_cno_resource_t
*cno_rp
;
2631 cno_rp
= (daplka_cno_resource_t
*)
2632 daplka_hash_lookup(&ia_rp
->ia_cno_htbl
,
2634 if (cno_rp
== NULL
) {
2635 DERR("evd_create: cannot find cno resource\n");
2638 ASSERT(DAPLKA_RS_TYPE(cno_rp
) == DAPL_TYPE_CNO
);
2639 evd_rp
->evd_cno_res
= cno_rp
;
2641 hca_attrp
= &ia_rp
->ia_hca
->hca_attr
;
2642 if ((evd_rp
->evd_flags
&
2643 (DAT_EVD_DTO_FLAG
| DAT_EVD_RMR_BIND_FLAG
)) != 0) {
2644 if (args
.evd_cq_size
> hca_attrp
->hca_max_cq_sz
) {
2645 DERR("evd_create: invalid cq size %d",
2650 cq_attr
.cq_size
= args
.evd_cq_size
;
2651 cq_attr
.cq_sched
= NULL
;
2652 cq_attr
.cq_flags
= IBT_CQ_USER_MAP
;
2654 status
= daplka_ibt_alloc_cq(evd_rp
, evd_rp
->evd_hca_hdl
,
2655 &cq_attr
, &evd_rp
->evd_cq_hdl
, &evd_rp
->evd_cq_real_size
);
2657 if (status
!= IBT_SUCCESS
) {
2658 DERR("evd_create: ibt_alloc_cq returned %d", status
);
2659 *rvalp
= (int)status
;
2665 * store evd ptr with cq_hdl
2666 * mutex is only needed for race of "destroy" and "async"
2668 mutex_enter(&daplka_dev
->daplka_mutex
);
2669 ibt_set_cq_private(evd_rp
->evd_cq_hdl
, (void *)evd_rp
);
2670 mutex_exit(&daplka_dev
->daplka_mutex
);
2672 /* Get HCA-specific data_out info */
2673 status
= ibt_ci_data_out(evd_rp
->evd_hca_hdl
,
2674 IBT_CI_NO_FLAGS
, IBT_HDL_CQ
, (void *)evd_rp
->evd_cq_hdl
,
2675 &args
.evd_cq_data_out
, sizeof (args
.evd_cq_data_out
));
2677 if (status
!= IBT_SUCCESS
) {
2678 DERR("evd_create: ibt_ci_data_out error(%d)", status
);
2679 *rvalp
= (int)status
;
2684 args
.evd_cq_real_size
= evd_rp
->evd_cq_real_size
;
2686 ibt_set_cq_handler(evd_rp
->evd_cq_hdl
, daplka_cq_handler
,
2690 retval
= daplka_hash_insert(&ia_rp
->ia_evd_htbl
,
2691 &evd_hkey
, (void *)evd_rp
);
2693 DERR("evd_ceate: cannot insert evd %d\n", retval
);
2697 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*evd_rp
))
2700 * If this evd handles async events need to add to the IA resource
2703 if (evd_rp
->evd_flags
& DAT_EVD_ASYNC_FLAG
) {
2704 async_evd
= kmem_zalloc(sizeof (daplka_async_evd_hkey_t
),
2706 /* add the evd to the head of the list */
2707 mutex_enter(&ia_rp
->ia_lock
);
2708 async_evd
->aeh_evd_hkey
= evd_hkey
;
2709 async_evd
->aeh_next
= ia_rp
->ia_async_evd_hkeys
;
2710 ia_rp
->ia_async_evd_hkeys
= async_evd
;
2711 mutex_exit(&ia_rp
->ia_lock
);
2714 args
.evd_hkey
= evd_hkey
;
2715 retval
= copyout(&args
, (void *)arg
, sizeof (dapl_evd_create_t
));
2717 DERR("evd_create: copyout error %d\n", retval
);
2725 daplka_evd_resource_t
*free_rp
= NULL
;
2727 (void) daplka_hash_remove(&ia_rp
->ia_evd_htbl
, evd_hkey
,
2729 if (free_rp
!= evd_rp
) {
2730 DERR("evd_create: cannot remove evd\n");
2732 * we can only get here if another thread
2733 * has completed the cleanup in evd_free
2738 DAPLKA_RS_UNREF(evd_rp
);
2743 * resizes CQ and returns new mapping info to library.
2747 daplka_cq_resize(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
2748 cred_t
*cred
, int *rvalp
)
2750 daplka_evd_resource_t
*evd_rp
= NULL
;
2751 ibt_hca_attr_t
*hca_attrp
;
2752 dapl_cq_resize_t args
;
2753 ibt_status_t status
;
2756 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_cq_resize_t
),
2759 DERR("cq_resize: copyin error %d\n", retval
);
2763 /* get evd resource */
2764 evd_rp
= (daplka_evd_resource_t
*)
2765 daplka_hash_lookup(&ia_rp
->ia_evd_htbl
, args
.cqr_evd_hkey
);
2766 if (evd_rp
== NULL
) {
2767 DERR("cq_resize: cannot find evd resource\n");
2770 ASSERT(DAPLKA_RS_TYPE(evd_rp
) == DAPL_TYPE_EVD
);
2772 hca_attrp
= &ia_rp
->ia_hca
->hca_attr
;
2773 if (args
.cqr_cq_new_size
> hca_attrp
->hca_max_cq_sz
) {
2774 DERR("cq_resize: invalid cq size %d", args
.cqr_cq_new_size
);
2779 * If ibt_resize_cq fails that it is primarily due to resource
2780 * shortage. Per IB spec resize will never loose events and
2781 * a resize error leaves the CQ intact. Therefore even if the
2782 * resize request fails we proceed and get the mapping data
2783 * from the CQ so that the library can mmap it.
2785 status
= ibt_resize_cq(evd_rp
->evd_cq_hdl
, args
.cqr_cq_new_size
,
2786 &args
.cqr_cq_real_size
);
2787 if (status
!= IBT_SUCCESS
) {
2788 /* we return the size of the old CQ if resize fails */
2789 args
.cqr_cq_real_size
= evd_rp
->evd_cq_real_size
;
2790 ASSERT(status
!= IBT_CQ_HDL_INVALID
);
2791 DERR("cq_resize: ibt_resize_cq failed:%d\n", status
);
2793 mutex_enter(&evd_rp
->evd_lock
);
2794 evd_rp
->evd_cq_real_size
= args
.cqr_cq_real_size
;
2795 mutex_exit(&evd_rp
->evd_lock
);
2798 D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n",
2799 DAPLKA_RS_RNUM(evd_rp
),
2800 args
.cqr_cq_new_size
, args
.cqr_cq_real_size
);
2802 /* Get HCA-specific data_out info */
2803 status
= ibt_ci_data_out(evd_rp
->evd_hca_hdl
,
2804 IBT_CI_NO_FLAGS
, IBT_HDL_CQ
, (void *)evd_rp
->evd_cq_hdl
,
2805 &args
.cqr_cq_data_out
, sizeof (args
.cqr_cq_data_out
));
2806 if (status
!= IBT_SUCCESS
) {
2807 DERR("cq_resize: ibt_ci_data_out error(%d)\n", status
);
2808 /* return ibt_ci_data_out status */
2809 *rvalp
= (int)status
;
2814 retval
= ddi_copyout(&args
, (void *)arg
, sizeof (dapl_cq_resize_t
),
2817 DERR("cq_resize: copyout error %d\n", retval
);
2823 if (evd_rp
!= NULL
) {
2824 DAPLKA_RS_UNREF(evd_rp
);
2830 * Routine to copyin the event poll message so that 32 bit libraries
2831 * can be safely supported
2834 daplka_event_poll_copyin(intptr_t inarg
, dapl_event_poll_t
*outarg
, int mode
)
2838 #ifdef _MULTI_DATAMODEL
2839 if ((mode
& DATAMODEL_MASK
) == DATAMODEL_ILP32
) {
2840 dapl_event_poll32_t args32
;
2842 retval
= ddi_copyin((void *)inarg
, &args32
,
2843 sizeof (dapl_event_poll32_t
), mode
);
2845 DERR("event_poll_copyin: 32bit error %d\n", retval
);
2849 outarg
->evp_evd_hkey
= args32
.evp_evd_hkey
;
2850 outarg
->evp_threshold
= args32
.evp_threshold
;
2851 outarg
->evp_timeout
= args32
.evp_timeout
;
2852 outarg
->evp_ep
= (dapl_ib_event_t
*)(uintptr_t)args32
.evp_ep
;
2853 outarg
->evp_num_ev
= args32
.evp_num_ev
;
2854 outarg
->evp_num_polled
= args32
.evp_num_polled
;
2858 retval
= ddi_copyin((void *)inarg
, outarg
, sizeof (dapl_event_poll_t
),
2861 DERR("event_poll: copyin error %d\n", retval
);
2869 * Routine to copyout the event poll message so that 32 bit libraries
2870 * can be safely supported
2873 daplka_event_poll_copyout(dapl_event_poll_t
*inarg
, intptr_t outarg
, int mode
)
2877 #ifdef _MULTI_DATAMODEL
2878 if ((mode
& DATAMODEL_MASK
) == DATAMODEL_ILP32
) {
2879 dapl_event_poll32_t args32
;
2881 args32
.evp_evd_hkey
= inarg
->evp_evd_hkey
;
2882 args32
.evp_threshold
= inarg
->evp_threshold
;
2883 args32
.evp_timeout
= inarg
->evp_timeout
;
2884 args32
.evp_ep
= (caddr32_t
)(uintptr_t)inarg
->evp_ep
;
2885 args32
.evp_num_ev
= inarg
->evp_num_ev
;
2886 args32
.evp_num_polled
= inarg
->evp_num_polled
;
2888 retval
= ddi_copyout((void *)&args32
, (void *)outarg
,
2889 sizeof (dapl_event_poll32_t
), mode
);
2891 DERR("event_poll_copyout: 32bit error %d\n", retval
);
2897 retval
= ddi_copyout((void *)inarg
, (void *)outarg
,
2898 sizeof (dapl_event_poll_t
), mode
);
2900 DERR("event_poll_copyout: error %d\n", retval
);
2908 * fucntion to handle CM REQ RCV private data from Solaris or third parties
2912 daplka_crevent_privdata_post(daplka_ia_resource_t
*ia_rp
,
2913 dapl_ib_event_t
*evd_rp
, daplka_evd_event_t
*cr_ev
)
2917 ibt_ar_t ar_query_s
;
2918 ibt_ar_t ar_result_s
;
2919 DAPL_HELLO_MSG
*hip
;
2920 uint32_t ipaddr_ord
;
2921 ibt_priv_data_len_t clen
;
2922 ibt_priv_data_len_t olen
;
2923 ibt_status_t status
;
2927 * get private data and len
2929 dp
= (DAPL_PRIVATE
*)cr_ev
->ee_cmev
.ec_cm_ev_priv_data
;
2930 clen
= cr_ev
->ee_cmev
.ec_cm_ev_priv_data_len
;
2931 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2932 /* skip the DAPL_PRIVATE chekcsum check */
2934 /* for remote connects */
2935 /* look up hello message in the CM private data area */
2936 if (clen
>= sizeof (DAPL_PRIVATE
) &&
2937 (dp
->hello_msg
.hi_vers
== DAPL_HELLO_MSG_VERS
)) {
2938 cksum
= ntohs(dp
->hello_msg
.hi_checksum
);
2939 dp
->hello_msg
.hi_checksum
= 0;
2940 if (daplka_hellomsg_cksum(dp
) == cksum
) {
2941 D2("daplka_crevent_privdata_post: Solaris msg\n");
2942 evd_rp
->ibe_ce
.ibce_priv_data_size
= clen
;
2943 dp
->hello_msg
.hi_checksum
= DAPL_CHECKSUM
;
2944 dp
->hello_msg
.hi_port
= ntohs(dp
->hello_msg
.hi_port
);
2945 bcopy(dp
, evd_rp
->ibe_ce
.ibce_priv_data_ptr
, clen
);
2946 kmem_free(dp
, clen
);
2950 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2952 D2("daplka_crevent_privdata_post: 3rd party msg\n");
2953 /* transpose CM private data into hello message */
2956 if (clen
> DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE
) {
2957 clen
= DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE
;
2959 bcopy(dp
, evd_rp
->ibe_ce
.ibce_priv_data_ptr
, clen
);
2960 kmem_free(dp
, olen
);
2962 bzero(evd_rp
->ibe_ce
.ibce_priv_data_ptr
,
2963 DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE
);
2965 evd_rp
->ibe_ce
.ibce_priv_data_size
= sizeof (DAPL_PRIVATE
);
2966 dp
= (DAPL_PRIVATE
*)evd_rp
->ibe_ce
.ibce_priv_data_ptr
;
2968 * fill in hello message
2970 hip
= &dp
->hello_msg
;
2971 hip
->hi_checksum
= DAPL_CHECKSUM
;
2972 hip
->hi_clen
= clen
;
2974 hip
->hi_vers
= DAPL_HELLO_MSG_VERS
;
2977 /* assign sgid and dgid */
2978 lgid
= &ia_rp
->ia_hca_sgid
;
2979 ar_query_s
.ar_gid
.gid_prefix
=
2980 cr_ev
->ee_cmev
.ec_cm_req_prim_addr
.gid_prefix
;
2981 ar_query_s
.ar_gid
.gid_guid
=
2982 cr_ev
->ee_cmev
.ec_cm_req_prim_addr
.gid_guid
;
2983 ar_query_s
.ar_pkey
= ia_rp
->ia_port_pkey
;
2984 bzero(ar_query_s
.ar_data
, DAPL_ATS_NBYTES
);
2986 /* reverse ip address lookup through ATS */
2987 status
= ibt_query_ar(lgid
, &ar_query_s
, &ar_result_s
);
2988 if (status
== IBT_SUCCESS
) {
2989 bcopy(ar_result_s
.ar_data
, hip
->hi_saaddr
, DAPL_ATS_NBYTES
);
2990 /* determine the address families */
2991 ipaddr_ord
= hip
->hi_v4pad
[0] | hip
->hi_v4pad
[1] |
2993 if (ipaddr_ord
== 0) {
2994 hip
->hi_ipv
= AF_INET
;
2996 hip
->hi_ipv
= AF_INET6
;
2999 #define UL(b) ar_result_s.ar_data[(b)]
3000 D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n",
3001 hip
->hi_ipv
, UL(8), UL(9), UL(10), UL(11));
3002 D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n",
3003 UL(12), UL(13), UL(14), UL(15));
3005 /* non-conformed third parties */
3006 hip
->hi_ipv
= AF_UNSPEC
;
3007 bzero(hip
->hi_saaddr
, DAPL_ATS_NBYTES
);
3012 * this function is called by evd_wait and evd_dequeue to wait for
3013 * connection events and CQ notifications. typically this function
3014 * is called when the userland CQ is empty and the client has
3015 * specified a non-zero timeout to evd_wait. if the client is
3016 * interested in CQ events, the CQ must be armed in userland prior
3017 * to calling this function.
3021 daplka_event_poll(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
3022 cred_t
*cred
, int *rvalp
)
3024 daplka_evd_resource_t
*evd_rp
= NULL
;
3025 dapl_event_poll_t args
;
3026 daplka_evd_event_t
*head
;
3027 dapl_ib_event_t evp_arr
[NUM_EVENTS_PER_POLL
];
3028 dapl_ib_event_t
*evp
;
3029 dapl_ib_event_t
*evp_start
;
3033 uint32_t max_events
;
3034 uint32_t num_events
= 0;
3036 ibt_priv_data_len_t n
;
3040 retval
= daplka_event_poll_copyin(arg
, &args
, mode
);
3045 if ((args
.evp_num_ev
> 0) && (args
.evp_ep
== NULL
)) {
3046 DERR("event_poll: evp_ep cannot be NULL if num_wc=%d",
3051 * Note: dequeue requests have a threshold = 0, timeout = 0
3053 threshold
= args
.evp_threshold
;
3055 max_events
= args
.evp_num_ev
;
3056 /* ensure library is passing sensible values */
3057 if (max_events
< threshold
) {
3058 DERR("event_poll: max_events(%d) < threshold(%d)\n",
3059 max_events
, threshold
);
3062 /* Do a sanity check to avoid excessive memory allocation */
3063 if (max_events
> DAPL_EVD_MAX_EVENTS
) {
3064 DERR("event_poll: max_events(%d) > %d",
3065 max_events
, DAPL_EVD_MAX_EVENTS
);
3068 D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n",
3069 threshold
, (longlong_t
)args
.evp_timeout
, max_events
);
3071 /* get evd resource */
3072 evd_rp
= (daplka_evd_resource_t
*)
3073 daplka_hash_lookup(&ia_rp
->ia_evd_htbl
, args
.evp_evd_hkey
);
3074 if (evd_rp
== NULL
) {
3075 DERR("event_poll: cannot find evd resource\n");
3078 ASSERT(DAPLKA_RS_TYPE(evd_rp
) == DAPL_TYPE_EVD
);
3081 * Use event array on the stack if possible
3083 if (max_events
<= NUM_EVENTS_PER_POLL
) {
3084 evp_start
= evp
= &evp_arr
[0];
3086 evp_size
= max_events
* sizeof (dapl_ib_event_t
);
3087 evp_start
= evp
= kmem_zalloc(evp_size
, daplka_km_flags
);
3089 DERR("event_poll: kmem_zalloc failed, evp_size %d",
3097 * The Event poll algorithm is as follows -
3098 * The library passes a buffer big enough to hold "max_events"
3099 * events. max_events is >= threshold. If at any stage we get
3100 * max_events no. of events we bail. The events are polled in
3101 * the following order -
3102 * 1) Check for CR events in the evd_cr_events list
3103 * 2) Check for Connection events in the evd_connection_events list
3105 * If after the above 2 steps we don't have enough(>= threshold) events
3106 * we block for CQ notification and sleep. Upon being woken up we start
3111 * Note: this could be 0 or INFINITE or anyother value in microsec
3113 if (args
.evp_timeout
> 0) {
3114 if (args
.evp_timeout
>= LONG_MAX
) {
3117 clock_t curr_time
= ddi_get_lbolt();
3119 timeout
= curr_time
+
3120 drv_usectohz((clock_t)args
.evp_timeout
);
3122 * use the max value if we wrapped around
3124 if (timeout
<= curr_time
) {
3132 mutex_enter(&evd_rp
->evd_lock
);
3135 * If this evd is waiting for CM events check that now.
3137 if ((evd_rp
->evd_flags
& DAT_EVD_CR_FLAG
) &&
3138 (evd_rp
->evd_cr_events
.eel_num_elements
> 0)) {
3139 /* dequeue events from evd_cr_events list */
3140 while (head
= daplka_evd_event_dequeue(
3141 &evd_rp
->evd_cr_events
)) {
3143 * populate the evp array
3145 evp
[num_events
].ibe_ev_family
= DAPL_CR_EVENTS
;
3146 evp
[num_events
].ibe_ce
.ibce_event
=
3147 head
->ee_cmev
.ec_cm_ev_type
;
3148 evp
[num_events
].ibe_ce
.ibce_cookie
=
3149 (uint64_t)head
->ee_cmev
.ec_cm_cookie
;
3150 evp
[num_events
].ibe_ce
.ibce_psep_cookie
=
3151 head
->ee_cmev
.ec_cm_psep_cookie
;
3152 daplka_crevent_privdata_post(ia_rp
,
3153 &evp
[num_events
], head
);
3154 kmem_free(head
, sizeof (daplka_evd_event_t
));
3156 if (++num_events
== max_events
) {
3157 mutex_exit(&evd_rp
->evd_lock
);
3158 goto maxevent_reached
;
3163 if ((evd_rp
->evd_flags
& DAT_EVD_CONNECTION_FLAG
) &&
3164 (evd_rp
->evd_conn_events
.eel_num_elements
> 0)) {
3165 /* dequeue events from evd_connection_events list */
3166 while ((head
= daplka_evd_event_dequeue
3167 (&evd_rp
->evd_conn_events
))) {
3169 * populate the evp array -
3172 if (head
->ee_cmev
.ec_cm_is_passive
) {
3173 evp
[num_events
].ibe_ev_family
=
3174 DAPL_PASSIVE_CONNECTION_EVENTS
;
3176 evp
[num_events
].ibe_ev_family
=
3177 DAPL_ACTIVE_CONNECTION_EVENTS
;
3179 evp
[num_events
].ibe_ce
.ibce_event
=
3180 head
->ee_cmev
.ec_cm_ev_type
;
3181 evp
[num_events
].ibe_ce
.ibce_cookie
=
3182 (uint64_t)head
->ee_cmev
.ec_cm_cookie
;
3183 evp
[num_events
].ibe_ce
.ibce_psep_cookie
=
3184 head
->ee_cmev
.ec_cm_psep_cookie
;
3186 if (head
->ee_cmev
.ec_cm_ev_priv_data_len
> 0) {
3187 pd
= head
->ee_cmev
.ec_cm_ev_priv_data
;
3189 ee_cmev
.ec_cm_ev_priv_data_len
;
3190 bcopy(pd
, (void *)evp
[num_events
].
3191 ibe_ce
.ibce_priv_data_ptr
, n
);
3192 evp
[num_events
].ibe_ce
.
3193 ibce_priv_data_size
= n
;
3197 kmem_free(head
, sizeof (daplka_evd_event_t
));
3199 if (++num_events
== max_events
) {
3200 mutex_exit(&evd_rp
->evd_lock
);
3201 goto maxevent_reached
;
3206 if ((evd_rp
->evd_flags
& DAT_EVD_ASYNC_FLAG
) &&
3207 (evd_rp
->evd_async_events
.eel_num_elements
> 0)) {
3208 /* dequeue events from evd_async_events list */
3209 while (head
= daplka_evd_event_dequeue(
3210 &evd_rp
->evd_async_events
)) {
3212 * populate the evp array
3214 evp
[num_events
].ibe_ev_family
=
3216 evp
[num_events
].ibe_async
.ibae_type
=
3217 head
->ee_aev
.ibae_type
;
3218 evp
[num_events
].ibe_async
.ibae_hca_guid
=
3219 head
->ee_aev
.ibae_hca_guid
;
3220 evp
[num_events
].ibe_async
.ibae_cookie
=
3221 head
->ee_aev
.ibae_cookie
;
3222 evp
[num_events
].ibe_async
.ibae_port
=
3223 head
->ee_aev
.ibae_port
;
3225 kmem_free(head
, sizeof (daplka_evd_event_t
));
3227 if (++num_events
== max_events
) {
3234 * We have sufficient events for this call so no need to wait
3236 if ((threshold
> 0) && (num_events
>= threshold
)) {
3237 mutex_exit(&evd_rp
->evd_lock
);
3241 evd_rp
->evd_waiters
++;
3243 * There are no new events and a timeout was specified.
3244 * Note: for CQ events threshold is 0 but timeout is
3245 * not necessarily 0.
3247 while ((evd_rp
->evd_newevents
== DAPLKA_EVD_NO_EVENTS
) &&
3249 retval
= DAPLKA_EVD_WAIT(&evd_rp
->evd_cv
,
3250 &evd_rp
->evd_lock
, timeout
);
3254 } else if (retval
== -1) {
3262 evd_rp
->evd_waiters
--;
3263 if (evd_rp
->evd_newevents
!= DAPLKA_EVD_NO_EVENTS
) {
3265 * If we got woken up by the CQ handler due to events
3266 * in the CQ. Need to go to userland to check for
3267 * CQ events. Or if we were woken up due to S/W events
3270 /* check for userland events only */
3271 if (!(evd_rp
->evd_newevents
&
3272 ~DAPLKA_EVD_ULAND_EVENTS
)) {
3273 evd_rp
->evd_newevents
= DAPLKA_EVD_NO_EVENTS
;
3274 mutex_exit(&evd_rp
->evd_lock
);
3278 * Clear newevents since we are going to loopback
3279 * back and check for both CM and CQ events
3281 evd_rp
->evd_newevents
= DAPLKA_EVD_NO_EVENTS
;
3282 } else { /* error */
3283 mutex_exit(&evd_rp
->evd_lock
);
3289 args
.evp_num_polled
= num_events
;
3292 * At this point retval might have a value that we want to return
3293 * back to the user. So the copyouts shouldn't tamper retval.
3295 if (args
.evp_num_polled
> 0) { /* copyout the events */
3296 rc
= ddi_copyout(evp
, args
.evp_ep
, args
.evp_num_polled
*
3297 sizeof (dapl_ib_event_t
), mode
);
3298 if (rc
!= 0) { /* XXX: we are losing events here */
3299 DERR("event_poll: event array copyout error %d", rc
);
3303 rc
= daplka_event_poll_copyout(&args
, arg
, mode
);
3304 if (rc
!= 0) { /* XXX: we are losing events here */
3305 DERR("event_poll: copyout error %d\n", rc
);
3312 if ((max_events
> NUM_EVENTS_PER_POLL
) && (evp_start
!= NULL
)) {
3313 kmem_free(evp_start
, evp_size
);
3316 if (evd_rp
!= NULL
) {
3317 DAPLKA_RS_UNREF(evd_rp
);
3324 daplka_event_wakeup(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
3325 cred_t
*cred
, int *rvalp
)
3327 dapl_event_wakeup_t args
;
3328 daplka_evd_resource_t
*evd_rp
;
3331 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_event_wakeup_t
),
3334 DERR("event_wakeup: copyin error %d\n", retval
);
3338 /* get evd resource */
3339 evd_rp
= (daplka_evd_resource_t
*)
3340 daplka_hash_lookup(&ia_rp
->ia_evd_htbl
, args
.evw_hkey
);
3341 if (evd_rp
== NULL
) {
3342 DERR("event_wakeup: cannot find evd resource\n");
3345 ASSERT(DAPLKA_RS_TYPE(evd_rp
) == DAPL_TYPE_EVD
);
3347 daplka_evd_wakeup(evd_rp
, NULL
, NULL
);
3349 DAPLKA_RS_UNREF(evd_rp
);
3356 daplka_evd_modify_cno(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
3357 cred_t
*cred
, int *rvalp
)
3359 dapl_evd_modify_cno_t args
;
3360 daplka_evd_resource_t
*evd_rp
;
3361 daplka_cno_resource_t
*cno_rp
;
3362 daplka_cno_resource_t
*old_cno_rp
;
3365 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_evd_modify_cno_t
),
3368 DERR("evd_modify_cno: copyin error %d\n", retval
);
3372 /* get evd resource */
3373 evd_rp
= (daplka_evd_resource_t
*)
3374 daplka_hash_lookup(&ia_rp
->ia_evd_htbl
, args
.evmc_hkey
);
3375 if (evd_rp
== NULL
) {
3376 DERR("evd_modify_cno: cannot find evd resource\n");
3380 ASSERT(DAPLKA_RS_TYPE(evd_rp
) == DAPL_TYPE_EVD
);
3382 if (args
.evmc_cno_hkey
> 0) {
3383 /* get cno resource corresponding to the new CNO */
3384 cno_rp
= (daplka_cno_resource_t
*)
3385 daplka_hash_lookup(&ia_rp
->ia_cno_htbl
,
3386 args
.evmc_cno_hkey
);
3387 if (cno_rp
== NULL
) {
3388 DERR("evd_modify_cno: cannot find CNO resource\n");
3392 ASSERT(DAPLKA_RS_TYPE(cno_rp
) == DAPL_TYPE_CNO
);
3397 mutex_enter(&evd_rp
->evd_lock
);
3398 old_cno_rp
= evd_rp
->evd_cno_res
;
3399 evd_rp
->evd_cno_res
= cno_rp
;
3400 mutex_exit(&evd_rp
->evd_lock
);
3403 * drop the refcnt on the old CNO, the refcnt on the new CNO is
3404 * retained since the evd holds a reference to it.
3407 DAPLKA_RS_UNREF(old_cno_rp
);
3412 DAPLKA_RS_UNREF(evd_rp
);
3419 * Frees the EVD and associated resources.
3420 * If there are other threads still using this EVD, the destruction
3421 * will defer until the EVD's refcnt drops to zero.
3425 daplka_evd_free(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
3426 cred_t
*cred
, int *rvalp
)
3428 daplka_evd_resource_t
*evd_rp
= NULL
;
3429 daplka_async_evd_hkey_t
*curr
;
3430 daplka_async_evd_hkey_t
*prev
;
3431 dapl_evd_free_t args
;
3434 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_evd_free_t
), mode
);
3436 DERR("evd_free: copyin error %d\n", retval
);
3439 retval
= daplka_hash_remove(&ia_rp
->ia_evd_htbl
, args
.evf_hkey
,
3441 if (retval
!= 0 || evd_rp
== NULL
) {
3442 DERR("evd_free: cannot find evd resource\n");
3445 ASSERT(DAPLKA_RS_TYPE(evd_rp
) == DAPL_TYPE_EVD
);
3447 /* If this is an async evd remove it from the IA's async evd list */
3448 if (evd_rp
->evd_flags
& DAT_EVD_ASYNC_FLAG
) {
3449 mutex_enter(&ia_rp
->ia_lock
);
3450 curr
= prev
= ia_rp
->ia_async_evd_hkeys
;
3451 while (curr
!= NULL
) {
3452 if (curr
->aeh_evd_hkey
== args
.evf_hkey
) {
3453 /* unlink curr from the list */
3456 * if first element in the list update
3459 ia_rp
->ia_async_evd_hkeys
=
3462 prev
->aeh_next
= curr
->aeh_next
;
3467 curr
= curr
->aeh_next
;
3469 mutex_exit(&ia_rp
->ia_lock
);
3470 /* free the curr entry */
3471 kmem_free(curr
, sizeof (daplka_async_evd_hkey_t
));
3474 /* UNREF calls the actual free function when refcnt is zero */
3475 DAPLKA_RS_UNREF(evd_rp
);
3480 * destroys EVD resource.
3481 * called when refcnt drops to zero.
3484 daplka_evd_destroy(daplka_resource_t
*gen_rp
)
3486 daplka_evd_resource_t
*evd_rp
= (daplka_evd_resource_t
*)gen_rp
;
3487 ibt_status_t status
;
3488 daplka_evd_event_t
*evt
;
3489 ibt_priv_data_len_t len
;
3491 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp
))
3492 D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n",
3493 evd_rp
, DAPLKA_RS_RNUM(evd_rp
));
3497 if (evd_rp
->evd_cq_hdl
) {
3498 ibt_set_cq_handler(evd_rp
->evd_cq_hdl
, NULL
, NULL
);
3499 mutex_enter(&daplka_dev
->daplka_mutex
);
3500 ibt_set_cq_private(evd_rp
->evd_cq_hdl
, NULL
);
3501 mutex_exit(&daplka_dev
->daplka_mutex
);
3503 status
= daplka_ibt_free_cq(evd_rp
, evd_rp
->evd_cq_hdl
);
3504 if (status
!= IBT_SUCCESS
) {
3505 DERR("evd_destroy: ibt_free_cq returned %d\n", status
);
3507 evd_rp
->evd_cq_hdl
= NULL
;
3508 D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp
));
3512 * release reference on CNO
3514 if (evd_rp
->evd_cno_res
!= NULL
) {
3515 mutex_enter(&evd_rp
->evd_cno_res
->cno_lock
);
3516 if (evd_rp
->evd_cno_res
->cno_evd_cookie
==
3517 evd_rp
->evd_cookie
) {
3518 evd_rp
->evd_cno_res
->cno_evd_cookie
= 0;
3520 mutex_exit(&evd_rp
->evd_cno_res
->cno_lock
);
3521 DAPLKA_RS_UNREF(evd_rp
->evd_cno_res
);
3522 evd_rp
->evd_cno_res
= NULL
;
3526 * discard all remaining events
3528 mutex_enter(&evd_rp
->evd_lock
);
3529 while ((evt
= daplka_evd_event_dequeue(&evd_rp
->evd_cr_events
))) {
3530 D2("evd_destroy: discarding CR event: %d\n",
3531 evt
->ee_cmev
.ec_cm_ev_type
);
3532 len
= evt
->ee_cmev
.ec_cm_ev_priv_data_len
;
3534 kmem_free(evt
->ee_cmev
.ec_cm_ev_priv_data
, len
);
3535 evt
->ee_cmev
.ec_cm_ev_priv_data
= NULL
;
3536 evt
->ee_cmev
.ec_cm_ev_priv_data_len
= 0;
3538 kmem_free(evt
, sizeof (*evt
));
3540 ASSERT(evd_rp
->evd_cr_events
.eel_num_elements
== 0);
3542 while ((evt
= daplka_evd_event_dequeue(&evd_rp
->evd_conn_events
))) {
3543 D2("evd_destroy: discarding CONN event: %d\n",
3544 evt
->ee_cmev
.ec_cm_ev_type
);
3545 len
= evt
->ee_cmev
.ec_cm_ev_priv_data_len
;
3547 kmem_free(evt
->ee_cmev
.ec_cm_ev_priv_data
, len
);
3548 evt
->ee_cmev
.ec_cm_ev_priv_data
= NULL
;
3549 evt
->ee_cmev
.ec_cm_ev_priv_data_len
= 0;
3551 kmem_free(evt
, sizeof (*evt
));
3553 ASSERT(evd_rp
->evd_conn_events
.eel_num_elements
== 0);
3555 while ((evt
= daplka_evd_event_dequeue(&evd_rp
->evd_async_events
))) {
3556 DERR("evd_destroy: discarding ASYNC event: %d\n",
3557 evt
->ee_aev
.ibae_type
);
3558 kmem_free(evt
, sizeof (*evt
));
3560 ASSERT(evd_rp
->evd_async_events
.eel_num_elements
== 0);
3561 mutex_exit(&evd_rp
->evd_lock
);
3563 mutex_destroy(&evd_rp
->evd_lock
);
3564 DAPLKA_RS_FINI(evd_rp
);
3565 kmem_free(evd_rp
, sizeof (daplka_evd_resource_t
));
3566 D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp
);
3571 daplka_hash_evd_free(void *obj
)
3573 daplka_evd_resource_t
*evd_rp
= (daplka_evd_resource_t
*)obj
;
3575 ASSERT(DAPLKA_RS_TYPE(evd_rp
) == DAPL_TYPE_EVD
);
3576 DAPLKA_RS_UNREF(evd_rp
);
3580 * this handler fires when new completions arrive.
3584 daplka_cq_handler(ibt_cq_hdl_t ibt_cq
, void *arg
)
3586 D3("cq_handler: fired setting evd_newevents\n");
3587 daplka_evd_wakeup((daplka_evd_resource_t
*)arg
, NULL
, NULL
);
3591 * this routine wakes up a client from evd_wait. if evtq and evt
3592 * are non-null, the event evt will be enqueued prior to waking
3593 * up the client. if the evd is associated with a CNO and if there
3594 * are no waiters on the evd, the CNO will be notified.
3597 daplka_evd_wakeup(daplka_evd_resource_t
*evd_rp
, daplka_evd_event_list_t
*evtq
,
3598 daplka_evd_event_t
*evt
)
3600 uint32_t waiters
= 0;
3602 mutex_enter(&evd_rp
->evd_lock
);
3603 if (evtq
!= NULL
&& evt
!= NULL
) {
3604 ASSERT(evtq
== &evd_rp
->evd_cr_events
||
3605 evtq
== &evd_rp
->evd_conn_events
||
3606 evtq
== &evd_rp
->evd_async_events
);
3607 daplka_evd_event_enqueue(evtq
, evt
);
3608 ASSERT((evtq
->eel_event_type
== DAPLKA_EVD_CM_EVENTS
) ||
3609 (evtq
->eel_event_type
== DAPLKA_EVD_ASYNC_EVENTS
));
3610 evd_rp
->evd_newevents
|= evtq
->eel_event_type
;
3612 evd_rp
->evd_newevents
|= DAPLKA_EVD_ULAND_EVENTS
;
3614 waiters
= evd_rp
->evd_waiters
;
3615 cv_broadcast(&evd_rp
->evd_cv
);
3616 mutex_exit(&evd_rp
->evd_lock
);
3619 * only wakeup the CNO if there are no waiters on this evd.
3621 if (evd_rp
->evd_cno_res
!= NULL
&& waiters
== 0) {
3622 mutex_enter(&evd_rp
->evd_cno_res
->cno_lock
);
3623 evd_rp
->evd_cno_res
->cno_evd_cookie
= evd_rp
->evd_cookie
;
3624 cv_broadcast(&evd_rp
->evd_cno_res
->cno_cv
);
3625 mutex_exit(&evd_rp
->evd_cno_res
->cno_lock
);
3630 * daplka_evd_event_enqueue adds elem to the end of the event list
3631 * The caller is expected to acquire appropriate locks before
3635 daplka_evd_event_enqueue(daplka_evd_event_list_t
*evlist
,
3636 daplka_evd_event_t
*elem
)
3638 if (evlist
->eel_tail
) {
3639 evlist
->eel_tail
->ee_next
= elem
;
3640 evlist
->eel_tail
= elem
;
3643 ASSERT(evlist
->eel_head
== NULL
);
3644 evlist
->eel_head
= elem
;
3645 evlist
->eel_tail
= elem
;
3647 evlist
->eel_num_elements
++;
3651 * daplka_evd_event_dequeue removes and returns the first element of event
3652 * list. NULL is returned if the list is empty. The caller is expected to
3653 * acquire appropriate locks before calling enqueue.
3655 static daplka_evd_event_t
*
3656 daplka_evd_event_dequeue(daplka_evd_event_list_t
*evlist
)
3658 daplka_evd_event_t
*head
;
3660 head
= evlist
->eel_head
;
3665 evlist
->eel_head
= head
->ee_next
;
3666 evlist
->eel_num_elements
--;
3667 /* if it was the last element update the tail pointer too */
3668 if (evlist
->eel_head
== NULL
) {
3669 ASSERT(evlist
->eel_num_elements
== 0);
3670 evlist
->eel_tail
= NULL
;
3676 * A CNO allows the client to wait for notifications from multiple EVDs.
3677 * To use a CNO, the client needs to follow the procedure below:
3678 * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO.
3679 * 2. create one or more EVDs using the returned cno_hkey.
3680 * 3. call cno_wait. when one of the associated EVDs get notified, the
3681 * CNO will also get notified. cno_wait will then return with a
3682 * evd_cookie identifying the EVD that triggered the event.
3684 * A note about cno_wait:
3685 * -unlike a EVD, a CNO does not maintain a queue of notifications. For
3686 * example, suppose multiple EVDs triggered a CNO before the client calls
3687 * cno_wait; when the client calls cno_wait, it will return with the
3688 * evd_cookie that identifies the *last* EVD that triggered the CNO. It
3689 * is the responsibility of the client, upon returning from cno_wait, to
3690 * check on all EVDs that can potentially trigger the CNO. the returned
3691 * evd_cookie is only meant to be a hint. there is no guarantee that the
3692 * EVD identified by the evd_cookie still contains an event or still
3693 * exists by the time cno_wait returns.
3698 * the returned cno_hkey may subsequently be used in evd_create.
3702 daplka_cno_alloc(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
3703 cred_t
*cred
, int *rvalp
)
3705 dapl_cno_alloc_t args
;
3706 daplka_cno_resource_t
*cno_rp
= NULL
;
3707 uint64_t cno_hkey
= 0;
3708 boolean_t inserted
= B_FALSE
;
3711 cno_rp
= kmem_zalloc(sizeof (*cno_rp
), daplka_km_flags
);
3712 if (cno_rp
== NULL
) {
3713 DERR("cno_alloc: cannot allocate cno resource\n");
3716 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cno_rp
))
3717 DAPLKA_RS_INIT(cno_rp
, DAPL_TYPE_CNO
,
3718 DAPLKA_RS_RNUM(ia_rp
), daplka_cno_destroy
);
3720 mutex_init(&cno_rp
->cno_lock
, NULL
, MUTEX_DRIVER
, NULL
);
3721 cv_init(&cno_rp
->cno_cv
, NULL
, CV_DRIVER
, NULL
);
3722 cno_rp
->cno_evd_cookie
= 0;
3724 /* insert into cno hash table */
3725 retval
= daplka_hash_insert(&ia_rp
->ia_cno_htbl
,
3726 &cno_hkey
, (void *)cno_rp
);
3728 DERR("cno_alloc: cannot insert cno resource\n");
3732 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cno_rp
))
3734 /* return hkey to library */
3735 args
.cno_hkey
= cno_hkey
;
3737 retval
= ddi_copyout(&args
, (void *)arg
, sizeof (dapl_cno_alloc_t
),
3740 DERR("cno_alloc: copyout error %d\n", retval
);
3748 daplka_cno_resource_t
*free_rp
= NULL
;
3750 (void) daplka_hash_remove(&ia_rp
->ia_cno_htbl
, cno_hkey
,
3752 if (free_rp
!= cno_rp
) {
3753 DERR("cno_alloc: cannot remove cno\n");
3755 * we can only get here if another thread
3756 * has completed the cleanup in cno_free
3761 DAPLKA_RS_UNREF(cno_rp
);
3767 * this gets called when a CNO resource's refcnt drops to zero.
3770 daplka_cno_destroy(daplka_resource_t
*gen_rp
)
3772 daplka_cno_resource_t
*cno_rp
= (daplka_cno_resource_t
*)gen_rp
;
3774 ASSERT(DAPLKA_RS_REFCNT(cno_rp
) == 0);
3775 D2("cno_destroy: entering, cno_rp %p, rnum %d\n",
3776 cno_rp
, DAPLKA_RS_RNUM(cno_rp
));
3778 ASSERT(DAPLKA_RS_TYPE(cno_rp
) == DAPL_TYPE_CNO
);
3779 cv_destroy(&cno_rp
->cno_cv
);
3780 mutex_destroy(&cno_rp
->cno_lock
);
3782 DAPLKA_RS_FINI(cno_rp
);
3783 kmem_free(cno_rp
, sizeof (daplka_cno_resource_t
));
3784 D2("cno_destroy: exiting, cno_rp %p\n", cno_rp
);
3789 daplka_hash_cno_free(void *obj
)
3791 daplka_cno_resource_t
*cno_rp
= (daplka_cno_resource_t
*)obj
;
3793 ASSERT(DAPLKA_RS_TYPE(cno_rp
) == DAPL_TYPE_CNO
);
3794 DAPLKA_RS_UNREF(cno_rp
);
3798 * removes the CNO from the cno hash table and frees the CNO
3799 * if there are no references to it. if there are references to
3800 * it, the CNO will be destroyed when the last of the references
3801 * is released. once the CNO is removed from the cno hash table,
3802 * the client will no longer be able to call cno_wait on the CNO.
3806 daplka_cno_free(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
3807 cred_t
*cred
, int *rvalp
)
3809 daplka_cno_resource_t
*cno_rp
= NULL
;
3810 dapl_cno_free_t args
;
3813 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_cno_free_t
), mode
);
3815 DERR("cno_free: copyin error %d\n", retval
);
3819 retval
= daplka_hash_remove(&ia_rp
->ia_cno_htbl
,
3820 args
.cnf_hkey
, (void **)&cno_rp
);
3821 if (retval
!= 0 || cno_rp
== NULL
) {
3822 DERR("cno_free: cannot find cno resource\n");
3825 ASSERT(DAPLKA_RS_TYPE(cno_rp
) == DAPL_TYPE_CNO
);
3827 /* UNREF calls the actual free function when refcnt is zero */
3828 DAPLKA_RS_UNREF(cno_rp
);
3833 * wait for a notification from one of the associated EVDs.
3837 daplka_cno_wait(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
3838 cred_t
*cred
, int *rvalp
)
3840 daplka_cno_resource_t
*cno_rp
= NULL
;
3841 dapl_cno_wait_t args
;
3843 uint64_t evd_cookie
= 0;
3844 clock_t timeout
, curr_time
;
3846 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_cno_wait_t
), mode
);
3848 DERR("cno_wait: copyin error %d\n", retval
);
3851 /* get cno resource */
3852 cno_rp
= (daplka_cno_resource_t
*)
3853 daplka_hash_lookup(&ia_rp
->ia_cno_htbl
, args
.cnw_hkey
);
3854 if (cno_rp
== NULL
) {
3855 DERR("cno_wait: cannot find cno resource\n");
3858 ASSERT(DAPLKA_RS_TYPE(cno_rp
) == DAPL_TYPE_CNO
);
3860 curr_time
= ddi_get_lbolt();
3861 timeout
= curr_time
+ drv_usectohz(args
.cnw_timeout
);
3864 * use the max value if we wrapped around
3866 if (args
.cnw_timeout
> 0 && timeout
<= curr_time
) {
3868 * clock_t (size long) changes between 32 and 64-bit kernels
3870 timeout
= LONG_MAX
>> 4;
3872 mutex_enter(&cno_rp
->cno_lock
);
3873 while (cno_rp
->cno_evd_cookie
== 0) {
3876 rval
= cv_timedwait_sig(&cno_rp
->cno_cv
,
3877 &cno_rp
->cno_lock
, timeout
);
3879 DERR("cno_wait: interrupted\n");
3880 mutex_exit(&cno_rp
->cno_lock
);
3883 } else if (rval
== -1) {
3884 DERR("cno_wait: timed out\n");
3885 mutex_exit(&cno_rp
->cno_lock
);
3890 evd_cookie
= cno_rp
->cno_evd_cookie
;
3891 cno_rp
->cno_evd_cookie
= 0;
3892 mutex_exit(&cno_rp
->cno_lock
);
3894 ASSERT(evd_cookie
!= 0);
3895 D2("cno_wait: returning evd_cookie 0x%p\n",
3896 (void *)(uintptr_t)evd_cookie
);
3897 args
.cnw_evd_cookie
= evd_cookie
;
3898 retval
= ddi_copyout((void *)&args
, (void *)arg
,
3899 sizeof (dapl_cno_wait_t
), mode
);
3901 DERR("cno_wait: copyout error %d\n", retval
);
3907 if (cno_rp
!= NULL
) {
3908 DAPLKA_RS_UNREF(cno_rp
);
3914 * this function is called by the client when it decides to
3915 * accept a connection request. a connection request is generated
3916 * when the active side generates REQ MAD to a service point on
3917 * the destination node. this causes the CM service handler
3918 * (daplka_cm_service_req) on the passive side to be callee. This
3919 * handler will then enqueue this connection request to the backlog
3920 * array of the service point. A connection event containing the
3921 * backlog array index and connection request private data is passed
3922 * to the client's service point EVD (sp_evd_res). once the event
3923 * is passed up to the userland, the client may examine the request
3924 * to decide whether to call daplka_cr_accept or dapka_cr_reject.
3928 daplka_cr_accept(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
3929 cred_t
*cred
, int *rvalp
)
3931 daplka_ep_resource_t
*ep_rp
= NULL
;
3932 daplka_sp_resource_t
*sp_rp
= NULL
;
3933 dapl_cr_accept_t args
;
3934 daplka_sp_conn_pend_t
*conn
;
3935 ibt_cm_proceed_reply_t proc_reply
;
3936 ibt_status_t status
;
3938 uint32_t old_state
, new_state
;
3940 void *priv_data
= NULL
, *sid
;
3942 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_cr_accept_t
),
3945 DERR("cr_accept: copyin error %d\n", retval
);
3948 if (args
.cra_priv_sz
> DAPL_MAX_PRIVATE_DATA_SIZE
) {
3949 DERR("cr_accept: private data len (%d) exceeded "
3950 "max size %d\n", args
.cra_priv_sz
,
3951 DAPL_MAX_PRIVATE_DATA_SIZE
);
3954 priv_data
= (args
.cra_priv_sz
> 0) ? (void *)args
.cra_priv
: NULL
;
3956 D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data
,
3957 args
.cra_priv_sz
, (longlong_t
)args
.cra_bkl_cookie
);
3959 /* get sp resource */
3960 sp_rp
= (daplka_sp_resource_t
*)daplka_hash_lookup(&ia_rp
->ia_sp_htbl
,
3962 if (sp_rp
== NULL
) {
3963 DERR("cr_accept: cannot find sp resource\n");
3966 ASSERT(DAPLKA_RS_TYPE(sp_rp
) == DAPL_TYPE_SP
);
3968 /* get ep resource */
3969 ep_rp
= (daplka_ep_resource_t
*)daplka_hash_lookup(&ia_rp
->ia_ep_htbl
,
3971 if (ep_rp
== NULL
) {
3972 DERR("cr_accept: cannot find ep resource\n");
3976 ASSERT(DAPLKA_RS_TYPE(ep_rp
) == DAPL_TYPE_EP
);
3979 * accept is only allowed if ep_state is CLOSED.
3980 * note that after this point, the ep_state is frozen
3981 * (i.e. TRANSITIONING) until we transition ep_state
3982 * to ACCEPTING or back to CLOSED if we get an error.
3984 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
3985 if (old_state
!= DAPLKA_EP_STATE_CLOSED
) {
3986 DERR("cr_accept: invalid ep state %d\n", old_state
);
3991 mutex_enter(&sp_rp
->sp_lock
);
3992 bkl_index
= DAPLKA_GET_PSEP_INDEX(args
.cra_bkl_cookie
);
3994 * make sure the backlog index is not bogus.
3996 if (bkl_index
>= sp_rp
->sp_backlog_size
) {
3997 DERR("cr_accept: invalid backlog index 0x%llx %d\n",
3998 (longlong_t
)args
.cra_bkl_cookie
, bkl_index
);
3999 mutex_exit(&sp_rp
->sp_lock
);
4004 * make sure the backlog index indeed refers
4005 * to a pending connection.
4007 conn
= &sp_rp
->sp_backlog
[bkl_index
];
4008 if (conn
->spcp_state
!= DAPLKA_SPCP_PENDING
) {
4009 DERR("cr_accept: invalid conn state %d\n",
4011 mutex_exit(&sp_rp
->sp_lock
);
4015 if (conn
->spcp_sid
== NULL
) {
4016 DERR("cr_accept: sid == NULL\n");
4017 mutex_exit(&sp_rp
->sp_lock
);
4021 if (ep_rp
->ep_chan_hdl
== NULL
) {
4023 * a ep_rp with a NULL chan_hdl is impossible.
4025 DERR("cr_accept: ep_chan_hdl == NULL\n");
4026 mutex_exit(&sp_rp
->sp_lock
);
4031 proc_reply
.rep
.cm_channel
= ep_rp
->ep_chan_hdl
;
4032 proc_reply
.rep
.cm_rdma_ra_out
= conn
->spcp_rdma_ra_out
;
4033 proc_reply
.rep
.cm_rdma_ra_in
= conn
->spcp_rdma_ra_in
;
4034 proc_reply
.rep
.cm_rnr_retry_cnt
= IBT_RNR_INFINITE_RETRY
;
4035 sid
= conn
->spcp_sid
;
4038 * this clears our slot in the backlog array.
4039 * this slot may now be used by other pending connections.
4041 conn
->spcp_sid
= NULL
;
4042 conn
->spcp_state
= DAPLKA_SPCP_INIT
;
4043 conn
->spcp_req_len
= 0;
4044 mutex_exit(&sp_rp
->sp_lock
);
4047 * Set the unique cookie corresponding to the CR to this EP
4048 * so that is can be used in passive side CM callbacks
4050 ep_rp
->ep_psep_cookie
= args
.cra_bkl_cookie
;
4052 status
= ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV
, sid
, IBT_CM_ACCEPT
,
4053 &proc_reply
, priv_data
, (ibt_priv_data_len_t
)args
.cra_priv_sz
);
4055 if (status
!= IBT_SUCCESS
) {
4056 DERR("cr_accept: ibt_cm_proceed returned %d\n", status
);
4057 *rvalp
= (int)status
;
4061 * note that the CM handler may actually be called at this
4062 * point. but since ep_state is still in TRANSITIONING, the
4063 * handler will wait until we transition to ACCEPTING. this
4064 * prevents the case where we set ep_state to ACCEPTING after
4065 * daplka_service_conn_est sets ep_state to CONNECTED.
4067 new_state
= DAPLKA_EP_STATE_ACCEPTING
;
4070 if (sp_rp
!= NULL
) {
4071 DAPLKA_RS_UNREF(sp_rp
);
4073 if (ep_rp
!= NULL
) {
4074 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
4075 DAPLKA_RS_UNREF(ep_rp
);
4081 * this function is called by the client to reject a
4082 * connection request.
4086 daplka_cr_reject(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
4087 cred_t
*cred
, int *rvalp
)
4089 dapl_cr_reject_t args
;
4090 daplka_sp_resource_t
*sp_rp
= NULL
;
4091 daplka_sp_conn_pend_t
*conn
;
4092 ibt_cm_proceed_reply_t proc_reply
;
4093 ibt_cm_status_t proc_status
;
4094 ibt_status_t status
;
4099 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_cr_reject_t
),
4102 DERR("cr_reject: copyin error %d\n", retval
);
4105 /* get sp resource */
4106 sp_rp
= (daplka_sp_resource_t
*)daplka_hash_lookup(&ia_rp
->ia_sp_htbl
,
4108 if (sp_rp
== NULL
) {
4109 DERR("cr_reject: cannot find sp resource\n");
4112 ASSERT(DAPLKA_RS_TYPE(sp_rp
) == DAPL_TYPE_SP
);
4114 D2("cr_reject: psep(0x%llx)\n", (longlong_t
)args
.crr_bkl_cookie
);
4116 mutex_enter(&sp_rp
->sp_lock
);
4117 bkl_index
= DAPLKA_GET_PSEP_INDEX(args
.crr_bkl_cookie
);
4119 * make sure the backlog index is not bogus.
4121 if (bkl_index
>= sp_rp
->sp_backlog_size
) {
4122 DERR("cr_reject: invalid backlog index 0x%llx %d\n",
4123 (longlong_t
)args
.crr_bkl_cookie
, bkl_index
);
4124 mutex_exit(&sp_rp
->sp_lock
);
4129 * make sure the backlog index indeed refers
4130 * to a pending connection.
4132 conn
= &sp_rp
->sp_backlog
[bkl_index
];
4133 if (conn
->spcp_state
!= DAPLKA_SPCP_PENDING
) {
4134 DERR("cr_reject: invalid conn state %d\n",
4136 mutex_exit(&sp_rp
->sp_lock
);
4140 if (conn
->spcp_sid
== NULL
) {
4141 DERR("cr_reject: sid == NULL\n");
4142 mutex_exit(&sp_rp
->sp_lock
);
4146 bzero(&proc_reply
, sizeof (proc_reply
));
4147 sid
= conn
->spcp_sid
;
4150 * this clears our slot in the backlog array.
4151 * this slot may now be used by other pending connections.
4153 conn
->spcp_sid
= NULL
;
4154 conn
->spcp_state
= DAPLKA_SPCP_INIT
;
4155 conn
->spcp_req_len
= 0;
4157 switch (args
.crr_reason
) {
4158 case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ
:
4159 /* results in IBT_CM_CONSUMER as the reason for reject */
4160 proc_status
= IBT_CM_REJECT
;
4162 case DAPL_IB_CME_LOCAL_FAILURE
:
4164 case DAPL_IB_CME_DESTINATION_UNREACHABLE
:
4165 /* results in IBT_CM_NO_RESC as the reason for reject */
4166 proc_status
= IBT_CM_NO_RESOURCE
;
4169 /* unexpect reason code */
4170 ASSERT(!"unexpected reject reason code");
4171 proc_status
= IBT_CM_NO_RESOURCE
;
4175 mutex_exit(&sp_rp
->sp_lock
);
4177 status
= ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV
, sid
, proc_status
,
4178 &proc_reply
, NULL
, 0);
4180 if (status
!= IBT_SUCCESS
) {
4181 DERR("cr_reject: ibt_cm_proceed returned %d\n", status
);
4182 *rvalp
= (int)status
;
4187 if (sp_rp
!= NULL
) {
4188 DAPLKA_RS_UNREF(sp_rp
);
4195 * daplka_sp_match is used by daplka_hash_walk for finding SPs
4197 typedef struct daplka_sp_match_s
{
4198 uint64_t spm_conn_qual
;
4199 daplka_sp_resource_t
*spm_sp_rp
;
4200 } daplka_sp_match_t
;
4201 _NOTE(SCHEME_PROTECTS_DATA("daplka", daplka_sp_match_s::spm_sp_rp
))
4204 daplka_sp_match(void *objp
, void *arg
)
4206 daplka_sp_resource_t
*sp_rp
= (daplka_sp_resource_t
*)objp
;
4208 ASSERT(DAPLKA_RS_TYPE(sp_rp
) == DAPL_TYPE_SP
);
4209 if (sp_rp
->sp_conn_qual
==
4210 ((daplka_sp_match_t
*)arg
)->spm_conn_qual
) {
4211 ((daplka_sp_match_t
*)arg
)->spm_sp_rp
= sp_rp
;
4212 D2("daplka_sp_match: found sp, conn_qual %016llu\n",
4213 (longlong_t
)((daplka_sp_match_t
*)arg
)->spm_conn_qual
);
4214 DAPLKA_RS_REF(sp_rp
);
4221 * cr_handoff allows the client to handoff a connection request from
4222 * one service point to another.
4226 daplka_cr_handoff(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
4227 cred_t
*cred
, int *rvalp
)
4229 dapl_cr_handoff_t args
;
4230 daplka_sp_resource_t
*sp_rp
= NULL
, *new_sp_rp
= NULL
;
4231 daplka_sp_conn_pend_t
*conn
;
4232 daplka_sp_match_t sp_match
;
4233 ibt_cm_event_t fake_event
;
4234 ibt_cm_status_t cm_status
;
4235 ibt_status_t status
;
4237 void *sid
, *priv
= NULL
;
4238 int retval
= 0, priv_len
= 0;
4240 D3("cr_handoff: entering\n");
4241 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_cr_handoff_t
),
4244 DERR("cr_handoff: copyin error %d\n", retval
);
4247 /* get sp resource */
4248 sp_rp
= (daplka_sp_resource_t
*)daplka_hash_lookup(&ia_rp
->ia_sp_htbl
,
4250 if (sp_rp
== NULL
) {
4251 DERR("cr_handoff: cannot find sp resource\n");
4254 ASSERT(DAPLKA_RS_TYPE(sp_rp
) == DAPL_TYPE_SP
);
4257 * find the destination service point.
4259 sp_match
.spm_conn_qual
= args
.crh_conn_qual
;
4260 sp_match
.spm_sp_rp
= NULL
;
4261 daplka_hash_walk(&daplka_global_sp_htbl
, daplka_sp_match
,
4262 (void *)&sp_match
, RW_READER
);
4265 * return if we cannot find the service point
4267 if (sp_match
.spm_sp_rp
== NULL
) {
4268 DERR("cr_handoff: new sp not found, conn qual = %llu\n",
4269 (longlong_t
)args
.crh_conn_qual
);
4273 new_sp_rp
= sp_match
.spm_sp_rp
;
4276 * the spec does not discuss the security implications of this
4277 * function. to be safe, we currently only allow processes
4278 * owned by the same user to handoff connection requests
4281 if (crgetruid(cred
) != new_sp_rp
->sp_ruid
) {
4282 DERR("cr_handoff: permission denied\n");
4287 D2("cr_handoff: psep(0x%llx)\n", (longlong_t
)args
.crh_bkl_cookie
);
4289 mutex_enter(&sp_rp
->sp_lock
);
4290 bkl_index
= DAPLKA_GET_PSEP_INDEX(args
.crh_bkl_cookie
);
4292 * make sure the backlog index is not bogus.
4294 if (bkl_index
>= sp_rp
->sp_backlog_size
) {
4295 DERR("cr_handoff: invalid backlog index 0x%llx %d\n",
4296 (longlong_t
)args
.crh_bkl_cookie
, bkl_index
);
4297 mutex_exit(&sp_rp
->sp_lock
);
4302 * make sure the backlog index indeed refers
4303 * to a pending connection.
4305 conn
= &sp_rp
->sp_backlog
[bkl_index
];
4306 if (conn
->spcp_state
!= DAPLKA_SPCP_PENDING
) {
4307 DERR("cr_handoff: invalid conn state %d\n",
4309 mutex_exit(&sp_rp
->sp_lock
);
4313 if (conn
->spcp_sid
== NULL
) {
4314 DERR("cr_handoff: sid == NULL\n");
4315 mutex_exit(&sp_rp
->sp_lock
);
4319 sid
= conn
->spcp_sid
;
4321 priv_len
= conn
->spcp_req_len
;
4323 priv
= kmem_zalloc(priv_len
, daplka_km_flags
);
4325 mutex_exit(&sp_rp
->sp_lock
);
4329 bcopy(conn
->spcp_req_data
, priv
, priv_len
);
4332 * this clears our slot in the backlog array.
4333 * this slot may now be used by other pending connections.
4335 conn
->spcp_sid
= NULL
;
4336 conn
->spcp_state
= DAPLKA_SPCP_INIT
;
4337 conn
->spcp_req_len
= 0;
4338 mutex_exit(&sp_rp
->sp_lock
);
4340 /* fill fake_event and call service_req handler */
4341 bzero(&fake_event
, sizeof (fake_event
));
4342 fake_event
.cm_type
= IBT_CM_EVENT_REQ_RCV
;
4343 fake_event
.cm_session_id
= sid
;
4344 fake_event
.cm_priv_data_len
= priv_len
;
4345 fake_event
.cm_priv_data
= priv
;
4347 cm_status
= daplka_cm_service_req(new_sp_rp
,
4348 &fake_event
, NULL
, priv
, (ibt_priv_data_len_t
)priv_len
);
4349 if (cm_status
!= IBT_CM_DEFER
) {
4350 ibt_cm_proceed_reply_t proc_reply
;
4352 DERR("cr_handoff: service_req returned %d\n", cm_status
);
4354 * if for some reason cm_service_req failed, we
4355 * reject the connection.
4357 bzero(&proc_reply
, sizeof (proc_reply
));
4359 status
= ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV
, sid
,
4360 IBT_CM_NO_RESOURCE
, &proc_reply
, NULL
, 0);
4361 if (status
!= IBT_SUCCESS
) {
4362 DERR("cr_handoff: ibt_cm_proceed returned %d\n",
4365 *rvalp
= (int)status
;
4370 if (priv_len
> 0 && priv
!= NULL
) {
4371 kmem_free(priv
, priv_len
);
4373 if (new_sp_rp
!= NULL
) {
4374 DAPLKA_RS_UNREF(new_sp_rp
);
4376 if (sp_rp
!= NULL
) {
4377 DAPLKA_RS_UNREF(sp_rp
);
4379 D3("cr_handoff: exiting\n");
4384 * returns a list of hca attributes
4388 daplka_ia_query(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
4389 cred_t
*cred
, int *rvalp
)
4391 dapl_ia_query_t args
;
4393 ibt_hca_attr_t
*hcap
;
4395 hcap
= &ia_rp
->ia_hca
->hca_attr
;
4398 * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t
4400 args
.hca_attr
.dhca_vendor_id
= hcap
->hca_vendor_id
;
4401 args
.hca_attr
.dhca_device_id
= hcap
->hca_device_id
;
4402 args
.hca_attr
.dhca_version_id
= hcap
->hca_version_id
;
4403 args
.hca_attr
.dhca_max_chans
= hcap
->hca_max_chans
;
4404 args
.hca_attr
.dhca_max_chan_sz
= hcap
->hca_max_chan_sz
;
4405 args
.hca_attr
.dhca_max_sgl
= hcap
->hca_max_sgl
;
4406 args
.hca_attr
.dhca_max_cq
= hcap
->hca_max_cq
;
4407 args
.hca_attr
.dhca_max_cq_sz
= hcap
->hca_max_cq_sz
;
4408 args
.hca_attr
.dhca_max_memr
= hcap
->hca_max_memr
;
4409 args
.hca_attr
.dhca_max_memr_len
= hcap
->hca_max_memr_len
;
4410 args
.hca_attr
.dhca_max_mem_win
= hcap
->hca_max_mem_win
;
4411 args
.hca_attr
.dhca_max_rdma_in_chan
= hcap
->hca_max_rdma_in_chan
;
4412 args
.hca_attr
.dhca_max_rdma_out_chan
= hcap
->hca_max_rdma_out_chan
;
4413 args
.hca_attr
.dhca_max_partitions
= hcap
->hca_max_partitions
;
4414 args
.hca_attr
.dhca_nports
= hcap
->hca_nports
;
4415 args
.hca_attr
.dhca_node_guid
= hcap
->hca_node_guid
;
4416 args
.hca_attr
.dhca_max_pd
= hcap
->hca_max_pd
;
4417 args
.hca_attr
.dhca_max_srqs
= hcap
->hca_max_srqs
;
4418 args
.hca_attr
.dhca_max_srqs_sz
= hcap
->hca_max_srqs_sz
;
4419 args
.hca_attr
.dhca_max_srq_sgl
= hcap
->hca_max_srq_sgl
;
4421 retval
= ddi_copyout(&args
, (void *)arg
, sizeof (dapl_ia_query_t
),
4424 DERR("ia_query: copyout error %d\n", retval
);
4431 * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback,
4432 * it frees the mw embedded in the mw resource object.
4437 daplka_mr_cb_freemw(void *objp
, void *arg
)
4439 daplka_mw_resource_t
*mw_rp
= (daplka_mw_resource_t
*)objp
;
4440 ibt_mw_hdl_t mw_hdl
;
4441 ibt_status_t status
;
4443 D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp
);
4444 DAPLKA_RS_REF(mw_rp
);
4446 mutex_enter(&mw_rp
->mw_lock
);
4447 mw_hdl
= mw_rp
->mw_hdl
;
4449 * we set mw_hdl to NULL so it won't get freed again
4451 mw_rp
->mw_hdl
= NULL
;
4452 mutex_exit(&mw_rp
->mw_lock
);
4454 if (mw_hdl
!= NULL
) {
4455 status
= daplka_ibt_free_mw(mw_rp
, mw_rp
->mw_hca_hdl
, mw_hdl
);
4456 if (status
!= IBT_SUCCESS
) {
4457 DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status
);
4459 D3("mr_cb_freemw: mw freed\n");
4462 DAPLKA_RS_UNREF(mw_rp
);
4467 * This routine is called from HCA driver's umem lock undo callback
4468 * when the memory associated with an MR is being unmapped. In this callback
4469 * we free all the MW associated with the IA and post an unaffiliated
4470 * async event to tell the app that there was a catastrophic event.
4471 * This allows the HCA to deregister the MR in its callback processing.
4474 daplka_pre_mr_cleanup_callback(void *arg1
, void *arg2
/*ARGSUSED*/)
4476 daplka_mr_resource_t
*mr_rp
;
4477 daplka_ia_resource_t
*ia_rp
;
4478 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4479 ibt_async_event_t event
;
4480 ibt_hca_attr_t
*hca_attrp
;
4484 mr_rp
= (daplka_mr_resource_t
*)arg1
;
4485 rnum
= DAPLKA_RS_RNUM(mr_rp
);
4486 daplka_shared_mr_free(mr_rp
);
4488 ia_rp
= (daplka_ia_resource_t
*)daplka_resource_lookup(rnum
);
4489 if (ia_rp
== NULL
) {
4490 DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n",
4495 DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp
, rnum
);
4497 mutex_enter(&ia_rp
->ia_lock
);
4499 * MW is being alloced OR MW freeze has already begun. In
4500 * both these cases we wait for that to complete before
4503 while ((ia_rp
->ia_state
== DAPLKA_IA_MW_ALLOC_IN_PROGRESS
) ||
4504 (ia_rp
->ia_state
== DAPLKA_IA_MW_FREEZE_IN_PROGRESS
)) {
4505 cv_wait(&ia_rp
->ia_cv
, &ia_rp
->ia_lock
);
4508 switch (ia_rp
->ia_state
) {
4509 case DAPLKA_IA_INIT
:
4510 ia_rp
->ia_state
= DAPLKA_IA_MW_FREEZE_IN_PROGRESS
;
4511 mutex_exit(&ia_rp
->ia_lock
);
4513 case DAPLKA_IA_MW_FROZEN
:
4514 /* the mw on this ia have been freed */
4515 D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n",
4517 mutex_exit(&ia_rp
->ia_lock
);
4520 ASSERT(!"daplka_mr_unlock_callback: IA state invalid");
4521 DERR("daplka_mr_unlock_callback: invalid ia_state %d\n",
4523 mutex_exit(&ia_rp
->ia_lock
);
4528 * Walk the mw hash table and free the mws. Acquire a writer
4529 * lock since we don't want anyone else traversing this tree
4530 * while we are freeing the MW.
4532 daplka_hash_walk(&ia_rp
->ia_mw_htbl
, daplka_mr_cb_freemw
, NULL
,
4535 mutex_enter(&ia_rp
->ia_lock
);
4536 ASSERT(ia_rp
->ia_state
== DAPLKA_IA_MW_FREEZE_IN_PROGRESS
);
4537 ia_rp
->ia_state
= DAPLKA_IA_MW_FROZEN
;
4538 cv_broadcast(&ia_rp
->ia_cv
);
4539 mutex_exit(&ia_rp
->ia_lock
);
4542 * Currently commented out because Oracle skgxp is incapable
4543 * of handling async events correctly.
4545 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4547 * Enqueue an unaffiliated async error event to indicate this
4548 * IA has encountered a problem that caused the MW to freed up
4551 /* Create a fake event, only relevant field is the hca_guid */
4552 bzero(&event
, sizeof (ibt_async_event_t
));
4553 hca_attrp
= &ia_rp
->ia_hca
->hca_attr
;
4554 event
.ev_hca_guid
= hca_attrp
->hca_node_guid
;
4556 daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC
, &event
, 0,
4558 #endif /* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */
4561 D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp
);
4562 DAPLKA_RS_UNREF(ia_rp
);
4566 * registers a memory region.
4567 * memory locking will be done by the HCA driver.
4571 daplka_mr_register(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
4572 cred_t
*cred
, int *rvalp
)
4574 boolean_t inserted
= B_FALSE
;
4575 daplka_mr_resource_t
*mr_rp
;
4576 daplka_pd_resource_t
*pd_rp
;
4577 dapl_mr_register_t args
;
4578 ibt_mr_data_in_t mr_cb_data_in
;
4579 uint64_t mr_hkey
= 0;
4580 ibt_status_t status
;
4583 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_mr_register_t
),
4586 DERR("mr_register: copyin error %d\n", retval
);
4589 mr_rp
= kmem_zalloc(sizeof (daplka_mr_resource_t
), daplka_km_flags
);
4590 if (mr_rp
== NULL
) {
4591 DERR("mr_register: cannot allocate mr resource\n");
4594 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp
))
4595 DAPLKA_RS_INIT(mr_rp
, DAPL_TYPE_MR
,
4596 DAPLKA_RS_RNUM(ia_rp
), daplka_mr_destroy
);
4598 mutex_init(&mr_rp
->mr_lock
, NULL
, MUTEX_DRIVER
, NULL
);
4599 mr_rp
->mr_hca
= ia_rp
->ia_hca
;
4600 mr_rp
->mr_hca_hdl
= ia_rp
->ia_hca_hdl
;
4601 mr_rp
->mr_next
= NULL
;
4602 mr_rp
->mr_shared_mr
= NULL
;
4605 pd_rp
= (daplka_pd_resource_t
*)
4606 daplka_hash_lookup(&ia_rp
->ia_pd_htbl
, args
.mr_pd_hkey
);
4607 if (pd_rp
== NULL
) {
4608 DERR("mr_register: cannot find pd resource\n");
4612 ASSERT(DAPLKA_RS_TYPE(pd_rp
) == DAPL_TYPE_PD
);
4613 mr_rp
->mr_pd_res
= pd_rp
;
4615 mr_rp
->mr_attr
.mr_vaddr
= args
.mr_vaddr
;
4616 mr_rp
->mr_attr
.mr_len
= args
.mr_len
;
4617 mr_rp
->mr_attr
.mr_as
= curproc
->p_as
;
4618 mr_rp
->mr_attr
.mr_flags
= args
.mr_flags
| IBT_MR_NOSLEEP
;
4620 D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n",
4621 (void *)(uintptr_t)mr_rp
->mr_attr
.mr_vaddr
,
4622 (longlong_t
)mr_rp
->mr_attr
.mr_len
,
4623 mr_rp
->mr_attr
.mr_flags
);
4625 status
= daplka_ibt_register_mr(mr_rp
, ia_rp
->ia_hca_hdl
,
4626 mr_rp
->mr_pd_res
->pd_hdl
, &mr_rp
->mr_attr
, &mr_rp
->mr_hdl
,
4629 if (status
!= IBT_SUCCESS
) {
4630 DERR("mr_register: ibt_register_mr error %d\n", status
);
4631 *rvalp
= (int)status
;
4636 mr_cb_data_in
.mr_rev
= IBT_MR_DATA_IN_IF_VERSION
;
4637 mr_cb_data_in
.mr_func
= daplka_pre_mr_cleanup_callback
;
4638 mr_cb_data_in
.mr_arg1
= (void *)mr_rp
;
4639 mr_cb_data_in
.mr_arg2
= NULL
;
4641 /* Pass the service driver mr cleanup handler to the hca driver */
4642 status
= ibt_ci_data_in(ia_rp
->ia_hca_hdl
,
4643 IBT_CI_NO_FLAGS
, IBT_HDL_MR
, (void *)mr_rp
->mr_hdl
,
4644 &mr_cb_data_in
, sizeof (mr_cb_data_in
));
4646 if (status
!= IBT_SUCCESS
) {
4647 DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)",
4648 status
, mr_cb_data_in
.mr_rev
);
4649 *rvalp
= (int)status
;
4654 /* insert into mr hash table */
4655 retval
= daplka_hash_insert(&ia_rp
->ia_mr_htbl
,
4656 &mr_hkey
, (void *)mr_rp
);
4658 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
4662 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp
))
4664 args
.mr_lkey
= mr_rp
->mr_desc
.md_lkey
;
4665 args
.mr_rkey
= mr_rp
->mr_desc
.md_rkey
;
4666 args
.mr_hkey
= mr_hkey
;
4668 retval
= ddi_copyout((void *)&args
, (void *)arg
,
4669 sizeof (dapl_mr_register_t
), mode
);
4671 DERR("mr_register: copyout error %d\n", retval
);
4679 daplka_mr_resource_t
*free_rp
= NULL
;
4681 (void) daplka_hash_remove(&ia_rp
->ia_mr_htbl
, mr_hkey
,
4683 if (free_rp
!= mr_rp
) {
4684 DERR("mr_register: cannot remove mr from hash table\n");
4686 * we can only get here if another thread
4687 * has completed the cleanup in mr_deregister
4692 DAPLKA_RS_UNREF(mr_rp
);
4697 * registers a shared memory region.
4698 * the client calls this function with the intention to share the memory
4699 * region with other clients. it is assumed that, prior to calling this
4700 * function, the client(s) are already sharing parts of their address
4701 * space using a mechanism such as SYSV shared memory. the first client
4702 * that calls this function will create and insert a daplka_shared_mr_t
4703 * object into the global daplka_shared_mr_tree. this shared mr object
4704 * will be identified by a unique 40-byte key and will maintain a list
4705 * of mr resources. every time this function gets called with the same
4706 * 40-byte key, a new mr resource (containing a new mr handle generated
4707 * by ibt_register_mr or ibt_register_shared_mr) is created and inserted
4708 * into this list. similarly, every time a shared mr gets deregistered
4709 * or invalidated by a callback, the mr resource gets removed from this
4710 * list. the shared mr object has a reference count. when it drops to
4711 * zero, the shared mr object will be removed from the global avl tree
4716 daplka_mr_register_shared(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
4717 cred_t
*cred
, int *rvalp
)
4719 dapl_mr_register_shared_t args
;
4720 daplka_shared_mr_t
*smrp
= NULL
;
4721 daplka_shared_mr_t tmp_smr
;
4722 ibt_mr_data_in_t mr_cb_data_in
;
4724 boolean_t inserted
= B_FALSE
;
4725 daplka_mr_resource_t
*mr_rp
= NULL
;
4726 daplka_pd_resource_t
*pd_rp
;
4727 uint64_t mr_hkey
= 0;
4728 ibt_status_t status
;
4731 retval
= ddi_copyin((void *)arg
, &args
,
4732 sizeof (dapl_mr_register_shared_t
), mode
);
4734 DERR("mr_register_shared: copyin error %d\n", retval
);
4738 mutex_enter(&daplka_shared_mr_lock
);
4740 * find smrp from the global avl tree.
4741 * the 40-byte key is used as the lookup key.
4743 tmp_smr
.smr_cookie
= args
.mrs_shm_cookie
;
4744 smrp
= (daplka_shared_mr_t
*)
4745 avl_find(&daplka_shared_mr_tree
, &tmp_smr
, &where
);
4747 D2("mr_register_shared: smrp 0x%p, found cookie:\n"
4748 "0x%016llx%016llx%016llx%016llx%016llx\n", smrp
,
4749 (longlong_t
)tmp_smr
.smr_cookie
.mc_uint_arr
[4],
4750 (longlong_t
)tmp_smr
.smr_cookie
.mc_uint_arr
[3],
4751 (longlong_t
)tmp_smr
.smr_cookie
.mc_uint_arr
[2],
4752 (longlong_t
)tmp_smr
.smr_cookie
.mc_uint_arr
[1],
4753 (longlong_t
)tmp_smr
.smr_cookie
.mc_uint_arr
[0]);
4756 * if the smrp exists, other threads could still be
4757 * accessing it. we wait until they are done before
4761 while (smrp
->smr_state
== DAPLKA_SMR_TRANSITIONING
) {
4762 D2("mr_register_shared: smrp 0x%p, "
4763 "waiting in transitioning state, refcnt %d\n",
4764 smrp
, smrp
->smr_refcnt
);
4765 cv_wait(&smrp
->smr_cv
, &daplka_shared_mr_lock
);
4767 ASSERT(smrp
->smr_state
== DAPLKA_SMR_READY
);
4768 D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n",
4769 smrp
, smrp
->smr_refcnt
);
4772 * we set smr_state to TRANSITIONING to temporarily
4773 * prevent other threads from trying to access smrp.
4775 smrp
->smr_state
= DAPLKA_SMR_TRANSITIONING
;
4777 D2("mr_register_shared: cannot find cookie:\n"
4778 "0x%016llx%016llx%016llx%016llx%016llx\n",
4779 (longlong_t
)tmp_smr
.smr_cookie
.mc_uint_arr
[4],
4780 (longlong_t
)tmp_smr
.smr_cookie
.mc_uint_arr
[3],
4781 (longlong_t
)tmp_smr
.smr_cookie
.mc_uint_arr
[2],
4782 (longlong_t
)tmp_smr
.smr_cookie
.mc_uint_arr
[1],
4783 (longlong_t
)tmp_smr
.smr_cookie
.mc_uint_arr
[0]);
4786 * if we cannot find smrp, we need to create and
4787 * insert one into daplka_shared_mr_tree
4789 smrp
= kmem_zalloc(sizeof (daplka_shared_mr_t
),
4793 mutex_exit(&daplka_shared_mr_lock
);
4796 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp
))
4797 smrp
->smr_refcnt
= 1;
4798 smrp
->smr_cookie
= args
.mrs_shm_cookie
;
4799 smrp
->smr_state
= DAPLKA_SMR_TRANSITIONING
;
4800 smrp
->smr_mr_list
= NULL
;
4801 cv_init(&smrp
->smr_cv
, NULL
, CV_DRIVER
, NULL
);
4802 avl_insert(&daplka_shared_mr_tree
, smrp
, where
);
4803 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*smrp
))
4805 mutex_exit(&daplka_shared_mr_lock
);
4807 mr_rp
= kmem_zalloc(sizeof (daplka_mr_resource_t
), daplka_km_flags
);
4808 if (mr_rp
== NULL
) {
4809 DERR("mr_register_shared: cannot allocate mr resource\n");
4812 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp
))
4813 DAPLKA_RS_INIT(mr_rp
, DAPL_TYPE_MR
,
4814 DAPLKA_RS_RNUM(ia_rp
), daplka_mr_destroy
);
4816 mutex_init(&mr_rp
->mr_lock
, NULL
, MUTEX_DRIVER
, NULL
);
4817 mr_rp
->mr_hca
= ia_rp
->ia_hca
;
4818 mr_rp
->mr_hca_hdl
= ia_rp
->ia_hca_hdl
;
4819 mr_rp
->mr_next
= NULL
;
4820 mr_rp
->mr_shared_mr
= NULL
;
4823 pd_rp
= (daplka_pd_resource_t
*)
4824 daplka_hash_lookup(&ia_rp
->ia_pd_htbl
, args
.mrs_pd_hkey
);
4825 if (pd_rp
== NULL
) {
4826 DERR("mr_register_shared: cannot find pd resource\n");
4830 ASSERT(DAPLKA_RS_TYPE(pd_rp
) == DAPL_TYPE_PD
);
4831 mr_rp
->mr_pd_res
= pd_rp
;
4833 mr_rp
->mr_attr
.mr_vaddr
= args
.mrs_vaddr
;
4834 mr_rp
->mr_attr
.mr_len
= args
.mrs_len
;
4835 mr_rp
->mr_attr
.mr_flags
= args
.mrs_flags
| IBT_MR_NOSLEEP
;
4836 mr_rp
->mr_attr
.mr_as
= curproc
->p_as
;
4838 D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, "
4839 "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n",
4840 (void *)(uintptr_t)mr_rp
->mr_attr
.mr_vaddr
,
4841 (longlong_t
)mr_rp
->mr_attr
.mr_len
,
4842 mr_rp
->mr_attr
.mr_flags
, mr_rp
->mr_attr
.mr_as
,
4843 (int)(smrp
->smr_mr_list
!= NULL
), smrp
);
4846 * since we are in TRANSITIONING state, we are guaranteed
4847 * that we have exclusive access to smr_mr_list.
4849 if (smrp
->smr_mr_list
!= NULL
) {
4850 ibt_smr_attr_t mem_sattr
;
4853 * a non-null smr_mr_list indicates that someone
4854 * else has already inserted an mr_resource into
4855 * smr_mr_list. we use the mr_handle from the first
4856 * element as an arg to ibt_register_shared_mr.
4858 mem_sattr
.mr_vaddr
= smrp
->smr_mr_list
->mr_desc
.md_vaddr
;
4859 mem_sattr
.mr_flags
= mr_rp
->mr_attr
.mr_flags
;
4861 D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n",
4862 (void *)(uintptr_t)mem_sattr
.mr_vaddr
, mem_sattr
.mr_flags
);
4863 status
= daplka_ibt_register_shared_mr(mr_rp
, ia_rp
->ia_hca_hdl
,
4864 smrp
->smr_mr_list
->mr_hdl
, mr_rp
->mr_pd_res
->pd_hdl
,
4865 &mem_sattr
, &mr_rp
->mr_hdl
, &mr_rp
->mr_desc
);
4867 if (status
!= IBT_SUCCESS
) {
4868 DERR("mr_register_shared: "
4869 "ibt_register_shared_mr error %d\n", status
);
4870 *rvalp
= (int)status
;
4876 * an mr does not exist yet. we need to create one
4877 * using ibt_register_mr.
4879 status
= daplka_ibt_register_mr(mr_rp
, ia_rp
->ia_hca_hdl
,
4880 mr_rp
->mr_pd_res
->pd_hdl
, &mr_rp
->mr_attr
,
4881 &mr_rp
->mr_hdl
, &mr_rp
->mr_desc
);
4883 if (status
!= IBT_SUCCESS
) {
4884 DERR("mr_register_shared: "
4885 "ibt_register_mr error %d\n", status
);
4886 *rvalp
= (int)status
;
4892 mr_cb_data_in
.mr_rev
= IBT_MR_DATA_IN_IF_VERSION
;
4893 mr_cb_data_in
.mr_func
= daplka_pre_mr_cleanup_callback
;
4894 mr_cb_data_in
.mr_arg1
= (void *)mr_rp
;
4895 mr_cb_data_in
.mr_arg2
= NULL
;
4897 /* Pass the service driver mr cleanup handler to the hca driver */
4898 status
= ibt_ci_data_in(ia_rp
->ia_hca_hdl
,
4899 IBT_CI_NO_FLAGS
, IBT_HDL_MR
, (void *)mr_rp
->mr_hdl
,
4900 &mr_cb_data_in
, sizeof (mr_cb_data_in
));
4902 if (status
!= IBT_SUCCESS
) {
4903 DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)",
4904 status
, mr_cb_data_in
.mr_rev
);
4905 *rvalp
= (int)status
;
4911 * we bump reference of mr_rp and enqueue it onto smrp.
4913 DAPLKA_RS_REF(mr_rp
);
4914 mr_rp
->mr_next
= smrp
->smr_mr_list
;
4915 smrp
->smr_mr_list
= mr_rp
;
4916 mr_rp
->mr_shared_mr
= smrp
;
4918 /* insert into mr hash table */
4919 retval
= daplka_hash_insert(&ia_rp
->ia_mr_htbl
,
4920 &mr_hkey
, (void *)mr_rp
);
4922 DERR("mr_register_shared: cannot insert mr resource\n");
4926 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp
))
4929 * at this point, there are two references to our mr resource.
4930 * one is kept in ia_mr_htbl. the other is kept in the list
4931 * within this shared mr object (smrp). when we deregister this
4932 * mr or when a callback invalidates this mr, the reference kept
4933 * by this shared mr object will be removed.
4936 args
.mrs_lkey
= mr_rp
->mr_desc
.md_lkey
;
4937 args
.mrs_rkey
= mr_rp
->mr_desc
.md_rkey
;
4938 args
.mrs_hkey
= mr_hkey
;
4940 retval
= ddi_copyout((void *)&args
, (void *)arg
,
4941 sizeof (dapl_mr_register_shared_t
), mode
);
4943 DERR("mr_register_shared: copyout error %d\n", retval
);
4949 * set the state to READY to allow others to continue
4951 mutex_enter(&daplka_shared_mr_lock
);
4952 smrp
->smr_state
= DAPLKA_SMR_READY
;
4953 cv_broadcast(&smrp
->smr_cv
);
4954 mutex_exit(&daplka_shared_mr_lock
);
4959 daplka_mr_resource_t
*free_rp
= NULL
;
4961 (void) daplka_hash_remove(&ia_rp
->ia_mr_htbl
, mr_hkey
,
4963 if (free_rp
!= mr_rp
) {
4964 DERR("mr_register_shared: "
4965 "cannot remove mr from hash table\n");
4967 * we can only get here if another thread
4968 * has completed the cleanup in mr_deregister
4974 mutex_enter(&daplka_shared_mr_lock
);
4975 ASSERT(smrp
->smr_refcnt
> 0);
4978 if (smrp
->smr_refcnt
== 0) {
4979 DERR("mr_register_shared: freeing smrp 0x%p\n", smrp
);
4980 avl_remove(&daplka_shared_mr_tree
, smrp
);
4981 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp
))
4982 if (smrp
->smr_mr_list
!= NULL
) {
4984 * the refcnt is 0. if there is anything
4985 * left on the list, it must be ours.
4987 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp
))
4988 ASSERT(smrp
->smr_mr_list
== mr_rp
);
4989 DAPLKA_RS_UNREF(mr_rp
);
4990 smrp
->smr_mr_list
= NULL
;
4991 ASSERT(mr_rp
->mr_shared_mr
== smrp
);
4992 mr_rp
->mr_shared_mr
= NULL
;
4993 ASSERT(mr_rp
->mr_next
== NULL
);
4995 smrp
->smr_state
= DAPLKA_SMR_FREED
;
4996 cv_destroy(&smrp
->smr_cv
);
4997 kmem_free(smrp
, sizeof (daplka_shared_mr_t
));
4999 DERR("mr_register_shared: resetting smr_state "
5000 "smrp 0x%p, %d waiters remain\n", smrp
,
5002 ASSERT(smrp
->smr_state
== DAPLKA_SMR_TRANSITIONING
);
5003 if (smrp
->smr_mr_list
!= NULL
&& mr_rp
!= NULL
) {
5004 daplka_mr_resource_t
**mpp
;
5007 * search and remove mr_rp from smr_mr_list
5009 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp
))
5010 mpp
= &smrp
->smr_mr_list
;
5011 while (*mpp
!= NULL
) {
5012 if (*mpp
== mr_rp
) {
5013 *mpp
= (*mpp
)->mr_next
;
5014 DAPLKA_RS_UNREF(mr_rp
);
5015 ASSERT(mr_rp
->mr_shared_mr
==
5017 mr_rp
->mr_shared_mr
= NULL
;
5018 mr_rp
->mr_next
= NULL
;
5021 mpp
= &(*mpp
)->mr_next
;
5025 * note that smr_state == READY does not necessarily
5026 * mean that smr_mr_list is non empty. for this case,
5027 * we are doing cleanup because of a failure. we set
5028 * the state to READY to allow other threads to
5031 smrp
->smr_state
= DAPLKA_SMR_READY
;
5032 cv_broadcast(&smrp
->smr_cv
);
5034 mutex_exit(&daplka_shared_mr_lock
);
5036 if (mr_rp
!= NULL
) {
5037 DAPLKA_RS_UNREF(mr_rp
);
5043 * registers a memory region using the attributes of an
5048 daplka_mr_register_lmr(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
5049 cred_t
*cred
, int *rvalp
)
5051 boolean_t inserted
= B_FALSE
;
5052 dapl_mr_register_lmr_t args
;
5053 ibt_mr_data_in_t mr_cb_data_in
;
5054 daplka_mr_resource_t
*orig_mr_rp
= NULL
;
5055 daplka_mr_resource_t
*mr_rp
;
5056 ibt_smr_attr_t mem_sattr
;
5057 uint64_t mr_hkey
= 0;
5058 ibt_status_t status
;
5061 retval
= ddi_copyin((void *)arg
, &args
,
5062 sizeof (dapl_mr_register_lmr_t
), mode
);
5064 DERR("mr_register_lmr: copyin error %d\n", retval
);
5067 orig_mr_rp
= (daplka_mr_resource_t
*)
5068 daplka_hash_lookup(&ia_rp
->ia_mr_htbl
, args
.mrl_orig_hkey
);
5069 if (orig_mr_rp
== NULL
) {
5070 DERR("mr_register_lmr: cannot find mr resource\n");
5073 ASSERT(DAPLKA_RS_TYPE(orig_mr_rp
) == DAPL_TYPE_MR
);
5075 mr_rp
= kmem_zalloc(sizeof (daplka_mr_resource_t
), daplka_km_flags
);
5076 if (mr_rp
== NULL
) {
5077 DERR("mr_register_lmr: cannot allocate mr resource\n");
5081 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp
))
5082 DAPLKA_RS_INIT(mr_rp
, DAPL_TYPE_MR
,
5083 DAPLKA_RS_RNUM(ia_rp
), daplka_mr_destroy
);
5085 mutex_init(&mr_rp
->mr_lock
, NULL
, MUTEX_DRIVER
, NULL
);
5086 mr_rp
->mr_hca
= ia_rp
->ia_hca
;
5087 mr_rp
->mr_hca_hdl
= ia_rp
->ia_hca_hdl
;
5088 mr_rp
->mr_next
= NULL
;
5089 mr_rp
->mr_shared_mr
= NULL
;
5091 DAPLKA_RS_REF(orig_mr_rp
->mr_pd_res
);
5092 mr_rp
->mr_pd_res
= orig_mr_rp
->mr_pd_res
;
5093 mr_rp
->mr_attr
= orig_mr_rp
->mr_attr
;
5095 /* Pass the IO addr that was returned while allocating the orig MR */
5096 mem_sattr
.mr_vaddr
= orig_mr_rp
->mr_desc
.md_vaddr
;
5097 mem_sattr
.mr_flags
= args
.mrl_flags
| IBT_MR_NOSLEEP
;
5099 status
= daplka_ibt_register_shared_mr(mr_rp
, ia_rp
->ia_hca_hdl
,
5100 orig_mr_rp
->mr_hdl
, mr_rp
->mr_pd_res
->pd_hdl
, &mem_sattr
,
5101 &mr_rp
->mr_hdl
, &mr_rp
->mr_desc
);
5103 if (status
!= IBT_SUCCESS
) {
5104 DERR("mr_register_lmr: ibt_register_shared_mr error %d\n",
5106 *rvalp
= (int)status
;
5111 mr_cb_data_in
.mr_rev
= IBT_MR_DATA_IN_IF_VERSION
;
5112 mr_cb_data_in
.mr_func
= daplka_pre_mr_cleanup_callback
;
5113 mr_cb_data_in
.mr_arg1
= (void *)mr_rp
;
5114 mr_cb_data_in
.mr_arg2
= NULL
;
5116 /* Pass the service driver mr cleanup handler to the hca driver */
5117 status
= ibt_ci_data_in(ia_rp
->ia_hca_hdl
,
5118 IBT_CI_NO_FLAGS
, IBT_HDL_MR
, (void *)mr_rp
->mr_hdl
,
5119 &mr_cb_data_in
, sizeof (mr_cb_data_in
));
5121 if (status
!= IBT_SUCCESS
) {
5122 DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)",
5123 status
, mr_cb_data_in
.mr_rev
);
5124 *rvalp
= (int)status
;
5128 mr_rp
->mr_attr
.mr_len
= orig_mr_rp
->mr_attr
.mr_len
;
5129 mr_rp
->mr_attr
.mr_flags
= mem_sattr
.mr_flags
;
5131 /* insert into mr hash table */
5132 retval
= daplka_hash_insert(&ia_rp
->ia_mr_htbl
, &mr_hkey
,
5135 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
5139 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp
))
5141 args
.mrl_lkey
= mr_rp
->mr_desc
.md_lkey
;
5142 args
.mrl_rkey
= mr_rp
->mr_desc
.md_rkey
;
5143 args
.mrl_hkey
= mr_hkey
;
5145 retval
= ddi_copyout((void *)&args
, (void *)arg
,
5146 sizeof (dapl_mr_register_lmr_t
), mode
);
5148 DERR("mr_register_lmr: copyout error %d\n", retval
);
5152 if (orig_mr_rp
!= NULL
) {
5153 DAPLKA_RS_UNREF(orig_mr_rp
);
5159 daplka_mr_resource_t
*free_rp
= NULL
;
5161 (void) daplka_hash_remove(&ia_rp
->ia_mr_htbl
, mr_hkey
,
5163 if (free_rp
!= mr_rp
) {
5164 DERR("mr_register: cannot remove mr from hash table\n");
5166 * we can only get here if another thread
5167 * has completed the cleanup in mr_deregister
5172 if (orig_mr_rp
!= NULL
) {
5173 DAPLKA_RS_UNREF(orig_mr_rp
);
5175 if (mr_rp
!= NULL
) {
5176 DAPLKA_RS_UNREF(mr_rp
);
5182 * this function is called by mr_deregister and mr_cleanup_callback to
5183 * remove a mr resource from the shared mr object mr_rp->mr_shared_mr.
5184 * if mr_shared_mr is already NULL, that means the region being
5185 * deregistered or invalidated is not a shared mr region and we can
5186 * return immediately.
5189 daplka_shared_mr_free(daplka_mr_resource_t
*mr_rp
)
5191 daplka_shared_mr_t
*smrp
;
5194 * we need a lock because mr_callback also checks this field.
5195 * for the rare case that mr_deregister and mr_cleanup_callback
5196 * gets called simultaneously, we are guaranteed that smrp won't
5197 * be dereferenced twice because either function will find
5198 * mr_shared_mr to be NULL.
5200 mutex_enter(&mr_rp
->mr_lock
);
5201 smrp
= mr_rp
->mr_shared_mr
;
5202 mr_rp
->mr_shared_mr
= NULL
;
5203 mutex_exit(&mr_rp
->mr_lock
);
5206 daplka_mr_resource_t
**mpp
;
5207 boolean_t mr_found
= B_FALSE
;
5209 mutex_enter(&daplka_shared_mr_lock
);
5210 ASSERT(smrp
->smr_refcnt
> 0);
5211 while (smrp
->smr_state
== DAPLKA_SMR_TRANSITIONING
) {
5212 cv_wait(&smrp
->smr_cv
, &daplka_shared_mr_lock
);
5214 ASSERT(smrp
->smr_state
== DAPLKA_SMR_READY
);
5215 smrp
->smr_state
= DAPLKA_SMR_TRANSITIONING
;
5219 * search and remove mr_rp from smr_mr_list.
5220 * also UNREF mr_rp because it is no longer
5223 mpp
= &smrp
->smr_mr_list
;
5224 while (*mpp
!= NULL
) {
5225 if (*mpp
== mr_rp
) {
5226 *mpp
= (*mpp
)->mr_next
;
5227 DAPLKA_RS_UNREF(mr_rp
);
5228 mr_rp
->mr_next
= NULL
;
5232 mpp
= &(*mpp
)->mr_next
;
5235 * since mr_clean_callback may not touch smr_mr_list
5236 * at this time (due to smr_state), we can be sure
5237 * that we can find and remove mr_rp from smr_mr_list
5240 if (smrp
->smr_refcnt
== 0) {
5241 D3("shared_mr_free: freeing smrp 0x%p\n", smrp
);
5242 avl_remove(&daplka_shared_mr_tree
, smrp
);
5243 ASSERT(smrp
->smr_mr_list
== NULL
);
5244 smrp
->smr_state
= DAPLKA_SMR_FREED
;
5245 cv_destroy(&smrp
->smr_cv
);
5246 kmem_free(smrp
, sizeof (daplka_shared_mr_t
));
5248 D3("shared_mr_free: smrp 0x%p, refcnt %d\n",
5249 smrp
, smrp
->smr_refcnt
);
5250 smrp
->smr_state
= DAPLKA_SMR_READY
;
5251 cv_broadcast(&smrp
->smr_cv
);
5253 mutex_exit(&daplka_shared_mr_lock
);
5258 * deregisters a memory region.
5259 * if mr is shared, remove reference from global shared mr object.
5260 * release the initial reference to the mr. if the mr's refcnt is
5261 * zero, call mr_destroy to free mr.
5265 daplka_mr_deregister(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
5266 cred_t
*cred
, int *rvalp
)
5268 daplka_mr_resource_t
*mr_rp
;
5269 dapl_mr_deregister_t args
;
5272 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_mr_deregister_t
),
5275 DERR("mr_deregister: copyin error %d\n", retval
);
5278 retval
= daplka_hash_remove(&ia_rp
->ia_mr_htbl
,
5279 args
.mrd_hkey
, (void **)&mr_rp
);
5280 if (retval
!= 0 || mr_rp
== NULL
) {
5281 DERR("mr_deregister: cannot find mr resource\n");
5284 ASSERT(DAPLKA_RS_TYPE(mr_rp
) == DAPL_TYPE_MR
);
5286 daplka_shared_mr_free(mr_rp
);
5287 DAPLKA_RS_UNREF(mr_rp
);
5292 * sync local memory regions on RDMA read or write.
5296 daplka_mr_sync(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
5297 cred_t
*cred
, int *rvalp
)
5299 dapl_mr_sync_t args
;
5300 daplka_mr_resource_t
*mr_rp
[DAPL_MR_PER_SYNC
];
5301 ibt_mr_sync_t mrs
[DAPL_MR_PER_SYNC
];
5302 uint32_t sync_direction_flags
;
5303 ibt_status_t status
;
5307 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_mr_sync_t
), mode
);
5309 DERR("mr_sync: copyin error %d\n", retval
);
5313 /* number of segments bound check */
5314 if (args
.mrs_numseg
> DAPL_MR_PER_SYNC
) {
5315 DERR("mr_sync: number of segments too large\n");
5319 /* translate MR sync direction flag */
5320 if (args
.mrs_flags
== DAPL_MR_SYNC_RDMA_RD
) {
5321 sync_direction_flags
= IBT_SYNC_READ
;
5322 } else if (args
.mrs_flags
== DAPL_MR_SYNC_RDMA_WR
) {
5323 sync_direction_flags
= IBT_SYNC_WRITE
;
5325 DERR("mr_sync: unknown flags\n");
5330 * all the segments are going to be sync'd by ibtl together
5332 for (i
= 0; i
< args
.mrs_numseg
; i
++) {
5333 mr_rp
[i
] = (daplka_mr_resource_t
*)daplka_hash_lookup(
5334 &ia_rp
->ia_mr_htbl
, args
.mrs_vec
[i
].mrsv_hkey
);
5335 if (mr_rp
[i
] == NULL
) {
5336 for (j
= 0; j
< i
; j
++) {
5337 DAPLKA_RS_UNREF(mr_rp
[j
]);
5339 DERR("mr_sync: lookup error\n");
5342 ASSERT(DAPLKA_RS_TYPE(mr_rp
[i
]) == DAPL_TYPE_MR
);
5343 mrs
[i
].ms_handle
= mr_rp
[i
]->mr_hdl
;
5344 mrs
[i
].ms_vaddr
= args
.mrs_vec
[i
].mrsv_va
;
5345 mrs
[i
].ms_len
= args
.mrs_vec
[i
].mrsv_len
;
5346 mrs
[i
].ms_flags
= sync_direction_flags
;
5349 status
= ibt_sync_mr(ia_rp
->ia_hca_hdl
, mrs
, args
.mrs_numseg
);
5350 if (status
!= IBT_SUCCESS
) {
5351 DERR("mr_sync: ibt_sync_mr error %d\n", status
);
5352 *rvalp
= (int)status
;
5354 for (i
= 0; i
< args
.mrs_numseg
; i
++) {
5355 DAPLKA_RS_UNREF(mr_rp
[i
]);
5361 * destroys a memory region.
5362 * called when refcnt drops to zero.
5365 daplka_mr_destroy(daplka_resource_t
*gen_rp
)
5367 daplka_mr_resource_t
*mr_rp
= (daplka_mr_resource_t
*)gen_rp
;
5368 ibt_status_t status
;
5370 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp
))
5371 ASSERT(DAPLKA_RS_REFCNT(mr_rp
) == 0);
5372 ASSERT(mr_rp
->mr_shared_mr
== NULL
);
5373 D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n",
5374 mr_rp
, DAPLKA_RS_RNUM(mr_rp
));
5379 if (mr_rp
->mr_hdl
) {
5380 status
= daplka_ibt_deregister_mr(mr_rp
, mr_rp
->mr_hca_hdl
,
5382 if (status
!= IBT_SUCCESS
) {
5383 DERR("mr_destroy: ibt_deregister_mr returned %d\n",
5386 mr_rp
->mr_hdl
= NULL
;
5387 D3("mr_destroy: mr deregistered\n");
5389 mr_rp
->mr_attr
.mr_vaddr
= 0;
5392 * release reference on PD
5394 if (mr_rp
->mr_pd_res
!= NULL
) {
5395 DAPLKA_RS_UNREF(mr_rp
->mr_pd_res
);
5396 mr_rp
->mr_pd_res
= NULL
;
5398 mutex_destroy(&mr_rp
->mr_lock
);
5399 DAPLKA_RS_FINI(mr_rp
);
5400 kmem_free(mr_rp
, sizeof (daplka_mr_resource_t
));
5401 D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp
);
5406 * this function is called by daplka_hash_destroy for
5407 * freeing MR resource objects
5410 daplka_hash_mr_free(void *obj
)
5412 daplka_mr_resource_t
*mr_rp
= (daplka_mr_resource_t
*)obj
;
5414 daplka_shared_mr_free(mr_rp
);
5415 DAPLKA_RS_UNREF(mr_rp
);
5419 * comparison function used for finding a shared mr object
5420 * from the global shared mr avl tree.
5423 daplka_shared_mr_cmp(const void *smr1
, const void *smr2
)
5425 daplka_shared_mr_t
*s1
= (daplka_shared_mr_t
*)smr1
;
5426 daplka_shared_mr_t
*s2
= (daplka_shared_mr_t
*)smr2
;
5429 for (i
= 4; i
>= 0; i
--) {
5430 if (s1
->smr_cookie
.mc_uint_arr
[i
] <
5431 s2
->smr_cookie
.mc_uint_arr
[i
]) {
5434 if (s1
->smr_cookie
.mc_uint_arr
[i
] >
5435 s2
->smr_cookie
.mc_uint_arr
[i
]) {
5443 * allocates a protection domain.
5447 daplka_pd_alloc(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
5448 cred_t
*cred
, int *rvalp
)
5450 dapl_pd_alloc_t args
;
5451 daplka_pd_resource_t
*pd_rp
;
5452 ibt_status_t status
;
5453 uint64_t pd_hkey
= 0;
5454 boolean_t inserted
= B_FALSE
;
5457 pd_rp
= kmem_zalloc(sizeof (*pd_rp
), daplka_km_flags
);
5458 if (pd_rp
== NULL
) {
5459 DERR("pd_alloc: cannot allocate pd resource\n");
5462 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp
))
5463 DAPLKA_RS_INIT(pd_rp
, DAPL_TYPE_PD
,
5464 DAPLKA_RS_RNUM(ia_rp
), daplka_pd_destroy
);
5466 pd_rp
->pd_hca
= ia_rp
->ia_hca
;
5467 pd_rp
->pd_hca_hdl
= ia_rp
->ia_hca_hdl
;
5468 status
= daplka_ibt_alloc_pd(pd_rp
, pd_rp
->pd_hca_hdl
,
5469 IBT_PD_NO_FLAGS
, &pd_rp
->pd_hdl
);
5470 if (status
!= IBT_SUCCESS
) {
5471 DERR("pd_alloc: ibt_alloc_pd returned %d\n", status
);
5472 *rvalp
= (int)status
;
5477 /* insert into pd hash table */
5478 retval
= daplka_hash_insert(&ia_rp
->ia_pd_htbl
,
5479 &pd_hkey
, (void *)pd_rp
);
5481 DERR("pd_alloc: cannot insert pd resource into pd_htbl\n");
5485 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pd_rp
))
5487 /* return hkey to library */
5488 args
.pda_hkey
= pd_hkey
;
5490 retval
= ddi_copyout(&args
, (void *)arg
, sizeof (dapl_pd_alloc_t
),
5493 DERR("pd_alloc: copyout error %d\n", retval
);
5501 daplka_pd_resource_t
*free_rp
= NULL
;
5503 (void) daplka_hash_remove(&ia_rp
->ia_pd_htbl
, pd_hkey
,
5505 if (free_rp
!= pd_rp
) {
5506 DERR("pd_alloc: cannot remove pd from hash table\n");
5508 * we can only get here if another thread
5509 * has completed the cleanup in pd_free
5514 DAPLKA_RS_UNREF(pd_rp
);
5519 * destroys a protection domain.
5520 * called when refcnt drops to zero.
5523 daplka_pd_destroy(daplka_resource_t
*gen_rp
)
5525 daplka_pd_resource_t
*pd_rp
= (daplka_pd_resource_t
*)gen_rp
;
5526 ibt_status_t status
;
5528 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp
))
5529 ASSERT(DAPLKA_RS_REFCNT(pd_rp
) == 0);
5530 D3("pd_destroy: entering, pd_rp %p, rnum %d\n",
5531 pd_rp
, DAPLKA_RS_RNUM(pd_rp
));
5533 ASSERT(DAPLKA_RS_TYPE(pd_rp
) == DAPL_TYPE_PD
);
5534 if (pd_rp
->pd_hdl
!= NULL
) {
5535 status
= daplka_ibt_free_pd(pd_rp
, pd_rp
->pd_hca_hdl
,
5537 if (status
!= IBT_SUCCESS
) {
5538 DERR("pd_destroy: ibt_free_pd returned %d\n", status
);
5541 DAPLKA_RS_FINI(pd_rp
);
5542 kmem_free(pd_rp
, sizeof (daplka_pd_resource_t
));
5543 D3("pd_destroy: exiting, pd_rp %p\n", pd_rp
);
5548 daplka_hash_pd_free(void *obj
)
5550 daplka_pd_resource_t
*pd_rp
= (daplka_pd_resource_t
*)obj
;
5552 ASSERT(DAPLKA_RS_TYPE(pd_rp
) == DAPL_TYPE_PD
);
5553 DAPLKA_RS_UNREF(pd_rp
);
5557 * removes the pd reference from ia_pd_htbl and releases the
5558 * initial reference to the pd. also destroys the pd if the refcnt
5563 daplka_pd_free(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
5564 cred_t
*cred
, int *rvalp
)
5566 daplka_pd_resource_t
*pd_rp
;
5567 dapl_pd_free_t args
;
5570 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_pd_free_t
), mode
);
5572 DERR("pd_free: copyin error %d\n", retval
);
5576 retval
= daplka_hash_remove(&ia_rp
->ia_pd_htbl
,
5577 args
.pdf_hkey
, (void **)&pd_rp
);
5578 if (retval
!= 0 || pd_rp
== NULL
) {
5579 DERR("pd_free: cannot find pd resource\n");
5582 ASSERT(DAPLKA_RS_TYPE(pd_rp
) == DAPL_TYPE_PD
);
5584 /* UNREF calls the actual free function when refcnt is zero */
5585 DAPLKA_RS_UNREF(pd_rp
);
5590 * allocates a memory window
5594 daplka_mw_alloc(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
5595 cred_t
*cred
, int *rvalp
)
5597 daplka_pd_resource_t
*pd_rp
;
5598 daplka_mw_resource_t
*mw_rp
;
5599 dapl_mw_alloc_t args
;
5600 ibt_status_t status
;
5601 boolean_t inserted
= B_FALSE
;
5606 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_mw_alloc_t
), mode
);
5608 DERR("mw_alloc: copyin error %d\n", retval
);
5613 * Allocate and initialize a MW resource
5615 mw_rp
= kmem_zalloc(sizeof (daplka_mw_resource_t
), daplka_km_flags
);
5616 if (mw_rp
== NULL
) {
5617 DERR("mw_alloc: cannot allocate mw resource\n");
5620 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp
))
5621 DAPLKA_RS_INIT(mw_rp
, DAPL_TYPE_MW
,
5622 DAPLKA_RS_RNUM(ia_rp
), daplka_mw_destroy
);
5624 mutex_init(&mw_rp
->mw_lock
, NULL
, MUTEX_DRIVER
, NULL
);
5625 mw_rp
->mw_hca
= ia_rp
->ia_hca
;
5626 mw_rp
->mw_hca_hdl
= ia_rp
->ia_hca_hdl
;
5629 pd_rp
= (daplka_pd_resource_t
*)
5630 daplka_hash_lookup(&ia_rp
->ia_pd_htbl
, args
.mw_pd_hkey
);
5631 if (pd_rp
== NULL
) {
5632 DERR("mw_alloc: cannot find pd resource\n");
5635 ASSERT(DAPLKA_RS_TYPE(pd_rp
) == DAPL_TYPE_PD
);
5637 mw_rp
->mw_pd_res
= pd_rp
;
5639 status
= daplka_ibt_alloc_mw(mw_rp
, mw_rp
->mw_hca_hdl
,
5640 pd_rp
->pd_hdl
, IBT_MW_NOSLEEP
, &mw_rp
->mw_hdl
, &mw_rkey
);
5642 if (status
!= IBT_SUCCESS
) {
5643 DERR("mw_alloc: ibt_alloc_mw returned %d\n", status
);
5644 *rvalp
= (int)status
;
5649 mutex_enter(&ia_rp
->ia_lock
);
5650 switch (ia_rp
->ia_state
) {
5651 case DAPLKA_IA_INIT
:
5652 ia_rp
->ia_state
= DAPLKA_IA_MW_ALLOC_IN_PROGRESS
;
5653 ia_rp
->ia_mw_alloccnt
++;
5656 case DAPLKA_IA_MW_ALLOC_IN_PROGRESS
:
5657 /* another mw_alloc is already in progress increase cnt */
5658 ia_rp
->ia_mw_alloccnt
++;
5661 case DAPLKA_IA_MW_FREEZE_IN_PROGRESS
:
5663 case DAPLKA_IA_MW_FROZEN
:
5665 * IA is being or already frozen don't allow more MWs to be
5668 DERR("mw_alloc: IA is freezing MWs (state=%d)\n",
5673 ASSERT(!"Invalid IA state in mw_alloc");
5674 DERR("mw_alloc: IA state=%d invalid\n", ia_rp
->ia_state
);
5678 mutex_exit(&ia_rp
->ia_lock
);
5679 /* retval is 0 when ia_mw_alloccnt is incremented */
5684 /* insert into mw hash table */
5686 retval
= daplka_hash_insert(&ia_rp
->ia_mw_htbl
, &mw_hkey
,
5689 DERR("mw_alloc: cannot insert mw resource into mw_htbl\n");
5690 mutex_enter(&ia_rp
->ia_lock
);
5691 ASSERT(ia_rp
->ia_state
== DAPLKA_IA_MW_ALLOC_IN_PROGRESS
);
5692 ia_rp
->ia_mw_alloccnt
--;
5693 if (ia_rp
->ia_mw_alloccnt
== 0) {
5694 ia_rp
->ia_state
= DAPLKA_IA_INIT
;
5695 cv_broadcast(&ia_rp
->ia_cv
);
5697 mutex_exit(&ia_rp
->ia_lock
);
5701 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mw_rp
))
5703 D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n",
5704 mw_rp
->mw_hdl
, (longlong_t
)mw_rkey
);
5706 mutex_enter(&ia_rp
->ia_lock
);
5708 * We are done with mw_alloc if this was the last mw_alloc
5709 * change state back to DAPLKA_IA_INIT and wake up waiters
5710 * specifically the unlock callback.
5712 ASSERT(ia_rp
->ia_state
== DAPLKA_IA_MW_ALLOC_IN_PROGRESS
);
5713 ia_rp
->ia_mw_alloccnt
--;
5714 if (ia_rp
->ia_mw_alloccnt
== 0) {
5715 ia_rp
->ia_state
= DAPLKA_IA_INIT
;
5716 cv_broadcast(&ia_rp
->ia_cv
);
5718 mutex_exit(&ia_rp
->ia_lock
);
5720 args
.mw_hkey
= mw_hkey
;
5721 args
.mw_rkey
= mw_rkey
;
5723 retval
= ddi_copyout(&args
, (void *)arg
, sizeof (dapl_mw_alloc_t
),
5726 DERR("mw_alloc: copyout error %d\n", retval
);
5734 daplka_mw_resource_t
*free_rp
= NULL
;
5736 (void) daplka_hash_remove(&ia_rp
->ia_mw_htbl
, mw_hkey
,
5738 if (free_rp
!= mw_rp
) {
5739 DERR("mw_alloc: cannot remove mw from hash table\n");
5741 * we can only get here if another thread
5742 * has completed the cleanup in mw_free
5747 DAPLKA_RS_UNREF(mw_rp
);
5752 * removes the mw reference from ia_mw_htbl and releases the
5753 * initial reference to the mw. also destroys the mw if the refcnt
5758 daplka_mw_free(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
5759 cred_t
*cred
, int *rvalp
)
5761 daplka_mw_resource_t
*mw_rp
= NULL
;
5762 dapl_mw_free_t args
;
5765 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_mw_free_t
), mode
);
5767 DERR("mw_free: copyin error %d\n", retval
);
5771 retval
= daplka_hash_remove(&ia_rp
->ia_mw_htbl
, args
.mw_hkey
,
5773 if (retval
!= 0 || mw_rp
== NULL
) {
5774 DERR("mw_free: cannot find mw resrc (0x%llx)\n",
5775 (longlong_t
)args
.mw_hkey
);
5779 ASSERT(DAPLKA_RS_TYPE(mw_rp
) == DAPL_TYPE_MW
);
5781 /* UNREF calls the actual free function when refcnt is zero */
5782 DAPLKA_RS_UNREF(mw_rp
);
5787 * destroys the memory window.
5788 * called when refcnt drops to zero.
5791 daplka_mw_destroy(daplka_resource_t
*gen_rp
)
5793 daplka_mw_resource_t
*mw_rp
= (daplka_mw_resource_t
*)gen_rp
;
5794 ibt_status_t status
;
5796 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp
))
5797 ASSERT(DAPLKA_RS_REFCNT(mw_rp
) == 0);
5798 D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n",
5799 mw_rp
, DAPLKA_RS_RNUM(mw_rp
));
5802 * free memory window
5804 if (mw_rp
->mw_hdl
) {
5805 status
= daplka_ibt_free_mw(mw_rp
, mw_rp
->mw_hca_hdl
,
5807 if (status
!= IBT_SUCCESS
) {
5808 DERR("mw_destroy: ibt_free_mw returned %d\n", status
);
5810 mw_rp
->mw_hdl
= NULL
;
5811 D3("mw_destroy: mw freed\n");
5815 * release reference on PD
5817 if (mw_rp
->mw_pd_res
!= NULL
) {
5818 DAPLKA_RS_UNREF(mw_rp
->mw_pd_res
);
5819 mw_rp
->mw_pd_res
= NULL
;
5821 mutex_destroy(&mw_rp
->mw_lock
);
5822 DAPLKA_RS_FINI(mw_rp
);
5823 kmem_free(mw_rp
, sizeof (daplka_mw_resource_t
));
5824 D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp
);
5829 daplka_hash_mw_free(void *obj
)
5831 daplka_mw_resource_t
*mw_rp
= (daplka_mw_resource_t
*)obj
;
5833 ASSERT(DAPLKA_RS_TYPE(mw_rp
) == DAPL_TYPE_MW
);
5834 DAPLKA_RS_UNREF(mw_rp
);
5838 * SRQ ioctls and supporting functions
5842 daplka_srq_create(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
5843 cred_t
*cred
, int *rvalp
)
5845 daplka_srq_resource_t
*srq_rp
;
5846 daplka_pd_resource_t
*pd_rp
;
5847 dapl_srq_create_t args
;
5848 ibt_srq_sizes_t srq_sizes
;
5849 ibt_srq_sizes_t srq_real_sizes
;
5850 ibt_hca_attr_t
*hca_attrp
;
5851 uint64_t srq_hkey
= 0;
5852 boolean_t inserted
= B_FALSE
;
5854 ibt_status_t status
;
5856 D3("srq_create: enter\n");
5857 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_srq_create_t
),
5860 DERR("srq_create: copyin error %d\n", retval
);
5863 srq_rp
= kmem_zalloc(sizeof (daplka_srq_resource_t
), daplka_km_flags
);
5864 if (srq_rp
== NULL
) {
5865 DERR("srq_create: cannot allocate ep_rp\n");
5868 DAPLKA_RS_INIT(srq_rp
, DAPL_TYPE_SRQ
,
5869 DAPLKA_RS_RNUM(ia_rp
), daplka_srq_destroy
);
5871 srq_rp
->srq_hca
= ia_rp
->ia_hca
;
5872 srq_rp
->srq_hca_hdl
= ia_rp
->ia_hca_hdl
;
5873 mutex_init(&srq_rp
->srq_lock
, NULL
, MUTEX_DRIVER
, NULL
);
5876 pd_rp
= (daplka_pd_resource_t
*)
5877 daplka_hash_lookup(&ia_rp
->ia_pd_htbl
, args
.srqc_pd_hkey
);
5878 if (pd_rp
== NULL
) {
5879 DERR("srq_create: cannot find pd resource\n");
5883 ASSERT(DAPLKA_RS_TYPE(pd_rp
) == DAPL_TYPE_PD
);
5884 srq_rp
->srq_pd_res
= pd_rp
;
5887 * these checks ensure that the requested SRQ sizes
5888 * are within the limits supported by the chosen HCA.
5890 hca_attrp
= &ia_rp
->ia_hca
->hca_attr
;
5891 if (args
.srqc_sizes
.srqs_sz
> hca_attrp
->hca_max_srqs_sz
) {
5892 DERR("srq_create: invalid srqs_sz %d\n",
5893 args
.srqc_sizes
.srqs_sz
);
5897 if (args
.srqc_sizes
.srqs_sgl
> hca_attrp
->hca_max_srq_sgl
) {
5898 DERR("srq_create: invalid srqs_sgl %d\n",
5899 args
.srqc_sizes
.srqs_sgl
);
5904 D3("srq_create: srq_sgl %d, srq_sz %d\n",
5905 args
.srqc_sizes
.srqs_sgl
, args
.srqc_sizes
.srqs_sz
);
5907 srq_sizes
.srq_wr_sz
= args
.srqc_sizes
.srqs_sz
;
5908 srq_sizes
.srq_sgl_sz
= args
.srqc_sizes
.srqs_sgl
;
5911 status
= daplka_ibt_alloc_srq(srq_rp
, ia_rp
->ia_hca_hdl
,
5912 IBT_SRQ_USER_MAP
, pd_rp
->pd_hdl
, &srq_sizes
, &srq_rp
->srq_hdl
,
5914 if (status
!= IBT_SUCCESS
) {
5915 DERR("srq_create: alloc_srq returned %d\n", status
);
5916 *rvalp
= (int)status
;
5921 args
.srqc_real_sizes
.srqs_sz
= srq_real_sizes
.srq_wr_sz
;
5922 args
.srqc_real_sizes
.srqs_sgl
= srq_real_sizes
.srq_sgl_sz
;
5924 /* Get HCA-specific data_out info */
5925 status
= ibt_ci_data_out(ia_rp
->ia_hca_hdl
,
5926 IBT_CI_NO_FLAGS
, IBT_HDL_SRQ
, (void *)srq_rp
->srq_hdl
,
5927 &args
.srqc_data_out
, sizeof (args
.srqc_data_out
));
5929 if (status
!= IBT_SUCCESS
) {
5930 DERR("srq_create: ibt_ci_data_out error(%d)\n", status
);
5931 *rvalp
= (int)status
;
5936 srq_rp
->srq_real_size
= srq_real_sizes
.srq_wr_sz
;
5938 /* preparing to copyout map_data back to the library */
5939 args
.srqc_real_sizes
.srqs_sz
= srq_real_sizes
.srq_wr_sz
;
5940 args
.srqc_real_sizes
.srqs_sgl
= srq_real_sizes
.srq_sgl_sz
;
5942 /* insert into srq hash table */
5943 retval
= daplka_hash_insert(&ia_rp
->ia_srq_htbl
,
5944 &srq_hkey
, (void *)srq_rp
);
5946 DERR("srq_create: cannot insert srq resource into srq_htbl\n");
5951 /* return hkey to library */
5952 args
.srqc_hkey
= srq_hkey
;
5954 retval
= ddi_copyout(&args
, (void *)arg
, sizeof (dapl_srq_create_t
),
5957 DERR("srq_create: copyout error %d\n", retval
);
5962 D3("srq_create: %p, 0x%llx\n", srq_rp
->srq_hdl
, (longlong_t
)srq_hkey
);
5963 D3(" sz(%d) sgl(%d)\n",
5964 args
.srqc_real_sizes
.srqs_sz
, args
.srqc_real_sizes
.srqs_sgl
);
5965 D3("srq_create: exit\n");
5970 daplka_srq_resource_t
*free_rp
= NULL
;
5972 (void) daplka_hash_remove(&ia_rp
->ia_srq_htbl
, srq_hkey
,
5974 if (free_rp
!= srq_rp
) {
5976 * this case is impossible because ep_free will
5977 * wait until our state transition is complete.
5979 DERR("srq_create: cannot remove srq from hash table\n");
5984 DAPLKA_RS_UNREF(srq_rp
);
5989 * Resize an existing SRQ
5993 daplka_srq_resize(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
5994 cred_t
*cred
, int *rvalp
)
5996 daplka_srq_resource_t
*srq_rp
= NULL
;
5997 ibt_hca_attr_t
*hca_attrp
;
5998 dapl_srq_resize_t args
;
5999 ibt_status_t status
;
6002 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_srq_resize_t
),
6005 DERR("srq_resize: copyin error %d\n", retval
);
6009 /* get srq resource */
6010 srq_rp
= (daplka_srq_resource_t
*)
6011 daplka_hash_lookup(&ia_rp
->ia_srq_htbl
, args
.srqr_hkey
);
6012 if (srq_rp
== NULL
) {
6013 DERR("srq_resize: cannot find srq resource\n");
6016 ASSERT(DAPLKA_RS_TYPE(srq_rp
) == DAPL_TYPE_SRQ
);
6018 hca_attrp
= &ia_rp
->ia_hca
->hca_attr
;
6019 if (args
.srqr_new_size
> hca_attrp
->hca_max_srqs_sz
) {
6020 DERR("srq_resize: invalid srq size %d", args
.srqr_new_size
);
6025 mutex_enter(&srq_rp
->srq_lock
);
6027 * If ibt_resize_srq fails that it is primarily due to resource
6028 * shortage. Per IB spec resize will never loose events and
6029 * a resize error leaves the SRQ intact. Therefore even if the
6030 * resize request fails we proceed and get the mapping data
6031 * from the SRQ so that the library can mmap it.
6033 status
= ibt_modify_srq(srq_rp
->srq_hdl
, IBT_SRQ_SET_SIZE
,
6034 args
.srqr_new_size
, 0, &args
.srqr_real_size
);
6035 if (status
!= IBT_SUCCESS
) {
6036 /* we return the size of the old CQ if resize fails */
6037 args
.srqr_real_size
= srq_rp
->srq_real_size
;
6038 ASSERT(status
!= IBT_SRQ_HDL_INVALID
);
6039 DERR("srq_resize: ibt_modify_srq failed:%d\n", status
);
6041 srq_rp
->srq_real_size
= args
.srqr_real_size
;
6043 mutex_exit(&srq_rp
->srq_lock
);
6046 D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n",
6047 DAPLKA_RS_RNUM(srq_rp
), args
.srqr_new_size
, args
.srqr_real_size
);
6049 /* Get HCA-specific data_out info */
6050 status
= ibt_ci_data_out(srq_rp
->srq_hca_hdl
,
6051 IBT_CI_NO_FLAGS
, IBT_HDL_SRQ
, (void *)srq_rp
->srq_hdl
,
6052 &args
.srqr_data_out
, sizeof (args
.srqr_data_out
));
6053 if (status
!= IBT_SUCCESS
) {
6054 DERR("srq_resize: ibt_ci_data_out error(%d)\n", status
);
6055 /* return ibt_ci_data_out status */
6056 *rvalp
= (int)status
;
6061 retval
= ddi_copyout(&args
, (void *)arg
, sizeof (dapl_srq_resize_t
),
6064 DERR("srq_resize: copyout error %d\n", retval
);
6070 if (srq_rp
!= NULL
) {
6071 DAPLKA_RS_UNREF(srq_rp
);
6077 * Frees an SRQ resource.
6081 daplka_srq_free(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
6082 cred_t
*cred
, int *rvalp
)
6084 daplka_srq_resource_t
*srq_rp
= NULL
;
6085 dapl_srq_free_t args
;
6088 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_srq_free_t
), mode
);
6090 DERR("srq_free: copyin error %d\n", retval
);
6094 retval
= daplka_hash_remove(&ia_rp
->ia_srq_htbl
,
6095 args
.srqf_hkey
, (void **)&srq_rp
);
6096 if (retval
!= 0 || srq_rp
== NULL
) {
6098 * this is only possible if we have two threads
6099 * calling ep_free in parallel.
6101 DERR("srq_free: cannot find resource retval(%d) 0x%llx\n",
6102 retval
, args
.srqf_hkey
);
6106 /* UNREF calls the actual free function when refcnt is zero */
6107 DAPLKA_RS_UNREF(srq_rp
);
6112 * destroys a SRQ resource.
6113 * called when refcnt drops to zero.
6116 daplka_srq_destroy(daplka_resource_t
*gen_rp
)
6118 daplka_srq_resource_t
*srq_rp
= (daplka_srq_resource_t
*)gen_rp
;
6119 ibt_status_t status
;
6121 ASSERT(DAPLKA_RS_REFCNT(srq_rp
) == 0);
6123 D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n",
6124 srq_rp
, DAPLKA_RS_RNUM(srq_rp
));
6128 if (srq_rp
->srq_hdl
!= NULL
) {
6129 status
= daplka_ibt_free_srq(srq_rp
, srq_rp
->srq_hdl
);
6130 if (status
!= IBT_SUCCESS
) {
6131 DERR("srq_destroy: ibt_free_srq returned %d\n",
6134 srq_rp
->srq_hdl
= NULL
;
6135 D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp
));
6138 * release all references
6140 if (srq_rp
->srq_pd_res
!= NULL
) {
6141 DAPLKA_RS_UNREF(srq_rp
->srq_pd_res
);
6142 srq_rp
->srq_pd_res
= NULL
;
6145 mutex_destroy(&srq_rp
->srq_lock
);
6146 DAPLKA_RS_FINI(srq_rp
);
6147 kmem_free(srq_rp
, sizeof (daplka_srq_resource_t
));
6148 D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp
);
6153 daplka_hash_srq_free(void *obj
)
6155 daplka_srq_resource_t
*srq_rp
= (daplka_srq_resource_t
*)obj
;
6157 ASSERT(DAPLKA_RS_TYPE(srq_rp
) == DAPL_TYPE_SRQ
);
6158 DAPLKA_RS_UNREF(srq_rp
);
6162 * This function tells the CM to start listening on a service id.
6163 * It must be called by the passive side client before the client
6164 * can receive connection requests from remote endpoints. If the
6165 * client specifies a non-zero service id (connection qualifier in
6166 * dapl terms), this function will attempt to bind to this service
6167 * id and return an error if the id is already in use. If the client
6168 * specifies zero as the service id, this function will try to find
6169 * the next available service id and return it back to the client.
6170 * To support the cr_handoff function, this function will, in addition
6171 * to creating and inserting an SP resource into the per-IA SP hash
6172 * table, insert the SP resource into a global SP table. This table
6173 * maintains all active service points created by all dapl clients.
6174 * CR handoff locates the target SP by iterating through this global
6179 daplka_service_register(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
6180 cred_t
*cred
, int *rvalp
)
6182 daplka_evd_resource_t
*evd_rp
= NULL
;
6183 daplka_sp_resource_t
*sp_rp
= NULL
;
6184 dapl_service_register_t args
;
6185 ibt_srv_desc_t sd_args
;
6186 ibt_srv_bind_t sb_args
;
6187 ibt_status_t status
;
6188 ib_svc_id_t retsid
= 0;
6189 uint64_t sp_hkey
= 0;
6190 boolean_t bumped
= B_FALSE
;
6194 retval
= ddi_copyin((void *)arg
, &args
,
6195 sizeof (dapl_service_register_t
), mode
);
6197 DERR("service_register: copyin error %d\n", retval
);
6201 sp_rp
= kmem_zalloc(sizeof (*sp_rp
), daplka_km_flags
);
6202 if (sp_rp
== NULL
) {
6203 DERR("service_register: cannot allocate sp resource\n");
6206 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp
))
6207 DAPLKA_RS_INIT(sp_rp
, DAPL_TYPE_SP
,
6208 DAPLKA_RS_RNUM(ia_rp
), daplka_sp_destroy
);
6210 /* check if evd exists */
6211 evd_rp
= (daplka_evd_resource_t
*)
6212 daplka_hash_lookup(&ia_rp
->ia_evd_htbl
, args
.sr_evd_hkey
);
6213 if (evd_rp
== NULL
) {
6214 DERR("service_register: evd resource not found\n");
6219 * initialize backlog size
6221 if (evd_rp
&& evd_rp
->evd_cq_real_size
> 0) {
6222 backlog_size
= evd_rp
->evd_cq_real_size
+ 1;
6224 backlog_size
= DAPLKA_DEFAULT_SP_BACKLOG
;
6226 D2("service_register: args.sr_sid = %llu\n", (longlong_t
)args
.sr_sid
);
6228 /* save the userland sp ptr */
6229 sp_rp
->sp_cookie
= args
.sr_sp_cookie
;
6230 sp_rp
->sp_backlog_size
= backlog_size
;
6231 D3("service_register: backlog set to %d\n", sp_rp
->sp_backlog_size
);
6232 sp_rp
->sp_backlog
= kmem_zalloc(sp_rp
->sp_backlog_size
*
6233 sizeof (daplka_sp_conn_pend_t
), daplka_km_flags
);
6235 /* save evd resource pointer */
6236 sp_rp
->sp_evd_res
= evd_rp
;
6239 * save ruid here so that we can do a comparison later
6240 * when someone does cr_handoff. the check will prevent
6241 * a malicious app from passing a CR to us.
6243 sp_rp
->sp_ruid
= crgetruid(cred
);
6245 /* fill in args for register_service */
6246 sd_args
.sd_ud_handler
= NULL
;
6247 sd_args
.sd_handler
= daplka_cm_service_handler
;
6248 sd_args
.sd_flags
= IBT_SRV_NO_FLAGS
;
6250 status
= ibt_register_service(daplka_dev
->daplka_clnt_hdl
,
6251 &sd_args
, args
.sr_sid
, 1, &sp_rp
->sp_srv_hdl
, &retsid
);
6253 if (status
!= IBT_SUCCESS
) {
6254 DERR("service_register: ibt_register_service returned %d\n",
6256 *rvalp
= (int)status
;
6260 /* save returned sid */
6261 sp_rp
->sp_conn_qual
= retsid
;
6262 args
.sr_retsid
= retsid
;
6264 /* fill in args for bind_service */
6265 sb_args
.sb_pkey
= ia_rp
->ia_port_pkey
;
6266 sb_args
.sb_lease
= 0xffffffff;
6267 sb_args
.sb_key
[0] = 0x1234;
6268 sb_args
.sb_key
[1] = 0x5678;
6269 sb_args
.sb_name
= DAPLKA_DRV_NAME
;
6271 D2("service_register: bind(0x%llx:0x%llx)\n",
6272 (longlong_t
)ia_rp
->ia_hca_sgid
.gid_prefix
,
6273 (longlong_t
)ia_rp
->ia_hca_sgid
.gid_guid
);
6275 status
= ibt_bind_service(sp_rp
->sp_srv_hdl
, ia_rp
->ia_hca_sgid
,
6276 &sb_args
, (void *)sp_rp
, &sp_rp
->sp_bind_hdl
);
6277 if (status
!= IBT_SUCCESS
) {
6278 DERR("service_register: ibt_bind_service returned %d\n",
6280 *rvalp
= (int)status
;
6286 * need to bump refcnt because the global hash table will
6287 * have a reference to sp_rp
6289 DAPLKA_RS_REF(sp_rp
);
6292 /* insert into global sp hash table */
6293 sp_rp
->sp_global_hkey
= 0;
6294 retval
= daplka_hash_insert(&daplka_global_sp_htbl
,
6295 &sp_rp
->sp_global_hkey
, (void *)sp_rp
);
6297 DERR("service_register: cannot insert sp resource\n");
6300 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sp_rp
))
6302 /* insert into per-IA sp hash table */
6303 retval
= daplka_hash_insert(&ia_rp
->ia_sp_htbl
,
6304 &sp_hkey
, (void *)sp_rp
);
6306 DERR("service_register: cannot insert sp resource\n");
6310 /* pass index to application */
6311 args
.sr_sp_hkey
= sp_hkey
;
6312 retval
= ddi_copyout(&args
, (void *)arg
,
6313 sizeof (dapl_service_register_t
), mode
);
6315 DERR("service_register: copyout error %d\n", retval
);
6322 ASSERT(sp_rp
!= NULL
);
6323 /* remove from ia table */
6325 daplka_sp_resource_t
*free_rp
= NULL
;
6327 (void) daplka_hash_remove(&ia_rp
->ia_sp_htbl
,
6328 sp_hkey
, (void **)&free_rp
);
6329 if (free_rp
!= sp_rp
) {
6330 DERR("service_register: cannot remove sp\n");
6332 * we can only get here if another thread
6333 * has completed the cleanup in svc_deregister
6339 /* remove from global table */
6340 if (sp_rp
->sp_global_hkey
!= 0) {
6341 daplka_sp_resource_t
*free_rp
= NULL
;
6344 * we get here if either the hash_insert into
6345 * ia_sp_htbl failed or the ddi_copyout failed.
6346 * hash_insert failure implies that we are the
6347 * only thread with a reference to sp. ddi_copyout
6348 * failure implies that svc_deregister could have
6349 * picked up the sp and destroyed it. but since
6350 * we got to this point, we must have removed
6351 * the sp ourselves in hash_remove above and
6352 * that the sp can be destroyed by us.
6354 (void) daplka_hash_remove(&daplka_global_sp_htbl
,
6355 sp_rp
->sp_global_hkey
, (void **)&free_rp
);
6356 if (free_rp
!= sp_rp
) {
6357 DERR("service_register: cannot remove sp\n");
6359 * this case is impossible. see explanation above.
6364 sp_rp
->sp_global_hkey
= 0;
6366 /* unreference sp */
6368 DAPLKA_RS_UNREF(sp_rp
);
6371 /* destroy sp resource */
6372 DAPLKA_RS_UNREF(sp_rp
);
6377 * deregisters the service and removes SP from the global table.
6381 daplka_service_deregister(daplka_ia_resource_t
*ia_rp
, intptr_t arg
, int mode
,
6382 cred_t
*cred
, int *rvalp
)
6384 dapl_service_deregister_t args
;
6385 daplka_sp_resource_t
*sp_rp
= NULL
, *g_sp_rp
= NULL
;
6388 retval
= ddi_copyin((void *)arg
, &args
,
6389 sizeof (dapl_service_deregister_t
), mode
);
6392 DERR("service_deregister: copyin error %d\n", retval
);
6396 retval
= daplka_hash_remove(&ia_rp
->ia_sp_htbl
,
6397 args
.sdr_sp_hkey
, (void **)&sp_rp
);
6398 if (retval
!= 0 || sp_rp
== NULL
) {
6399 DERR("service_deregister: cannot find sp resource\n");
6403 retval
= daplka_hash_remove(&daplka_global_sp_htbl
,
6404 sp_rp
->sp_global_hkey
, (void **)&g_sp_rp
);
6405 if (retval
!= 0 || g_sp_rp
== NULL
) {
6406 DERR("service_deregister: cannot find sp resource\n");
6409 /* remove the global reference */
6410 if (g_sp_rp
== sp_rp
) {
6411 DAPLKA_RS_UNREF(g_sp_rp
);
6414 DAPLKA_RS_UNREF(sp_rp
);
6419 * destroys a service point.
6420 * called when the refcnt drops to zero.
6423 daplka_sp_destroy(daplka_resource_t
*gen_rp
)
6425 daplka_sp_resource_t
*sp_rp
= (daplka_sp_resource_t
*)gen_rp
;
6426 ibt_status_t status
;
6428 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp
))
6429 ASSERT(DAPLKA_RS_REFCNT(sp_rp
) == 0);
6430 D3("sp_destroy: entering, sp_rp %p, rnum %d\n",
6431 sp_rp
, DAPLKA_RS_RNUM(sp_rp
));
6434 * it is possible for pending connections to remain
6435 * on an SP. We need to clean them up here.
6437 if (sp_rp
->sp_backlog
!= NULL
) {
6438 ibt_cm_proceed_reply_t proc_reply
;
6442 for (i
= 0; i
< sp_rp
->sp_backlog_size
; i
++) {
6443 if (sp_rp
->sp_backlog
[i
].spcp_state
==
6444 DAPLKA_SPCP_PENDING
) {
6446 if (sp_rp
->sp_backlog
[i
].spcp_sid
== NULL
) {
6448 "spcp_sid == NULL!\n");
6451 mutex_enter(&sp_rp
->sp_lock
);
6452 spcp_sidp
= sp_rp
->sp_backlog
[i
].spcp_sid
;
6453 sp_rp
->sp_backlog
[i
].spcp_state
=
6455 sp_rp
->sp_backlog
[i
].spcp_sid
= NULL
;
6456 sp_rp
->sp_backlog
[i
].spcp_req_len
= 0;
6457 mutex_exit(&sp_rp
->sp_lock
);
6458 status
= ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV
,
6460 IBT_CM_NO_RESOURCE
, &proc_reply
, NULL
, 0);
6461 if (status
!= IBT_SUCCESS
) {
6462 DERR("sp_destroy: proceed failed %d\n",
6468 DERR("sp_destroy: found %d pending "
6469 "connections\n", cnt
);
6473 if (sp_rp
->sp_srv_hdl
!= NULL
&& sp_rp
->sp_bind_hdl
!= NULL
) {
6474 status
= ibt_unbind_service(sp_rp
->sp_srv_hdl
,
6475 sp_rp
->sp_bind_hdl
);
6476 if (status
!= IBT_SUCCESS
) {
6477 DERR("sp_destroy: ibt_unbind_service "
6478 "failed: %d\n", status
);
6482 if (sp_rp
->sp_srv_hdl
!= NULL
) {
6483 status
= ibt_deregister_service(daplka_dev
->daplka_clnt_hdl
,
6485 if (status
!= IBT_SUCCESS
) {
6486 DERR("sp_destroy: ibt_deregister_service "
6487 "failed: %d\n", status
);
6490 if (sp_rp
->sp_backlog
!= NULL
) {
6491 kmem_free(sp_rp
->sp_backlog
,
6492 sp_rp
->sp_backlog_size
* sizeof (daplka_sp_conn_pend_t
));
6493 sp_rp
->sp_backlog
= NULL
;
6494 sp_rp
->sp_backlog_size
= 0;
6498 * release reference to evd
6500 if (sp_rp
->sp_evd_res
!= NULL
) {
6501 DAPLKA_RS_UNREF(sp_rp
->sp_evd_res
);
6503 sp_rp
->sp_bind_hdl
= NULL
;
6504 sp_rp
->sp_srv_hdl
= NULL
;
6505 DAPLKA_RS_FINI(sp_rp
);
6506 kmem_free(sp_rp
, sizeof (*sp_rp
));
6507 D3("sp_destroy: exiting, sp_rp %p\n", sp_rp
);
6512 * this function is called by daplka_hash_destroy for
6513 * freeing SP resource objects
6516 daplka_hash_sp_free(void *obj
)
6518 daplka_sp_resource_t
*sp_rp
= (daplka_sp_resource_t
*)obj
;
6519 daplka_sp_resource_t
*g_sp_rp
;
6522 ASSERT(DAPLKA_RS_TYPE(sp_rp
) == DAPL_TYPE_SP
);
6524 retval
= daplka_hash_remove(&daplka_global_sp_htbl
,
6525 sp_rp
->sp_global_hkey
, (void **)&g_sp_rp
);
6526 if (retval
!= 0 || g_sp_rp
== NULL
) {
6527 DERR("sp_free: cannot find sp resource\n");
6529 if (g_sp_rp
== sp_rp
) {
6530 DAPLKA_RS_UNREF(g_sp_rp
);
6533 DAPLKA_RS_UNREF(sp_rp
);
6537 daplka_hash_sp_unref(void *obj
)
6539 daplka_sp_resource_t
*sp_rp
= (daplka_sp_resource_t
*)obj
;
6541 ASSERT(DAPLKA_RS_TYPE(sp_rp
) == DAPL_TYPE_SP
);
6542 DAPLKA_RS_UNREF(sp_rp
);
6546 * Passive side CM handlers
6550 * processes the REQ_RCV event
6553 static ibt_cm_status_t
6554 daplka_cm_service_req(daplka_sp_resource_t
*spp
, ibt_cm_event_t
*event
,
6555 ibt_cm_return_args_t
*ret_args
, void *pr_data
, ibt_priv_data_len_t pr_len
)
6557 daplka_sp_conn_pend_t
*conn
= NULL
;
6558 daplka_evd_event_t
*cr_ev
= NULL
;
6559 ibt_cm_status_t cm_status
= IBT_CM_DEFAULT
;
6561 ibt_status_t status
;
6564 * acquire a slot in the connection backlog of this service point
6566 mutex_enter(&spp
->sp_lock
);
6567 for (bkl_index
= 0; bkl_index
< spp
->sp_backlog_size
; bkl_index
++) {
6568 if (spp
->sp_backlog
[bkl_index
].spcp_state
== DAPLKA_SPCP_INIT
) {
6569 conn
= &spp
->sp_backlog
[bkl_index
];
6570 ASSERT(conn
->spcp_sid
== NULL
);
6571 conn
->spcp_state
= DAPLKA_SPCP_PENDING
;
6572 conn
->spcp_sid
= event
->cm_session_id
;
6576 mutex_exit(&spp
->sp_lock
);
6579 * too many pending connections
6581 if (bkl_index
== spp
->sp_backlog_size
) {
6582 DERR("service_req: connection pending exceeded %d limit\n",
6583 spp
->sp_backlog_size
);
6584 return (IBT_CM_NO_RESOURCE
);
6586 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*conn
))
6589 * save data for cr_handoff
6591 if (pr_data
!= NULL
&& pr_len
> 0) {
6592 int trunc_len
= pr_len
;
6594 if (trunc_len
> DAPL_MAX_PRIVATE_DATA_SIZE
) {
6595 DERR("service_req: private data truncated\n");
6596 trunc_len
= DAPL_MAX_PRIVATE_DATA_SIZE
;
6598 conn
->spcp_req_len
= trunc_len
;
6599 bcopy(pr_data
, conn
->spcp_req_data
, trunc_len
);
6601 conn
->spcp_req_len
= 0;
6603 conn
->spcp_rdma_ra_in
= event
->cm_event
.req
.req_rdma_ra_in
;
6604 conn
->spcp_rdma_ra_out
= event
->cm_event
.req
.req_rdma_ra_out
;
6609 cr_ev
= kmem_zalloc(sizeof (daplka_evd_event_t
), KM_NOSLEEP
);
6610 if (cr_ev
== NULL
) {
6611 DERR("service_req: could not alloc cr_ev\n");
6612 cm_status
= IBT_CM_NO_RESOURCE
;
6616 cr_ev
->ee_next
= NULL
;
6617 cr_ev
->ee_cmev
.ec_cm_cookie
= spp
->sp_cookie
;
6618 cr_ev
->ee_cmev
.ec_cm_is_passive
= B_TRUE
;
6619 cr_ev
->ee_cmev
.ec_cm_psep_cookie
= DAPLKA_CREATE_PSEP_COOKIE(bkl_index
);
6621 * save the requestor gid
6622 * daplka_event_poll needs this if this is a third party REQ_RCV
6624 cr_ev
->ee_cmev
.ec_cm_req_prim_addr
.gid_prefix
=
6625 event
->cm_event
.req
.req_prim_addr
.av_dgid
.gid_prefix
;
6626 cr_ev
->ee_cmev
.ec_cm_req_prim_addr
.gid_guid
=
6627 event
->cm_event
.req
.req_prim_addr
.av_dgid
.gid_guid
;
6633 cr_ev
->ee_cmev
.ec_cm_ev_type
=
6634 DAPL_IB_CME_CONNECTION_REQUEST_PENDING
;
6636 cr_ev
->ee_cmev
.ec_cm_ev_priv_data
=
6637 kmem_zalloc(pr_len
, KM_NOSLEEP
);
6638 if (cr_ev
->ee_cmev
.ec_cm_ev_priv_data
== NULL
) {
6639 DERR("service_req: could not alloc priv\n");
6640 cm_status
= IBT_CM_NO_RESOURCE
;
6643 bcopy(pr_data
, cr_ev
->ee_cmev
.ec_cm_ev_priv_data
, pr_len
);
6644 cr_ev
->ee_cmev
.ec_cm_ev_type
=
6645 DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA
;
6647 cr_ev
->ee_cmev
.ec_cm_ev_priv_data_len
= pr_len
;
6650 * tell the active side to expect the processing time to be
6651 * at most equal to daplka_cm_delay
6653 status
= ibt_cm_delay(IBT_CM_DELAY_REQ
, event
->cm_session_id
,
6654 daplka_cm_delay
, NULL
, 0);
6655 if (status
!= IBT_SUCCESS
) {
6656 DERR("service_req: ibt_cm_delay failed %d\n", status
);
6657 cm_status
= IBT_CM_NO_RESOURCE
;
6662 * enqueue cr_ev onto the cr_events list of the EVD
6663 * corresponding to the SP
6665 D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) "
6666 "priv_len(%d) psep(0x%llx)\n", cr_ev
, spp
->sp_evd_res
,
6667 cr_ev
->ee_cmev
.ec_cm_ev_priv_data
,
6668 (int)cr_ev
->ee_cmev
.ec_cm_ev_priv_data_len
,
6669 (longlong_t
)cr_ev
->ee_cmev
.ec_cm_psep_cookie
);
6671 daplka_evd_wakeup(spp
->sp_evd_res
,
6672 &spp
->sp_evd_res
->evd_cr_events
, cr_ev
);
6674 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*conn
))
6675 return (IBT_CM_DEFER
);
6681 if (cr_ev
!= NULL
) {
6682 if (cr_ev
->ee_cmev
.ec_cm_ev_priv_data
!= NULL
) {
6683 kmem_free(cr_ev
->ee_cmev
.ec_cm_ev_priv_data
, pr_len
);
6684 cr_ev
->ee_cmev
.ec_cm_ev_priv_data
= NULL
;
6685 cr_ev
->ee_cmev
.ec_cm_ev_priv_data_len
= 0;
6687 kmem_free(cr_ev
, sizeof (daplka_evd_event_t
));
6690 * release our slot in the backlog array
6693 mutex_enter(&spp
->sp_lock
);
6694 ASSERT(conn
->spcp_state
== DAPLKA_SPCP_PENDING
);
6695 ASSERT(conn
->spcp_sid
== event
->cm_session_id
);
6696 conn
->spcp_state
= DAPLKA_SPCP_INIT
;
6697 conn
->spcp_req_len
= 0;
6698 conn
->spcp_sid
= NULL
;
6699 mutex_exit(&spp
->sp_lock
);
6705 * processes the CONN_CLOSED event
6708 static ibt_cm_status_t
6709 daplka_cm_service_conn_closed(daplka_sp_resource_t
*sp_rp
,
6710 ibt_cm_event_t
*event
, ibt_cm_return_args_t
*ret_args
,
6711 void *priv_data
, ibt_priv_data_len_t len
)
6713 daplka_ep_resource_t
*ep_rp
;
6714 daplka_evd_event_t
*disc_ev
;
6715 uint32_t old_state
, new_state
;
6717 ep_rp
= (daplka_ep_resource_t
*)
6718 ibt_get_chan_private(event
->cm_channel
);
6719 if (ep_rp
== NULL
) {
6720 DERR("service_conn_closed: ep_rp == NULL\n");
6721 return (IBT_CM_ACCEPT
);
6725 * verify that the ep_state is either CONNECTED or
6726 * DISCONNECTING. if it is not in either states return
6727 * without generating an event.
6729 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
6730 if (old_state
!= DAPLKA_EP_STATE_CONNECTED
&&
6731 old_state
!= DAPLKA_EP_STATE_DISCONNECTING
) {
6733 * we can get here if the connection is being aborted
6735 D2("service_conn_closed: conn aborted, state = %d, "
6736 "closed = %d\n", old_state
, (int)event
->cm_event
.closed
);
6737 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
6738 return (IBT_CM_ACCEPT
);
6742 * create a DAPL_IB_CME_DISCONNECTED event
6744 disc_ev
= kmem_zalloc(sizeof (daplka_evd_event_t
), KM_NOSLEEP
);
6745 if (disc_ev
== NULL
) {
6746 DERR("service_conn_closed: cannot alloc disc_ev\n");
6747 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
6748 return (IBT_CM_ACCEPT
);
6751 disc_ev
->ee_cmev
.ec_cm_ev_type
= DAPL_IB_CME_DISCONNECTED
;
6752 disc_ev
->ee_cmev
.ec_cm_cookie
= sp_rp
->sp_cookie
;
6753 disc_ev
->ee_cmev
.ec_cm_is_passive
= B_TRUE
;
6754 disc_ev
->ee_cmev
.ec_cm_psep_cookie
= ep_rp
->ep_psep_cookie
;
6755 disc_ev
->ee_cmev
.ec_cm_ev_priv_data
= NULL
;
6756 disc_ev
->ee_cmev
.ec_cm_ev_priv_data_len
= 0;
6758 D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n",
6759 disc_ev
, sp_rp
->sp_evd_res
, (longlong_t
)ep_rp
->ep_psep_cookie
);
6762 * transition ep_state to DISCONNECTED
6764 new_state
= DAPLKA_EP_STATE_DISCONNECTED
;
6765 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
6768 * enqueue event onto the conn_evd owned by ep_rp
6770 daplka_evd_wakeup(ep_rp
->ep_conn_evd
,
6771 &ep_rp
->ep_conn_evd
->evd_conn_events
, disc_ev
);
6773 return (IBT_CM_ACCEPT
);
6777 * processes the CONN_EST event
6780 static ibt_cm_status_t
6781 daplka_cm_service_conn_est(daplka_sp_resource_t
*sp_rp
, ibt_cm_event_t
*event
,
6782 ibt_cm_return_args_t
*ret_args
, void *priv_data
, ibt_priv_data_len_t len
)
6784 daplka_ep_resource_t
*ep_rp
;
6785 daplka_evd_event_t
*conn_ev
;
6786 void *pr_data
= event
->cm_priv_data
;
6787 ibt_priv_data_len_t pr_len
= event
->cm_priv_data_len
;
6788 uint32_t old_state
, new_state
;
6790 ep_rp
= (daplka_ep_resource_t
*)
6791 ibt_get_chan_private(event
->cm_channel
);
6792 if (ep_rp
== NULL
) {
6793 DERR("service_conn_est: ep_rp == NULL\n");
6794 return (IBT_CM_ACCEPT
);
6798 * verify that ep_state is ACCEPTING. if it is not in this
6799 * state, return without generating an event.
6801 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
6802 if (old_state
!= DAPLKA_EP_STATE_ACCEPTING
) {
6804 * we can get here if the connection is being aborted
6806 DERR("service_conn_est: conn aborted, state = %d\n",
6808 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
6809 return (IBT_CM_ACCEPT
);
6813 * create a DAPL_IB_CME_CONNECTED event
6815 conn_ev
= kmem_zalloc(sizeof (daplka_evd_event_t
), KM_NOSLEEP
);
6816 if (conn_ev
== NULL
) {
6817 DERR("service_conn_est: conn_ev alloc failed\n");
6818 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
6819 return (IBT_CM_ACCEPT
);
6822 conn_ev
->ee_cmev
.ec_cm_ev_type
= DAPL_IB_CME_CONNECTED
;
6823 conn_ev
->ee_cmev
.ec_cm_cookie
= sp_rp
->sp_cookie
;
6824 conn_ev
->ee_cmev
.ec_cm_is_passive
= B_TRUE
;
6825 conn_ev
->ee_cmev
.ec_cm_psep_cookie
= ep_rp
->ep_psep_cookie
;
6828 * copy private data into event
6831 conn_ev
->ee_cmev
.ec_cm_ev_priv_data
=
6832 kmem_zalloc(pr_len
, KM_NOSLEEP
);
6833 if (conn_ev
->ee_cmev
.ec_cm_ev_priv_data
== NULL
) {
6834 DERR("service_conn_est: pr_data alloc failed\n");
6835 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
6836 kmem_free(conn_ev
, sizeof (daplka_evd_event_t
));
6837 return (IBT_CM_ACCEPT
);
6839 bcopy(pr_data
, conn_ev
->ee_cmev
.ec_cm_ev_priv_data
, pr_len
);
6841 conn_ev
->ee_cmev
.ec_cm_ev_priv_data_len
= pr_len
;
6843 D2("service_conn_est: enqueue event(%p) evdp(%p)\n",
6844 conn_ev
, ep_rp
->ep_conn_evd
);
6847 * transition ep_state to CONNECTED
6849 new_state
= DAPLKA_EP_STATE_CONNECTED
;
6850 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
6853 * enqueue event onto the conn_evd owned by ep_rp
6855 daplka_evd_wakeup(ep_rp
->ep_conn_evd
,
6856 &ep_rp
->ep_conn_evd
->evd_conn_events
, conn_ev
);
6858 return (IBT_CM_ACCEPT
);
6862 * processes the FAILURE event
6865 static ibt_cm_status_t
6866 daplka_cm_service_event_failure(daplka_sp_resource_t
*sp_rp
,
6867 ibt_cm_event_t
*event
, ibt_cm_return_args_t
*ret_args
, void *priv_data
,
6868 ibt_priv_data_len_t len
)
6870 daplka_evd_event_t
*disc_ev
;
6871 daplka_ep_resource_t
*ep_rp
;
6872 uint32_t old_state
, new_state
;
6873 ibt_rc_chan_query_attr_t chan_attrs
;
6874 ibt_status_t status
;
6877 * check that we still have a valid cm_channel before continuing
6879 if (event
->cm_channel
== NULL
) {
6880 DERR("serice_event_failure: event->cm_channel == NULL\n");
6881 return (IBT_CM_ACCEPT
);
6883 ep_rp
= (daplka_ep_resource_t
*)
6884 ibt_get_chan_private(event
->cm_channel
);
6885 if (ep_rp
== NULL
) {
6886 DERR("service_event_failure: ep_rp == NULL\n");
6887 return (IBT_CM_ACCEPT
);
6891 * verify that ep_state is ACCEPTING or DISCONNECTING. if it
6892 * is not in either state, return without generating an event.
6894 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
6895 if (old_state
!= DAPLKA_EP_STATE_ACCEPTING
&&
6896 old_state
!= DAPLKA_EP_STATE_DISCONNECTING
) {
6898 * we can get here if the connection is being aborted
6900 DERR("service_event_failure: conn aborted, state = %d, "
6901 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state
,
6902 (int)event
->cm_event
.failed
.cf_code
,
6903 (int)event
->cm_event
.failed
.cf_msg
,
6904 (int)event
->cm_event
.failed
.cf_reason
);
6906 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
6907 return (IBT_CM_ACCEPT
);
6910 bzero(&chan_attrs
, sizeof (ibt_rc_chan_query_attr_t
));
6911 status
= ibt_query_rc_channel(ep_rp
->ep_chan_hdl
, &chan_attrs
);
6913 if ((status
== IBT_SUCCESS
) &&
6914 (chan_attrs
.rc_state
!= IBT_STATE_ERROR
)) {
6915 DERR("service_event_failure: conn abort qpn %d state %d\n",
6916 chan_attrs
.rc_qpn
, chan_attrs
.rc_state
);
6918 /* explicit transition the QP to ERROR state */
6919 status
= ibt_flush_channel(ep_rp
->ep_chan_hdl
);
6925 disc_ev
= kmem_zalloc(sizeof (daplka_evd_event_t
), KM_NOSLEEP
);
6926 if (disc_ev
== NULL
) {
6927 DERR("service_event_failure: cannot alloc disc_ev\n");
6928 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
6929 return (IBT_CM_ACCEPT
);
6933 * fill in the appropriate event type
6935 if (event
->cm_event
.failed
.cf_code
== IBT_CM_FAILURE_TIMEOUT
) {
6936 disc_ev
->ee_cmev
.ec_cm_ev_type
= DAPL_IB_CME_TIMED_OUT
;
6937 } else if (event
->cm_event
.failed
.cf_code
== IBT_CM_FAILURE_REJ_RCV
) {
6938 switch (event
->cm_event
.failed
.cf_reason
) {
6939 case IBT_CM_INVALID_CID
:
6940 disc_ev
->ee_cmev
.ec_cm_ev_type
=
6941 DAPL_IB_CME_DESTINATION_REJECT
;
6944 disc_ev
->ee_cmev
.ec_cm_ev_type
=
6945 DAPL_IB_CME_LOCAL_FAILURE
;
6949 disc_ev
->ee_cmev
.ec_cm_ev_type
= DAPL_IB_CME_LOCAL_FAILURE
;
6951 disc_ev
->ee_cmev
.ec_cm_cookie
= sp_rp
->sp_cookie
;
6952 disc_ev
->ee_cmev
.ec_cm_is_passive
= B_TRUE
;
6953 disc_ev
->ee_cmev
.ec_cm_psep_cookie
= ep_rp
->ep_psep_cookie
;
6954 disc_ev
->ee_cmev
.ec_cm_ev_priv_data_len
= 0;
6955 disc_ev
->ee_cmev
.ec_cm_ev_priv_data
= NULL
;
6957 D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
6958 "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev
,
6959 ep_rp
->ep_conn_evd
, (int)event
->cm_event
.failed
.cf_code
,
6960 (int)event
->cm_event
.failed
.cf_msg
,
6961 (int)event
->cm_event
.failed
.cf_reason
,
6962 (longlong_t
)ep_rp
->ep_psep_cookie
);
6965 * transition ep_state to DISCONNECTED
6967 new_state
= DAPLKA_EP_STATE_DISCONNECTED
;
6968 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
6971 * enqueue event onto the conn_evd owned by ep_rp
6973 daplka_evd_wakeup(ep_rp
->ep_conn_evd
,
6974 &ep_rp
->ep_conn_evd
->evd_conn_events
, disc_ev
);
6976 return (IBT_CM_ACCEPT
);
6980 * this is the passive side CM handler. it gets registered
6981 * when an SP resource is created in daplka_service_register.
6983 static ibt_cm_status_t
6984 daplka_cm_service_handler(void *cm_private
, ibt_cm_event_t
*event
,
6985 ibt_cm_return_args_t
*ret_args
, void *priv_data
, ibt_priv_data_len_t len
)
6987 daplka_sp_resource_t
*sp_rp
= (daplka_sp_resource_t
*)cm_private
;
6989 if (sp_rp
== NULL
) {
6990 DERR("service_handler: sp_rp == NULL\n");
6991 return (IBT_CM_NO_RESOURCE
);
6994 * default is not to return priv data
6996 if (ret_args
!= NULL
) {
6997 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args
))
6998 ret_args
->cm_ret_len
= 0;
7001 switch (event
->cm_type
) {
7002 case IBT_CM_EVENT_REQ_RCV
:
7003 D2("service_handler: IBT_CM_EVENT_REQ_RCV\n");
7004 return (daplka_cm_service_req(sp_rp
, event
, ret_args
,
7005 event
->cm_priv_data
, event
->cm_priv_data_len
));
7007 case IBT_CM_EVENT_REP_RCV
:
7008 /* passive side should not receive this event */
7009 D2("service_handler: IBT_CM_EVENT_REP_RCV\n");
7010 return (IBT_CM_DEFAULT
);
7012 case IBT_CM_EVENT_CONN_CLOSED
:
7013 D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7014 event
->cm_event
.closed
);
7015 return (daplka_cm_service_conn_closed(sp_rp
, event
, ret_args
,
7018 case IBT_CM_EVENT_MRA_RCV
:
7019 /* passive side does default processing MRA event */
7020 D2("service_handler: IBT_CM_EVENT_MRA_RCV\n");
7021 return (IBT_CM_DEFAULT
);
7023 case IBT_CM_EVENT_CONN_EST
:
7024 D2("service_handler: IBT_CM_EVENT_CONN_EST\n");
7025 return (daplka_cm_service_conn_est(sp_rp
, event
, ret_args
,
7028 case IBT_CM_EVENT_FAILURE
:
7029 D2("service_handler: IBT_CM_EVENT_FAILURE\n");
7030 return (daplka_cm_service_event_failure(sp_rp
, event
, ret_args
,
7032 case IBT_CM_EVENT_LAP_RCV
:
7033 /* active side had initiated a path migration operation */
7034 D2("service_handler: IBT_CM_EVENT_LAP_RCV\n");
7035 return (IBT_CM_ACCEPT
);
7037 DERR("service_handler: invalid event %d\n", event
->cm_type
);
7040 return (IBT_CM_DEFAULT
);
7044 * Active side CM handlers
7048 * Processes the REP_RCV event. When the passive side accepts the
7049 * connection, this handler is called. We make a copy of the private
7050 * data into the ep so that it can be passed back to userland in when
7051 * the CONN_EST event occurs.
7054 static ibt_cm_status_t
7055 daplka_cm_rc_rep_rcv(daplka_ep_resource_t
*ep_rp
, ibt_cm_event_t
*event
,
7056 ibt_cm_return_args_t
*ret_args
, void *priv_data
, ibt_priv_data_len_t len
)
7058 void *pr_data
= event
->cm_priv_data
;
7059 ibt_priv_data_len_t pr_len
= event
->cm_priv_data_len
;
7060 uint32_t old_state
, new_state
;
7062 D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data
,
7065 ASSERT(ep_rp
!= NULL
);
7066 new_state
= old_state
= daplka_ep_get_state(ep_rp
);
7067 if (old_state
!= DAPLKA_EP_STATE_CONNECTING
) {
7069 * we can get here if the connection is being aborted
7071 DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state
);
7072 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7073 return (IBT_CM_NO_CHANNEL
);
7077 * we do not cancel the timer here because the connection
7078 * handshake is still in progress.
7082 * save the private data. it will be passed up when
7083 * the connection is established.
7086 ep_rp
->ep_priv_len
= pr_len
;
7087 bcopy(pr_data
, ep_rp
->ep_priv_data
, (size_t)pr_len
);
7091 * we do not actually transition to a different state.
7092 * the state will change when we get a conn_est, failure,
7093 * closed, or timeout event.
7095 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7096 return (IBT_CM_ACCEPT
);
7100 * Processes the CONN_CLOSED event. This gets called when either
7101 * the active or passive side closes the rc channel.
7104 static ibt_cm_status_t
7105 daplka_cm_rc_conn_closed(daplka_ep_resource_t
*ep_rp
, ibt_cm_event_t
*event
,
7106 ibt_cm_return_args_t
*ret_args
, void *priv_data
, ibt_priv_data_len_t len
)
7108 daplka_evd_event_t
*disc_ev
;
7109 uint32_t old_state
, new_state
;
7111 ASSERT(ep_rp
!= NULL
);
7112 old_state
= new_state
= daplka_ep_get_state(ep_rp
);
7113 if (old_state
!= DAPLKA_EP_STATE_CONNECTED
&&
7114 old_state
!= DAPLKA_EP_STATE_DISCONNECTING
) {
7116 * we can get here if the connection is being aborted
7118 D2("rc_conn_closed: conn aborted, state = %d, "
7119 "closed = %d\n", old_state
, (int)event
->cm_event
.closed
);
7120 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7121 return (IBT_CM_ACCEPT
);
7125 * it's ok for the timer to fire at this point. the
7126 * taskq thread that processes the timer will just wait
7127 * until we are done with our state transition.
7129 if (daplka_cancel_timer(ep_rp
) != 0) {
7131 * daplka_cancel_timer returns -1 if the timer is
7132 * being processed and 0 for all other cases.
7133 * we need to reset ep_state to allow timer processing
7136 DERR("rc_conn_closed: timer is being processed\n");
7137 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7138 return (IBT_CM_ACCEPT
);
7142 * create a DAPL_IB_CME_DISCONNECTED event
7144 disc_ev
= kmem_zalloc(sizeof (daplka_evd_event_t
), KM_NOSLEEP
);
7145 if (disc_ev
== NULL
) {
7146 DERR("rc_conn_closed: could not alloc ev\n");
7147 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7148 return (IBT_CM_ACCEPT
);
7151 disc_ev
->ee_cmev
.ec_cm_ev_type
= DAPL_IB_CME_DISCONNECTED
;
7152 disc_ev
->ee_cmev
.ec_cm_cookie
= ep_rp
->ep_cookie
;
7153 disc_ev
->ee_cmev
.ec_cm_is_passive
= B_FALSE
;
7154 disc_ev
->ee_cmev
.ec_cm_psep_cookie
= 0;
7155 disc_ev
->ee_cmev
.ec_cm_ev_priv_data
= NULL
;
7156 disc_ev
->ee_cmev
.ec_cm_ev_priv_data_len
= 0;
7158 D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n",
7159 disc_ev
, ep_rp
->ep_conn_evd
, (int)event
->cm_event
.closed
);
7162 * transition ep_state to DISCONNECTED
7164 new_state
= DAPLKA_EP_STATE_DISCONNECTED
;
7165 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7168 * enqueue event onto the conn_evd owned by ep_rp
7170 daplka_evd_wakeup(ep_rp
->ep_conn_evd
,
7171 &ep_rp
->ep_conn_evd
->evd_conn_events
, disc_ev
);
7173 return (IBT_CM_ACCEPT
);
7177 * processes the CONN_EST event
7180 static ibt_cm_status_t
7181 daplka_cm_rc_conn_est(daplka_ep_resource_t
*ep_rp
, ibt_cm_event_t
*event
,
7182 ibt_cm_return_args_t
*ret_args
, void *priv_data
, ibt_priv_data_len_t len
)
7184 daplka_evd_event_t
*conn_ev
;
7185 uint32_t old_state
, new_state
;
7187 ASSERT(ep_rp
!= NULL
);
7188 old_state
= new_state
= daplka_ep_get_state(ep_rp
);
7189 if (old_state
!= DAPLKA_EP_STATE_CONNECTING
) {
7191 * we can get here if the connection is being aborted
7193 DERR("rc_conn_est: conn aborted, state = %d\n", old_state
);
7194 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7195 return (IBT_CM_ACCEPT
);
7199 * it's ok for the timer to fire at this point. the
7200 * taskq thread that processes the timer will just wait
7201 * until we are done with our state transition.
7203 if (daplka_cancel_timer(ep_rp
) != 0) {
7205 * daplka_cancel_timer returns -1 if the timer is
7206 * being processed and 0 for all other cases.
7207 * we need to reset ep_state to allow timer processing
7210 DERR("rc_conn_est: timer is being processed\n");
7211 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7212 return (IBT_CM_ACCEPT
);
7216 * create a DAPL_IB_CME_CONNECTED event
7218 conn_ev
= kmem_zalloc(sizeof (daplka_evd_event_t
), KM_NOSLEEP
);
7219 if (conn_ev
== NULL
) {
7220 DERR("rc_conn_est: could not alloc ev\n");
7221 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7222 return (IBT_CM_ACCEPT
);
7225 conn_ev
->ee_cmev
.ec_cm_ev_type
= DAPL_IB_CME_CONNECTED
;
7226 conn_ev
->ee_cmev
.ec_cm_cookie
= ep_rp
->ep_cookie
;
7227 conn_ev
->ee_cmev
.ec_cm_is_passive
= B_FALSE
;
7228 conn_ev
->ee_cmev
.ec_cm_psep_cookie
= 0;
7231 * The private data passed back in the connection established
7232 * event is what was recvd in the daplka_cm_rc_rep_rcv handler and
7233 * saved in ep resource structure.
7235 if (ep_rp
->ep_priv_len
> 0) {
7236 conn_ev
->ee_cmev
.ec_cm_ev_priv_data
=
7237 kmem_zalloc(ep_rp
->ep_priv_len
, KM_NOSLEEP
);
7239 if (conn_ev
->ee_cmev
.ec_cm_ev_priv_data
== NULL
) {
7240 DERR("rc_conn_est: could not alloc pr_data\n");
7241 kmem_free(conn_ev
, sizeof (daplka_evd_event_t
));
7242 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7243 return (IBT_CM_ACCEPT
);
7245 bcopy(ep_rp
->ep_priv_data
, conn_ev
->ee_cmev
.ec_cm_ev_priv_data
,
7246 ep_rp
->ep_priv_len
);
7248 conn_ev
->ee_cmev
.ec_cm_ev_priv_data_len
= ep_rp
->ep_priv_len
;
7250 D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), "
7251 "pr_len(%d)\n", conn_ev
, ep_rp
->ep_conn_evd
,
7252 conn_ev
->ee_cmev
.ec_cm_ev_priv_data
,
7253 (int)conn_ev
->ee_cmev
.ec_cm_ev_priv_data_len
);
7256 * transition ep_state to CONNECTED
7258 new_state
= DAPLKA_EP_STATE_CONNECTED
;
7259 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7262 * enqueue event onto the conn_evd owned by ep_rp
7264 daplka_evd_wakeup(ep_rp
->ep_conn_evd
,
7265 &ep_rp
->ep_conn_evd
->evd_conn_events
, conn_ev
);
7267 return (IBT_CM_ACCEPT
);
7271 * processes the FAILURE event
7274 static ibt_cm_status_t
7275 daplka_cm_rc_event_failure(daplka_ep_resource_t
*ep_rp
, ibt_cm_event_t
*event
,
7276 ibt_cm_return_args_t
*ret_args
, void *priv_data
, ibt_priv_data_len_t len
)
7278 daplka_evd_event_t
*disc_ev
;
7279 ibt_priv_data_len_t pr_len
= event
->cm_priv_data_len
;
7280 void *pr_data
= event
->cm_priv_data
;
7281 uint32_t old_state
, new_state
;
7282 ibt_rc_chan_query_attr_t chan_attrs
;
7283 ibt_status_t status
;
7285 ASSERT(ep_rp
!= NULL
);
7286 old_state
= new_state
= daplka_ep_get_state(ep_rp
);
7287 if (old_state
!= DAPLKA_EP_STATE_CONNECTING
&&
7288 old_state
!= DAPLKA_EP_STATE_DISCONNECTING
) {
7290 * we can get here if the connection is being aborted
7292 DERR("rc_event_failure: conn aborted, state = %d, "
7293 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state
,
7294 (int)event
->cm_event
.failed
.cf_code
,
7295 (int)event
->cm_event
.failed
.cf_msg
,
7296 (int)event
->cm_event
.failed
.cf_reason
);
7298 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7299 return (IBT_CM_ACCEPT
);
7303 * it's ok for the timer to fire at this point. the
7304 * taskq thread that processes the timer will just wait
7305 * until we are done with our state transition.
7307 if (daplka_cancel_timer(ep_rp
) != 0) {
7309 * daplka_cancel_timer returns -1 if the timer is
7310 * being processed and 0 for all other cases.
7311 * we need to reset ep_state to allow timer processing
7314 DERR("rc_event_failure: timer is being processed\n");
7315 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7316 return (IBT_CM_ACCEPT
);
7319 bzero(&chan_attrs
, sizeof (ibt_rc_chan_query_attr_t
));
7320 status
= ibt_query_rc_channel(ep_rp
->ep_chan_hdl
, &chan_attrs
);
7322 if ((status
== IBT_SUCCESS
) &&
7323 (chan_attrs
.rc_state
!= IBT_STATE_ERROR
)) {
7324 DERR("rc_event_failure: conn abort qpn %d state %d\n",
7325 chan_attrs
.rc_qpn
, chan_attrs
.rc_state
);
7327 /* explicit transition the QP to ERROR state */
7328 status
= ibt_flush_channel(ep_rp
->ep_chan_hdl
);
7334 disc_ev
= kmem_zalloc(sizeof (daplka_evd_event_t
), KM_NOSLEEP
);
7335 if (disc_ev
== NULL
) {
7336 DERR("rc_event_failure: cannot alloc disc_ev\n");
7337 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7338 return (IBT_CM_ACCEPT
);
7342 * copy private data into event
7345 disc_ev
->ee_cmev
.ec_cm_ev_priv_data
=
7346 kmem_zalloc(pr_len
, KM_NOSLEEP
);
7348 if (disc_ev
->ee_cmev
.ec_cm_ev_priv_data
== NULL
) {
7349 DERR("rc_event_failure: cannot alloc pr data\n");
7350 kmem_free(disc_ev
, sizeof (daplka_evd_event_t
));
7351 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7352 return (IBT_CM_ACCEPT
);
7354 bcopy(pr_data
, disc_ev
->ee_cmev
.ec_cm_ev_priv_data
, pr_len
);
7356 disc_ev
->ee_cmev
.ec_cm_ev_priv_data_len
= pr_len
;
7359 * fill in the appropriate event type
7361 if (event
->cm_event
.failed
.cf_code
== IBT_CM_FAILURE_REJ_RCV
) {
7362 switch (event
->cm_event
.failed
.cf_reason
) {
7363 case IBT_CM_CONSUMER
:
7364 disc_ev
->ee_cmev
.ec_cm_ev_type
=
7365 DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA
;
7367 case IBT_CM_NO_CHAN
:
7368 case IBT_CM_NO_RESC
:
7369 disc_ev
->ee_cmev
.ec_cm_ev_type
=
7370 DAPL_IB_CME_DESTINATION_REJECT
;
7373 disc_ev
->ee_cmev
.ec_cm_ev_type
=
7374 DAPL_IB_CME_DESTINATION_REJECT
;
7377 } else if (event
->cm_event
.failed
.cf_code
== IBT_CM_FAILURE_TIMEOUT
) {
7378 disc_ev
->ee_cmev
.ec_cm_ev_type
= DAPL_IB_CME_TIMED_OUT
;
7380 /* others we'll mark as local failure */
7381 disc_ev
->ee_cmev
.ec_cm_ev_type
= DAPL_IB_CME_LOCAL_FAILURE
;
7383 disc_ev
->ee_cmev
.ec_cm_cookie
= ep_rp
->ep_cookie
;
7384 disc_ev
->ee_cmev
.ec_cm_is_passive
= B_FALSE
;
7385 disc_ev
->ee_cmev
.ec_cm_psep_cookie
= 0;
7387 D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
7388 "cf_msg(%d) cf_reason(%d)\n", disc_ev
, ep_rp
->ep_conn_evd
,
7389 (int)event
->cm_event
.failed
.cf_code
,
7390 (int)event
->cm_event
.failed
.cf_msg
,
7391 (int)event
->cm_event
.failed
.cf_reason
);
7394 * transition ep_state to DISCONNECTED
7396 new_state
= DAPLKA_EP_STATE_DISCONNECTED
;
7397 daplka_ep_set_state(ep_rp
, old_state
, new_state
);
7400 * enqueue event onto the conn_evd owned by ep_rp
7402 daplka_evd_wakeup(ep_rp
->ep_conn_evd
,
7403 &ep_rp
->ep_conn_evd
->evd_conn_events
, disc_ev
);
7405 return (IBT_CM_ACCEPT
);
7409 * This is the active side CM handler. It gets registered when
7410 * ibt_open_rc_channel is called.
7412 static ibt_cm_status_t
7413 daplka_cm_rc_handler(void *cm_private
, ibt_cm_event_t
*event
,
7414 ibt_cm_return_args_t
*ret_args
, void *priv_data
, ibt_priv_data_len_t len
)
7416 daplka_ep_resource_t
*ep_rp
= (daplka_ep_resource_t
*)cm_private
;
7418 if (ep_rp
== NULL
) {
7419 DERR("rc_handler: ep_rp == NULL\n");
7420 return (IBT_CM_NO_CHANNEL
);
7423 * default is not to return priv data
7425 if (ret_args
!= NULL
) {
7426 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args
))
7427 ret_args
->cm_ret_len
= 0;
7430 switch (event
->cm_type
) {
7431 case IBT_CM_EVENT_REQ_RCV
:
7432 /* active side should not receive this event */
7433 D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n");
7436 case IBT_CM_EVENT_REP_RCV
:
7437 /* connection accepted by passive side */
7438 D2("rc_handler: IBT_CM_EVENT_REP_RCV\n");
7439 return (daplka_cm_rc_rep_rcv(ep_rp
, event
, ret_args
,
7442 case IBT_CM_EVENT_CONN_CLOSED
:
7443 D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7444 event
->cm_event
.closed
);
7445 return (daplka_cm_rc_conn_closed(ep_rp
, event
, ret_args
,
7448 case IBT_CM_EVENT_MRA_RCV
:
7449 /* passive side does default processing MRA event */
7450 D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n");
7451 return (IBT_CM_DEFAULT
);
7453 case IBT_CM_EVENT_CONN_EST
:
7454 D2("rc_handler: IBT_CM_EVENT_CONN_EST\n");
7455 return (daplka_cm_rc_conn_est(ep_rp
, event
, ret_args
,
7458 case IBT_CM_EVENT_FAILURE
:
7459 D2("rc_handler: IBT_CM_EVENT_FAILURE\n");
7460 return (daplka_cm_rc_event_failure(ep_rp
, event
, ret_args
,
7464 D2("rc_handler: invalid event %d\n", event
->cm_type
);
7467 return (IBT_CM_DEFAULT
);
7471 * creates an IA resource and inserts it into the global resource table.
7475 daplka_ia_create(minor_t rnum
, intptr_t arg
, int mode
,
7476 cred_t
*cred
, int *rvalp
)
7478 daplka_ia_resource_t
*ia_rp
, *tmp_rp
;
7479 boolean_t inserted
= B_FALSE
;
7480 dapl_ia_create_t args
;
7481 ibt_hca_hdl_t hca_hdl
;
7482 ibt_status_t status
;
7485 ibt_hca_portinfo_t
*pinfop
;
7491 retval
= ddi_copyin((void *)arg
, &args
, sizeof (dapl_ia_create_t
),
7494 DERR("ia_create: copyin error %d\n", retval
);
7497 if (args
.ia_version
!= DAPL_IF_VERSION
) {
7498 DERR("ia_create: invalid version %d, expected version %d\n",
7499 args
.ia_version
, DAPL_IF_VERSION
);
7504 * find the hca with the matching guid
7506 mutex_enter(&daplka_dev
->daplka_mutex
);
7507 for (hca
= daplka_dev
->daplka_hca_list_head
; hca
!= NULL
;
7508 hca
= hca
->hca_next
) {
7509 if (hca
->hca_guid
== args
.ia_guid
) {
7510 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca
);
7514 mutex_exit(&daplka_dev
->daplka_mutex
);
7517 DERR("ia_create: guid 0x%016llx not found\n",
7518 (longlong_t
)args
.ia_guid
);
7523 * check whether port number is valid and whether it is up
7525 if (args
.ia_port
> hca
->hca_nports
) {
7526 DERR("ia_create: invalid hca_port %d\n", args
.ia_port
);
7527 DAPLKA_RELE_HCA(daplka_dev
, hca
);
7530 hca_hdl
= hca
->hca_hdl
;
7531 if (hca_hdl
== NULL
) {
7532 DERR("ia_create: hca_hdl == NULL\n");
7533 DAPLKA_RELE_HCA(daplka_dev
, hca
);
7536 status
= ibt_query_hca_ports(hca_hdl
, (uint8_t)args
.ia_port
,
7537 &pinfop
, &pinfon
, &size
);
7538 if (status
!= IBT_SUCCESS
) {
7539 DERR("ia_create: ibt_query_hca_ports returned %d\n", status
);
7540 *rvalp
= (int)status
;
7541 DAPLKA_RELE_HCA(daplka_dev
, hca
);
7544 sgid
= pinfop
->p_sgid_tbl
[0];
7545 ibt_free_portinfo(pinfop
, size
);
7547 ia_rp
= kmem_zalloc(sizeof (daplka_ia_resource_t
), daplka_km_flags
);
7548 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp
))
7549 DAPLKA_RS_INIT(ia_rp
, DAPL_TYPE_IA
, rnum
, daplka_ia_destroy
);
7551 mutex_init(&ia_rp
->ia_lock
, NULL
, MUTEX_DRIVER
, NULL
);
7552 cv_init(&ia_rp
->ia_cv
, NULL
, CV_DRIVER
, NULL
);
7553 ia_rp
->ia_hca_hdl
= hca_hdl
;
7554 ia_rp
->ia_hca_sgid
= sgid
;
7555 ia_rp
->ia_hca
= hca
;
7556 ia_rp
->ia_port_num
= args
.ia_port
;
7557 ia_rp
->ia_port_pkey
= args
.ia_pkey
;
7558 ia_rp
->ia_pid
= ddi_get_pid();
7559 ia_rp
->ia_async_evd_hkeys
= NULL
;
7560 ia_rp
->ia_ar_registered
= B_FALSE
;
7561 bcopy(args
.ia_sadata
, ia_rp
->ia_sadata
, DAPL_ATS_NBYTES
);
7563 /* register Address Record */
7564 ar_s
.ar_gid
= ia_rp
->ia_hca_sgid
;
7565 ar_s
.ar_pkey
= ia_rp
->ia_port_pkey
;
7566 bcopy(ia_rp
->ia_sadata
, ar_s
.ar_data
, DAPL_ATS_NBYTES
);
7567 #define UC(b) ar_s.ar_data[(b)]
7568 D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n",
7569 UC(8), UC(9), UC(10), UC(11));
7570 D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n",
7571 UC(12), UC(13), UC(14), UC(15));
7572 retval
= ibt_register_ar(daplka_dev
->daplka_clnt_hdl
, &ar_s
);
7573 if (retval
!= IBT_SUCCESS
) {
7574 DERR("ia_create: failed to register Address Record.\n");
7578 ia_rp
->ia_ar_registered
= B_TRUE
;
7581 * create hash tables for all object types
7583 retval
= daplka_hash_create(&ia_rp
->ia_ep_htbl
, DAPLKA_EP_HTBL_SZ
,
7584 daplka_hash_ep_free
, daplka_hash_generic_lookup
);
7586 DERR("ia_create: cannot create ep hash table\n");
7589 retval
= daplka_hash_create(&ia_rp
->ia_mr_htbl
, DAPLKA_MR_HTBL_SZ
,
7590 daplka_hash_mr_free
, daplka_hash_generic_lookup
);
7592 DERR("ia_create: cannot create mr hash table\n");
7595 retval
= daplka_hash_create(&ia_rp
->ia_mw_htbl
, DAPLKA_MW_HTBL_SZ
,
7596 daplka_hash_mw_free
, daplka_hash_generic_lookup
);
7598 DERR("ia_create: cannot create mw hash table\n");
7601 retval
= daplka_hash_create(&ia_rp
->ia_pd_htbl
, DAPLKA_PD_HTBL_SZ
,
7602 daplka_hash_pd_free
, daplka_hash_generic_lookup
);
7604 DERR("ia_create: cannot create pd hash table\n");
7607 retval
= daplka_hash_create(&ia_rp
->ia_evd_htbl
, DAPLKA_EVD_HTBL_SZ
,
7608 daplka_hash_evd_free
, daplka_hash_generic_lookup
);
7610 DERR("ia_create: cannot create evd hash table\n");
7613 retval
= daplka_hash_create(&ia_rp
->ia_cno_htbl
, DAPLKA_CNO_HTBL_SZ
,
7614 daplka_hash_cno_free
, daplka_hash_generic_lookup
);
7616 DERR("ia_create: cannot create cno hash table\n");
7619 retval
= daplka_hash_create(&ia_rp
->ia_sp_htbl
, DAPLKA_SP_HTBL_SZ
,
7620 daplka_hash_sp_free
, daplka_hash_generic_lookup
);
7622 DERR("ia_create: cannot create sp hash table\n");
7625 retval
= daplka_hash_create(&ia_rp
->ia_srq_htbl
, DAPLKA_SRQ_HTBL_SZ
,
7626 daplka_hash_srq_free
, daplka_hash_generic_lookup
);
7628 DERR("ia_create: cannot create srq hash table\n");
7632 * insert ia_rp into the global resource table
7634 retval
= daplka_resource_insert(rnum
, (daplka_resource_t
*)ia_rp
);
7636 DERR("ia_create: cannot insert resource\n");
7640 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ia_rp
))
7642 args
.ia_resnum
= rnum
;
7643 retval
= copyout(&args
, (void *)arg
, sizeof (dapl_ia_create_t
));
7645 DERR("ia_create: copyout error %d\n", retval
);
7653 tmp_rp
= (daplka_ia_resource_t
*)daplka_resource_remove(rnum
);
7654 if (tmp_rp
!= ia_rp
) {
7656 * we can return here because another thread must
7657 * have freed up the resource
7659 DERR("ia_create: cannot remove resource\n");
7663 DAPLKA_RS_UNREF(ia_rp
);
7668 * destroys an IA resource
7671 daplka_ia_destroy(daplka_resource_t
*gen_rp
)
7673 daplka_ia_resource_t
*ia_rp
= (daplka_ia_resource_t
*)gen_rp
;
7674 daplka_async_evd_hkey_t
*hkp
;
7678 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp
))
7679 D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp
);
7681 /* deregister Address Record */
7682 if (ia_rp
->ia_ar_registered
) {
7683 ar_s
.ar_gid
= ia_rp
->ia_hca_sgid
;
7684 ar_s
.ar_pkey
= ia_rp
->ia_port_pkey
;
7685 bcopy(ia_rp
->ia_sadata
, ar_s
.ar_data
, DAPL_ATS_NBYTES
);
7686 (void) ibt_deregister_ar(daplka_dev
->daplka_clnt_hdl
, &ar_s
);
7687 ia_rp
->ia_ar_registered
= B_FALSE
;
7691 * destroy hash tables. make sure resources are
7692 * destroyed in the correct order.
7694 daplka_hash_destroy(&ia_rp
->ia_mw_htbl
);
7695 daplka_hash_destroy(&ia_rp
->ia_mr_htbl
);
7696 daplka_hash_destroy(&ia_rp
->ia_ep_htbl
);
7697 daplka_hash_destroy(&ia_rp
->ia_srq_htbl
);
7698 daplka_hash_destroy(&ia_rp
->ia_evd_htbl
);
7699 daplka_hash_destroy(&ia_rp
->ia_cno_htbl
);
7700 daplka_hash_destroy(&ia_rp
->ia_pd_htbl
);
7701 daplka_hash_destroy(&ia_rp
->ia_sp_htbl
);
7704 * free the async evd list
7707 hkp
= ia_rp
->ia_async_evd_hkeys
;
7708 while (hkp
!= NULL
) {
7709 daplka_async_evd_hkey_t
*free_hkp
;
7713 hkp
= hkp
->aeh_next
;
7714 kmem_free(free_hkp
, sizeof (*free_hkp
));
7717 D3("ia_destroy: freed %d hkeys\n", cnt
);
7719 mutex_destroy(&ia_rp
->ia_lock
);
7720 cv_destroy(&ia_rp
->ia_cv
);
7721 ia_rp
->ia_hca_hdl
= NULL
;
7723 DAPLKA_RS_FINI(ia_rp
);
7726 DAPLKA_RELE_HCA(daplka_dev
, ia_rp
->ia_hca
);
7728 kmem_free(ia_rp
, sizeof (daplka_ia_resource_t
));
7729 D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp
);
7734 daplka_async_event_create(ibt_async_code_t code
, ibt_async_event_t
*event
,
7735 uint64_t cookie
, daplka_ia_resource_t
*ia_rp
)
7737 daplka_evd_event_t
*evp
;
7738 daplka_evd_resource_t
*async_evd
;
7739 daplka_async_evd_hkey_t
*curr
;
7741 mutex_enter(&ia_rp
->ia_lock
);
7742 curr
= ia_rp
->ia_async_evd_hkeys
;
7743 while (curr
!= NULL
) {
7745 * Note: this allocation does not zero out the buffer
7746 * since we init all the fields.
7748 evp
= kmem_alloc(sizeof (daplka_evd_event_t
), KM_NOSLEEP
);
7750 DERR("async_event_enqueue: event alloc failed"
7751 "!found\n", ia_rp
, curr
->aeh_evd_hkey
);
7752 curr
= curr
->aeh_next
;
7755 evp
->ee_next
= NULL
;
7756 evp
->ee_aev
.ibae_type
= code
;
7757 evp
->ee_aev
.ibae_hca_guid
= event
->ev_hca_guid
;
7758 evp
->ee_aev
.ibae_cookie
= cookie
;
7759 evp
->ee_aev
.ibae_port
= event
->ev_port
;
7762 * Lookup the async evd corresponding to this ia and enqueue
7763 * evp and wakeup any waiter.
7765 async_evd
= (daplka_evd_resource_t
*)
7766 daplka_hash_lookup(&ia_rp
->ia_evd_htbl
, curr
->aeh_evd_hkey
);
7767 if (async_evd
== NULL
) { /* async evd is being freed */
7768 DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx "
7769 "!found\n", ia_rp
, (longlong_t
)curr
->aeh_evd_hkey
);
7770 kmem_free(evp
, sizeof (daplka_evd_event_t
));
7771 curr
= curr
->aeh_next
;
7774 daplka_evd_wakeup(async_evd
, &async_evd
->evd_async_events
, evp
);
7776 /* decrement refcnt on async_evd */
7777 DAPLKA_RS_UNREF(async_evd
);
7778 curr
= curr
->aeh_next
;
7780 mutex_exit(&ia_rp
->ia_lock
);
7783 * This routine is called in kernel context
7788 daplka_rc_async_handler(void *clnt_private
, ibt_hca_hdl_t hca_hdl
,
7789 ibt_async_code_t code
, ibt_async_event_t
*event
)
7791 daplka_ep_resource_t
*epp
;
7792 daplka_ia_resource_t
*ia_rp
;
7795 if (event
->ev_chan_hdl
== NULL
) {
7796 DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n");
7800 mutex_enter(&daplka_dev
->daplka_mutex
);
7801 epp
= ibt_get_chan_private(event
->ev_chan_hdl
);
7803 mutex_exit(&daplka_dev
->daplka_mutex
);
7804 DERR("daplka_rc_async_handler: chan_private is NULL\n");
7808 /* grab a reference to this ep */
7810 mutex_exit(&daplka_dev
->daplka_mutex
);
7813 * The endpoint resource has the resource number corresponding to
7814 * the IA resource. Use that to lookup the ia resource entry
7816 ia_rnum
= DAPLKA_RS_RNUM(epp
);
7817 ia_rp
= (daplka_ia_resource_t
*)daplka_resource_lookup(ia_rnum
);
7818 if ((ia_rp
== NULL
) || DAPLKA_RS_RESERVED(ia_rp
)) {
7819 D2("daplka_rc_async_handler: resource (%d) not found\n",
7821 DAPLKA_RS_UNREF(epp
);
7826 * Create an async event and chain it to the async evd
7828 daplka_async_event_create(code
, event
, epp
->ep_cookie
, ia_rp
);
7830 DAPLKA_RS_UNREF(ia_rp
);
7831 DAPLKA_RS_UNREF(epp
);
7835 * This routine is called in kernel context
7840 daplka_cq_async_handler(void *clnt_private
, ibt_hca_hdl_t hca_hdl
,
7841 ibt_async_code_t code
, ibt_async_event_t
*event
)
7843 daplka_evd_resource_t
*evdp
;
7844 daplka_ia_resource_t
*ia_rp
;
7847 if (event
->ev_cq_hdl
== NULL
)
7850 mutex_enter(&daplka_dev
->daplka_mutex
);
7851 evdp
= ibt_get_cq_private(event
->ev_cq_hdl
);
7853 mutex_exit(&daplka_dev
->daplka_mutex
);
7854 DERR("daplka_cq_async_handler: get cq private(%p) failed\n",
7858 /* grab a reference to this evd resource */
7859 DAPLKA_RS_REF(evdp
);
7860 mutex_exit(&daplka_dev
->daplka_mutex
);
7863 * The endpoint resource has the resource number corresponding to
7864 * the IA resource. Use that to lookup the ia resource entry
7866 ia_rnum
= DAPLKA_RS_RNUM(evdp
);
7867 ia_rp
= (daplka_ia_resource_t
*)daplka_resource_lookup(ia_rnum
);
7868 if ((ia_rp
== NULL
) || DAPLKA_RS_RESERVED(ia_rp
)) {
7869 DERR("daplka_cq_async_handler: resource (%d) not found\n",
7871 DAPLKA_RS_UNREF(evdp
);
7876 * Create an async event and chain it to the async evd
7878 daplka_async_event_create(code
, event
, evdp
->evd_cookie
, ia_rp
);
7880 /* release all the refcount that were acquired */
7881 DAPLKA_RS_UNREF(ia_rp
);
7882 DAPLKA_RS_UNREF(evdp
);
7886 * This routine is called in kernel context, handles unaffiliated async errors
7891 daplka_un_async_handler(void *clnt_private
, ibt_hca_hdl_t hca_hdl
,
7892 ibt_async_code_t code
, ibt_async_event_t
*event
)
7895 daplka_resource_blk_t
*blk
;
7896 daplka_resource_t
*rp
;
7897 daplka_ia_resource_t
*ia_rp
;
7900 * Walk the resource table looking for an ia that matches the
7903 rw_enter(&daplka_resource
.daplka_rct_lock
, RW_READER
);
7904 for (i
= 0; i
< daplka_resource
.daplka_rc_len
; i
++) {
7905 blk
= daplka_resource
.daplka_rc_root
[i
];
7908 for (j
= 0; j
< DAPLKA_RC_BLKSZ
; j
++) {
7909 rp
= blk
->daplka_rcblk_blks
[j
];
7911 ((intptr_t)rp
== DAPLKA_RC_RESERVED
) ||
7912 (rp
->rs_type
!= DAPL_TYPE_IA
)) {
7916 * rp is an IA resource check if it belongs
7917 * to the hca/port for which we got the event
7919 ia_rp
= (daplka_ia_resource_t
*)rp
;
7920 DAPLKA_RS_REF(ia_rp
);
7921 if ((hca_hdl
== ia_rp
->ia_hca_hdl
) &&
7922 (event
->ev_port
== ia_rp
->ia_port_num
)) {
7924 * walk the ep hash table. Acquire a
7925 * reader lock. NULL dgid indicates
7926 * local port up event.
7928 daplka_hash_walk(&ia_rp
->ia_ep_htbl
,
7929 daplka_ep_failback
, NULL
, RW_READER
);
7931 DAPLKA_RS_UNREF(ia_rp
);
7934 rw_exit(&daplka_resource
.daplka_rct_lock
);
7938 daplka_handle_hca_detach_event(ibt_async_event_t
*event
)
7943 * find the hca with the matching guid
7945 mutex_enter(&daplka_dev
->daplka_mutex
);
7946 for (hca
= daplka_dev
->daplka_hca_list_head
; hca
!= NULL
;
7947 hca
= hca
->hca_next
) {
7948 if (hca
->hca_guid
== event
->ev_hca_guid
) {
7949 if (DAPLKA_HCA_BUSY(hca
)) {
7950 mutex_exit(&daplka_dev
->daplka_mutex
);
7951 return (IBT_HCA_RESOURCES_NOT_FREED
);
7953 daplka_dequeue_hca(daplka_dev
, hca
);
7957 mutex_exit(&daplka_dev
->daplka_mutex
);
7960 return (IBT_FAILURE
);
7962 return (daplka_fini_hca(daplka_dev
, hca
));
7966 * This routine is called in kernel context
7969 daplka_async_handler(void *clnt_private
, ibt_hca_hdl_t hca_hdl
,
7970 ibt_async_code_t code
, ibt_async_event_t
*event
)
7973 case IBT_ERROR_CATASTROPHIC_CHAN
:
7974 case IBT_ERROR_INVALID_REQUEST_CHAN
:
7975 case IBT_ERROR_ACCESS_VIOLATION_CHAN
:
7976 case IBT_ERROR_PATH_MIGRATE_REQ
:
7977 D2("daplka_async_handler(): Channel affiliated=0x%x\n", code
);
7978 /* These events are affiliated with a the RC channel */
7979 daplka_rc_async_handler(clnt_private
, hca_hdl
, code
, event
);
7982 /* This event is affiliated with a the CQ */
7983 D2("daplka_async_handler(): IBT_ERROR_CQ\n");
7984 daplka_cq_async_handler(clnt_private
, hca_hdl
, code
, event
);
7986 case IBT_ERROR_PORT_DOWN
:
7987 D2("daplka_async_handler(): IBT_PORT_DOWN\n");
7989 case IBT_EVENT_PORT_UP
:
7990 D2("daplka_async_handler(): IBT_PORT_UP\n");
7992 daplka_un_async_handler(clnt_private
, hca_hdl
, code
,
7996 case IBT_HCA_ATTACH_EVENT
:
7998 * NOTE: In some error recovery paths, it is possible to
7999 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
8001 D2("daplka_async_handler(): IBT_HCA_ATTACH\n");
8002 (void) daplka_init_hca(daplka_dev
, event
->ev_hca_guid
);
8004 case IBT_HCA_DETACH_EVENT
:
8005 D2("daplka_async_handler(): IBT_HCA_DETACH\n");
8006 /* Free all hca resources and close the HCA. */
8007 (void) daplka_handle_hca_detach_event(event
);
8009 case IBT_EVENT_PATH_MIGRATED
:
8010 /* This event is affiliated with APM */
8011 D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n");
8014 D2("daplka_async_handler(): unhandled code = 0x%x\n", code
);
8020 * This routine is called in kernel context related to Subnet events
8024 daplka_sm_notice_handler(void *arg
, ib_gid_t gid
, ibt_subnet_event_code_t code
,
8025 ibt_subnet_event_t
*event
)
8027 ib_gid_t
*sgid
= &gid
;
8030 dgid
= &event
->sm_notice_gid
;
8032 case IBT_SM_EVENT_GID_AVAIL
:
8033 /* This event is affiliated with remote port up */
8034 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n");
8036 daplka_sm_gid_avail(sgid
, dgid
);
8038 case IBT_SM_EVENT_GID_UNAVAIL
:
8039 /* This event is affiliated with remote port down */
8040 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n");
8043 D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n",
8050 * This routine is called in kernel context, handles Subnet GID avail events
8051 * which correspond to remote port up. Setting up alternate path or path
8052 * migration (failback) has to be initiated from the active side of the
8056 daplka_sm_gid_avail(ib_gid_t
*sgid
, ib_gid_t
*dgid
)
8059 daplka_resource_blk_t
*blk
;
8060 daplka_resource_t
*rp
;
8061 daplka_ia_resource_t
*ia_rp
;
8063 D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n",
8064 (longlong_t
)sgid
->gid_prefix
, (longlong_t
)sgid
->gid_guid
,
8065 (longlong_t
)dgid
->gid_prefix
, (longlong_t
)dgid
->gid_guid
);
8068 * Walk the resource table looking for an ia that matches the sgid
8070 rw_enter(&daplka_resource
.daplka_rct_lock
, RW_READER
);
8071 for (i
= 0; i
< daplka_resource
.daplka_rc_len
; i
++) {
8072 blk
= daplka_resource
.daplka_rc_root
[i
];
8075 for (j
= 0; j
< DAPLKA_RC_BLKSZ
; j
++) {
8076 rp
= blk
->daplka_rcblk_blks
[j
];
8078 ((intptr_t)rp
== DAPLKA_RC_RESERVED
) ||
8079 (rp
->rs_type
!= DAPL_TYPE_IA
)) {
8083 * rp is an IA resource check if its gid
8084 * matches with the calling sgid
8086 ia_rp
= (daplka_ia_resource_t
*)rp
;
8087 DAPLKA_RS_REF(ia_rp
);
8088 if ((sgid
->gid_prefix
==
8089 ia_rp
->ia_hca_sgid
.gid_prefix
) &&
8090 (sgid
->gid_guid
== ia_rp
->ia_hca_sgid
.gid_guid
)) {
8092 * walk the ep hash table. Acquire a
8095 daplka_hash_walk(&ia_rp
->ia_ep_htbl
,
8097 (void *)dgid
, RW_READER
);
8099 DAPLKA_RS_UNREF(ia_rp
);
8102 rw_exit(&daplka_resource
.daplka_rct_lock
);
8106 * This routine is called in kernel context to get and set an alternate path
8109 daplka_ep_altpath(daplka_ep_resource_t
*ep_rp
, ib_gid_t
*dgid
)
8111 ibt_alt_path_info_t path_info
;
8112 ibt_alt_path_attr_t path_attr
;
8113 ibt_ap_returns_t ap_rets
;
8114 ibt_status_t status
;
8116 D2("daplka_ep_altpath : ibt_get_alt_path()\n");
8117 bzero(&path_info
, sizeof (ibt_alt_path_info_t
));
8118 bzero(&path_attr
, sizeof (ibt_alt_path_attr_t
));
8120 path_attr
.apa_sgid
= ep_rp
->ep_sgid
;
8121 path_attr
.apa_dgid
= *dgid
;
8123 status
= ibt_get_alt_path(ep_rp
->ep_chan_hdl
, IBT_PATH_AVAIL
,
8124 &path_attr
, &path_info
);
8125 if (status
!= IBT_SUCCESS
) {
8126 DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n",
8131 D2("daplka_ep_altpath : ibt_set_alt_path()\n");
8132 bzero(&ap_rets
, sizeof (ibt_ap_returns_t
));
8133 status
= ibt_set_alt_path(ep_rp
->ep_chan_hdl
, IBT_BLOCKING
,
8134 &path_info
, NULL
, 0, &ap_rets
);
8135 if ((status
!= IBT_SUCCESS
) ||
8136 (ap_rets
.ap_status
!= IBT_CM_AP_LOADED
)) {
8137 DERR("daplka_ep_altpath : ibt_set_alt_path failed "
8138 "status %d ap_status %d\n", status
, ap_rets
.ap_status
);
8145 * This routine is called in kernel context to failback to the original path
8148 daplka_ep_failback(void *objp
, void *arg
)
8150 daplka_ep_resource_t
*ep_rp
= (daplka_ep_resource_t
*)objp
;
8152 ibt_status_t status
;
8153 ibt_rc_chan_query_attr_t chan_attrs
;
8156 ASSERT(DAPLKA_RS_TYPE(ep_rp
) == DAPL_TYPE_EP
);
8157 D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n",
8158 (longlong_t
)ep_rp
->ep_sgid
.gid_prefix
,
8159 (longlong_t
)ep_rp
->ep_sgid
.gid_guid
,
8160 (longlong_t
)ep_rp
->ep_dgid
.gid_prefix
,
8161 (longlong_t
)ep_rp
->ep_dgid
.gid_guid
);
8164 * daplka_ep_failback is called from daplka_hash_walk
8165 * which holds the read lock on hash table to protect
8166 * the endpoint resource from removal
8168 mutex_enter(&ep_rp
->ep_lock
);
8169 /* check for unconnected endpoints */
8170 /* first check for ep state */
8171 if (ep_rp
->ep_state
!= DAPLKA_EP_STATE_CONNECTED
) {
8172 mutex_exit(&ep_rp
->ep_lock
);
8173 D2("daplka_ep_failback : endpoints not connected\n");
8177 /* second check for gids */
8178 if (((ep_rp
->ep_sgid
.gid_prefix
== 0) &&
8179 (ep_rp
->ep_sgid
.gid_guid
== 0)) ||
8180 ((ep_rp
->ep_dgid
.gid_prefix
== 0) &&
8181 (ep_rp
->ep_dgid
.gid_guid
== 0))) {
8182 mutex_exit(&ep_rp
->ep_lock
);
8183 D2("daplka_ep_failback : skip unconnected endpoints\n");
8188 * matching destination ep
8189 * when dgid is NULL, the async event is a local port up.
8190 * dgid becomes wild card, i.e. all endpoints match
8192 dgid
= (ib_gid_t
*)arg
;
8194 /* ignore loopback ep */
8195 if ((ep_rp
->ep_sgid
.gid_prefix
== ep_rp
->ep_dgid
.gid_prefix
) &&
8196 (ep_rp
->ep_sgid
.gid_guid
== ep_rp
->ep_dgid
.gid_guid
)) {
8197 mutex_exit(&ep_rp
->ep_lock
);
8198 D2("daplka_ep_failback : skip loopback endpoints\n");
8202 /* matching remote ep */
8203 if ((ep_rp
->ep_dgid
.gid_prefix
!= dgid
->gid_prefix
) ||
8204 (ep_rp
->ep_dgid
.gid_guid
!= dgid
->gid_guid
)) {
8205 mutex_exit(&ep_rp
->ep_lock
);
8206 D2("daplka_ep_failback : unrelated endpoints\n");
8211 /* call get and set altpath with original dgid used in ep_connect */
8212 if (daplka_ep_altpath(ep_rp
, &ep_rp
->ep_dgid
)) {
8213 mutex_exit(&ep_rp
->ep_lock
);
8218 * wait for migration state to be ARMed
8219 * e.g. a post_send msg will transit mig_state from REARM to ARM
8221 for (i
= 0; i
< daplka_query_aft_setaltpath
; i
++) {
8222 bzero(&chan_attrs
, sizeof (ibt_rc_chan_query_attr_t
));
8223 status
= ibt_query_rc_channel(ep_rp
->ep_chan_hdl
, &chan_attrs
);
8224 if (status
!= IBT_SUCCESS
) {
8225 mutex_exit(&ep_rp
->ep_lock
);
8226 DERR("daplka_ep_altpath : ibt_query_rc_channel err\n");
8229 if (chan_attrs
.rc_mig_state
== IBT_STATE_ARMED
)
8233 D2("daplka_ep_altpath : query[%d] mig_st=%d\n",
8234 i
, chan_attrs
.rc_mig_state
);
8235 D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n",
8237 chan_attrs
.rc_prim_path
.cep_adds_vect
.av_sgid
.gid_prefix
,
8238 (longlong_t
)chan_attrs
.rc_prim_path
.cep_adds_vect
.av_sgid
.gid_guid
,
8240 chan_attrs
.rc_prim_path
.cep_adds_vect
.av_dgid
.gid_prefix
,
8241 (longlong_t
)chan_attrs
.rc_prim_path
.cep_adds_vect
.av_dgid
.gid_guid
);
8242 D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n",
8243 (longlong_t
)chan_attrs
.rc_alt_path
.cep_adds_vect
.av_sgid
.gid_prefix
,
8244 (longlong_t
)chan_attrs
.rc_alt_path
.cep_adds_vect
.av_sgid
.gid_guid
,
8245 (longlong_t
)chan_attrs
.rc_alt_path
.cep_adds_vect
.av_dgid
.gid_prefix
,
8246 (longlong_t
)chan_attrs
.rc_alt_path
.cep_adds_vect
.av_dgid
.gid_guid
);
8248 /* skip failback on ARMed state not reached or env override */
8249 if ((i
>= daplka_query_aft_setaltpath
) || (daplka_failback
== 0)) {
8250 mutex_exit(&ep_rp
->ep_lock
);
8251 DERR("daplka_ep_altpath : ARMed state not reached\n");
8255 D2("daplka_ep_failback : ibt_migrate_path() to original ep\n");
8256 status
= ibt_migrate_path(ep_rp
->ep_chan_hdl
);
8257 if (status
!= IBT_SUCCESS
) {
8258 mutex_exit(&ep_rp
->ep_lock
);
8259 DERR("daplka_ep_failback : migration failed "
8260 "status %d\n", status
);
8264 /* call get and altpath with NULL dgid to indicate unspecified dgid */
8265 (void) daplka_ep_altpath(ep_rp
, NULL
);
8266 mutex_exit(&ep_rp
->ep_lock
);
8271 * IBTF wrappers used for resource accounting
8274 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t
*ep_rp
, ibt_hca_hdl_t hca_hdl
,
8275 ibt_chan_alloc_flags_t flags
, ibt_rc_chan_alloc_args_t
*args
,
8276 ibt_channel_hdl_t
*chan_hdl_p
, ibt_chan_sizes_t
*sizes
)
8278 daplka_hca_t
*hca_p
;
8280 boolean_t acct_enabled
;
8281 ibt_status_t status
;
8283 acct_enabled
= daplka_accounting_enabled
;
8284 hca_p
= ep_rp
->ep_hca
;
8285 max_qps
= daplka_max_qp_percent
* hca_p
->hca_attr
.hca_max_chans
/ 100;
8288 if (daplka_max_qp_percent
!= 0 &&
8289 max_qps
<= hca_p
->hca_qp_count
) {
8290 DERR("ibt_alloc_rc_channel: resource limit exceeded "
8291 "(limit %d, count %d)\n", max_qps
,
8292 hca_p
->hca_qp_count
);
8293 return (IBT_INSUFF_RESOURCE
);
8295 DAPLKA_RS_ACCT_INC(ep_rp
, 1);
8296 atomic_inc_32(&hca_p
->hca_qp_count
);
8298 status
= ibt_alloc_rc_channel(hca_hdl
, flags
, args
, chan_hdl_p
, sizes
);
8300 if (status
!= IBT_SUCCESS
&& acct_enabled
) {
8301 DAPLKA_RS_ACCT_DEC(ep_rp
, 1);
8302 atomic_dec_32(&hca_p
->hca_qp_count
);
8308 daplka_ibt_free_channel(daplka_ep_resource_t
*ep_rp
, ibt_channel_hdl_t chan_hdl
)
8310 daplka_hca_t
*hca_p
;
8311 ibt_status_t status
;
8313 hca_p
= ep_rp
->ep_hca
;
8315 status
= ibt_free_channel(chan_hdl
);
8316 if (status
!= IBT_SUCCESS
) {
8319 if (DAPLKA_RS_ACCT_CHARGED(ep_rp
) > 0) {
8320 DAPLKA_RS_ACCT_DEC(ep_rp
, 1);
8321 atomic_dec_32(&hca_p
->hca_qp_count
);
8327 daplka_ibt_alloc_cq(daplka_evd_resource_t
*evd_rp
, ibt_hca_hdl_t hca_hdl
,
8328 ibt_cq_attr_t
*cq_attr
, ibt_cq_hdl_t
*ibt_cq_p
, uint32_t *real_size
)
8330 daplka_hca_t
*hca_p
;
8332 boolean_t acct_enabled
;
8333 ibt_status_t status
;
8335 acct_enabled
= daplka_accounting_enabled
;
8336 hca_p
= evd_rp
->evd_hca
;
8337 max_cqs
= daplka_max_cq_percent
* hca_p
->hca_attr
.hca_max_cq
/ 100;
8340 if (daplka_max_cq_percent
!= 0 &&
8341 max_cqs
<= hca_p
->hca_cq_count
) {
8342 DERR("ibt_alloc_cq: resource limit exceeded "
8343 "(limit %d, count %d)\n", max_cqs
,
8344 hca_p
->hca_cq_count
);
8345 return (IBT_INSUFF_RESOURCE
);
8347 DAPLKA_RS_ACCT_INC(evd_rp
, 1);
8348 atomic_inc_32(&hca_p
->hca_cq_count
);
8350 status
= ibt_alloc_cq(hca_hdl
, cq_attr
, ibt_cq_p
, real_size
);
8352 if (status
!= IBT_SUCCESS
&& acct_enabled
) {
8353 DAPLKA_RS_ACCT_DEC(evd_rp
, 1);
8354 atomic_dec_32(&hca_p
->hca_cq_count
);
8360 daplka_ibt_free_cq(daplka_evd_resource_t
*evd_rp
, ibt_cq_hdl_t cq_hdl
)
8362 daplka_hca_t
*hca_p
;
8363 ibt_status_t status
;
8365 hca_p
= evd_rp
->evd_hca
;
8367 status
= ibt_free_cq(cq_hdl
);
8368 if (status
!= IBT_SUCCESS
) {
8371 if (DAPLKA_RS_ACCT_CHARGED(evd_rp
) > 0) {
8372 DAPLKA_RS_ACCT_DEC(evd_rp
, 1);
8373 atomic_dec_32(&hca_p
->hca_cq_count
);
8379 daplka_ibt_alloc_pd(daplka_pd_resource_t
*pd_rp
, ibt_hca_hdl_t hca_hdl
,
8380 ibt_pd_flags_t flags
, ibt_pd_hdl_t
*pd_hdl_p
)
8382 daplka_hca_t
*hca_p
;
8384 boolean_t acct_enabled
;
8385 ibt_status_t status
;
8387 acct_enabled
= daplka_accounting_enabled
;
8388 hca_p
= pd_rp
->pd_hca
;
8389 max_pds
= daplka_max_pd_percent
* hca_p
->hca_attr
.hca_max_pd
/ 100;
8392 if (daplka_max_pd_percent
!= 0 &&
8393 max_pds
<= hca_p
->hca_pd_count
) {
8394 DERR("ibt_alloc_pd: resource limit exceeded "
8395 "(limit %d, count %d)\n", max_pds
,
8396 hca_p
->hca_pd_count
);
8397 return (IBT_INSUFF_RESOURCE
);
8399 DAPLKA_RS_ACCT_INC(pd_rp
, 1);
8400 atomic_inc_32(&hca_p
->hca_pd_count
);
8402 status
= ibt_alloc_pd(hca_hdl
, flags
, pd_hdl_p
);
8404 if (status
!= IBT_SUCCESS
&& acct_enabled
) {
8405 DAPLKA_RS_ACCT_DEC(pd_rp
, 1);
8406 atomic_dec_32(&hca_p
->hca_pd_count
);
8412 daplka_ibt_free_pd(daplka_pd_resource_t
*pd_rp
, ibt_hca_hdl_t hca_hdl
,
8413 ibt_pd_hdl_t pd_hdl
)
8415 daplka_hca_t
*hca_p
;
8416 ibt_status_t status
;
8418 hca_p
= pd_rp
->pd_hca
;
8420 status
= ibt_free_pd(hca_hdl
, pd_hdl
);
8421 if (status
!= IBT_SUCCESS
) {
8424 if (DAPLKA_RS_ACCT_CHARGED(pd_rp
) > 0) {
8425 DAPLKA_RS_ACCT_DEC(pd_rp
, 1);
8426 atomic_dec_32(&hca_p
->hca_pd_count
);
8432 daplka_ibt_alloc_mw(daplka_mw_resource_t
*mw_rp
, ibt_hca_hdl_t hca_hdl
,
8433 ibt_pd_hdl_t pd_hdl
, ibt_mw_flags_t flags
, ibt_mw_hdl_t
*mw_hdl_p
,
8436 daplka_hca_t
*hca_p
;
8438 boolean_t acct_enabled
;
8439 ibt_status_t status
;
8441 acct_enabled
= daplka_accounting_enabled
;
8442 hca_p
= mw_rp
->mw_hca
;
8443 max_mws
= daplka_max_mw_percent
* hca_p
->hca_attr
.hca_max_mem_win
/ 100;
8446 if (daplka_max_mw_percent
!= 0 &&
8447 max_mws
<= hca_p
->hca_mw_count
) {
8448 DERR("ibt_alloc_mw: resource limit exceeded "
8449 "(limit %d, count %d)\n", max_mws
,
8450 hca_p
->hca_mw_count
);
8451 return (IBT_INSUFF_RESOURCE
);
8453 DAPLKA_RS_ACCT_INC(mw_rp
, 1);
8454 atomic_inc_32(&hca_p
->hca_mw_count
);
8456 status
= ibt_alloc_mw(hca_hdl
, pd_hdl
, flags
, mw_hdl_p
, rkey_p
);
8458 if (status
!= IBT_SUCCESS
&& acct_enabled
) {
8459 DAPLKA_RS_ACCT_DEC(mw_rp
, 1);
8460 atomic_dec_32(&hca_p
->hca_mw_count
);
8466 daplka_ibt_free_mw(daplka_mw_resource_t
*mw_rp
, ibt_hca_hdl_t hca_hdl
,
8467 ibt_mw_hdl_t mw_hdl
)
8469 daplka_hca_t
*hca_p
;
8470 ibt_status_t status
;
8472 hca_p
= mw_rp
->mw_hca
;
8474 status
= ibt_free_mw(hca_hdl
, mw_hdl
);
8475 if (status
!= IBT_SUCCESS
) {
8478 if (DAPLKA_RS_ACCT_CHARGED(mw_rp
) > 0) {
8479 DAPLKA_RS_ACCT_DEC(mw_rp
, 1);
8480 atomic_dec_32(&hca_p
->hca_mw_count
);
8486 daplka_ibt_register_mr(daplka_mr_resource_t
*mr_rp
, ibt_hca_hdl_t hca_hdl
,
8487 ibt_pd_hdl_t pd_hdl
, ibt_mr_attr_t
*mr_attr
, ibt_mr_hdl_t
*mr_hdl_p
,
8488 ibt_mr_desc_t
*mr_desc_p
)
8490 daplka_hca_t
*hca_p
;
8492 boolean_t acct_enabled
;
8493 ibt_status_t status
;
8495 acct_enabled
= daplka_accounting_enabled
;
8496 hca_p
= mr_rp
->mr_hca
;
8497 max_mrs
= daplka_max_mr_percent
* hca_p
->hca_attr
.hca_max_memr
/ 100;
8500 if (daplka_max_mr_percent
!= 0 &&
8501 max_mrs
<= hca_p
->hca_mr_count
) {
8502 DERR("ibt_register_mr: resource limit exceeded "
8503 "(limit %d, count %d)\n", max_mrs
,
8504 hca_p
->hca_mr_count
);
8505 return (IBT_INSUFF_RESOURCE
);
8507 DAPLKA_RS_ACCT_INC(mr_rp
, 1);
8508 atomic_inc_32(&hca_p
->hca_mr_count
);
8510 status
= ibt_register_mr(hca_hdl
, pd_hdl
, mr_attr
, mr_hdl_p
, mr_desc_p
);
8512 if (status
!= IBT_SUCCESS
&& acct_enabled
) {
8513 DAPLKA_RS_ACCT_DEC(mr_rp
, 1);
8514 atomic_dec_32(&hca_p
->hca_mr_count
);
8520 daplka_ibt_register_shared_mr(daplka_mr_resource_t
*mr_rp
,
8521 ibt_hca_hdl_t hca_hdl
, ibt_mr_hdl_t mr_hdl
, ibt_pd_hdl_t pd_hdl
,
8522 ibt_smr_attr_t
*smr_attr_p
, ibt_mr_hdl_t
*mr_hdl_p
,
8523 ibt_mr_desc_t
*mr_desc_p
)
8525 daplka_hca_t
*hca_p
;
8527 boolean_t acct_enabled
;
8528 ibt_status_t status
;
8530 acct_enabled
= daplka_accounting_enabled
;
8531 hca_p
= mr_rp
->mr_hca
;
8532 max_mrs
= daplka_max_mr_percent
* hca_p
->hca_attr
.hca_max_memr
/ 100;
8535 if (daplka_max_mr_percent
!= 0 &&
8536 max_mrs
<= hca_p
->hca_mr_count
) {
8537 DERR("ibt_register_shared_mr: resource limit exceeded "
8538 "(limit %d, count %d)\n", max_mrs
,
8539 hca_p
->hca_mr_count
);
8540 return (IBT_INSUFF_RESOURCE
);
8542 DAPLKA_RS_ACCT_INC(mr_rp
, 1);
8543 atomic_inc_32(&hca_p
->hca_mr_count
);
8545 status
= ibt_register_shared_mr(hca_hdl
, mr_hdl
, pd_hdl
,
8546 smr_attr_p
, mr_hdl_p
, mr_desc_p
);
8548 if (status
!= IBT_SUCCESS
&& acct_enabled
) {
8549 DAPLKA_RS_ACCT_DEC(mr_rp
, 1);
8550 atomic_dec_32(&hca_p
->hca_mr_count
);
8556 daplka_ibt_deregister_mr(daplka_mr_resource_t
*mr_rp
, ibt_hca_hdl_t hca_hdl
,
8557 ibt_mr_hdl_t mr_hdl
)
8559 daplka_hca_t
*hca_p
;
8560 ibt_status_t status
;
8562 hca_p
= mr_rp
->mr_hca
;
8564 status
= ibt_deregister_mr(hca_hdl
, mr_hdl
);
8565 if (status
!= IBT_SUCCESS
) {
8568 if (DAPLKA_RS_ACCT_CHARGED(mr_rp
) > 0) {
8569 DAPLKA_RS_ACCT_DEC(mr_rp
, 1);
8570 atomic_dec_32(&hca_p
->hca_mr_count
);
8576 daplka_ibt_alloc_srq(daplka_srq_resource_t
*srq_rp
, ibt_hca_hdl_t hca_hdl
,
8577 ibt_srq_flags_t flags
, ibt_pd_hdl_t pd
, ibt_srq_sizes_t
*reqsz
,
8578 ibt_srq_hdl_t
*srq_hdl_p
, ibt_srq_sizes_t
*realsz
)
8580 daplka_hca_t
*hca_p
;
8582 boolean_t acct_enabled
;
8583 ibt_status_t status
;
8585 acct_enabled
= daplka_accounting_enabled
;
8586 hca_p
= srq_rp
->srq_hca
;
8587 max_srqs
= daplka_max_srq_percent
* hca_p
->hca_attr
.hca_max_srqs
/ 100;
8590 if (daplka_max_srq_percent
!= 0 &&
8591 max_srqs
<= hca_p
->hca_srq_count
) {
8592 DERR("ibt_alloc_srq: resource limit exceeded "
8593 "(limit %d, count %d)\n", max_srqs
,
8594 hca_p
->hca_srq_count
);
8595 return (IBT_INSUFF_RESOURCE
);
8597 DAPLKA_RS_ACCT_INC(srq_rp
, 1);
8598 atomic_inc_32(&hca_p
->hca_srq_count
);
8600 status
= ibt_alloc_srq(hca_hdl
, flags
, pd
, reqsz
, srq_hdl_p
, realsz
);
8602 if (status
!= IBT_SUCCESS
&& acct_enabled
) {
8603 DAPLKA_RS_ACCT_DEC(srq_rp
, 1);
8604 atomic_dec_32(&hca_p
->hca_srq_count
);
8610 daplka_ibt_free_srq(daplka_srq_resource_t
*srq_rp
, ibt_srq_hdl_t srq_hdl
)
8612 daplka_hca_t
*hca_p
;
8613 ibt_status_t status
;
8615 hca_p
= srq_rp
->srq_hca
;
8617 D3("ibt_free_srq: %p %p\n", srq_rp
, srq_hdl
);
8619 status
= ibt_free_srq(srq_hdl
);
8620 if (status
!= IBT_SUCCESS
) {
8623 if (DAPLKA_RS_ACCT_CHARGED(srq_rp
) > 0) {
8624 DAPLKA_RS_ACCT_DEC(srq_rp
, 1);
8625 atomic_dec_32(&hca_p
->hca_srq_count
);
8632 daplka_common_ioctl(int cmd
, minor_t rnum
, intptr_t arg
, int mode
,
8633 cred_t
*cred
, int *rvalp
)
8638 case DAPL_IA_CREATE
:
8639 error
= daplka_ia_create(rnum
, arg
, mode
, cred
, rvalp
);
8642 /* can potentially add other commands here */
8645 DERR("daplka_common_ioctl: cmd not supported\n");
8646 error
= DDI_FAILURE
;
8652 daplka_evd_ioctl(int cmd
, daplka_ia_resource_t
*rp
, intptr_t arg
, int mode
,
8653 cred_t
*cred
, int *rvalp
)
8658 case DAPL_EVD_CREATE
:
8659 error
= daplka_evd_create(rp
, arg
, mode
, cred
, rvalp
);
8662 case DAPL_CQ_RESIZE
:
8663 error
= daplka_cq_resize(rp
, arg
, mode
, cred
, rvalp
);
8666 case DAPL_EVENT_POLL
:
8667 error
= daplka_event_poll(rp
, arg
, mode
, cred
, rvalp
);
8670 case DAPL_EVENT_WAKEUP
:
8671 error
= daplka_event_wakeup(rp
, arg
, mode
, cred
, rvalp
);
8674 case DAPL_EVD_MODIFY_CNO
:
8675 error
= daplka_evd_modify_cno(rp
, arg
, mode
, cred
, rvalp
);
8679 error
= daplka_evd_free(rp
, arg
, mode
, cred
, rvalp
);
8683 DERR("daplka_evd_ioctl: cmd not supported\n");
8684 error
= DDI_FAILURE
;
8690 daplka_ep_ioctl(int cmd
, daplka_ia_resource_t
*rp
, intptr_t arg
, int mode
,
8691 cred_t
*cred
, int *rvalp
)
8696 case DAPL_EP_MODIFY
:
8697 error
= daplka_ep_modify(rp
, arg
, mode
, cred
, rvalp
);
8701 error
= daplka_ep_free(rp
, arg
, mode
, cred
, rvalp
);
8704 case DAPL_EP_CONNECT
:
8705 error
= daplka_ep_connect(rp
, arg
, mode
, cred
, rvalp
);
8708 case DAPL_EP_DISCONNECT
:
8709 error
= daplka_ep_disconnect(rp
, arg
, mode
, cred
, rvalp
);
8712 case DAPL_EP_REINIT
:
8713 error
= daplka_ep_reinit(rp
, arg
, mode
, cred
, rvalp
);
8716 case DAPL_EP_CREATE
:
8717 error
= daplka_ep_create(rp
, arg
, mode
, cred
, rvalp
);
8721 DERR("daplka_ep_ioctl: cmd not supported\n");
8722 error
= DDI_FAILURE
;
8728 daplka_mr_ioctl(int cmd
, daplka_ia_resource_t
*rp
, intptr_t arg
, int mode
,
8729 cred_t
*cred
, int *rvalp
)
8734 case DAPL_MR_REGISTER
:
8735 error
= daplka_mr_register(rp
, arg
, mode
, cred
, rvalp
);
8738 case DAPL_MR_REGISTER_LMR
:
8739 error
= daplka_mr_register_lmr(rp
, arg
, mode
, cred
, rvalp
);
8742 case DAPL_MR_REGISTER_SHARED
:
8743 error
= daplka_mr_register_shared(rp
, arg
, mode
, cred
, rvalp
);
8746 case DAPL_MR_DEREGISTER
:
8747 error
= daplka_mr_deregister(rp
, arg
, mode
, cred
, rvalp
);
8751 error
= daplka_mr_sync(rp
, arg
, mode
, cred
, rvalp
);
8755 DERR("daplka_mr_ioctl: cmd not supported\n");
8756 error
= DDI_FAILURE
;
8762 daplka_mw_ioctl(int cmd
, daplka_ia_resource_t
*rp
, intptr_t arg
, int mode
,
8763 cred_t
*cred
, int *rvalp
)
8769 error
= daplka_mw_alloc(rp
, arg
, mode
, cred
, rvalp
);
8773 error
= daplka_mw_free(rp
, arg
, mode
, cred
, rvalp
);
8777 DERR("daplka_mw_ioctl: cmd not supported\n");
8778 error
= DDI_FAILURE
;
8784 daplka_cno_ioctl(int cmd
, daplka_ia_resource_t
*rp
, intptr_t arg
, int mode
,
8785 cred_t
*cred
, int *rvalp
)
8790 case DAPL_CNO_ALLOC
:
8791 error
= daplka_cno_alloc(rp
, arg
, mode
, cred
, rvalp
);
8795 error
= daplka_cno_free(rp
, arg
, mode
, cred
, rvalp
);
8799 error
= daplka_cno_wait(rp
, arg
, mode
, cred
, rvalp
);
8803 DERR("daplka_cno_ioctl: cmd not supported\n");
8804 error
= DDI_FAILURE
;
8810 daplka_pd_ioctl(int cmd
, daplka_ia_resource_t
*rp
, intptr_t arg
, int mode
,
8811 cred_t
*cred
, int *rvalp
)
8817 error
= daplka_pd_alloc(rp
, arg
, mode
, cred
, rvalp
);
8821 error
= daplka_pd_free(rp
, arg
, mode
, cred
, rvalp
);
8825 DERR("daplka_pd_ioctl: cmd not supported\n");
8826 error
= DDI_FAILURE
;
8832 daplka_sp_ioctl(int cmd
, daplka_ia_resource_t
*rp
, intptr_t arg
, int mode
,
8833 cred_t
*cred
, int *rvalp
)
8838 case DAPL_SERVICE_REGISTER
:
8839 error
= daplka_service_register(rp
, arg
, mode
, cred
, rvalp
);
8842 case DAPL_SERVICE_DEREGISTER
:
8843 error
= daplka_service_deregister(rp
, arg
, mode
, cred
, rvalp
);
8847 DERR("daplka_sp_ioctl: cmd not supported\n");
8848 error
= DDI_FAILURE
;
8854 daplka_srq_ioctl(int cmd
, daplka_ia_resource_t
*rp
, intptr_t arg
, int mode
,
8855 cred_t
*cred
, int *rvalp
)
8860 case DAPL_SRQ_CREATE
:
8861 error
= daplka_srq_create(rp
, arg
, mode
, cred
, rvalp
);
8864 case DAPL_SRQ_RESIZE
:
8865 error
= daplka_srq_resize(rp
, arg
, mode
, cred
, rvalp
);
8869 error
= daplka_srq_free(rp
, arg
, mode
, cred
, rvalp
);
8873 DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd
);
8874 error
= DDI_FAILURE
;
8881 daplka_misc_ioctl(int cmd
, daplka_ia_resource_t
*rp
, intptr_t arg
, int mode
,
8882 cred_t
*cred
, int *rvalp
)
8887 case DAPL_CR_ACCEPT
:
8888 error
= daplka_cr_accept(rp
, arg
, mode
, cred
, rvalp
);
8891 case DAPL_CR_REJECT
:
8892 error
= daplka_cr_reject(rp
, arg
, mode
, cred
, rvalp
);
8896 error
= daplka_ia_query(rp
, arg
, mode
, cred
, rvalp
);
8899 case DAPL_CR_HANDOFF
:
8900 error
= daplka_cr_handoff(rp
, arg
, mode
, cred
, rvalp
);
8904 DERR("daplka_misc_ioctl: cmd not supported\n");
8905 error
= DDI_FAILURE
;
8912 daplka_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int mode
, cred_t
*cred
,
8915 daplka_ia_resource_t
*ia_rp
;
8919 rnum
= getminor(dev
);
8920 ia_rp
= (daplka_ia_resource_t
*)daplka_resource_lookup(rnum
);
8921 if (ia_rp
== NULL
) {
8922 DERR("ioctl: resource not found, rnum %d\n", rnum
);
8926 D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum
, cmd
);
8927 if (DAPLKA_RS_RESERVED(ia_rp
)) {
8928 error
= daplka_common_ioctl(cmd
, rnum
, arg
, mode
, cred
, rvalp
);
8931 if (DAPLKA_RS_TYPE(ia_rp
) != DAPL_TYPE_IA
) {
8932 DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp
));
8936 if (ia_rp
->ia_pid
!= ddi_get_pid()) {
8937 DERR("ioctl: ia_pid %d != pid %d\n",
8938 ia_rp
->ia_pid
, ddi_get_pid());
8943 switch (cmd
& DAPL_TYPE_MASK
) {
8945 error
= daplka_evd_ioctl(cmd
, ia_rp
, arg
, mode
, cred
, rvalp
);
8949 error
= daplka_ep_ioctl(cmd
, ia_rp
, arg
, mode
, cred
, rvalp
);
8953 error
= daplka_mr_ioctl(cmd
, ia_rp
, arg
, mode
, cred
, rvalp
);
8957 error
= daplka_mw_ioctl(cmd
, ia_rp
, arg
, mode
, cred
, rvalp
);
8961 error
= daplka_pd_ioctl(cmd
, ia_rp
, arg
, mode
, cred
, rvalp
);
8965 error
= daplka_sp_ioctl(cmd
, ia_rp
, arg
, mode
, cred
, rvalp
);
8969 error
= daplka_cno_ioctl(cmd
, ia_rp
, arg
, mode
, cred
, rvalp
);
8972 case DAPL_TYPE_MISC
:
8973 error
= daplka_misc_ioctl(cmd
, ia_rp
, arg
, mode
, cred
, rvalp
);
8977 error
= daplka_srq_ioctl(cmd
, ia_rp
, arg
, mode
, cred
, rvalp
);
8981 DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp
));
8982 error
= DDI_FAILURE
;
8986 DAPLKA_RS_UNREF(ia_rp
);
8992 daplka_open(dev_t
*devp
, int flag
, int otyp
, struct cred
*cred
)
8999 if (otyp
!= OTYP_CHR
) {
9004 * Only zero can be opened, clones are used for resources.
9006 if (getminor(*devp
) != DAPLKA_DRIVER_MINOR
) {
9007 DERR("daplka_open: bad minor %d\n", getminor(*devp
));
9012 * - allocate new minor number
9013 * - update devp argument to new device
9015 if (daplka_resource_reserve(&rnum
) == 0) {
9016 *devp
= makedevice(getmajor(*devp
), rnum
);
9021 return (DDI_SUCCESS
);
9026 daplka_close(dev_t dev
, int flag
, int otyp
, struct cred
*cred
)
9028 daplka_ia_resource_t
*ia_rp
;
9029 minor_t rnum
= getminor(dev
);
9034 if (otyp
!= OTYP_CHR
) {
9037 D2("daplka_close: closing rnum = %d\n", rnum
);
9038 atomic_inc_32(&daplka_pending_close
);
9041 * remove from resource table.
9043 ia_rp
= (daplka_ia_resource_t
*)daplka_resource_remove(rnum
);
9046 * remove the initial reference
9048 if (ia_rp
!= NULL
) {
9049 DAPLKA_RS_UNREF(ia_rp
);
9051 atomic_dec_32(&daplka_pending_close
);
9052 return (DDI_SUCCESS
);
9057 * Resource management routines
9059 * We start with no resource array. Each time we run out of slots, we
9060 * reallocate a new larger array and copy the pointer to the new array and
9061 * a new resource blk is allocated and added to the hash table.
9063 * The resource control block contains:
9064 * root - array of pointer of resource blks
9065 * sz - current size of array.
9066 * len - last valid entry in array.
9068 * A search operation based on a resource number is as follows:
9069 * index = rnum / RESOURCE_BLKSZ;
9070 * ASSERT(index < resource_block.len);
9071 * ASSERT(index < resource_block.sz);
9072 * offset = rnum % RESOURCE_BLKSZ;
9073 * ASSERT(offset >= resource_block.root[index]->base);
9074 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
9075 * return resource_block.root[index]->blks[offset];
9077 * A resource blk is freed when its used count reaches zero.
9081 * initializes the global resource table
9084 daplka_resource_init(void)
9086 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(daplka_resource
))
9087 rw_init(&daplka_resource
.daplka_rct_lock
, NULL
, RW_DRIVER
, NULL
);
9088 daplka_resource
.daplka_rc_len
= 0;
9089 daplka_resource
.daplka_rc_sz
= 0;
9090 daplka_resource
.daplka_rc_cnt
= 0;
9091 daplka_resource
.daplka_rc_flag
= 0;
9092 daplka_resource
.daplka_rc_root
= NULL
;
9093 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(daplka_resource
))
9097 * destroys the global resource table
9100 daplka_resource_fini(void)
9104 rw_enter(&daplka_resource
.daplka_rct_lock
, RW_WRITER
);
9105 for (i
= 0; i
< daplka_resource
.daplka_rc_len
; i
++) {
9106 daplka_resource_blk_t
*blk
;
9109 blk
= daplka_resource
.daplka_rc_root
[i
];
9113 for (j
= 0; j
< DAPLKA_RC_BLKSZ
; j
++) {
9114 if (blk
->daplka_rcblk_blks
[j
] != NULL
) {
9115 DERR("resource_fini: non-null slot %d, %p\n",
9116 j
, blk
->daplka_rcblk_blks
[j
]);
9119 kmem_free(blk
, sizeof (*blk
));
9120 daplka_resource
.daplka_rc_root
[i
] = NULL
;
9122 if (daplka_resource
.daplka_rc_root
!= NULL
) {
9125 sz
= daplka_resource
.daplka_rc_sz
*
9126 sizeof (daplka_resource_blk_t
*);
9127 kmem_free(daplka_resource
.daplka_rc_root
, (uint_t
)sz
);
9128 daplka_resource
.daplka_rc_root
= NULL
;
9129 daplka_resource
.daplka_rc_len
= 0;
9130 daplka_resource
.daplka_rc_sz
= 0;
9132 rw_exit(&daplka_resource
.daplka_rct_lock
);
9133 rw_destroy(&daplka_resource
.daplka_rct_lock
);
9137 * reserves a slot in the global resource table.
9138 * this is called by the open() syscall. it is needed because
9139 * at open() time, we do not have sufficient information to
9140 * create an IA resource. the library needs to subsequently
9141 * call daplka_ia_create to insert an IA resource into this
9145 daplka_resource_reserve(minor_t
*rnum
)
9147 int i
, j
, empty
= -1;
9148 daplka_resource_blk_t
*blk
;
9150 rw_enter(&daplka_resource
.daplka_rct_lock
, RW_WRITER
);
9152 * Try to find an empty slot
9154 for (i
= 0; i
< daplka_resource
.daplka_rc_len
; i
++) {
9155 blk
= daplka_resource
.daplka_rc_root
[i
];
9156 if (blk
!= NULL
&& blk
->daplka_rcblk_avail
> 0) {
9158 D3("resource_alloc: available blks %d\n",
9159 blk
->daplka_rcblk_avail
);
9162 * found an empty slot in this blk
9164 for (j
= 0; j
< DAPLKA_RC_BLKSZ
; j
++) {
9165 if (blk
->daplka_rcblk_blks
[j
] == NULL
) {
9167 (j
+ (i
* DAPLKA_RC_BLKSZ
));
9168 blk
->daplka_rcblk_blks
[j
] =
9169 (daplka_resource_t
*)
9171 blk
->daplka_rcblk_avail
--;
9172 daplka_resource
.daplka_rc_cnt
++;
9173 rw_exit(&daplka_resource
.
9178 } else if (blk
== NULL
&& empty
< 0) {
9180 * remember first empty slot
9187 * Couldn't find anything, allocate a new blk
9188 * Do we need to reallocate the root array
9191 if (daplka_resource
.daplka_rc_len
==
9192 daplka_resource
.daplka_rc_sz
) {
9194 * Allocate new array and copy current stuff into it
9196 daplka_resource_blk_t
**p
;
9197 uint_t newsz
= (uint_t
)daplka_resource
.daplka_rc_sz
+
9200 D3("resource_alloc: increasing no. of buckets to %d\n",
9203 p
= kmem_zalloc(newsz
* sizeof (*p
), daplka_km_flags
);
9205 if (daplka_resource
.daplka_rc_root
) {
9208 oldsz
= (uint_t
)(daplka_resource
.daplka_rc_sz
*
9212 * Copy old data into new space and
9215 bcopy(daplka_resource
.daplka_rc_root
, p
, oldsz
);
9216 kmem_free(daplka_resource
.daplka_rc_root
,
9220 daplka_resource
.daplka_rc_root
= p
;
9221 daplka_resource
.daplka_rc_sz
= (int)newsz
;
9224 empty
= daplka_resource
.daplka_rc_len
;
9225 daplka_resource
.daplka_rc_len
++;
9227 D3("resource_alloc: daplka_rc_len %d\n",
9228 daplka_resource
.daplka_rc_len
);
9232 * Allocate a new blk
9234 blk
= kmem_zalloc(sizeof (*blk
), daplka_km_flags
);
9235 ASSERT(daplka_resource
.daplka_rc_root
[empty
] == NULL
);
9236 daplka_resource
.daplka_rc_root
[empty
] = blk
;
9237 blk
->daplka_rcblk_avail
= DAPLKA_RC_BLKSZ
- 1;
9242 *rnum
= (minor_t
)(empty
* DAPLKA_RC_BLKSZ
);
9243 blk
->daplka_rcblk_blks
[0] = (daplka_resource_t
*)DAPLKA_RC_RESERVED
;
9244 daplka_resource
.daplka_rc_cnt
++;
9245 rw_exit(&daplka_resource
.daplka_rct_lock
);
9251 * removes resource from global resource table
9253 static daplka_resource_t
*
9254 daplka_resource_remove(minor_t rnum
)
9257 daplka_resource_blk_t
*blk
;
9258 daplka_resource_t
*p
;
9260 i
= (int)(rnum
/ DAPLKA_RC_BLKSZ
);
9261 j
= (int)(rnum
% DAPLKA_RC_BLKSZ
);
9263 rw_enter(&daplka_resource
.daplka_rct_lock
, RW_WRITER
);
9264 if (i
>= daplka_resource
.daplka_rc_len
) {
9265 rw_exit(&daplka_resource
.daplka_rct_lock
);
9266 DERR("resource_remove: invalid rnum %d\n", rnum
);
9270 ASSERT(daplka_resource
.daplka_rc_root
);
9271 ASSERT(i
< daplka_resource
.daplka_rc_len
);
9272 ASSERT(i
< daplka_resource
.daplka_rc_sz
);
9273 blk
= daplka_resource
.daplka_rc_root
[i
];
9275 rw_exit(&daplka_resource
.daplka_rct_lock
);
9276 DERR("resource_remove: invalid rnum %d\n", rnum
);
9280 if (blk
->daplka_rcblk_blks
[j
] == NULL
) {
9281 rw_exit(&daplka_resource
.daplka_rct_lock
);
9282 DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n");
9285 p
= blk
->daplka_rcblk_blks
[j
];
9286 blk
->daplka_rcblk_blks
[j
] = NULL
;
9287 blk
->daplka_rcblk_avail
++;
9288 if (blk
->daplka_rcblk_avail
== DAPLKA_RC_BLKSZ
) {
9292 kmem_free(blk
, sizeof (*blk
));
9293 daplka_resource
.daplka_rc_root
[i
] = NULL
;
9295 daplka_resource
.daplka_rc_cnt
--;
9296 rw_exit(&daplka_resource
.daplka_rct_lock
);
9298 if ((intptr_t)p
== DAPLKA_RC_RESERVED
) {
9306 * inserts resource into the slot designated by rnum
9309 daplka_resource_insert(minor_t rnum
, daplka_resource_t
*rp
)
9311 int i
, j
, error
= -1;
9312 daplka_resource_blk_t
*blk
;
9315 * Find resource and lock it in WRITER mode
9316 * search for available resource slot
9319 i
= (int)(rnum
/ DAPLKA_RC_BLKSZ
);
9320 j
= (int)(rnum
% DAPLKA_RC_BLKSZ
);
9322 rw_enter(&daplka_resource
.daplka_rct_lock
, RW_WRITER
);
9323 if (i
>= daplka_resource
.daplka_rc_len
) {
9324 rw_exit(&daplka_resource
.daplka_rct_lock
);
9325 DERR("resource_insert: resource %d not found\n", rnum
);
9329 blk
= daplka_resource
.daplka_rc_root
[i
];
9331 ASSERT(i
< daplka_resource
.daplka_rc_len
);
9332 ASSERT(i
< daplka_resource
.daplka_rc_sz
);
9334 if ((intptr_t)blk
->daplka_rcblk_blks
[j
] == DAPLKA_RC_RESERVED
) {
9335 blk
->daplka_rcblk_blks
[j
] = rp
;
9338 DERR("resource_insert: %d not reserved, blk = %p\n",
9339 rnum
, blk
->daplka_rcblk_blks
[j
]);
9342 DERR("resource_insert: resource %d not found\n", rnum
);
9344 rw_exit(&daplka_resource
.daplka_rct_lock
);
9349 * finds resource using minor device number
9351 static daplka_resource_t
*
9352 daplka_resource_lookup(minor_t rnum
)
9355 daplka_resource_blk_t
*blk
;
9356 daplka_resource_t
*rp
;
9359 * Find resource and lock it in READER mode
9360 * search for available resource slot
9363 i
= (int)(rnum
/ DAPLKA_RC_BLKSZ
);
9364 j
= (int)(rnum
% DAPLKA_RC_BLKSZ
);
9366 rw_enter(&daplka_resource
.daplka_rct_lock
, RW_READER
);
9367 if (i
>= daplka_resource
.daplka_rc_len
) {
9368 rw_exit(&daplka_resource
.daplka_rct_lock
);
9369 DERR("resource_lookup: resource %d not found\n", rnum
);
9373 blk
= daplka_resource
.daplka_rc_root
[i
];
9375 ASSERT(i
< daplka_resource
.daplka_rc_len
);
9376 ASSERT(i
< daplka_resource
.daplka_rc_sz
);
9378 rp
= blk
->daplka_rcblk_blks
[j
];
9379 if (rp
== NULL
|| (intptr_t)rp
== DAPLKA_RC_RESERVED
) {
9380 D3("resource_lookup: %d not found, blk = %p\n",
9381 rnum
, blk
->daplka_rcblk_blks
[j
]);
9383 DAPLKA_RS_REF((daplka_ia_resource_t
*)rp
);
9386 DERR("resource_lookup: resource %d not found\n", rnum
);
9389 rw_exit(&daplka_resource
.daplka_rct_lock
);
9394 * generic hash table implementation
9398 * daplka_hash_create:
9399 * initializes a hash table with the specified parameters
9402 * htblp pointer to hash table
9404 * nbuckets number of buckets (must be power of 2)
9406 * free_func this function is called on each hash
9407 * table element when daplka_hash_destroy
9410 * lookup_func if daplka_hash_lookup is able to find
9411 * the desired object, this function is
9412 * applied on the object before
9413 * daplka_hash_lookup returns
9418 * EINVAL nbuckets is not a power of 2
9419 * ENOMEM cannot allocate buckets
9423 daplka_hash_create(daplka_hash_table_t
*htblp
, uint_t nbuckets
,
9424 void (*free_func
)(void *), void (*lookup_func
)(void *))
9428 if ((nbuckets
& ~(nbuckets
- 1)) != nbuckets
) {
9429 DERR("hash_create: nbuckets not power of 2\n");
9432 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*htblp
))
9435 kmem_zalloc(sizeof (daplka_hash_bucket_t
) * nbuckets
,
9437 if (htblp
->ht_buckets
== NULL
) {
9438 DERR("hash_create: cannot allocate buckets\n");
9441 for (i
= 0; i
< nbuckets
; i
++) {
9442 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(htblp
->ht_buckets
[i
]))
9443 htblp
->ht_buckets
[i
].hb_count
= 0;
9444 htblp
->ht_buckets
[i
].hb_entries
= NULL
;
9445 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(htblp
->ht_buckets
[i
]))
9447 rw_init(&htblp
->ht_table_lock
, NULL
, RW_DRIVER
, NULL
);
9448 mutex_init(&htblp
->ht_key_lock
, NULL
, MUTEX_DRIVER
, NULL
);
9450 htblp
->ht_count
= 0;
9451 htblp
->ht_next_hkey
= (uint64_t)gethrtime();
9452 htblp
->ht_nbuckets
= nbuckets
;
9453 htblp
->ht_free_func
= free_func
;
9454 htblp
->ht_lookup_func
= lookup_func
;
9455 htblp
->ht_initialized
= B_TRUE
;
9456 D3("hash_create: done, buckets = %d\n", nbuckets
);
9457 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*htblp
))
9462 * daplka_hash_insert:
9463 * inserts an object into a hash table
9466 * htblp pointer to hash table
9468 * hkeyp pointer to hash key.
9469 * *hkeyp being non-zero means that the caller
9470 * has generated its own hkey. if *hkeyp is zero,
9471 * this function will generate an hkey for the
9472 * caller. it is recommended that the caller
9473 * leave the hkey generation to this function
9474 * because the hkey is more likely to be evenly
9477 * objp pointer to object to be inserted into
9481 * hkeyp the generated hkey is returned via this pointer
9484 * EINVAL invalid parameter
9485 * ENOMEM cannot allocate hash entry
9489 daplka_hash_insert(daplka_hash_table_t
*htblp
, uint64_t *hkeyp
, void *objp
)
9491 daplka_hash_entry_t
*hep
, *curr_hep
;
9492 daplka_hash_bucket_t
*hbp
;
9496 if (hkeyp
== NULL
) {
9497 DERR("hash_insert: hkeyp == NULL\n");
9500 hep
= kmem_zalloc(sizeof (*hep
), daplka_km_flags
);
9502 DERR("hash_insert: cannot alloc hash_entry\n");
9506 /* generate a new key */
9507 mutex_enter(&htblp
->ht_key_lock
);
9508 hkey
= ++htblp
->ht_next_hkey
;
9510 hkey
= htblp
->ht_next_hkey
= (uint64_t)gethrtime();
9512 mutex_exit(&htblp
->ht_key_lock
);
9514 /* use user generated key */
9518 /* only works if ht_nbuckets is a power of 2 */
9519 bucket
= (uint32_t)(hkey
& (htblp
->ht_nbuckets
- 1));
9520 ASSERT(objp
!= NULL
);
9521 ASSERT(bucket
< htblp
->ht_nbuckets
);
9523 rw_enter(&htblp
->ht_table_lock
, RW_WRITER
);
9524 hep
->he_hkey
= hkey
;
9525 hep
->he_objp
= objp
;
9527 /* look for duplicate entries */
9528 hbp
= &htblp
->ht_buckets
[bucket
];
9529 curr_hep
= hbp
->hb_entries
;
9530 while (curr_hep
!= NULL
) {
9531 if (curr_hep
->he_hkey
== hep
->he_hkey
) {
9534 curr_hep
= curr_hep
->he_next
;
9536 if (curr_hep
!= NULL
) {
9537 DERR("hash_insert: found duplicate hash entry: "
9538 "bucket %d, hkey 0x%016llx\n",
9539 bucket
, (longlong_t
)hep
->he_hkey
);
9540 kmem_free(hep
, sizeof (*hep
));
9541 rw_exit(&htblp
->ht_table_lock
);
9544 hep
->he_next
= hbp
->hb_entries
;
9545 hbp
->hb_entries
= hep
;
9548 rw_exit(&htblp
->ht_table_lock
);
9552 ASSERT(*hkeyp
!= 0);
9554 D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n",
9555 htblp
, (longlong_t
)*hkeyp
, bucket
);
9560 * daplka_hash_remove:
9561 * removes object identified by hkey from hash table
9564 * htblp pointer to hash table
9566 * hkey hkey that identifies the object to be removed
9569 * objpp pointer to pointer to object.
9570 * if remove is successful, the removed object
9571 * will be returned via *objpp.
9574 * EINVAL cannot find hash entry
9578 daplka_hash_remove(daplka_hash_table_t
*htblp
, uint64_t hkey
, void **objpp
)
9580 daplka_hash_entry_t
*free_hep
, **curr_hepp
;
9581 daplka_hash_bucket_t
*hbp
;
9584 bucket
= (uint32_t)(hkey
& (htblp
->ht_nbuckets
- 1));
9586 rw_enter(&htblp
->ht_table_lock
, RW_WRITER
);
9587 hbp
= &htblp
->ht_buckets
[bucket
];
9589 curr_hepp
= &hbp
->hb_entries
;
9590 while (*curr_hepp
!= NULL
) {
9591 if ((*curr_hepp
)->he_hkey
== hkey
) {
9594 curr_hepp
= &(*curr_hepp
)->he_next
;
9596 if (*curr_hepp
== NULL
) {
9597 DERR("hash_remove: cannot find hash entry: "
9598 "bucket %d, hkey 0x%016llx\n", bucket
, (longlong_t
)hkey
);
9599 rw_exit(&htblp
->ht_table_lock
);
9602 if (objpp
!= NULL
) {
9603 *objpp
= (*curr_hepp
)->he_objp
;
9605 free_hep
= *curr_hepp
;
9606 *curr_hepp
= (*curr_hepp
)->he_next
;
9607 kmem_free(free_hep
, sizeof (*free_hep
));
9611 D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, "
9612 "hb_count %d, hb_count %d\n",
9613 (longlong_t
)hkey
, bucket
, hbp
->hb_count
, htblp
->ht_count
);
9614 rw_exit(&htblp
->ht_table_lock
);
9620 * walks through the entire hash table. applying func on each of
9621 * the inserted objects. stops walking if func returns non-zero.
9624 * htblp pointer to hash table
9626 * func function to be applied on each object
9628 * farg second argument to func
9630 * lockmode can be RW_WRITER or RW_READER. this
9631 * allows the caller to choose what type
9632 * of lock to acquire before walking the
9642 daplka_hash_walk(daplka_hash_table_t
*htblp
, int (*func
)(void *, void *),
9643 void *farg
, krw_t lockmode
)
9645 daplka_hash_entry_t
*curr_hep
;
9646 daplka_hash_bucket_t
*hbp
;
9647 uint32_t bucket
, retval
= 0;
9649 ASSERT(lockmode
== RW_WRITER
|| lockmode
== RW_READER
);
9651 /* needed for warlock */
9652 if (lockmode
== RW_WRITER
) {
9653 rw_enter(&htblp
->ht_table_lock
, RW_WRITER
);
9655 rw_enter(&htblp
->ht_table_lock
, RW_READER
);
9657 for (bucket
= 0; bucket
< htblp
->ht_nbuckets
&& retval
== 0; bucket
++) {
9658 hbp
= &htblp
->ht_buckets
[bucket
];
9659 curr_hep
= hbp
->hb_entries
;
9660 while (curr_hep
!= NULL
) {
9661 retval
= (*func
)(curr_hep
->he_objp
, farg
);
9665 curr_hep
= curr_hep
->he_next
;
9668 rw_exit(&htblp
->ht_table_lock
);
9672 * daplka_hash_lookup:
9673 * finds object from hkey
9676 * htblp pointer to hash table
9678 * hkey hkey that identifies the object to be looked up
9685 * object pointer if found
9688 daplka_hash_lookup(daplka_hash_table_t
*htblp
, uint64_t hkey
)
9690 daplka_hash_entry_t
*curr_hep
;
9694 bucket
= (uint32_t)(hkey
& (htblp
->ht_nbuckets
- 1));
9696 rw_enter(&htblp
->ht_table_lock
, RW_READER
);
9697 curr_hep
= htblp
->ht_buckets
[bucket
].hb_entries
;
9698 while (curr_hep
!= NULL
) {
9699 if (curr_hep
->he_hkey
== hkey
) {
9702 curr_hep
= curr_hep
->he_next
;
9704 if (curr_hep
== NULL
) {
9705 DERR("hash_lookup: cannot find hash entry: "
9706 "bucket %d, hkey 0x%016llx\n", bucket
, (longlong_t
)hkey
);
9707 rw_exit(&htblp
->ht_table_lock
);
9710 objp
= curr_hep
->he_objp
;
9711 ASSERT(objp
!= NULL
);
9712 if (htblp
->ht_lookup_func
!= NULL
) {
9713 (*htblp
->ht_lookup_func
)(objp
);
9715 rw_exit(&htblp
->ht_table_lock
);
9720 * daplka_hash_destroy:
9721 * destroys hash table. applies free_func on all inserted objects.
9724 * htblp pointer to hash table
9733 daplka_hash_destroy(daplka_hash_table_t
*htblp
)
9735 daplka_hash_entry_t
*curr_hep
, *free_hep
;
9736 daplka_hash_entry_t
*free_list
= NULL
;
9737 daplka_hash_bucket_t
*hbp
;
9738 uint32_t bucket
, cnt
, total
= 0;
9740 if (!htblp
->ht_initialized
) {
9741 DERR("hash_destroy: not initialized\n");
9744 /* free all elements from hash table */
9745 rw_enter(&htblp
->ht_table_lock
, RW_WRITER
);
9746 for (bucket
= 0; bucket
< htblp
->ht_nbuckets
; bucket
++) {
9747 hbp
= &htblp
->ht_buckets
[bucket
];
9749 /* build list of elements to be freed */
9750 curr_hep
= hbp
->hb_entries
;
9752 while (curr_hep
!= NULL
) {
9754 free_hep
= curr_hep
;
9755 curr_hep
= curr_hep
->he_next
;
9757 free_hep
->he_next
= free_list
;
9758 free_list
= free_hep
;
9760 ASSERT(cnt
== hbp
->hb_count
);
9763 hbp
->hb_entries
= NULL
;
9765 ASSERT(total
== htblp
->ht_count
);
9766 D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n",
9767 htblp
, htblp
->ht_nbuckets
, total
);
9768 rw_exit(&htblp
->ht_table_lock
);
9770 /* free all objects, now without holding the hash table lock */
9772 while (free_list
!= NULL
) {
9774 free_hep
= free_list
;
9775 free_list
= free_list
->he_next
;
9776 if (htblp
->ht_free_func
!= NULL
) {
9777 (*htblp
->ht_free_func
)(free_hep
->he_objp
);
9779 kmem_free(free_hep
, sizeof (*free_hep
));
9781 ASSERT(total
== cnt
);
9783 /* free hash buckets and destroy locks */
9784 kmem_free(htblp
->ht_buckets
,
9785 sizeof (daplka_hash_bucket_t
) * htblp
->ht_nbuckets
);
9787 rw_enter(&htblp
->ht_table_lock
, RW_WRITER
);
9788 htblp
->ht_buckets
= NULL
;
9789 htblp
->ht_count
= 0;
9790 htblp
->ht_nbuckets
= 0;
9791 htblp
->ht_free_func
= NULL
;
9792 htblp
->ht_lookup_func
= NULL
;
9793 htblp
->ht_initialized
= B_FALSE
;
9794 rw_exit(&htblp
->ht_table_lock
);
9796 mutex_destroy(&htblp
->ht_key_lock
);
9797 rw_destroy(&htblp
->ht_table_lock
);
9801 * daplka_hash_getsize:
9802 * return the number of objects in hash table
9805 * htblp pointer to hash table
9811 * number of objects in hash table
9814 daplka_hash_getsize(daplka_hash_table_t
*htblp
)
9818 rw_enter(&htblp
->ht_table_lock
, RW_READER
);
9819 sz
= htblp
->ht_count
;
9820 rw_exit(&htblp
->ht_table_lock
);
9826 * this function is used as ht_lookup_func above when lookup is called.
9827 * other types of objs may use a more elaborate lookup_func.
9830 daplka_hash_generic_lookup(void *obj
)
9832 daplka_resource_t
*rp
= (daplka_resource_t
*)obj
;
9834 mutex_enter(&rp
->rs_reflock
);
9836 ASSERT(rp
->rs_refcnt
!= 0);
9837 mutex_exit(&rp
->rs_reflock
);
9841 * Generates a non-zero 32 bit hash key used for the timer hash table.
9844 daplka_timer_hkey_gen()
9849 new_hkey
= atomic_inc_32_nv(&daplka_timer_hkey
);
9850 } while (new_hkey
== 0);
9857 * The DAPL KA debug logging routines
9861 * Add the string str to the end of the debug log, followed by a newline.
9864 daplka_dbglog(char *str
)
9870 * If this is the first time we've written to the log, initialize it.
9872 if (!daplka_dbginit
) {
9875 mutex_enter(&daplka_dbglock
);
9877 * Note the log is circular; if this string would run over the end,
9878 * we copy the first piece to the end and then the last piece to
9879 * the beginning of the log.
9881 length
= strlen(str
);
9883 remlen
= (size_t)sizeof (daplka_dbgbuf
) - daplka_dbgnext
- 1;
9885 if (length
> remlen
) {
9887 bcopy(str
, daplka_dbgbuf
+ daplka_dbgnext
, remlen
);
9888 daplka_dbgbuf
[sizeof (daplka_dbgbuf
) - 1] = '\0';
9893 bcopy(str
, daplka_dbgbuf
+ daplka_dbgnext
, length
);
9894 daplka_dbgnext
+= length
;
9896 if (daplka_dbgnext
>= sizeof (daplka_dbgbuf
))
9898 mutex_exit(&daplka_dbglock
);
9903 * Add a printf-style message to whichever debug logs we're currently using.
9906 daplka_debug(const char *fmt
, ...)
9911 * The system prepends the thread id and high resolution time
9912 * (nanoseconds are dropped and so are the upper digits)
9913 * to the specified string.
9914 * The unit for timestamp is 10 microseconds.
9915 * It wraps around every 10000 seconds.
9916 * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec.
9918 int micro_time
= (int)((gethrtime() / 10000) % 1000000000);
9919 (void) sprintf(buff
, "th %p tm %9d: ", (void *)curthread
, micro_time
);
9922 (void) vsprintf(buff
+strlen(buff
), fmt
, ap
);
9925 daplka_dbglog(buff
);
9929 daplka_console(const char *fmt
, ...)
9935 (void) vsprintf(buff
, fmt
, ap
);
9938 cmn_err(CE_CONT
, "%s", buff
);