4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Hermon Work Request Processing Routines
30 * Implements all the routines necessary to provide the PostSend(),
31 * PostRecv() and PostSRQ() verbs. Also contains all the code
32 * necessary to implement the Hermon WRID tracking mechanism.
35 #include <sys/types.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
42 #include <sys/ib/adapters/hermon/hermon.h>
44 static uint32_t hermon_wr_get_immediate(ibt_send_wr_t
*wr
);
45 static int hermon_wr_bind_check(hermon_state_t
*state
, ibt_send_wr_t
*wr
);
46 static int hermon_wqe_send_build(hermon_state_t
*state
, hermon_qphdl_t qp
,
47 ibt_send_wr_t
*wr
, uint64_t *desc
, uint_t
*size
);
48 static int hermon_wqe_mlx_build(hermon_state_t
*state
, hermon_qphdl_t qp
,
49 ibt_send_wr_t
*wr
, uint64_t *desc
, uint_t
*size
);
50 static void hermon_wqe_headroom(uint_t from
, hermon_qphdl_t qp
);
51 static int hermon_wqe_recv_build(hermon_state_t
*state
, hermon_qphdl_t qp
,
52 ibt_recv_wr_t
*wr
, uint64_t *desc
);
53 static int hermon_wqe_srq_build(hermon_state_t
*state
, hermon_srqhdl_t srq
,
54 ibt_recv_wr_t
*wr
, uint64_t *desc
);
55 static hermon_workq_avl_t
*hermon_wrid_wqavl_find(hermon_cqhdl_t cq
, uint_t qpn
,
57 static void hermon_cq_workq_add(hermon_cqhdl_t cq
, hermon_workq_avl_t
*wqavl
);
58 static void hermon_cq_workq_remove(hermon_cqhdl_t cq
,
59 hermon_workq_avl_t
*wqavl
);
61 static ibt_wr_ds_t null_sgl
= { 0, 0x00000100, 0 };
64 * Add ability to try to debug RDMA_READ/RDMA_WRITE failures.
66 * 0x1 - print rkey used during post_send
67 * 0x2 - print sgls used during post_send
68 * 0x4 - print FMR comings and goings
70 int hermon_rdma_debug
= 0x0;
73 hermon_post_send_ud(hermon_state_t
*state
, hermon_qphdl_t qp
,
74 ibt_send_wr_t
*wr
, uint_t num_wr
, uint_t
*num_posted
)
76 hermon_hw_snd_wqe_ud_t
*ud
;
77 hermon_workq_hdr_t
*wq
;
79 ibt_wr_rfci_send_t
*rfci
;
80 ibt_wr_init_send_t
*is
;
84 uint32_t signaled_dbd
, solicited
;
85 uint32_t head
, tail
, next_tail
, qsize_msk
;
87 uint32_t nopcode
, fence
, immed_data
= 0;
88 hermon_hw_wqe_sgl_t
*ds
, *old_ds
;
91 int i
, j
, last_ds
, num_ds
, status
;
94 uint_t posted_cnt
= 0;
95 int total_len
, strong_order
, fc_bits
, cksum
;
98 /* initialize the FMA retry loop */
99 hermon_pio_init(fm_loop_cnt
, fm_status
, fm_test_num
);
101 ASSERT(MUTEX_HELD(&qp
->qp_sq_lock
));
102 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp
->qp_sq_lock
))
104 /* Grab the lock for the WRID list */
107 /* Save away some initial QP state */
108 wq
= qp
->qp_sq_wqhdr
;
109 qsize_msk
= wq
->wq_mask
;
110 hdrmwqes
= qp
->qp_sq_hdrmwqes
; /* in WQEs */
111 sectperwqe
= 1 << (qp
->qp_sq_log_wqesz
- 2);
115 status
= DDI_SUCCESS
;
119 * Check for "queue full" condition. If the queue
120 * is already full, then no more WQEs can be posted.
121 * So break out, ring a doorbell (if necessary) and
124 if (wq
->wq_full
!= 0) {
125 status
= IBT_QP_FULL
;
129 next_tail
= (tail
+ 1) & qsize_msk
;
130 if (((tail
+ hdrmwqes
) & qsize_msk
) == head
) {
134 desc
= HERMON_QP_SQ_ENTRY(qp
, tail
);
144 * Build a Send or Send_LSO WQE
146 switch (wr
->wr_opcode
) {
147 case IBT_WRC_SEND_LSO
:
148 if (wr
->wr_trans
!= IBT_UD_SRV
) {
149 status
= IBT_QP_SRV_TYPE_INVALID
;
152 nopcode
= HERMON_WQE_SEND_NOPCODE_LSO
;
153 if (wr
->wr_flags
& IBT_WR_SEND_CKSUM
)
155 if (wr
->wr
.ud_lso
.lso_hdr_sz
> 60) {
156 nopcode
|= (1 << 6); /* ReRead bit must be set */
158 dest
= wr
->wr
.ud_lso
.lso_ud_dest
;
159 ah
= (hermon_ahhdl_t
)dest
->ud_ah
;
161 status
= IBT_AH_HDL_INVALID
;
164 ud
= (hermon_hw_snd_wqe_ud_t
*)((uintptr_t)desc
+
165 sizeof (hermon_hw_snd_wqe_ctrl_t
));
166 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)ud
+
167 sizeof (hermon_hw_snd_wqe_ud_t
));
168 HERMON_WQE_BUILD_UD(qp
, ud
, ah
, dest
);
170 total_len
= (4 + 0xf + wr
->wr
.ud_lso
.lso_hdr_sz
) & ~0xf;
171 if ((uintptr_t)ds
+ total_len
+ (nds
* 16) >
172 (uintptr_t)desc
+ (1 << qp
->qp_sq_log_wqesz
)) {
173 status
= IBT_QP_SGL_LEN_INVALID
;
177 bcopy(wr
->wr
.ud_lso
.lso_hdr
, (uint32_t *)old_ds
+ 1,
178 wr
->wr
.ud_lso
.lso_hdr_sz
);
179 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)ds
+ total_len
);
184 nopcode
= HERMON_WQE_SEND_NOPCODE_SEND
;
185 if (qp
->qp_serv_type
== HERMON_QP_UD
) {
186 if (wr
->wr_trans
!= IBT_UD_SRV
) {
187 status
= IBT_QP_SRV_TYPE_INVALID
;
190 if (wr
->wr_flags
& IBT_WR_SEND_CKSUM
)
192 dest
= wr
->wr
.ud
.udwr_dest
;
193 } else if (qp
->qp_serv_type
== HERMON_QP_RFCI
) {
194 if (wr
->wr_trans
!= IBT_RFCI_SRV
) {
195 status
= IBT_QP_SRV_TYPE_INVALID
;
198 rfci
= &wr
->wr
.fc
.rfci_send
;
199 if ((wr
->wr_flags
& IBT_WR_SEND_FC_CRC
) != 0) {
200 nopcode
|= (rfci
->rfci_eof
<< 16);
201 fc_bits
= 0x40; /* set FCRC */
203 dest
= rfci
->rfci_dest
;
205 status
= IBT_QP_OP_TYPE_INVALID
;
208 if (wr
->wr_flags
& IBT_WR_SEND_IMMED
) {
209 /* "|=" changes 0xa to 0xb without touching FCEOF */
210 nopcode
|= HERMON_WQE_SEND_NOPCODE_SENDI
;
211 immed_data
= wr
->wr
.ud
.udwr_immed
;
213 ah
= (hermon_ahhdl_t
)dest
->ud_ah
;
215 status
= IBT_AH_HDL_INVALID
;
218 ud
= (hermon_hw_snd_wqe_ud_t
*)((uintptr_t)desc
+
219 sizeof (hermon_hw_snd_wqe_ctrl_t
));
220 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)ud
+
221 sizeof (hermon_hw_snd_wqe_ud_t
));
222 HERMON_WQE_BUILD_UD(qp
, ud
, ah
, dest
);
226 case IBT_WRC_INIT_SEND_FCMD
:
227 if (qp
->qp_serv_type
!= HERMON_QP_FCMND
) {
228 status
= IBT_QP_OP_TYPE_INVALID
;
231 if (wr
->wr_trans
!= IBT_FCMD_SRV
) {
232 status
= IBT_QP_SRV_TYPE_INVALID
;
235 nopcode
= HERMON_WQE_FCP_OPCODE_INIT_AND_SEND
;
236 is
= wr
->wr
.fc
.fc_is
;
237 dest
= is
->is_ctl
.fc_dest
;
238 ah
= (hermon_ahhdl_t
)dest
->ud_ah
;
240 status
= IBT_AH_HDL_INVALID
;
243 ud
= (hermon_hw_snd_wqe_ud_t
*)((uintptr_t)desc
+
244 sizeof (hermon_hw_snd_wqe_ctrl_t
));
245 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)ud
+
246 sizeof (hermon_hw_snd_wqe_ud_t
));
247 HERMON_WQE_BUILD_UD(qp
, ud
, ah
, dest
);
249 /* move ds beyond the FCP-3 Init Segment */
250 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)ds
+ 0x10);
254 case IBT_WRC_FAST_REG_PMR
:
256 hermon_hw_snd_wqe_frwr_t
*frwr
;
258 if (qp
->qp_serv_type
!= HERMON_QP_FCMND
) {
259 status
= IBT_QP_OP_TYPE_INVALID
;
262 if (wr
->wr_trans
!= IBT_FCMD_SRV
) {
263 status
= IBT_QP_SRV_TYPE_INVALID
;
266 nopcode
= HERMON_WQE_SEND_NOPCODE_FRWR
;
267 frwr
= (hermon_hw_snd_wqe_frwr_t
*)((uintptr_t)desc
+
268 sizeof (hermon_hw_snd_wqe_ctrl_t
));
269 HERMON_WQE_BUILD_FRWR(qp
, frwr
, wr
->wr
.fc
.reg_pmr
);
270 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)frwr
+
271 sizeof (hermon_hw_snd_wqe_frwr_t
));
278 /* firmware does not support this */
279 case IBT_WRC_LOCAL_INVALIDATE
:
281 hermon_hw_snd_wqe_local_inv_t
*li
;
283 if (qp
->qp_serv_type
!= HERMON_QP_FCMND
) {
284 status
= IBT_QP_OP_TYPE_INVALID
;
287 if (wr
->wr_trans
!= IBT_FCMD_SRV
) {
288 status
= IBT_QP_SRV_TYPE_INVALID
;
291 nopcode
= HERMON_WQE_SEND_NOPCODE_LCL_INV
;
292 li
= (hermon_hw_snd_wqe_local_inv_t
*)((uintptr_t)desc
+
293 sizeof (hermon_hw_snd_wqe_ctrl_t
));
294 HERMON_WQE_BUILD_LI(qp
, li
, wr
->wr
.fc
.li
);
295 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)li
+
296 sizeof (hermon_hw_snd_wqe_local_inv_t
));
303 status
= IBT_QP_OP_TYPE_INVALID
;
307 if (nds
> qp
->qp_sq_sgl
) {
308 status
= IBT_QP_SGL_LEN_INVALID
;
311 for (last_ds
= num_ds
, j
= i
; j
< nds
; j
++) {
312 if (sgl
[j
].ds_len
!= 0)
313 last_ds
++; /* real last ds of wqe to fill */
315 desc_sz
= ((uintptr_t)&ds
[last_ds
] - (uintptr_t)desc
) >> 0x4;
316 for (j
= nds
; --j
>= i
; ) {
317 if (sgl
[j
].ds_len
== 0) {
322 * Fill in the Data Segment(s) for the current WQE, using the
323 * information contained in the scatter-gather list of the
327 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds
[last_ds
], &sgl
[j
]);
332 if (wr
->wr_opcode
== IBT_WRC_SEND_LSO
) {
333 HERMON_WQE_BUILD_LSO(qp
, old_ds
, wr
->wr
.ud_lso
.lso_mss
,
334 wr
->wr
.ud_lso
.lso_hdr_sz
);
335 } else if (wr
->wr_opcode
== IBT_WRC_INIT_SEND_FCMD
) {
336 /* This sits in the STAMP, so must be set after setting SGL */
337 HERMON_WQE_BUILD_FCP3_INIT(old_ds
, is
->is_ctl
.fc_frame_ctrl
,
338 is
->is_cs_priority
, is
->is_tx_seq_id
, is
->is_fc_mtu
,
339 is
->is_dest_id
, is
->is_op
, is
->is_rem_exch
,
342 /* The following will be used in HERMON_WQE_SET_CTRL_SEGMENT */
343 /* SIT bit in FCP-3 ctrl segment */
344 desc_sz
|= (is
->is_ctl
.fc_frame_ctrl
& IBT_FCTL_SIT
) ? 0x80 : 0;
345 /* LS bit in FCP-3 ctrl segment */
346 fc_bits
|= (is
->is_ctl
.fc_frame_ctrl
& IBT_FCTL_LAST_SEQ
) ?
348 fc_bits
|= ((is
->is_ctl
.fc_routing_ctrl
& 0xF) << 20) |
349 (is
->is_ctl
.fc_seq_id
<< 24);
350 immed_data
= is
->is_ctl
.fc_parameter
;
353 fence
= (wr
->wr_flags
& IBT_WR_SEND_FENCE
) ? 1 : 0;
355 signaled_dbd
= ((qp
->qp_sq_sigtype
== HERMON_QP_SQ_ALL_SIGNALED
) ||
356 (wr
->wr_flags
& IBT_WR_SEND_SIGNAL
)) ? 0xC : 0;
358 solicited
= (wr
->wr_flags
& IBT_WR_SEND_SOLICIT
) ? 0x2 : 0;
360 HERMON_WQE_SET_CTRL_SEGMENT(desc
, desc_sz
, fence
, immed_data
,
361 solicited
, signaled_dbd
, cksum
, qp
, strong_order
, fc_bits
);
363 wq
->wq_wrid
[tail
] = wr
->wr_id
;
367 /* Update some of the state in the QP */
372 /* Now set the ownership bit and opcode (first dword). */
373 HERMON_SET_SEND_WQE_OWNER(qp
, (uint32_t *)desc
, nopcode
);
377 /* do the invalidate of the headroom */
378 wqe_start
= (uint32_t *)HERMON_QP_SQ_ENTRY(qp
,
379 (tail
+ hdrmwqes
) & qsize_msk
);
380 for (i
= 16; i
< sectperwqe
; i
+= 16) {
381 wqe_start
[i
] = 0xFFFFFFFF;
388 if (posted_cnt
!= 0) {
389 ddi_acc_handle_t uarhdl
= hermon_get_uarhdl(state
);
393 /* the FMA retry loop starts for Hermon doorbell register. */
394 hermon_pio_start(state
, uarhdl
, pio_error
, fm_loop_cnt
,
395 fm_status
, fm_test_num
);
397 HERMON_UAR_DOORBELL(state
, uarhdl
,
398 (uint64_t *)(void *)&state
->hs_uar
->send
,
399 (uint64_t)qp
->qp_ring
);
401 /* the FMA retry loop ends. */
402 hermon_pio_end(state
, uarhdl
, pio_error
, fm_loop_cnt
,
403 fm_status
, fm_test_num
);
405 /* do the invalidate of the headroom */
406 wqe_start
= (uint32_t *)HERMON_QP_SQ_ENTRY(qp
,
407 (tail
+ hdrmwqes
) & qsize_msk
);
408 for (i
= 16; i
< sectperwqe
; i
+= 16) {
409 wqe_start
[i
] = 0xFFFFFFFF;
412 if (num_posted
!= NULL
)
413 *num_posted
= posted_cnt
;
415 mutex_exit(&qp
->qp_sq_lock
);
420 mutex_exit(&qp
->qp_sq_lock
);
421 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
422 return (ibc_get_ci_failure(0));
426 hermon_post_send_rc(hermon_state_t
*state
, hermon_qphdl_t qp
,
427 ibt_send_wr_t
*wr
, uint_t num_wr
, uint_t
*num_posted
)
430 hermon_workq_hdr_t
*wq
;
432 uint32_t signaled_dbd
, solicited
;
433 uint32_t head
, tail
, next_tail
, qsize_msk
;
436 uint32_t nopcode
, fence
, immed_data
= 0;
437 hermon_hw_snd_wqe_remaddr_t
*rc
;
438 hermon_hw_snd_wqe_atomic_t
*at
;
439 hermon_hw_snd_wqe_bind_t
*bn
;
440 hermon_hw_snd_wqe_frwr_t
*frwr
;
441 hermon_hw_snd_wqe_local_inv_t
*li
;
442 hermon_hw_wqe_sgl_t
*ds
;
445 int i
, last_ds
, num_ds
;
448 uint_t posted_cnt
= 0;
455 /* initialize the FMA retry loop */
456 hermon_pio_init(fm_loop_cnt
, fm_status
, fm_test_num
);
458 ASSERT(MUTEX_HELD(&qp
->qp_sq_lock
));
459 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp
->qp_sq_lock
))
461 /* Save away some initial QP state */
462 wq
= qp
->qp_sq_wqhdr
;
463 qsize_msk
= wq
->wq_mask
;
464 hdrmwqes
= qp
->qp_sq_hdrmwqes
; /* in WQEs */
465 sectperwqe
= 1 << (qp
->qp_sq_log_wqesz
- 2);
469 status
= DDI_SUCCESS
;
477 * Check for "queue full" condition. If the queue
478 * is already full, then no more WQEs can be posted.
479 * So break out, ring a doorbell (if necessary) and
482 if (wq
->wq_full
!= 0) {
483 status
= IBT_QP_FULL
;
486 next_tail
= (tail
+ 1) & qsize_msk
;
487 if (((tail
+ hdrmwqes
) & qsize_msk
) == head
) {
491 desc
= HERMON_QP_SQ_ENTRY(qp
, tail
);
493 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)desc
+
494 sizeof (hermon_hw_snd_wqe_ctrl_t
));
498 if (wr
->wr_trans
!= IBT_RC_SRV
) {
499 status
= IBT_QP_SRV_TYPE_INVALID
;
504 * Validate the operation type. For RC requests, we allow
505 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
506 * operations, and memory window "Bind"
508 switch (wr
->wr_opcode
) {
510 status
= IBT_QP_OP_TYPE_INVALID
;
514 if (wr
->wr_flags
& IBT_WR_SEND_REMOTE_INVAL
) {
515 nopcode
= HERMON_WQE_SEND_NOPCODE_SND_INV
;
516 immed_data
= wr
->wr
.rc
.rcwr
.send_inval
;
517 } else if (wr
->wr_flags
& IBT_WR_SEND_IMMED
) {
518 nopcode
= HERMON_WQE_SEND_NOPCODE_SENDI
;
519 immed_data
= wr
->wr
.rc
.rcwr
.send_immed
;
521 nopcode
= HERMON_WQE_SEND_NOPCODE_SEND
;
526 * If this is an RDMA Read or RDMA Write request, then fill
527 * in the "Remote Address" header fields.
530 if (wr
->wr_flags
& IBT_WR_SEND_IMMED
) {
531 nopcode
= HERMON_WQE_SEND_NOPCODE_RDMAWI
;
532 immed_data
= wr
->wr
.rc
.rcwr
.rdma
.rdma_immed
;
534 nopcode
= HERMON_WQE_SEND_NOPCODE_RDMAW
;
538 if (wr
->wr_opcode
== IBT_WRC_RDMAR
)
539 nopcode
= HERMON_WQE_SEND_NOPCODE_RDMAR
;
540 rc
= (hermon_hw_snd_wqe_remaddr_t
*)((uintptr_t)desc
+
541 sizeof (hermon_hw_snd_wqe_ctrl_t
));
544 * Build the Remote Address Segment for the WQE, using
545 * the information from the RC work request.
547 HERMON_WQE_BUILD_REMADDR(qp
, rc
, &wr
->wr
.rc
.rcwr
.rdma
);
549 if (hermon_rdma_debug
) {
550 print_rdma
= hermon_rdma_debug
;
551 rkey
= wr
->wr
.rc
.rcwr
.rdma
.rdma_rkey
;
552 raddr
= wr
->wr
.rc
.rcwr
.rdma
.rdma_raddr
;
555 /* Update "ds" for filling in Data Segments (below) */
556 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)rc
+
557 sizeof (hermon_hw_snd_wqe_remaddr_t
));
561 * If this is one of the Atomic type operations (i.e
562 * Compare-Swap or Fetch-Add), then fill in both the "Remote
563 * Address" header fields and the "Atomic" header fields.
566 nopcode
= HERMON_WQE_SEND_NOPCODE_ATMCS
;
569 if (wr
->wr_opcode
== IBT_WRC_FADD
)
570 nopcode
= HERMON_WQE_SEND_NOPCODE_ATMFA
;
571 rc
= (hermon_hw_snd_wqe_remaddr_t
*)((uintptr_t)desc
+
572 sizeof (hermon_hw_snd_wqe_ctrl_t
));
573 at
= (hermon_hw_snd_wqe_atomic_t
*)((uintptr_t)rc
+
574 sizeof (hermon_hw_snd_wqe_remaddr_t
));
577 * Build the Remote Address and Atomic Segments for
578 * the WQE, using the information from the RC Atomic
581 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp
, rc
, wr
);
582 HERMON_WQE_BUILD_ATOMIC(qp
, at
, wr
->wr
.rc
.rcwr
.atomic
);
584 /* Update "ds" for filling in Data Segments (below) */
585 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)at
+
586 sizeof (hermon_hw_snd_wqe_atomic_t
));
589 * Update "nds" and "sgl" because Atomic requests have
590 * only a single Data Segment.
597 * If this is memory window Bind operation, then we call the
598 * hermon_wr_bind_check() routine to validate the request and
599 * to generate the updated RKey. If this is successful, then
600 * we fill in the WQE's "Bind" header fields.
603 nopcode
= HERMON_WQE_SEND_NOPCODE_BIND
;
604 status
= hermon_wr_bind_check(state
, wr
);
605 if (status
!= DDI_SUCCESS
)
608 bn
= (hermon_hw_snd_wqe_bind_t
*)((uintptr_t)desc
+
609 sizeof (hermon_hw_snd_wqe_ctrl_t
));
612 * Build the Bind Memory Window Segments for the WQE,
613 * using the information from the RC Bind memory
614 * window work request.
616 HERMON_WQE_BUILD_BIND(qp
, bn
, wr
->wr
.rc
.rcwr
.bind
);
619 * Update the "ds" pointer. Even though the "bind"
620 * operation requires no SGLs, this is necessary to
621 * facilitate the correct descriptor size calculations
624 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)bn
+
625 sizeof (hermon_hw_snd_wqe_bind_t
));
629 case IBT_WRC_FAST_REG_PMR
:
630 nopcode
= HERMON_WQE_SEND_NOPCODE_FRWR
;
631 frwr
= (hermon_hw_snd_wqe_frwr_t
*)((uintptr_t)desc
+
632 sizeof (hermon_hw_snd_wqe_ctrl_t
));
633 HERMON_WQE_BUILD_FRWR(qp
, frwr
, wr
->wr
.rc
.rcwr
.reg_pmr
);
634 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)frwr
+
635 sizeof (hermon_hw_snd_wqe_frwr_t
));
640 case IBT_WRC_LOCAL_INVALIDATE
:
641 nopcode
= HERMON_WQE_SEND_NOPCODE_LCL_INV
;
642 li
= (hermon_hw_snd_wqe_local_inv_t
*)((uintptr_t)desc
+
643 sizeof (hermon_hw_snd_wqe_ctrl_t
));
644 HERMON_WQE_BUILD_LI(qp
, li
, wr
->wr
.rc
.rcwr
.li
);
645 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)li
+
646 sizeof (hermon_hw_snd_wqe_local_inv_t
));
653 * Now fill in the Data Segments (SGL) for the Send WQE based
654 * on the values setup above (i.e. "sgl", "nds", and the "ds"
655 * pointer. Start by checking for a valid number of SGL entries
657 if (nds
> qp
->qp_sq_sgl
) {
658 status
= IBT_QP_SGL_LEN_INVALID
;
662 for (last_ds
= num_ds
, i
= 0; i
< nds
; i
++) {
663 if (sgl
[i
].ds_len
!= 0)
664 last_ds
++; /* real last ds of wqe to fill */
666 desc_sz
= ((uintptr_t)&ds
[last_ds
] - (uintptr_t)desc
) >> 0x4;
667 for (i
= nds
; --i
>= 0; ) {
668 if (sgl
[i
].ds_len
== 0) {
671 rlen
+= sgl
[i
].ds_len
;
672 if (print_rdma
& 0x2)
673 IBTF_DPRINTF_L2("rdma", "post: [%d]: laddr %llx "
674 "llen %x", i
, sgl
[i
].ds_va
, sgl
[i
].ds_len
);
677 * Fill in the Data Segment(s) for the current WQE, using the
678 * information contained in the scatter-gather list of the
682 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds
[last_ds
], &sgl
[i
]);
684 /* ensure RDMA READ does not exceed HCA limit */
685 if ((wr
->wr_opcode
== IBT_WRC_RDMAR
) && (desc_sz
>
686 state
->hs_ibtfinfo
.hca_attr
->hca_conn_rdma_read_sgl_sz
+ 2)) {
687 status
= IBT_QP_SGL_LEN_INVALID
;
691 if (print_rdma
& 0x1) {
692 IBTF_DPRINTF_L2("rdma", "post: indx %x rkey %x raddr %llx "
693 "total len %x", tail
, rkey
, raddr
, rlen
);
696 fence
= (wr
->wr_flags
& IBT_WR_SEND_FENCE
) ? 1 : 0;
698 signaled_dbd
= ((qp
->qp_sq_sigtype
== HERMON_QP_SQ_ALL_SIGNALED
) ||
699 (wr
->wr_flags
& IBT_WR_SEND_SIGNAL
)) ? 0xC : 0;
701 solicited
= (wr
->wr_flags
& IBT_WR_SEND_SOLICIT
) ? 0x2 : 0;
703 HERMON_WQE_SET_CTRL_SEGMENT(desc
, desc_sz
, fence
, immed_data
, solicited
,
704 signaled_dbd
, 0, qp
, strong_order
, 0);
706 wq
->wq_wrid
[tail
] = wr
->wr_id
;
710 /* Update some of the state in the QP */
715 /* Now set the ownership bit of the first one in the chain. */
716 HERMON_SET_SEND_WQE_OWNER(qp
, (uint32_t *)desc
, nopcode
);
720 /* do the invalidate of the headroom */
721 wqe_start
= (uint32_t *)HERMON_QP_SQ_ENTRY(qp
,
722 (tail
+ hdrmwqes
) & qsize_msk
);
723 for (i
= 16; i
< sectperwqe
; i
+= 16) {
724 wqe_start
[i
] = 0xFFFFFFFF;
732 if (posted_cnt
!= 0) {
733 ddi_acc_handle_t uarhdl
= hermon_get_uarhdl(state
);
737 /* the FMA retry loop starts for Hermon doorbell register. */
738 hermon_pio_start(state
, uarhdl
, pio_error
, fm_loop_cnt
,
739 fm_status
, fm_test_num
);
741 /* Ring the doorbell */
742 HERMON_UAR_DOORBELL(state
, uarhdl
,
743 (uint64_t *)(void *)&state
->hs_uar
->send
,
744 (uint64_t)qp
->qp_ring
);
746 /* the FMA retry loop ends. */
747 hermon_pio_end(state
, uarhdl
, pio_error
, fm_loop_cnt
,
748 fm_status
, fm_test_num
);
750 /* do the invalidate of the headroom */
751 wqe_start
= (uint32_t *)HERMON_QP_SQ_ENTRY(qp
,
752 (tail
+ hdrmwqes
) & qsize_msk
);
753 for (i
= 16; i
< sectperwqe
; i
+= 16) {
754 wqe_start
[i
] = 0xFFFFFFFF;
758 * Update the "num_posted" return value (if necessary).
759 * Then drop the locks and return success.
761 if (num_posted
!= NULL
) {
762 *num_posted
= posted_cnt
;
765 mutex_exit(&qp
->qp_sq_lock
);
769 mutex_exit(&qp
->qp_sq_lock
);
770 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
771 return (ibc_get_ci_failure(0));
776 * Context: Can be called from interrupt or base context.
779 hermon_post_send(hermon_state_t
*state
, hermon_qphdl_t qp
,
780 ibt_send_wr_t
*wr
, uint_t num_wr
, uint_t
*num_posted
)
782 ibt_send_wr_t
*curr_wr
;
783 hermon_workq_hdr_t
*wq
;
785 uint64_t *desc
, *prev
;
787 uint32_t signaled_dbd
, solicited
;
788 uint32_t head
, tail
, next_tail
, qsize_msk
;
790 uint_t currindx
, wrindx
, numremain
;
792 uint_t posted_cnt
, maxstat
;
795 uint32_t nopcode
, fence
, immed_data
= 0;
796 uint32_t prev_nopcode
;
799 /* initialize the FMA retry loop */
800 hermon_pio_init(fm_loop_cnt
, fm_status
, fm_test
);
803 * Check for user-mappable QP memory. Note: We do not allow kernel
804 * clients to post to QP memory that is accessible directly by the
805 * user. If the QP memory is user accessible, then return an error.
807 if (qp
->qp_alloc_flags
& IBT_QP_USER_MAP
) {
808 return (IBT_QP_HDL_INVALID
);
811 mutex_enter(&qp
->qp_sq_lock
);
814 * Check QP state. Can not post Send requests from the "Reset",
815 * "Init", or "RTR" states
817 qp_state
= qp
->qp_state_for_post_send
;
818 if ((qp_state
== HERMON_QP_RESET
) ||
819 (qp_state
== HERMON_QP_INIT
) ||
820 (qp_state
== HERMON_QP_RTR
)) {
821 mutex_exit(&qp
->qp_sq_lock
);
822 return (IBT_QP_STATE_INVALID
);
825 if (qp
->qp_is_special
)
828 /* Use these optimized functions most of the time */
829 if (qp
->qp_type
== IBT_UD_RQP
) {
830 return (hermon_post_send_ud(state
, qp
, wr
, num_wr
, num_posted
));
833 if (qp
->qp_serv_type
== HERMON_QP_RC
) {
834 return (hermon_post_send_rc(state
, qp
, wr
, num_wr
, num_posted
));
837 if (qp
->qp_serv_type
== HERMON_QP_UC
)
840 mutex_exit(&qp
->qp_sq_lock
);
841 return (IBT_QP_SRV_TYPE_INVALID
);
844 /* general loop for non-optimized posting */
846 /* Save away some initial QP state */
847 wq
= qp
->qp_sq_wqhdr
;
848 qsize_msk
= wq
->wq_mask
;
851 hdrmwqes
= qp
->qp_sq_hdrmwqes
; /* in WQEs */
853 /* Initialize posted_cnt */
858 * For each ibt_send_wr_t in the wr[] list passed in, parse the
859 * request and build a Send WQE. NOTE: Because we are potentially
860 * building a chain of WQEs to post, we want to build them all first,
861 * and set the valid (HW Ownership) bit on all but the first.
862 * However, we do not want to validate the first one until the
863 * entire chain of WQEs has been built. Then in the final
864 * we set the valid bit in the first, flush if needed, and as a last
865 * step ring the appropriate doorbell. NOTE: the doorbell ring may
866 * NOT be needed if the HCA is already processing, but the doorbell
867 * ring will be done regardless. NOTE ALSO: It is possible for
868 * more Work Requests to be posted than the HW will support at one
869 * shot. If this happens, we need to be able to post and ring
870 * several chains here until the the entire request is complete.
871 * NOTE ALSO: the term "chain" is used to differentiate it from
872 * Work Request List passed in; and because that's the terminology
873 * from the previous generations of HCA - but the WQEs are not, in fact
874 * chained together for Hermon
879 status
= DDI_SUCCESS
;
880 while ((wrindx
< num_wr
) && (status
== DDI_SUCCESS
)) {
882 * For the first WQE on a new chain we need "prev" to point
883 * to the current descriptor.
885 prev
= HERMON_QP_SQ_ENTRY(qp
, tail
);
888 * Break the request up into lists that are less than or
889 * equal to the maximum number of WQEs that can be posted
890 * per doorbell ring - 256 currently
892 chainlen
= (numremain
> HERMON_QP_MAXDESC_PER_DB
) ?
893 HERMON_QP_MAXDESC_PER_DB
: numremain
;
894 numremain
-= chainlen
;
896 for (currindx
= 0; currindx
< chainlen
; currindx
++, wrindx
++) {
898 * Check for "queue full" condition. If the queue
899 * is already full, then no more WQEs can be posted.
900 * So break out, ring a doorbell (if necessary) and
903 if (wq
->wq_full
!= 0) {
904 status
= IBT_QP_FULL
;
909 * Increment the "tail index". Check for "queue
910 * full" condition incl. headroom. If we detect that
911 * the current work request is going to fill the work
912 * queue, then we mark this condition and continue.
913 * Don't need >=, because going one-by-one we have to
914 * hit it exactly sooner or later
917 next_tail
= (tail
+ 1) & qsize_msk
;
918 if (((tail
+ hdrmwqes
) & qsize_msk
) == head
) {
923 * Get the address of the location where the next
924 * Send WQE should be built
926 desc
= HERMON_QP_SQ_ENTRY(qp
, tail
);
928 * Call hermon_wqe_send_build() to build the WQE
929 * at the given address. This routine uses the
930 * information in the ibt_send_wr_t list (wr[]) and
931 * returns the size of the WQE when it returns.
933 status
= hermon_wqe_send_build(state
, qp
,
934 &wr
[wrindx
], desc
, &desc_sz
);
935 if (status
!= DDI_SUCCESS
) {
940 * Now, build the Ctrl Segment based on
943 curr_wr
= &wr
[wrindx
];
945 switch (curr_wr
->wr_opcode
) {
947 if (curr_wr
->wr_flags
& IBT_WR_SEND_IMMED
) {
949 HERMON_WQE_SEND_NOPCODE_RDMAWI
;
951 hermon_wr_get_immediate(curr_wr
);
953 nopcode
= HERMON_WQE_SEND_NOPCODE_RDMAW
;
958 if (curr_wr
->wr_flags
& IBT_WR_SEND_IMMED
) {
959 nopcode
= HERMON_WQE_SEND_NOPCODE_SENDI
;
961 hermon_wr_get_immediate(curr_wr
);
963 nopcode
= HERMON_WQE_SEND_NOPCODE_SEND
;
967 case IBT_WRC_SEND_LSO
:
968 nopcode
= HERMON_WQE_SEND_NOPCODE_LSO
;
972 nopcode
= HERMON_WQE_SEND_NOPCODE_RDMAR
;
976 nopcode
= HERMON_WQE_SEND_NOPCODE_ATMCS
;
980 nopcode
= HERMON_WQE_SEND_NOPCODE_ATMFA
;
984 nopcode
= HERMON_WQE_SEND_NOPCODE_BIND
;
988 fence
= (curr_wr
->wr_flags
& IBT_WR_SEND_FENCE
) ? 1 : 0;
991 * now, build up the control segment, leaving the
995 if ((qp
->qp_sq_sigtype
== HERMON_QP_SQ_ALL_SIGNALED
) ||
996 (curr_wr
->wr_flags
& IBT_WR_SEND_SIGNAL
)) {
1001 if (curr_wr
->wr_flags
& IBT_WR_SEND_SOLICIT
)
1006 if (qp
->qp_is_special
) {
1007 /* Ensure correctness, set the ReRead bit */
1008 nopcode
|= (1 << 6);
1009 ah
= (hermon_ahhdl_t
)
1010 curr_wr
->wr
.ud
.udwr_dest
->ud_ah
;
1011 mutex_enter(&ah
->ah_lock
);
1012 maxstat
= ah
->ah_udav
->max_stat_rate
;
1013 HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc
, desc_sz
,
1014 signaled_dbd
, maxstat
, ah
->ah_udav
->rlid
,
1015 qp
, ah
->ah_udav
->sl
);
1016 mutex_exit(&ah
->ah_lock
);
1018 HERMON_WQE_SET_CTRL_SEGMENT(desc
, desc_sz
,
1019 fence
, immed_data
, solicited
,
1020 signaled_dbd
, 0, qp
, 0, 0);
1022 wq
->wq_wrid
[tail
] = curr_wr
->wr_id
;
1025 * If this is not the first descriptor on the current
1026 * chain, then set the ownership bit.
1028 if (currindx
!= 0) { /* not the first */
1030 HERMON_SET_SEND_WQE_OWNER(qp
,
1031 (uint32_t *)desc
, nopcode
);
1033 prev_nopcode
= nopcode
;
1036 * Update the current "tail index" and increment
1044 * If we reach here and there are one or more WQEs which have
1045 * been successfully built as a chain, we have to finish up
1046 * and prepare them for writing to the HW
1048 * 1. do the headroom fixup
1049 * 2. add in the size of the headroom for the sync
1050 * 3. write the owner bit for the first WQE
1052 * 5. fix up the structures
1053 * 6. hit the doorbell in UAR
1055 if (posted_cnt
!= 0) {
1056 ddi_acc_handle_t uarhdl
= hermon_get_uarhdl(state
);
1058 /* do the invalidate of the headroom */
1060 hermon_wqe_headroom(tail
, qp
);
1062 /* Update some of the state in the QP */
1064 total_posted
+= posted_cnt
;
1070 * Now set the ownership bit of the first
1073 HERMON_SET_SEND_WQE_OWNER(qp
, (uint32_t *)prev
,
1076 /* the FMA retry loop starts for Hermon doorbell. */
1077 hermon_pio_start(state
, uarhdl
, pio_error
, fm_loop_cnt
,
1078 fm_status
, fm_test
);
1080 HERMON_UAR_DOORBELL(state
, uarhdl
,
1081 (uint64_t *)(void *)&state
->hs_uar
->send
,
1082 (uint64_t)qp
->qp_ring
);
1084 /* the FMA retry loop ends. */
1085 hermon_pio_end(state
, uarhdl
, pio_error
, fm_loop_cnt
,
1086 fm_status
, fm_test
);
1091 * Update the "num_posted" return value (if necessary).
1092 * Then drop the locks and return success.
1094 if (num_posted
!= NULL
) {
1095 *num_posted
= total_posted
;
1097 mutex_exit(&qp
->qp_sq_lock
);
1101 mutex_exit(&qp
->qp_sq_lock
);
1102 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
1103 return (ibc_get_ci_failure(0));
1108 * hermon_post_recv()
1109 * Context: Can be called from interrupt or base context.
1112 hermon_post_recv(hermon_state_t
*state
, hermon_qphdl_t qp
,
1113 ibt_recv_wr_t
*wr
, uint_t num_wr
, uint_t
*num_posted
)
1116 hermon_workq_hdr_t
*wq
;
1117 uint32_t head
, tail
, next_tail
, qsize_msk
;
1123 * Check for user-mappable QP memory. Note: We do not allow kernel
1124 * clients to post to QP memory that is accessible directly by the
1125 * user. If the QP memory is user accessible, then return an error.
1127 if (qp
->qp_alloc_flags
& IBT_QP_USER_MAP
) {
1128 return (IBT_QP_HDL_INVALID
);
1131 /* Initialize posted_cnt */
1134 mutex_enter(&qp
->qp_lock
);
1137 * Check if QP is associated with an SRQ
1139 if (qp
->qp_alloc_flags
& IBT_QP_USES_SRQ
) {
1140 mutex_exit(&qp
->qp_lock
);
1141 return (IBT_SRQ_IN_USE
);
1145 * Check QP state. Can not post Recv requests from the "Reset" state
1147 if (qp
->qp_state
== HERMON_QP_RESET
) {
1148 mutex_exit(&qp
->qp_lock
);
1149 return (IBT_QP_STATE_INVALID
);
1152 /* Check that work request transport type is valid */
1153 if ((qp
->qp_type
!= IBT_UD_RQP
) &&
1154 (qp
->qp_serv_type
!= HERMON_QP_RC
) &&
1155 (qp
->qp_serv_type
!= HERMON_QP_UC
)) {
1156 mutex_exit(&qp
->qp_lock
);
1157 return (IBT_QP_SRV_TYPE_INVALID
);
1161 * Grab the lock for the WRID list, i.e., membar_consumer().
1162 * This is not needed because the mutex_enter() above has
1166 /* Save away some initial QP state */
1167 wq
= qp
->qp_rq_wqhdr
;
1168 qsize_msk
= wq
->wq_mask
;
1173 status
= DDI_SUCCESS
;
1175 for (wrindx
= 0; wrindx
< num_wr
; wrindx
++) {
1176 if (wq
->wq_full
!= 0) {
1177 status
= IBT_QP_FULL
;
1180 next_tail
= (tail
+ 1) & qsize_msk
;
1181 if (next_tail
== head
) {
1184 desc
= HERMON_QP_RQ_ENTRY(qp
, tail
);
1185 status
= hermon_wqe_recv_build(state
, qp
, &wr
[wrindx
], desc
);
1186 if (status
!= DDI_SUCCESS
) {
1190 wq
->wq_wrid
[tail
] = wr
[wrindx
].wr_id
;
1191 qp
->qp_rq_wqecntr
++;
1197 if (posted_cnt
!= 0) {
1201 membar_producer(); /* ensure wrids are visible */
1203 /* Update the doorbell record w/ wqecntr */
1204 HERMON_UAR_DB_RECORD_WRITE(qp
->qp_rq_vdbr
,
1205 qp
->qp_rq_wqecntr
& 0xFFFF);
1208 if (num_posted
!= NULL
) {
1209 *num_posted
= posted_cnt
;
1213 mutex_exit(&qp
->qp_lock
);
1219 * Context: Can be called from interrupt or base context.
1222 hermon_post_srq(hermon_state_t
*state
, hermon_srqhdl_t srq
,
1223 ibt_recv_wr_t
*wr
, uint_t num_wr
, uint_t
*num_posted
)
1226 hermon_workq_hdr_t
*wq
;
1227 uint_t indx
, wrindx
;
1231 mutex_enter(&srq
->srq_lock
);
1234 * Check for user-mappable QP memory. Note: We do not allow kernel
1235 * clients to post to QP memory that is accessible directly by the
1236 * user. If the QP memory is user accessible, then return an error.
1238 if (srq
->srq_is_umap
) {
1239 mutex_exit(&srq
->srq_lock
);
1240 return (IBT_SRQ_HDL_INVALID
);
1244 * Check SRQ state. Can not post Recv requests when SRQ is in error
1246 if (srq
->srq_state
== HERMON_SRQ_STATE_ERROR
) {
1247 mutex_exit(&srq
->srq_lock
);
1248 return (IBT_QP_STATE_INVALID
);
1251 status
= DDI_SUCCESS
;
1253 wq
= srq
->srq_wq_wqhdr
;
1256 for (wrindx
= 0; wrindx
< num_wr
; wrindx
++) {
1258 if (indx
== wq
->wq_tail
) {
1259 status
= IBT_QP_FULL
;
1262 desc
= HERMON_SRQ_WQE_ADDR(srq
, indx
);
1264 wq
->wq_wrid
[indx
] = wr
[wrindx
].wr_id
;
1266 status
= hermon_wqe_srq_build(state
, srq
, &wr
[wrindx
], desc
);
1267 if (status
!= DDI_SUCCESS
) {
1272 indx
= htons(((uint16_t *)desc
)[1]);
1276 if (posted_cnt
!= 0) {
1278 srq
->srq_wq_wqecntr
+= posted_cnt
;
1280 membar_producer(); /* ensure wrids are visible */
1282 /* Ring the doorbell w/ wqecntr */
1283 HERMON_UAR_DB_RECORD_WRITE(srq
->srq_wq_vdbr
,
1284 srq
->srq_wq_wqecntr
& 0xFFFF);
1287 if (num_posted
!= NULL
) {
1288 *num_posted
= posted_cnt
;
1291 mutex_exit(&srq
->srq_lock
);
1297 * hermon_wqe_send_build()
1298 * Context: Can be called from interrupt or base context.
1301 hermon_wqe_send_build(hermon_state_t
*state
, hermon_qphdl_t qp
,
1302 ibt_send_wr_t
*wr
, uint64_t *desc
, uint_t
*size
)
1304 hermon_hw_snd_wqe_ud_t
*ud
;
1305 hermon_hw_snd_wqe_remaddr_t
*rc
;
1306 hermon_hw_snd_wqe_atomic_t
*at
;
1307 hermon_hw_snd_wqe_remaddr_t
*uc
;
1308 hermon_hw_snd_wqe_bind_t
*bn
;
1309 hermon_hw_wqe_sgl_t
*ds
, *old_ds
;
1310 ibt_ud_dest_t
*dest
;
1314 int i
, j
, last_ds
, num_ds
, status
;
1317 ASSERT(MUTEX_HELD(&qp
->qp_sq_lock
));
1319 /* Initialize the information for the Data Segments */
1320 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)desc
+
1321 sizeof (hermon_hw_snd_wqe_ctrl_t
));
1328 * Build a Send WQE depends first and foremost on the transport
1329 * type of Work Request (i.e. UD, RC, or UC)
1331 switch (wr
->wr_trans
) {
1333 /* Ensure that work request transport type matches QP type */
1334 if (qp
->qp_serv_type
!= HERMON_QP_UD
) {
1335 return (IBT_QP_SRV_TYPE_INVALID
);
1339 * Validate the operation type. For UD requests, only the
1340 * "Send" and "Send LSO" operations are valid.
1342 if (wr
->wr_opcode
!= IBT_WRC_SEND
&&
1343 wr
->wr_opcode
!= IBT_WRC_SEND_LSO
) {
1344 return (IBT_QP_OP_TYPE_INVALID
);
1348 * If this is a Special QP (QP0 or QP1), then we need to
1349 * build MLX WQEs instead. So jump to hermon_wqe_mlx_build()
1350 * and return whatever status it returns
1352 if (qp
->qp_is_special
) {
1353 if (wr
->wr_opcode
== IBT_WRC_SEND_LSO
) {
1354 return (IBT_QP_OP_TYPE_INVALID
);
1356 status
= hermon_wqe_mlx_build(state
, qp
,
1362 * Otherwise, if this is a normal UD Send request, then fill
1363 * all the fields in the Hermon UD header for the WQE. Note:
1364 * to do this we'll need to extract some information from the
1365 * Address Handle passed with the work request.
1367 ud
= (hermon_hw_snd_wqe_ud_t
*)((uintptr_t)desc
+
1368 sizeof (hermon_hw_snd_wqe_ctrl_t
));
1369 if (wr
->wr_opcode
== IBT_WRC_SEND
) {
1370 dest
= wr
->wr
.ud
.udwr_dest
;
1372 dest
= wr
->wr
.ud_lso
.lso_ud_dest
;
1374 ah
= (hermon_ahhdl_t
)dest
->ud_ah
;
1376 return (IBT_AH_HDL_INVALID
);
1380 * Build the Unreliable Datagram Segment for the WQE, using
1381 * the information from the address handle and the work
1384 /* mutex_enter(&ah->ah_lock); */
1385 if (wr
->wr_opcode
== IBT_WRC_SEND
) {
1386 HERMON_WQE_BUILD_UD(qp
, ud
, ah
, wr
->wr
.ud
.udwr_dest
);
1387 } else { /* IBT_WRC_SEND_LSO */
1388 HERMON_WQE_BUILD_UD(qp
, ud
, ah
,
1389 wr
->wr
.ud_lso
.lso_ud_dest
);
1391 /* mutex_exit(&ah->ah_lock); */
1393 /* Update "ds" for filling in Data Segments (below) */
1394 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)ud
+
1395 sizeof (hermon_hw_snd_wqe_ud_t
));
1397 if (wr
->wr_opcode
== IBT_WRC_SEND_LSO
) {
1400 total_len
= (4 + 0xf + wr
->wr
.ud_lso
.lso_hdr_sz
) & ~0xf;
1401 if ((uintptr_t)ds
+ total_len
+ (nds
* 16) >
1402 (uintptr_t)desc
+ (1 << qp
->qp_sq_log_wqesz
))
1403 return (IBT_QP_SGL_LEN_INVALID
);
1405 bcopy(wr
->wr
.ud_lso
.lso_hdr
, (uint32_t *)ds
+ 1,
1406 wr
->wr
.ud_lso
.lso_hdr_sz
);
1408 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)ds
+ total_len
);
1409 for (; i
< nds
; i
++) {
1410 if (sgl
[i
].ds_len
== 0)
1412 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds
[num_ds
],
1419 HERMON_WQE_BUILD_LSO(qp
, old_ds
, wr
->wr
.ud_lso
.lso_mss
,
1420 wr
->wr
.ud_lso
.lso_hdr_sz
);
1426 /* Ensure that work request transport type matches QP type */
1427 if (qp
->qp_serv_type
!= HERMON_QP_RC
) {
1428 return (IBT_QP_SRV_TYPE_INVALID
);
1432 * Validate the operation type. For RC requests, we allow
1433 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
1434 * operations, and memory window "Bind"
1436 if ((wr
->wr_opcode
!= IBT_WRC_SEND
) &&
1437 (wr
->wr_opcode
!= IBT_WRC_RDMAR
) &&
1438 (wr
->wr_opcode
!= IBT_WRC_RDMAW
) &&
1439 (wr
->wr_opcode
!= IBT_WRC_CSWAP
) &&
1440 (wr
->wr_opcode
!= IBT_WRC_FADD
) &&
1441 (wr
->wr_opcode
!= IBT_WRC_BIND
)) {
1442 return (IBT_QP_OP_TYPE_INVALID
);
1446 * If this is a Send request, then all we need to do is break
1447 * out and here and begin the Data Segment processing below
1449 if (wr
->wr_opcode
== IBT_WRC_SEND
) {
1454 * If this is an RDMA Read or RDMA Write request, then fill
1455 * in the "Remote Address" header fields.
1457 if ((wr
->wr_opcode
== IBT_WRC_RDMAR
) ||
1458 (wr
->wr_opcode
== IBT_WRC_RDMAW
)) {
1459 rc
= (hermon_hw_snd_wqe_remaddr_t
*)((uintptr_t)desc
+
1460 sizeof (hermon_hw_snd_wqe_ctrl_t
));
1463 * Build the Remote Address Segment for the WQE, using
1464 * the information from the RC work request.
1466 HERMON_WQE_BUILD_REMADDR(qp
, rc
, &wr
->wr
.rc
.rcwr
.rdma
);
1468 /* Update "ds" for filling in Data Segments (below) */
1469 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)rc
+
1470 sizeof (hermon_hw_snd_wqe_remaddr_t
));
1475 * If this is one of the Atomic type operations (i.e
1476 * Compare-Swap or Fetch-Add), then fill in both the "Remote
1477 * Address" header fields and the "Atomic" header fields.
1479 if ((wr
->wr_opcode
== IBT_WRC_CSWAP
) ||
1480 (wr
->wr_opcode
== IBT_WRC_FADD
)) {
1481 rc
= (hermon_hw_snd_wqe_remaddr_t
*)((uintptr_t)desc
+
1482 sizeof (hermon_hw_snd_wqe_ctrl_t
));
1483 at
= (hermon_hw_snd_wqe_atomic_t
*)((uintptr_t)rc
+
1484 sizeof (hermon_hw_snd_wqe_remaddr_t
));
1487 * Build the Remote Address and Atomic Segments for
1488 * the WQE, using the information from the RC Atomic
1491 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp
, rc
, wr
);
1492 HERMON_WQE_BUILD_ATOMIC(qp
, at
, wr
->wr
.rc
.rcwr
.atomic
);
1494 /* Update "ds" for filling in Data Segments (below) */
1495 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)at
+
1496 sizeof (hermon_hw_snd_wqe_atomic_t
));
1499 * Update "nds" and "sgl" because Atomic requests have
1500 * only a single Data Segment (and they are encoded
1501 * somewhat differently in the work request.
1509 * If this is memory window Bind operation, then we call the
1510 * hermon_wr_bind_check() routine to validate the request and
1511 * to generate the updated RKey. If this is successful, then
1512 * we fill in the WQE's "Bind" header fields.
1514 if (wr
->wr_opcode
== IBT_WRC_BIND
) {
1515 status
= hermon_wr_bind_check(state
, wr
);
1516 if (status
!= DDI_SUCCESS
) {
1520 bn
= (hermon_hw_snd_wqe_bind_t
*)((uintptr_t)desc
+
1521 sizeof (hermon_hw_snd_wqe_ctrl_t
));
1524 * Build the Bind Memory Window Segments for the WQE,
1525 * using the information from the RC Bind memory
1526 * window work request.
1528 HERMON_WQE_BUILD_BIND(qp
, bn
, wr
->wr
.rc
.rcwr
.bind
);
1531 * Update the "ds" pointer. Even though the "bind"
1532 * operation requires no SGLs, this is necessary to
1533 * facilitate the correct descriptor size calculations
1536 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)bn
+
1537 sizeof (hermon_hw_snd_wqe_bind_t
));
1543 /* Ensure that work request transport type matches QP type */
1544 if (qp
->qp_serv_type
!= HERMON_QP_UC
) {
1545 return (IBT_QP_SRV_TYPE_INVALID
);
1549 * Validate the operation type. For UC requests, we only
1550 * allow "Send", "RDMA Write", and memory window "Bind".
1551 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic"
1554 if ((wr
->wr_opcode
!= IBT_WRC_SEND
) &&
1555 (wr
->wr_opcode
!= IBT_WRC_RDMAW
) &&
1556 (wr
->wr_opcode
!= IBT_WRC_BIND
)) {
1557 return (IBT_QP_OP_TYPE_INVALID
);
1561 * If this is a Send request, then all we need to do is break
1562 * out and here and begin the Data Segment processing below
1564 if (wr
->wr_opcode
== IBT_WRC_SEND
) {
1569 * If this is an RDMA Write request, then fill in the "Remote
1570 * Address" header fields.
1572 if (wr
->wr_opcode
== IBT_WRC_RDMAW
) {
1573 uc
= (hermon_hw_snd_wqe_remaddr_t
*)((uintptr_t)desc
+
1574 sizeof (hermon_hw_snd_wqe_ctrl_t
));
1577 * Build the Remote Address Segment for the WQE, using
1578 * the information from the UC work request.
1580 HERMON_WQE_BUILD_REMADDR(qp
, uc
, &wr
->wr
.uc
.ucwr
.rdma
);
1582 /* Update "ds" for filling in Data Segments (below) */
1583 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)uc
+
1584 sizeof (hermon_hw_snd_wqe_remaddr_t
));
1589 * If this is memory window Bind operation, then we call the
1590 * hermon_wr_bind_check() routine to validate the request and
1591 * to generate the updated RKey. If this is successful, then
1592 * we fill in the WQE's "Bind" header fields.
1594 if (wr
->wr_opcode
== IBT_WRC_BIND
) {
1595 status
= hermon_wr_bind_check(state
, wr
);
1596 if (status
!= DDI_SUCCESS
) {
1600 bn
= (hermon_hw_snd_wqe_bind_t
*)((uintptr_t)desc
+
1601 sizeof (hermon_hw_snd_wqe_ctrl_t
));
1604 * Build the Bind Memory Window Segments for the WQE,
1605 * using the information from the UC Bind memory
1606 * window work request.
1608 HERMON_WQE_BUILD_BIND(qp
, bn
, wr
->wr
.uc
.ucwr
.bind
);
1611 * Update the "ds" pointer. Even though the "bind"
1612 * operation requires no SGLs, this is necessary to
1613 * facilitate the correct descriptor size calculations
1616 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)bn
+
1617 sizeof (hermon_hw_snd_wqe_bind_t
));
1623 return (IBT_QP_SRV_TYPE_INVALID
);
1627 * Now fill in the Data Segments (SGL) for the Send WQE based on
1628 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer
1629 * Start by checking for a valid number of SGL entries
1631 if (nds
> qp
->qp_sq_sgl
) {
1632 return (IBT_QP_SGL_LEN_INVALID
);
1636 * For each SGL in the Send Work Request, fill in the Send WQE's data
1637 * segments. Note: We skip any SGL with zero size because Hermon
1638 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1639 * the encoding for zero means a 2GB transfer.
1641 for (last_ds
= num_ds
, j
= i
; j
< nds
; j
++) {
1642 if (sgl
[j
].ds_len
!= 0)
1643 last_ds
++; /* real last ds of wqe to fill */
1647 * Return the size of descriptor (in 16-byte chunks)
1648 * For Hermon, we want them (for now) to be on stride size
1649 * boundaries, which was implicit in Tavor/Arbel
1652 tmpsize
= ((uintptr_t)&ds
[last_ds
] - (uintptr_t)desc
);
1654 *size
= tmpsize
>> 0x4;
1656 for (j
= nds
; --j
>= i
; ) {
1657 if (sgl
[j
].ds_len
== 0) {
1662 * Fill in the Data Segment(s) for the current WQE, using the
1663 * information contained in the scatter-gather list of the
1667 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds
[last_ds
], &sgl
[j
]);
1670 return (DDI_SUCCESS
);
1676 * hermon_wqe_mlx_build()
1677 * Context: Can be called from interrupt or base context.
1680 hermon_wqe_mlx_build(hermon_state_t
*state
, hermon_qphdl_t qp
,
1681 ibt_send_wr_t
*wr
, uint64_t *desc
, uint_t
*size
)
1684 hermon_hw_udav_t
*udav
;
1688 ib_deth_hdr_t
*deth
;
1689 hermon_hw_wqe_sgl_t
*ds
;
1691 uint8_t *mgmtclass
, *hpoint
, *hcount
;
1692 uint32_t nds
, offset
, pktlen
;
1697 ASSERT(MUTEX_HELD(&qp
->qp_sq_lock
));
1699 /* Initialize the information for the Data Segments */
1700 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)desc
+
1701 sizeof (hermon_hw_mlx_wqe_nextctrl_t
));
1704 * Pull the address handle from the work request. The UDAV will
1705 * be used to answer some questions about the request.
1707 ah
= (hermon_ahhdl_t
)wr
->wr
.ud
.udwr_dest
->ud_ah
;
1709 return (IBT_AH_HDL_INVALID
);
1711 mutex_enter(&ah
->ah_lock
);
1715 * If the request is for QP1 and the destination LID is equal to
1716 * the Permissive LID, then return an error. This combination is
1719 if ((udav
->rlid
== IB_LID_PERMISSIVE
) &&
1720 (qp
->qp_is_special
== HERMON_QP_GSI
)) {
1721 mutex_exit(&ah
->ah_lock
);
1722 return (IBT_AH_HDL_INVALID
);
1726 * Calculate the size of the packet headers, including the GRH
1729 desc_sz
= sizeof (ib_lrh_hdr_t
) + sizeof (ib_bth_hdr_t
) +
1730 sizeof (ib_deth_hdr_t
);
1732 desc_sz
+= sizeof (ib_grh_t
);
1736 * Begin to build the first "inline" data segment for the packet
1737 * headers. Note: By specifying "inline" we can build the contents
1738 * of the MAD packet headers directly into the work queue (as part
1739 * descriptor). This has the advantage of both speeding things up
1740 * and of not requiring the driver to allocate/register any additional
1741 * memory for the packet headers.
1743 HERMON_WQE_BUILD_INLINE(qp
, &ds
[0], desc_sz
);
1747 * Build Local Route Header (LRH)
1748 * We start here by building the LRH into a temporary location.
1749 * When we have finished we copy the LRH data into the descriptor.
1751 * Notice that the VL values are hardcoded. This is not a problem
1752 * because VL15 is decided later based on the value in the MLX
1753 * transport "next/ctrl" header (see the "vl15" bit below), and it
1754 * is otherwise (meaning for QP1) chosen from the SL-to-VL table
1755 * values. This rule does not hold for loopback packets however
1756 * (all of which bypass the SL-to-VL tables) and it is the reason
1757 * that non-QP0 MADs are setup with VL hardcoded to zero below.
1759 * Notice also that Source LID is hardcoded to the Permissive LID
1760 * (0xFFFF). This is also not a problem because if the Destination
1761 * LID is not the Permissive LID, then the "slr" value in the MLX
1762 * transport "next/ctrl" header will be set to zero and the hardware
1763 * will pull the LID from value in the port.
1765 lrh
= (ib_lrh_hdr_t
*)((uintptr_t)&ds
[0] + 4);
1766 pktlen
= (desc_sz
+ 0x100) >> 2;
1767 HERMON_WQE_BUILD_MLX_LRH(lrh
, qp
, udav
, pktlen
);
1770 * Build Global Route Header (GRH)
1771 * This is only built if necessary as defined by the "grh" bit in
1772 * the address vector. Note: We also calculate the offset to the
1773 * next header (BTH) based on whether or not the "grh" bit is set.
1777 * If the request is for QP0, then return an error. The
1778 * combination of global routine (GRH) and QP0 is not allowed.
1780 if (qp
->qp_is_special
== HERMON_QP_SMI
) {
1781 mutex_exit(&ah
->ah_lock
);
1782 return (IBT_AH_HDL_INVALID
);
1784 grh
= (ib_grh_t
*)((uintptr_t)lrh
+ sizeof (ib_lrh_hdr_t
));
1785 HERMON_WQE_BUILD_MLX_GRH(state
, grh
, qp
, udav
, pktlen
);
1787 bth
= (ib_bth_hdr_t
*)((uintptr_t)grh
+ sizeof (ib_grh_t
));
1789 bth
= (ib_bth_hdr_t
*)((uintptr_t)lrh
+ sizeof (ib_lrh_hdr_t
));
1791 mutex_exit(&ah
->ah_lock
);
1795 * Build Base Transport Header (BTH)
1796 * Notice that the M, PadCnt, and TVer fields are all set
1797 * to zero implicitly. This is true for all Management Datagrams
1798 * MADs whether GSI are SMI.
1800 HERMON_WQE_BUILD_MLX_BTH(state
, bth
, qp
, wr
);
1803 * Build Datagram Extended Transport Header (DETH)
1805 deth
= (ib_deth_hdr_t
*)((uintptr_t)bth
+ sizeof (ib_bth_hdr_t
));
1806 HERMON_WQE_BUILD_MLX_DETH(deth
, qp
);
1808 /* Ensure that the Data Segment is aligned on a 16-byte boundary */
1809 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)deth
+ sizeof (ib_deth_hdr_t
));
1810 ds
= (hermon_hw_wqe_sgl_t
*)(((uintptr_t)ds
+ 0xF) & ~0xF);
1816 * Now fill in the Data Segments (SGL) for the MLX WQE based on the
1817 * values set up above (i.e. "sgl", "nds", and the "ds" pointer
1818 * Start by checking for a valid number of SGL entries
1820 if (nds
> qp
->qp_sq_sgl
) {
1821 return (IBT_QP_SGL_LEN_INVALID
);
1825 * For each SGL in the Send Work Request, fill in the MLX WQE's data
1826 * segments. Note: We skip any SGL with zero size because Hermon
1827 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1828 * the encoding for zero means a 2GB transfer. Because of this special
1829 * encoding in the hardware, we mask the requested length with
1830 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1833 mgmtclass
= hpoint
= hcount
= NULL
;
1835 for (i
= 0; i
< nds
; i
++) {
1836 if (sgl
[i
].ds_len
== 0) {
1841 * Fill in the Data Segment(s) for the MLX send WQE, using
1842 * the information contained in the scatter-gather list of
1845 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds
[num_ds
], &sgl
[i
]);
1848 * Search through the contents of all MADs posted to QP0 to
1849 * initialize pointers to the places where Directed Route "hop
1850 * pointer", "hop count", and "mgmtclass" would be. Hermon
1851 * needs these updated (i.e. incremented or decremented, as
1852 * necessary) by software.
1854 if (qp
->qp_is_special
== HERMON_QP_SMI
) {
1856 HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass
,
1857 offset
, sgl
[i
].ds_va
, sgl
[i
].ds_len
);
1859 HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint
,
1860 offset
, sgl
[i
].ds_va
, sgl
[i
].ds_len
);
1862 HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount
,
1863 offset
, sgl
[i
].ds_va
, sgl
[i
].ds_len
);
1865 offset
+= sgl
[i
].ds_len
;
1871 * Hermon's Directed Route MADs need to have the "hop pointer"
1872 * incremented/decremented (as necessary) depending on whether it is
1873 * currently less than or greater than the "hop count" (i.e. whether
1874 * the MAD is a request or a response.)
1876 if (qp
->qp_is_special
== HERMON_QP_SMI
) {
1877 HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass
,
1882 * Now fill in the ICRC Data Segment. This data segment is inlined
1883 * just like the packets headers above, but it is only four bytes and
1884 * set to zero (to indicate that we wish the hardware to generate ICRC.
1886 HERMON_WQE_BUILD_INLINE_ICRC(qp
, &ds
[num_ds
], 4, 0);
1890 * Return the size of descriptor (in 16-byte chunks)
1891 * For Hermon, we want them (for now) to be on stride size
1892 * boundaries, which was implicit in Tavor/Arbel
1894 tmpsize
= ((uintptr_t)&ds
[num_ds
] - (uintptr_t)desc
);
1896 *size
= tmpsize
>> 0x04;
1898 return (DDI_SUCCESS
);
1904 * hermon_wqe_recv_build()
1905 * Context: Can be called from interrupt or base context.
1909 hermon_wqe_recv_build(hermon_state_t
*state
, hermon_qphdl_t qp
,
1910 ibt_recv_wr_t
*wr
, uint64_t *desc
)
1912 hermon_hw_wqe_sgl_t
*ds
;
1915 ASSERT(MUTEX_HELD(&qp
->qp_lock
));
1918 * Fill in the Data Segments (SGL) for the Recv WQE - don't
1919 * need to have a reserved for the ctrl, there is none on the
1920 * recv queue for hermon, but will need to put an invalid
1921 * (null) scatter pointer per PRM
1923 ds
= (hermon_hw_wqe_sgl_t
*)(uintptr_t)desc
;
1926 /* Check for valid number of SGL entries */
1927 if (wr
->wr_nds
> qp
->qp_rq_sgl
) {
1928 return (IBT_QP_SGL_LEN_INVALID
);
1932 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
1933 * segments. Note: We skip any SGL with zero size because Hermon
1934 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1935 * the encoding for zero means a 2GB transfer. Because of this special
1936 * encoding in the hardware, we mask the requested length with
1937 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1940 for (i
= 0; i
< wr
->wr_nds
; i
++) {
1941 if (wr
->wr_sgl
[i
].ds_len
== 0) {
1946 * Fill in the Data Segment(s) for the receive WQE, using the
1947 * information contained in the scatter-gather list of the
1950 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds
[num_ds
], &wr
->wr_sgl
[i
]);
1954 /* put the null sgl pointer as well if needed */
1955 if (num_ds
< qp
->qp_rq_sgl
) {
1956 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds
[num_ds
], &null_sgl
);
1959 return (DDI_SUCCESS
);
1965 * hermon_wqe_srq_build()
1966 * Context: Can be called from interrupt or base context.
1970 hermon_wqe_srq_build(hermon_state_t
*state
, hermon_srqhdl_t srq
,
1971 ibt_recv_wr_t
*wr
, uint64_t *desc
)
1973 hermon_hw_wqe_sgl_t
*ds
;
1976 ASSERT(MUTEX_HELD(&srq
->srq_lock
));
1978 /* Fill in the Data Segments (SGL) for the Recv WQE */
1979 ds
= (hermon_hw_wqe_sgl_t
*)((uintptr_t)desc
+
1980 sizeof (hermon_hw_srq_wqe_next_t
));
1983 /* Check for valid number of SGL entries */
1984 if (wr
->wr_nds
> srq
->srq_wq_sgl
) {
1985 return (IBT_QP_SGL_LEN_INVALID
);
1989 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
1990 * segments. Note: We skip any SGL with zero size because Hermon
1991 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1992 * the encoding for zero means a 2GB transfer. Because of this special
1993 * encoding in the hardware, we mask the requested length with
1994 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1997 for (i
= 0; i
< wr
->wr_nds
; i
++) {
1998 if (wr
->wr_sgl
[i
].ds_len
== 0) {
2003 * Fill in the Data Segment(s) for the receive WQE, using the
2004 * information contained in the scatter-gather list of the
2007 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds
[num_ds
], &wr
->wr_sgl
[i
]);
2012 * put in the null sgl pointer as well, if needed
2014 if (num_ds
< srq
->srq_wq_sgl
) {
2015 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds
[num_ds
], &null_sgl
);
2018 return (DDI_SUCCESS
);
2023 * hermon_wr_get_immediate()
2024 * Context: Can be called from interrupt or base context.
2027 hermon_wr_get_immediate(ibt_send_wr_t
*wr
)
2030 * This routine extracts the "immediate data" from the appropriate
2031 * location in the IBTF work request. Because of the way the
2032 * work request structure is defined, the location for this data
2033 * depends on the actual work request operation type.
2036 /* For RDMA Write, test if RC or UC */
2037 if (wr
->wr_opcode
== IBT_WRC_RDMAW
) {
2038 if (wr
->wr_trans
== IBT_RC_SRV
) {
2039 return (wr
->wr
.rc
.rcwr
.rdma
.rdma_immed
);
2040 } else { /* IBT_UC_SRV */
2041 return (wr
->wr
.uc
.ucwr
.rdma
.rdma_immed
);
2045 /* For Send, test if RC, UD, or UC */
2046 if (wr
->wr_opcode
== IBT_WRC_SEND
) {
2047 if (wr
->wr_trans
== IBT_RC_SRV
) {
2048 return (wr
->wr
.rc
.rcwr
.send_immed
);
2049 } else if (wr
->wr_trans
== IBT_UD_SRV
) {
2050 return (wr
->wr
.ud
.udwr_immed
);
2051 } else { /* IBT_UC_SRV */
2052 return (wr
->wr
.uc
.ucwr
.send_immed
);
2057 * If any other type of request, then immediate is undefined
2063 * hermon_wqe_headroom()
2064 * Context: can be called from interrupt or base, currently only from
2066 * Routine that fills in the headroom for the Send Queue
2070 hermon_wqe_headroom(uint_t from
, hermon_qphdl_t qp
)
2072 uint32_t *wqe_start
, *wqe_top
, *wqe_base
, qsize
;
2073 int hdrmwqes
, wqesizebytes
, sectperwqe
;
2077 qsize
= qp
->qp_sq_bufsz
;
2078 wqesizebytes
= 1 << qp
->qp_sq_log_wqesz
;
2079 sectperwqe
= wqesizebytes
>> 6; /* 64 bytes/section */
2080 hdrmwqes
= qp
->qp_sq_hdrmwqes
;
2081 wqe_base
= (uint32_t *)HERMON_QP_SQ_ENTRY(qp
, 0);
2082 wqe_top
= (uint32_t *)HERMON_QP_SQ_ENTRY(qp
, qsize
);
2083 wqe_start
= (uint32_t *)HERMON_QP_SQ_ENTRY(qp
, from
);
2085 for (i
= 0; i
< hdrmwqes
; i
++) {
2086 for (j
= 0; j
< sectperwqe
; j
++) {
2087 if (j
== 0) { /* 1st section of wqe */
2088 /* perserve ownership bit */
2089 invalue
= ddi_get32(qp
->qp_wqinfo
.qa_acchdl
,
2090 wqe_start
) | 0x7FFFFFFF;
2092 /* or just invalidate it */
2093 invalue
= 0xFFFFFFFF;
2095 ddi_put32(qp
->qp_wqinfo
.qa_acchdl
, wqe_start
, invalue
);
2096 wqe_start
+= 16; /* move 64 bytes */
2098 if (wqe_start
== wqe_top
) /* hit the end of the queue */
2099 wqe_start
= wqe_base
; /* wrap to start */
2104 * hermon_wr_bind_check()
2105 * Context: Can be called from interrupt or base context.
2109 hermon_wr_bind_check(hermon_state_t
*state
, ibt_send_wr_t
*wr
)
2111 ibt_bind_flags_t bind_flags
;
2112 uint64_t vaddr
, len
;
2113 uint64_t reg_start_addr
, reg_end_addr
;
2119 /* Check for a valid Memory Window handle in the WR */
2120 mw
= (hermon_mwhdl_t
)wr
->wr
.rc
.rcwr
.bind
->bind_ibt_mw_hdl
;
2122 return (IBT_MW_HDL_INVALID
);
2125 /* Check for a valid Memory Region handle in the WR */
2126 mr
= (hermon_mrhdl_t
)wr
->wr
.rc
.rcwr
.bind
->bind_ibt_mr_hdl
;
2128 return (IBT_MR_HDL_INVALID
);
2131 mutex_enter(&mr
->mr_lock
);
2132 mutex_enter(&mw
->mr_lock
);
2135 * Check here to see if the memory region has already been partially
2136 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
2137 * If so, this is an error, return failure.
2139 if ((mr
->mr_is_umem
) && (mr
->mr_umemcookie
== NULL
)) {
2140 mutex_exit(&mr
->mr_lock
);
2141 mutex_exit(&mw
->mr_lock
);
2142 return (IBT_MR_HDL_INVALID
);
2145 /* Check for a valid Memory Window RKey (i.e. a matching RKey) */
2146 if (mw
->mr_rkey
!= wr
->wr
.rc
.rcwr
.bind
->bind_rkey
) {
2147 mutex_exit(&mr
->mr_lock
);
2148 mutex_exit(&mw
->mr_lock
);
2149 return (IBT_MR_RKEY_INVALID
);
2152 /* Check for a valid Memory Region LKey (i.e. a matching LKey) */
2153 if (mr
->mr_lkey
!= wr
->wr
.rc
.rcwr
.bind
->bind_lkey
) {
2154 mutex_exit(&mr
->mr_lock
);
2155 mutex_exit(&mw
->mr_lock
);
2156 return (IBT_MR_LKEY_INVALID
);
2160 * Now check for valid "vaddr" and "len". Note: We don't check the
2161 * "vaddr" range when "len == 0" (i.e. on unbind operations)
2163 len
= wr
->wr
.rc
.rcwr
.bind
->bind_len
;
2165 vaddr
= wr
->wr
.rc
.rcwr
.bind
->bind_va
;
2166 reg_start_addr
= mr
->mr_bindinfo
.bi_addr
;
2167 reg_end_addr
= mr
->mr_bindinfo
.bi_addr
+
2168 (mr
->mr_bindinfo
.bi_len
- 1);
2169 if ((vaddr
< reg_start_addr
) || (vaddr
> reg_end_addr
)) {
2170 mutex_exit(&mr
->mr_lock
);
2171 mutex_exit(&mw
->mr_lock
);
2172 return (IBT_MR_VA_INVALID
);
2174 vaddr
= (vaddr
+ len
) - 1;
2175 if (vaddr
> reg_end_addr
) {
2176 mutex_exit(&mr
->mr_lock
);
2177 mutex_exit(&mw
->mr_lock
);
2178 return (IBT_MR_LEN_INVALID
);
2183 * Validate the bind access flags. Remote Write and Atomic access for
2184 * the Memory Window require that Local Write access be set in the
2185 * corresponding Memory Region.
2187 bind_flags
= wr
->wr
.rc
.rcwr
.bind
->bind_flags
;
2188 if (((bind_flags
& IBT_WR_BIND_WRITE
) ||
2189 (bind_flags
& IBT_WR_BIND_ATOMIC
)) &&
2190 !(mr
->mr_accflag
& IBT_MR_LOCAL_WRITE
)) {
2191 mutex_exit(&mr
->mr_lock
);
2192 mutex_exit(&mw
->mr_lock
);
2193 return (IBT_MR_ACCESS_REQ_INVALID
);
2196 /* Calculate the new RKey for the Memory Window */
2197 mpt
= mw
->mr_mptrsrcp
;
2198 new_rkey
= hermon_mr_keycalc(mpt
->hr_indx
);
2199 new_rkey
= hermon_mr_key_swap(new_rkey
);
2201 wr
->wr
.rc
.rcwr
.bind
->bind_rkey_out
= new_rkey
;
2202 mw
->mr_rkey
= new_rkey
;
2204 mutex_exit(&mr
->mr_lock
);
2205 mutex_exit(&mw
->mr_lock
);
2206 return (DDI_SUCCESS
);
2211 * hermon_wrid_from_reset_handling()
2212 * Context: Can be called from interrupt or base context.
2216 hermon_wrid_from_reset_handling(hermon_state_t
*state
, hermon_qphdl_t qp
)
2218 hermon_workq_hdr_t
*swq
, *rwq
;
2220 if (qp
->qp_alloc_flags
& IBT_QP_USER_MAP
)
2221 return (DDI_SUCCESS
);
2224 mutex_enter(&qp
->qp_rq_cqhdl
->cq_lock
);
2225 mutex_enter(&qp
->qp_sq_cqhdl
->cq_lock
);
2227 /* grab the cq lock(s) to modify the wqavl tree */
2228 if (qp
->qp_rq_cqhdl
)
2229 mutex_enter(&qp
->qp_rq_cqhdl
->cq_lock
);
2230 if (qp
->qp_rq_cqhdl
!= qp
->qp_sq_cqhdl
&&
2231 qp
->qp_sq_cqhdl
!= NULL
)
2232 mutex_enter(&qp
->qp_sq_cqhdl
->cq_lock
);
2235 /* Chain the newly allocated work queue header to the CQ's list */
2236 if (qp
->qp_sq_cqhdl
)
2237 hermon_cq_workq_add(qp
->qp_sq_cqhdl
, &qp
->qp_sq_wqavl
);
2239 swq
= qp
->qp_sq_wqhdr
;
2245 * Now we repeat all the above operations for the receive work queue,
2246 * or shared receive work queue.
2248 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case.
2252 mutex_enter(&qp
->qp_srqhdl
->srq_lock
);
2254 if (qp
->qp_alloc_flags
& IBT_QP_USES_SRQ
) {
2255 mutex_enter(&qp
->qp_srqhdl
->srq_lock
);
2257 rwq
= qp
->qp_rq_wqhdr
;
2261 qp
->qp_rq_wqecntr
= 0;
2264 hermon_cq_workq_add(qp
->qp_rq_cqhdl
, &qp
->qp_rq_wqavl
);
2267 mutex_exit(&qp
->qp_srqhdl
->srq_lock
);
2269 if (qp
->qp_alloc_flags
& IBT_QP_USES_SRQ
) {
2270 mutex_exit(&qp
->qp_srqhdl
->srq_lock
);
2275 mutex_exit(&qp
->qp_sq_cqhdl
->cq_lock
);
2276 mutex_exit(&qp
->qp_rq_cqhdl
->cq_lock
);
2278 if (qp
->qp_rq_cqhdl
!= qp
->qp_sq_cqhdl
&&
2279 qp
->qp_sq_cqhdl
!= NULL
)
2280 mutex_exit(&qp
->qp_sq_cqhdl
->cq_lock
);
2281 if (qp
->qp_rq_cqhdl
)
2282 mutex_exit(&qp
->qp_rq_cqhdl
->cq_lock
);
2284 return (DDI_SUCCESS
);
2289 * hermon_wrid_to_reset_handling()
2290 * Context: Can be called from interrupt or base context.
2293 hermon_wrid_to_reset_handling(hermon_state_t
*state
, hermon_qphdl_t qp
)
2295 if (qp
->qp_alloc_flags
& IBT_QP_USER_MAP
)
2296 return (DDI_SUCCESS
);
2299 * If there are unpolled entries in these CQs, they are
2301 * Grab the CQ lock(s) before manipulating the lists.
2304 mutex_enter(&qp
->qp_rq_cqhdl
->cq_lock
);
2305 mutex_enter(&qp
->qp_sq_cqhdl
->cq_lock
);
2307 /* grab the cq lock(s) to modify the wqavl tree */
2308 if (qp
->qp_rq_cqhdl
)
2309 mutex_enter(&qp
->qp_rq_cqhdl
->cq_lock
);
2310 if (qp
->qp_rq_cqhdl
!= qp
->qp_sq_cqhdl
&&
2311 qp
->qp_sq_cqhdl
!= NULL
)
2312 mutex_enter(&qp
->qp_sq_cqhdl
->cq_lock
);
2316 mutex_enter(&qp
->qp_srqhdl
->srq_lock
);
2318 if (qp
->qp_alloc_flags
& IBT_QP_USES_SRQ
) {
2319 mutex_enter(&qp
->qp_srqhdl
->srq_lock
);
2323 * Flush the entries on the CQ for this QP's QPN.
2325 hermon_cq_entries_flush(state
, qp
);
2328 mutex_exit(&qp
->qp_srqhdl
->srq_lock
);
2330 if (qp
->qp_alloc_flags
& IBT_QP_USES_SRQ
) {
2331 mutex_exit(&qp
->qp_srqhdl
->srq_lock
);
2335 hermon_cq_workq_remove(qp
->qp_rq_cqhdl
, &qp
->qp_rq_wqavl
);
2336 if (qp
->qp_sq_cqhdl
!= NULL
)
2337 hermon_cq_workq_remove(qp
->qp_sq_cqhdl
, &qp
->qp_sq_wqavl
);
2340 mutex_exit(&qp
->qp_sq_cqhdl
->cq_lock
);
2341 mutex_exit(&qp
->qp_rq_cqhdl
->cq_lock
);
2343 if (qp
->qp_rq_cqhdl
!= qp
->qp_sq_cqhdl
&&
2344 qp
->qp_sq_cqhdl
!= NULL
)
2345 mutex_exit(&qp
->qp_sq_cqhdl
->cq_lock
);
2346 if (qp
->qp_rq_cqhdl
)
2347 mutex_exit(&qp
->qp_rq_cqhdl
->cq_lock
);
2350 return (IBT_SUCCESS
);
2355 * hermon_wrid_get_entry()
2356 * Context: Can be called from interrupt or base context.
2359 hermon_wrid_get_entry(hermon_cqhdl_t cq
, hermon_hw_cqe_t
*cqe
)
2361 hermon_workq_avl_t
*wqa
;
2362 hermon_workq_hdr_t
*wq
;
2364 uint_t send_or_recv
, qpnum
;
2368 * Determine whether this CQE is a send or receive completion.
2370 send_or_recv
= HERMON_CQE_SENDRECV_GET(cq
, cqe
);
2372 /* Find the work queue for this QP number (send or receive side) */
2373 qpnum
= HERMON_CQE_QPNUM_GET(cq
, cqe
);
2374 wqa
= hermon_wrid_wqavl_find(cq
, qpnum
, send_or_recv
);
2378 * Regardless of whether the completion is the result of a "success"
2379 * or a "failure", we lock the list of "containers" and attempt to
2380 * search for the the first matching completion (i.e. the first WR
2381 * with a matching WQE addr and size). Once we find it, we pull out
2382 * the "wrid" field and return it (see below). XXX Note: One possible
2383 * future enhancement would be to enable this routine to skip over
2384 * any "unsignaled" completions to go directly to the next "signaled"
2387 indx
= HERMON_CQE_WQEADDRSZ_GET(cq
, cqe
) & wq
->wq_mask
;
2388 wrid
= wq
->wq_wrid
[indx
];
2389 if (wqa
->wqa_srq_en
) {
2390 struct hermon_sw_srq_s
*srq
;
2393 /* put wqe back on the srq free list */
2395 mutex_enter(&srq
->srq_lock
);
2396 desc
= HERMON_SRQ_WQE_ADDR(srq
, wq
->wq_tail
);
2397 ((uint16_t *)desc
)[1] = htons(indx
);
2399 mutex_exit(&srq
->srq_lock
);
2401 wq
->wq_head
= (indx
+ 1) & wq
->wq_mask
;
2410 hermon_wrid_workq_compare(const void *p1
, const void *p2
)
2412 hermon_workq_compare_t
*cmpp
;
2413 hermon_workq_avl_t
*curr
;
2415 cmpp
= (hermon_workq_compare_t
*)p1
;
2416 curr
= (hermon_workq_avl_t
*)p2
;
2418 if (cmpp
->cmp_qpn
< curr
->wqa_qpn
)
2420 else if (cmpp
->cmp_qpn
> curr
->wqa_qpn
)
2422 else if (cmpp
->cmp_type
< curr
->wqa_type
)
2424 else if (cmpp
->cmp_type
> curr
->wqa_type
)
2432 * hermon_wrid_workq_find()
2433 * Context: Can be called from interrupt or base context.
2435 static hermon_workq_avl_t
*
2436 hermon_wrid_wqavl_find(hermon_cqhdl_t cq
, uint_t qpn
, uint_t wq_type
)
2438 hermon_workq_avl_t
*curr
;
2439 hermon_workq_compare_t cmp
;
2442 * Walk the CQ's work queue list, trying to find a send or recv queue
2443 * with the same QP number. We do this even if we are going to later
2444 * create a new entry because it helps us easily find the end of the
2448 cmp
.cmp_type
= wq_type
;
2450 hermon_wrid_workq_compare(NULL
, NULL
);
2452 curr
= avl_find(&cq
->cq_wrid_wqhdr_avl_tree
, &cmp
, NULL
);
2459 * hermon_wrid_wqhdr_create()
2460 * Context: Can be called from base context.
2463 hermon_workq_hdr_t
*
2464 hermon_wrid_wqhdr_create(int bufsz
)
2466 hermon_workq_hdr_t
*wqhdr
;
2469 * Allocate space for the wqhdr, and an array to record all the wrids.
2471 wqhdr
= (hermon_workq_hdr_t
*)kmem_zalloc(sizeof (*wqhdr
), KM_NOSLEEP
);
2472 if (wqhdr
== NULL
) {
2475 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr
))
2476 wqhdr
->wq_wrid
= kmem_zalloc(bufsz
* sizeof (uint64_t), KM_NOSLEEP
);
2477 if (wqhdr
->wq_wrid
== NULL
) {
2478 kmem_free(wqhdr
, sizeof (*wqhdr
));
2481 wqhdr
->wq_size
= bufsz
;
2482 wqhdr
->wq_mask
= bufsz
- 1;
2488 hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t
*wqhdr
)
2490 kmem_free(wqhdr
->wq_wrid
, wqhdr
->wq_size
* sizeof (uint64_t));
2491 kmem_free(wqhdr
, sizeof (*wqhdr
));
2496 * hermon_cq_workq_add()
2497 * Context: Can be called from interrupt or base context.
2500 hermon_cq_workq_add(hermon_cqhdl_t cq
, hermon_workq_avl_t
*wqavl
)
2502 hermon_workq_compare_t cmp
;
2505 cmp
.cmp_qpn
= wqavl
->wqa_qpn
;
2506 cmp
.cmp_type
= wqavl
->wqa_type
;
2508 hermon_wrid_workq_compare(NULL
, NULL
);
2510 (void) avl_find(&cq
->cq_wrid_wqhdr_avl_tree
, &cmp
, &where
);
2511 avl_insert(&cq
->cq_wrid_wqhdr_avl_tree
, wqavl
, where
);
2516 * hermon_cq_workq_remove()
2517 * Context: Can be called from interrupt or base context.
2520 hermon_cq_workq_remove(hermon_cqhdl_t cq
, hermon_workq_avl_t
*wqavl
)
2523 hermon_wrid_workq_compare(NULL
, NULL
);
2525 avl_remove(&cq
->cq_wrid_wqhdr_avl_tree
, wqavl
);