4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
27 #include <sys/types.h>
29 #include <sys/types.h>
30 #include <sys/socket.h>
31 #include <netinet/in.h>
32 #include <sys/sunddi.h>
34 #include <sys/ib/ibtl/ibti.h>
35 #include <sys/ib/ibtl/ibtl_types.h>
37 #include <sys/ib/clients/iser/iser.h>
41 * Routines for completion queue handlers for iSER.
43 static void iser_msg_handle(iser_chan_t
*chan
, iser_msg_t
*msg
);
44 int iser_iscsihdr_handle(iser_chan_t
*chan
, iser_msg_t
*msg
);
45 static int iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl
,
46 iser_chan_t
*iser_chan
);
47 static int iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl
,
48 iser_chan_t
*iser_chan
);
51 iser_ib_sendcq_handler(ibt_cq_hdl_t cq_hdl
, void *arg
)
53 iser_chan_t
*iser_chan
;
56 iser_chan
= (iser_chan_t
*)arg
;
59 * Poll for work request completion while successful. If the
60 * queue empties or otherwise becomes invalid, stop polling.
63 status
= iser_ib_poll_send_completions(cq_hdl
, iser_chan
);
64 } while (status
== IBT_SUCCESS
);
66 if (status
== IBT_CQ_EMPTY
) {
67 /* We've emptied the CQ, rearm it before we're done here */
68 status
= ibt_enable_cq_notify(cq_hdl
, IBT_NEXT_COMPLETION
);
69 if (status
!= IBT_SUCCESS
) {
70 /* Unexpected error */
71 ISER_LOG(CE_NOTE
, "iser_ib_sendcq_handler: "
72 "ibt_enable_cq_notify error (%d)", status
);
76 /* Now, check for more completions after the rearm */
78 status
= iser_ib_poll_send_completions(
80 } while (status
== IBT_SUCCESS
);
85 iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl
, iser_chan_t
*iser_chan
)
87 ibt_wc_t wc
[ISER_IB_SCQ_POLL_MAX
];
89 idm_buf_t
*idb
= NULL
;
90 idm_task_t
*idt
= NULL
;
95 iser_conn_t
*iser_conn
;
96 idm_status_t idm_status
;
99 iser_conn
= iser_chan
->ic_conn
;
101 /* Poll ISER_IB_SCQ_POLL_MAX completions from the CQ */
102 status
= ibt_poll_cq(cq_hdl
, wc
, ISER_IB_SCQ_POLL_MAX
, &npoll
);
104 if (status
!= IBT_SUCCESS
) {
105 if (status
!= IBT_CQ_EMPTY
) {
106 /* Unexpected error */
107 ISER_LOG(CE_NOTE
, "iser_ib_sendcq_handler: ibt_poll_cq "
108 "unexpected error (%d)", status
);
110 /* CQ is empty. Either way, move along... */
115 * Handle each of the completions we've polled
117 for (i
= 0; i
< npoll
; i
++) {
119 DTRACE_PROBE3(iser__send__cqe
, iser_chan_t
*, iser_chan
,
120 ibt_wc_t
*, &wc
[i
], ibt_wc_status_t
, wc
[i
].wc_status
);
122 /* Grab the wrid of the completion */
125 /* Decrement this channel's SQ posted count */
126 mutex_enter(&iser_chan
->ic_sq_post_lock
);
127 iser_chan
->ic_sq_post_count
--;
128 mutex_exit(&iser_chan
->ic_sq_post_lock
);
130 /* Pull in the wr handle */
131 wr
= (iser_wr_t
*)(uintptr_t)wrid
;
134 /* Set an idm_status for return to IDM */
135 idm_status
= (wc
[i
].wc_status
== IBT_WC_SUCCESS
) ?
136 IDM_STATUS_SUCCESS
: IDM_STATUS_FAIL
;
139 * A non-success status here indicates the QP went
140 * into an error state while this WR was being
141 * processed. This can also happen when the
142 * channel is closed on the remote end. Clean up
143 * the resources, then push CE_TRANSPORT_FAIL
146 if (wc
[i
].wc_status
!= IBT_WC_SUCCESS
) {
148 * Free the resources attached to this
151 if (wr
->iw_msg
!= NULL
) {
152 /* Free iser_msg handle */
153 iser_msg_free(wr
->iw_msg
);
156 if (wr
->iw_pdu
!= NULL
) {
157 /* Complete the PDU */
158 idm_pdu_complete(wr
->iw_pdu
, idm_status
);
161 if (wr
->iw_buf
!= NULL
) {
162 /* Invoke buffer callback */
165 idb
->idb_buf_private
)->iser_mr
;
168 &((iser_buf_t
*)idb
->idb_buf_private
)->
169 buf_wc
, sizeof (ibt_wc_t
));
171 idt
= idb
->idb_task_binding
;
172 mutex_enter(&idt
->idt_mutex
);
173 if (wr
->iw_type
== ISER_WR_RDMAW
) {
174 DTRACE_ISCSI_8(xfer__done
,
175 idm_conn_t
*, idt
->idt_ic
,
176 uintptr_t, idb
->idb_buf
,
177 uint32_t, idb
->idb_bufoffset
,
178 uint64_t, mr
->is_mrva
, uint32_t, 0,
179 uint32_t, mr
->is_mrrkey
,
180 uint32_t, idb
->idb_xfer_len
,
181 int, XFER_BUF_TX_TO_INI
);
182 idm_buf_tx_to_ini_done(idt
, idb
,
184 } else { /* ISER_WR_RDMAR */
185 DTRACE_ISCSI_8(xfer__done
,
186 idm_conn_t
*, idt
->idt_ic
,
187 uintptr_t, idb
->idb_buf
,
188 uint32_t, idb
->idb_bufoffset
,
189 uint64_t, mr
->is_mrva
, uint32_t, 0,
190 uint32_t, mr
->is_mrrkey
,
191 uint32_t, idb
->idb_xfer_len
,
192 int, XFER_BUF_RX_FROM_INI
);
193 idm_buf_rx_from_ini_done(idt
, idb
,
198 /* Free the iser wr handle */
202 * Tell IDM that the channel has gone down,
203 * unless it already knows.
205 mutex_enter(&iser_conn
->ic_lock
);
206 switch (iser_conn
->ic_stage
) {
207 case ISER_CONN_STAGE_IC_DISCONNECTED
:
208 case ISER_CONN_STAGE_IC_FREED
:
209 case ISER_CONN_STAGE_CLOSING
:
210 case ISER_CONN_STAGE_CLOSED
:
214 idm_conn_event(iser_conn
->ic_idmc
,
215 CE_TRANSPORT_FAIL
, idm_status
);
216 iser_conn
->ic_stage
= ISER_CONN_STAGE_CLOSING
;
218 mutex_exit(&iser_conn
->ic_lock
);
220 /* Move onto the next completion */
225 * For a success status, just invoke the PDU or
226 * buffer completion. We use our WR handle's
227 * "iw_type" here so that we can properly process
228 * because the CQE's opcode is invalid if the status
231 switch (wr
->iw_type
) {
233 /* Free the msg handle */
234 ASSERT(wr
->iw_msg
!= NULL
);
235 iser_msg_free(wr
->iw_msg
);
237 if (wr
->iw_pdu
== NULL
) {
238 /* This is a hello exchange message */
239 mutex_enter(&iser_conn
->ic_lock
);
240 if (iser_conn
->ic_stage
==
241 ISER_CONN_STAGE_HELLOREPLY_SENT
) {
243 * We're on the target side,
244 * and have just successfully
245 * sent the HelloReply msg.
247 iser_conn
->ic_stage
=
248 ISER_CONN_STAGE_LOGGED_IN
;
250 mutex_exit(&iser_conn
->ic_lock
);
252 /* This is a normal control message */
253 idm_pdu_complete(wr
->iw_pdu
, idm_status
);
256 /* Free the wr handle */
264 * Invoke the appropriate callback;
265 * the buffer will be freed there.
268 mr
= ((iser_buf_t
*)idb
->idb_buf_private
)->iser_mr
;
271 &((iser_buf_t
*)idb
->idb_buf_private
)->buf_wc
,
274 idt
= idb
->idb_task_binding
;
276 mutex_enter(&idt
->idt_mutex
);
277 if (wr
->iw_type
== ISER_WR_RDMAW
) {
278 DTRACE_ISCSI_8(xfer__done
,
279 idm_conn_t
*, idt
->idt_ic
,
280 uintptr_t, idb
->idb_buf
,
281 uint32_t, idb
->idb_bufoffset
,
282 uint64_t, mr
->is_mrva
, uint32_t, 0,
283 uint32_t, mr
->is_mrrkey
,
284 uint32_t, idb
->idb_xfer_len
,
285 int, XFER_BUF_TX_TO_INI
);
286 idm_buf_tx_to_ini_done(idt
, idb
, idm_status
);
288 DTRACE_ISCSI_8(xfer__done
,
289 idm_conn_t
*, idt
->idt_ic
,
290 uintptr_t, idb
->idb_buf
,
291 uint32_t, idb
->idb_bufoffset
,
292 uint64_t, mr
->is_mrva
, uint32_t, 0,
293 uint32_t, mr
->is_mrrkey
,
294 uint32_t, idb
->idb_xfer_len
,
295 int, XFER_BUF_RX_FROM_INI
);
296 idm_buf_rx_from_ini_done(idt
, idb
, idm_status
);
299 /* Free the wr handle */
314 iser_ib_recvcq_handler(ibt_cq_hdl_t cq_hdl
, void *arg
)
316 iser_chan_t
*iser_chan
;
319 iser_chan
= (iser_chan_t
*)arg
;
322 * Poll for work request completion while successful. If the
323 * queue empties or otherwise becomes invalid, stop polling.
326 status
= iser_ib_poll_recv_completions(cq_hdl
, iser_chan
);
327 } while (status
== IBT_SUCCESS
);
329 if (status
== IBT_CQ_EMPTY
) {
330 /* We've emptied the CQ, rearm it before we're done here */
331 status
= ibt_enable_cq_notify(cq_hdl
, IBT_NEXT_COMPLETION
);
332 if (status
!= IBT_SUCCESS
) {
333 /* Unexpected error */
334 ISER_LOG(CE_NOTE
, "iser_ib_recvcq_handler: "
335 "ibt_enable_cq_notify error (%d)", status
);
339 /* Now, check for more completions after the rearm */
341 status
= iser_ib_poll_recv_completions(
343 } while (status
== IBT_SUCCESS
);
348 iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl
, iser_chan_t
*iser_chan
)
355 iser_qp
= &(iser_chan
->ic_qp
);
357 bzero(&wc
, sizeof (ibt_wc_t
));
358 status
= ibt_poll_cq(cq_hdl
, &wc
, 1, NULL
);
359 if (status
== IBT_CQ_EMPTY
) {
360 /* CQ is empty, return */
364 if (status
!= IBT_SUCCESS
) {
365 /* Unexpected error */
366 ISER_LOG(CE_NOTE
, "iser_ib_poll_recv_completions: "
367 "ibt_poll_cq error (%d)", status
);
368 mutex_enter(&iser_qp
->qp_lock
);
370 mutex_exit(&iser_qp
->qp_lock
);
371 /* Free the msg handle (if we got it back) */
372 if ((msg
= (iser_msg_t
*)(uintptr_t)wc
.wc_id
) != NULL
) {
378 /* Retrieve the iSER msg handle */
379 msg
= (iser_msg_t
*)(uintptr_t)wc
.wc_id
;
383 * Decrement the posted level in the RQ, then check
384 * to see if we need to fill the RQ back up (or if
385 * we are already on the taskq).
387 mutex_enter(&iser_chan
->ic_conn
->ic_lock
);
388 mutex_enter(&iser_qp
->qp_lock
);
391 if ((iser_qp
->rq_taskqpending
== B_FALSE
) &&
392 (iser_qp
->rq_level
<= iser_qp
->rq_lwm
) &&
393 (iser_chan
->ic_conn
->ic_stage
>= ISER_CONN_STAGE_IC_CONNECTED
) &&
394 (iser_chan
->ic_conn
->ic_stage
<= ISER_CONN_STAGE_LOGGED_IN
)) {
395 /* Set the pending flag and fire off a post_recv */
396 iser_qp
->rq_taskqpending
= B_TRUE
;
397 mutex_exit(&iser_qp
->qp_lock
);
399 status
= iser_ib_post_recv_async(iser_chan
->ic_chanhdl
);
401 if (status
!= DDI_SUCCESS
) {
402 ISER_LOG(CE_NOTE
, "iser_ib_poll_recv_completions: "
403 "task dispatch failed");
404 /* Failure to launch, unset the pending flag */
405 mutex_enter(&iser_qp
->qp_lock
);
406 iser_qp
->rq_taskqpending
= B_FALSE
;
407 mutex_exit(&iser_qp
->qp_lock
);
410 mutex_exit(&iser_qp
->qp_lock
);
413 DTRACE_PROBE3(iser__recv__cqe
, iser_chan_t
*, iser_chan
,
414 ibt_wc_t
*, &wc
, ibt_wc_status_t
, wc
.wc_status
);
415 if (wc
.wc_status
!= IBT_WC_SUCCESS
) {
417 * Tell IDM that the channel has gone down,
418 * unless it already knows.
420 switch (iser_chan
->ic_conn
->ic_stage
) {
421 case ISER_CONN_STAGE_IC_DISCONNECTED
:
422 case ISER_CONN_STAGE_IC_FREED
:
423 case ISER_CONN_STAGE_CLOSING
:
424 case ISER_CONN_STAGE_CLOSED
:
428 idm_conn_event(iser_chan
->ic_conn
->ic_idmc
,
429 CE_TRANSPORT_FAIL
, IDM_STATUS_FAIL
);
430 iser_chan
->ic_conn
->ic_stage
=
431 ISER_CONN_STAGE_CLOSING
;
433 mutex_exit(&iser_chan
->ic_conn
->ic_lock
);
436 return (DDI_SUCCESS
);
438 mutex_exit(&iser_chan
->ic_conn
->ic_lock
);
441 * We have an iSER message in, let's handle it.
442 * We will free the iser_msg_t later in this path,
443 * depending upon the action required.
445 iser_msg_handle(iser_chan
, msg
);
446 return (DDI_SUCCESS
);
451 iser_msg_handle(iser_chan_t
*chan
, iser_msg_t
*msg
)
454 iser_ctrl_hdr_t
*hdr
= NULL
;
455 iser_conn_t
*iser_conn
= chan
->ic_conn
;
458 hdr
= (iser_ctrl_hdr_t
*)(uintptr_t)msg
->msg_ds
.ds_va
;
461 opcode
= hdr
->opcode
;
462 if (opcode
== ISER_OPCODE_CTRL_TYPE_PDU
) {
464 * Handle an iSCSI Control PDU iSER message.
465 * Note we'll free the msg handle in the PDU callback.
467 status
= iser_iscsihdr_handle(chan
, msg
);
468 if (status
!= DDI_SUCCESS
) {
470 * We are unable to handle this message, and
471 * have no way to recover from this. Fail the
474 ISER_LOG(CE_NOTE
, "iser_msg_handle: failed "
475 "iser_iscsihdr_handle");
477 idm_conn_event(iser_conn
->ic_idmc
,
478 CE_TRANSPORT_FAIL
, IDM_STATUS_FAIL
);
480 } else if (opcode
== ISER_OPCODE_HELLO_MSG
) { /* at the target */
482 * We are currently not supporting Hello Exchange,
483 * since OFED iSER does not. May be revisited.
485 ASSERT(opcode
!= ISER_OPCODE_HELLO_MSG
);
487 if (iser_conn
->ic_type
!= ISER_CONN_TYPE_TGT
) {
488 idm_conn_event(iser_conn
->ic_idmc
,
489 CE_TRANSPORT_FAIL
, IDM_STATUS_FAIL
);
492 iser_hello_hdr_t
*hello_hdr
= (iser_hello_hdr_t
*)hdr
;
494 ISER_LOG(CE_NOTE
, "received Hello message: opcode[%d], "
495 "maxver[%d], minver[%d], iser_ird[%d], msg (0x%p)",
496 hello_hdr
->opcode
, hello_hdr
->maxver
, hello_hdr
->minver
,
497 ntohs(hello_hdr
->iser_ird
), (void *)msg
);
499 mutex_enter(&iser_conn
->ic_lock
);
501 if (iser_conn
->ic_stage
!= ISER_CONN_STAGE_HELLO_WAIT
) {
502 /* target is not expected to receive a Hello */
503 idm_conn_event(iser_conn
->ic_idmc
,
504 CE_TRANSPORT_FAIL
, IDM_STATUS_FAIL
);
507 iser_conn
->ic_stage
= ISER_CONN_STAGE_HELLOREPLY_SENT
;
508 mutex_exit(&iser_conn
->ic_lock
);
510 /* Prepare and send a HelloReply message */
511 status
= iser_xfer_helloreply_msg(chan
);
512 if (status
!= ISER_STATUS_SUCCESS
) {
514 mutex_enter(&iser_conn
->ic_lock
);
515 iser_conn
->ic_stage
=
516 ISER_CONN_STAGE_HELLOREPLY_SENT_FAIL
;
517 mutex_exit(&iser_conn
->ic_lock
);
519 idm_conn_event(iser_conn
->ic_idmc
,
520 CE_TRANSPORT_FAIL
, status
);
523 /* Free this msg handle */
526 } else if (opcode
== ISER_OPCODE_HELLOREPLY_MSG
) { /* at initiator */
529 * We are currently not supporting Hello Exchange,
530 * since OFED iSER does not. May be revisited.
532 ASSERT(opcode
!= ISER_OPCODE_HELLOREPLY_MSG
);
534 if (iser_conn
->ic_type
!= ISER_CONN_TYPE_INI
) {
535 idm_conn_event(iser_conn
->ic_idmc
,
536 CE_TRANSPORT_FAIL
, status
);
539 iser_helloreply_hdr_t
*hello_hdr
= (iser_helloreply_hdr_t
*)hdr
;
541 ISER_LOG(CE_NOTE
, "received Hello Reply message: opcode[%d], "
542 "maxver[%d], curver[%d], iser_ord[%d], msg (0x%p)",
543 hello_hdr
->opcode
, hello_hdr
->maxver
, hello_hdr
->curver
,
544 ntohs(hello_hdr
->iser_ord
), (void *)msg
);
546 /* Free this msg handle */
550 * Signal the receipt of HelloReply to the waiting thread
551 * so that the initiator can proceed to the Full Feature
554 mutex_enter(&iser_conn
->ic_lock
);
555 iser_conn
->ic_stage
= ISER_CONN_STAGE_HELLOREPLY_RCV
;
556 cv_signal(&iser_conn
->ic_stage_cv
);
557 mutex_exit(&iser_conn
->ic_lock
);
559 /* Protocol error: free the msg handle and fail the session */
560 ISER_LOG(CE_NOTE
, "iser_msg_handle: unsupported opcode (0x%x): "
561 "terminating session on IDM handle (0x%p)", opcode
,
562 (void *) iser_conn
->ic_idmc
);
565 idm_conn_event(iser_conn
->ic_idmc
, CE_TRANSPORT_FAIL
,
570 #define IDM_PDU_OPCODE(PDU) \
571 ((PDU)->isp_hdr->opcode & ISCSI_OPCODE_MASK)
573 /* network to host translation for 24b integers */
577 return ((ptr
[0] << 16) | (ptr
[1] << 8) | ptr
[2]);
582 iser_rx_pdu_cb(idm_pdu_t
*pdu
, idm_status_t status
)
584 /* Free the iser msg handle and the PDU handle */
585 iser_msg_free((iser_msg_t
*)pdu
->isp_transport_private
);
590 iser_iscsihdr_handle(iser_chan_t
*chan
, iser_msg_t
*msg
)
597 pdu
= idm_pdu_alloc_nosleep(sizeof (iscsi_hdr_t
), 0);
598 pdu
->isp_ic
= chan
->ic_conn
->ic_idmc
;
599 ASSERT(pdu
->isp_ic
!= NULL
);
601 /* Set the iser_msg handle into the transport-private field */
602 pdu
->isp_transport_private
= (void *)msg
;
604 /* Set up a pointer in the pdu handle to the iSER header */
605 iser_hdrp
= (uint8_t *)(uintptr_t)msg
->msg_ds
.ds_va
;
606 if (iser_hdrp
== NULL
) {
607 ISER_LOG(CE_NOTE
, "iser_iscsihdr_handle: iser_hdrp is NULL");
609 return (ISER_STATUS_FAIL
);
611 pdu
->isp_transport_hdr
= (void *)iser_hdrp
;
612 pdu
->isp_transport_hdrlen
= ISER_HEADER_LENGTH
;
615 * Set up a pointer to the iSCSI header, which is directly
616 * after the iSER header in the message.
618 iscsi_hdrp
= ((uint8_t *)(uintptr_t)msg
->msg_ds
.ds_va
) +
620 if (iscsi_hdrp
== NULL
) {
621 ISER_LOG(CE_NOTE
, "iser_iscsihdr_handle: iscsi_hdrp is NULL");
623 return (ISER_STATUS_FAIL
);
625 pdu
->isp_hdr
= (iscsi_hdr_t
*)(uintptr_t)iscsi_hdrp
;
627 /* Fill in the BHS */
629 pdu
->isp_hdrlen
= sizeof (iscsi_hdr_t
) +
630 (bhs
->hlength
* sizeof (uint32_t));
631 pdu
->isp_datalen
= n2h24(bhs
->dlength
);
632 pdu
->isp_callback
= iser_rx_pdu_cb
;
635 * If datalen > 0, then non-scsi data may be present. Allocate
636 * space in the PDU handle and set a pointer to the data.
638 if (pdu
->isp_datalen
) {
639 pdu
->isp_data
= ((uint8_t *)(uintptr_t)pdu
->isp_hdr
) +
644 idm_pdu_rx(pdu
->isp_ic
, pdu
);
646 return (DDI_SUCCESS
);