Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / io / ib / clients / iser / iser_cq.c
blob6a8f1d755c8591dc6fc1e6d79e19f514d144820a
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
27 #include <sys/types.h>
28 #include <sys/ddi.h>
29 #include <sys/types.h>
30 #include <sys/socket.h>
31 #include <netinet/in.h>
32 #include <sys/sunddi.h>
33 #include <sys/sdt.h>
34 #include <sys/ib/ibtl/ibti.h>
35 #include <sys/ib/ibtl/ibtl_types.h>
37 #include <sys/ib/clients/iser/iser.h>
40 * iser_cq.c
41 * Routines for completion queue handlers for iSER.
43 static void iser_msg_handle(iser_chan_t *chan, iser_msg_t *msg);
44 int iser_iscsihdr_handle(iser_chan_t *chan, iser_msg_t *msg);
45 static int iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl,
46 iser_chan_t *iser_chan);
47 static int iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl,
48 iser_chan_t *iser_chan);
50 void
51 iser_ib_sendcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
53 iser_chan_t *iser_chan;
54 ibt_status_t status;
56 iser_chan = (iser_chan_t *)arg;
59 * Poll for work request completion while successful. If the
60 * queue empties or otherwise becomes invalid, stop polling.
62 do {
63 status = iser_ib_poll_send_completions(cq_hdl, iser_chan);
64 } while (status == IBT_SUCCESS);
66 if (status == IBT_CQ_EMPTY) {
67 /* We've emptied the CQ, rearm it before we're done here */
68 status = ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
69 if (status != IBT_SUCCESS) {
70 /* Unexpected error */
71 ISER_LOG(CE_NOTE, "iser_ib_sendcq_handler: "
72 "ibt_enable_cq_notify error (%d)", status);
73 return;
76 /* Now, check for more completions after the rearm */
77 do {
78 status = iser_ib_poll_send_completions(
79 cq_hdl, iser_chan);
80 } while (status == IBT_SUCCESS);
84 static int
85 iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl, iser_chan_t *iser_chan)
87 ibt_wc_t wc[ISER_IB_SCQ_POLL_MAX];
88 ibt_wrid_t wrid;
89 idm_buf_t *idb = NULL;
90 idm_task_t *idt = NULL;
91 iser_wr_t *wr = NULL;
92 int i;
93 uint_t npoll = 0;
94 ibt_status_t status;
95 iser_conn_t *iser_conn;
96 idm_status_t idm_status;
97 iser_mr_t *mr;
99 iser_conn = iser_chan->ic_conn;
101 /* Poll ISER_IB_SCQ_POLL_MAX completions from the CQ */
102 status = ibt_poll_cq(cq_hdl, wc, ISER_IB_SCQ_POLL_MAX, &npoll);
104 if (status != IBT_SUCCESS) {
105 if (status != IBT_CQ_EMPTY) {
106 /* Unexpected error */
107 ISER_LOG(CE_NOTE, "iser_ib_sendcq_handler: ibt_poll_cq "
108 "unexpected error (%d)", status);
110 /* CQ is empty. Either way, move along... */
111 return (status);
115 * Handle each of the completions we've polled
117 for (i = 0; i < npoll; i++) {
119 DTRACE_PROBE3(iser__send__cqe, iser_chan_t *, iser_chan,
120 ibt_wc_t *, &wc[i], ibt_wc_status_t, wc[i].wc_status);
122 /* Grab the wrid of the completion */
123 wrid = wc[i].wc_id;
125 /* Decrement this channel's SQ posted count */
126 mutex_enter(&iser_chan->ic_sq_post_lock);
127 iser_chan->ic_sq_post_count--;
128 mutex_exit(&iser_chan->ic_sq_post_lock);
130 /* Pull in the wr handle */
131 wr = (iser_wr_t *)(uintptr_t)wrid;
132 ASSERT(wr != NULL);
134 /* Set an idm_status for return to IDM */
135 idm_status = (wc[i].wc_status == IBT_WC_SUCCESS) ?
136 IDM_STATUS_SUCCESS : IDM_STATUS_FAIL;
139 * A non-success status here indicates the QP went
140 * into an error state while this WR was being
141 * processed. This can also happen when the
142 * channel is closed on the remote end. Clean up
143 * the resources, then push CE_TRANSPORT_FAIL
144 * into IDM.
146 if (wc[i].wc_status != IBT_WC_SUCCESS) {
148 * Free the resources attached to this
149 * completion.
151 if (wr->iw_msg != NULL) {
152 /* Free iser_msg handle */
153 iser_msg_free(wr->iw_msg);
156 if (wr->iw_pdu != NULL) {
157 /* Complete the PDU */
158 idm_pdu_complete(wr->iw_pdu, idm_status);
161 if (wr->iw_buf != NULL) {
162 /* Invoke buffer callback */
163 idb = wr->iw_buf;
164 mr = ((iser_buf_t *)
165 idb->idb_buf_private)->iser_mr;
166 #ifdef DEBUG
167 bcopy(&wc[i],
168 &((iser_buf_t *)idb->idb_buf_private)->
169 buf_wc, sizeof (ibt_wc_t));
170 #endif
171 idt = idb->idb_task_binding;
172 mutex_enter(&idt->idt_mutex);
173 if (wr->iw_type == ISER_WR_RDMAW) {
174 DTRACE_ISCSI_8(xfer__done,
175 idm_conn_t *, idt->idt_ic,
176 uintptr_t, idb->idb_buf,
177 uint32_t, idb->idb_bufoffset,
178 uint64_t, mr->is_mrva, uint32_t, 0,
179 uint32_t, mr->is_mrrkey,
180 uint32_t, idb->idb_xfer_len,
181 int, XFER_BUF_TX_TO_INI);
182 idm_buf_tx_to_ini_done(idt, idb,
183 IDM_STATUS_FAIL);
184 } else { /* ISER_WR_RDMAR */
185 DTRACE_ISCSI_8(xfer__done,
186 idm_conn_t *, idt->idt_ic,
187 uintptr_t, idb->idb_buf,
188 uint32_t, idb->idb_bufoffset,
189 uint64_t, mr->is_mrva, uint32_t, 0,
190 uint32_t, mr->is_mrrkey,
191 uint32_t, idb->idb_xfer_len,
192 int, XFER_BUF_RX_FROM_INI);
193 idm_buf_rx_from_ini_done(idt, idb,
194 IDM_STATUS_FAIL);
198 /* Free the iser wr handle */
199 iser_wr_free(wr);
202 * Tell IDM that the channel has gone down,
203 * unless it already knows.
205 mutex_enter(&iser_conn->ic_lock);
206 switch (iser_conn->ic_stage) {
207 case ISER_CONN_STAGE_IC_DISCONNECTED:
208 case ISER_CONN_STAGE_IC_FREED:
209 case ISER_CONN_STAGE_CLOSING:
210 case ISER_CONN_STAGE_CLOSED:
211 break;
213 default:
214 idm_conn_event(iser_conn->ic_idmc,
215 CE_TRANSPORT_FAIL, idm_status);
216 iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING;
218 mutex_exit(&iser_conn->ic_lock);
220 /* Move onto the next completion */
221 continue;
225 * For a success status, just invoke the PDU or
226 * buffer completion. We use our WR handle's
227 * "iw_type" here so that we can properly process
228 * because the CQE's opcode is invalid if the status
229 * is failed.
231 switch (wr->iw_type) {
232 case ISER_WR_SEND:
233 /* Free the msg handle */
234 ASSERT(wr->iw_msg != NULL);
235 iser_msg_free(wr->iw_msg);
237 if (wr->iw_pdu == NULL) {
238 /* This is a hello exchange message */
239 mutex_enter(&iser_conn->ic_lock);
240 if (iser_conn->ic_stage ==
241 ISER_CONN_STAGE_HELLOREPLY_SENT) {
243 * We're on the target side,
244 * and have just successfully
245 * sent the HelloReply msg.
247 iser_conn->ic_stage =
248 ISER_CONN_STAGE_LOGGED_IN;
250 mutex_exit(&iser_conn->ic_lock);
251 } else {
252 /* This is a normal control message */
253 idm_pdu_complete(wr->iw_pdu, idm_status);
256 /* Free the wr handle */
257 iser_wr_free(wr);
259 break;
261 case ISER_WR_RDMAW:
262 case ISER_WR_RDMAR:
264 * Invoke the appropriate callback;
265 * the buffer will be freed there.
267 idb = wr->iw_buf;
268 mr = ((iser_buf_t *)idb->idb_buf_private)->iser_mr;
269 #ifdef DEBUG
270 bcopy(&wc[i],
271 &((iser_buf_t *)idb->idb_buf_private)->buf_wc,
272 sizeof (ibt_wc_t));
273 #endif
274 idt = idb->idb_task_binding;
276 mutex_enter(&idt->idt_mutex);
277 if (wr->iw_type == ISER_WR_RDMAW) {
278 DTRACE_ISCSI_8(xfer__done,
279 idm_conn_t *, idt->idt_ic,
280 uintptr_t, idb->idb_buf,
281 uint32_t, idb->idb_bufoffset,
282 uint64_t, mr->is_mrva, uint32_t, 0,
283 uint32_t, mr->is_mrrkey,
284 uint32_t, idb->idb_xfer_len,
285 int, XFER_BUF_TX_TO_INI);
286 idm_buf_tx_to_ini_done(idt, idb, idm_status);
287 } else {
288 DTRACE_ISCSI_8(xfer__done,
289 idm_conn_t *, idt->idt_ic,
290 uintptr_t, idb->idb_buf,
291 uint32_t, idb->idb_bufoffset,
292 uint64_t, mr->is_mrva, uint32_t, 0,
293 uint32_t, mr->is_mrrkey,
294 uint32_t, idb->idb_xfer_len,
295 int, XFER_BUF_RX_FROM_INI);
296 idm_buf_rx_from_ini_done(idt, idb, idm_status);
299 /* Free the wr handle */
300 iser_wr_free(wr);
302 break;
304 default:
305 ASSERT(0);
306 break;
310 return (status);
313 void
314 iser_ib_recvcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
316 iser_chan_t *iser_chan;
317 ibt_status_t status;
319 iser_chan = (iser_chan_t *)arg;
322 * Poll for work request completion while successful. If the
323 * queue empties or otherwise becomes invalid, stop polling.
325 do {
326 status = iser_ib_poll_recv_completions(cq_hdl, iser_chan);
327 } while (status == IBT_SUCCESS);
329 if (status == IBT_CQ_EMPTY) {
330 /* We've emptied the CQ, rearm it before we're done here */
331 status = ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
332 if (status != IBT_SUCCESS) {
333 /* Unexpected error */
334 ISER_LOG(CE_NOTE, "iser_ib_recvcq_handler: "
335 "ibt_enable_cq_notify error (%d)", status);
336 return;
339 /* Now, check for more completions after the rearm */
340 do {
341 status = iser_ib_poll_recv_completions(
342 cq_hdl, iser_chan);
343 } while (status == IBT_SUCCESS);
347 static int
348 iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl, iser_chan_t *iser_chan)
350 ibt_wc_t wc;
351 iser_msg_t *msg;
352 iser_qp_t *iser_qp;
353 int status;
355 iser_qp = &(iser_chan->ic_qp);
357 bzero(&wc, sizeof (ibt_wc_t));
358 status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
359 if (status == IBT_CQ_EMPTY) {
360 /* CQ is empty, return */
361 return (status);
364 if (status != IBT_SUCCESS) {
365 /* Unexpected error */
366 ISER_LOG(CE_NOTE, "iser_ib_poll_recv_completions: "
367 "ibt_poll_cq error (%d)", status);
368 mutex_enter(&iser_qp->qp_lock);
369 iser_qp->rq_level--;
370 mutex_exit(&iser_qp->qp_lock);
371 /* Free the msg handle (if we got it back) */
372 if ((msg = (iser_msg_t *)(uintptr_t)wc.wc_id) != NULL) {
373 iser_msg_free(msg);
375 return (status);
378 /* Retrieve the iSER msg handle */
379 msg = (iser_msg_t *)(uintptr_t)wc.wc_id;
380 ASSERT(msg != NULL);
383 * Decrement the posted level in the RQ, then check
384 * to see if we need to fill the RQ back up (or if
385 * we are already on the taskq).
387 mutex_enter(&iser_chan->ic_conn->ic_lock);
388 mutex_enter(&iser_qp->qp_lock);
389 iser_qp->rq_level--;
391 if ((iser_qp->rq_taskqpending == B_FALSE) &&
392 (iser_qp->rq_level <= iser_qp->rq_lwm) &&
393 (iser_chan->ic_conn->ic_stage >= ISER_CONN_STAGE_IC_CONNECTED) &&
394 (iser_chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN)) {
395 /* Set the pending flag and fire off a post_recv */
396 iser_qp->rq_taskqpending = B_TRUE;
397 mutex_exit(&iser_qp->qp_lock);
399 status = iser_ib_post_recv_async(iser_chan->ic_chanhdl);
401 if (status != DDI_SUCCESS) {
402 ISER_LOG(CE_NOTE, "iser_ib_poll_recv_completions: "
403 "task dispatch failed");
404 /* Failure to launch, unset the pending flag */
405 mutex_enter(&iser_qp->qp_lock);
406 iser_qp->rq_taskqpending = B_FALSE;
407 mutex_exit(&iser_qp->qp_lock);
409 } else {
410 mutex_exit(&iser_qp->qp_lock);
413 DTRACE_PROBE3(iser__recv__cqe, iser_chan_t *, iser_chan,
414 ibt_wc_t *, &wc, ibt_wc_status_t, wc.wc_status);
415 if (wc.wc_status != IBT_WC_SUCCESS) {
417 * Tell IDM that the channel has gone down,
418 * unless it already knows.
420 switch (iser_chan->ic_conn->ic_stage) {
421 case ISER_CONN_STAGE_IC_DISCONNECTED:
422 case ISER_CONN_STAGE_IC_FREED:
423 case ISER_CONN_STAGE_CLOSING:
424 case ISER_CONN_STAGE_CLOSED:
425 break;
427 default:
428 idm_conn_event(iser_chan->ic_conn->ic_idmc,
429 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
430 iser_chan->ic_conn->ic_stage =
431 ISER_CONN_STAGE_CLOSING;
433 mutex_exit(&iser_chan->ic_conn->ic_lock);
435 iser_msg_free(msg);
436 return (DDI_SUCCESS);
437 } else {
438 mutex_exit(&iser_chan->ic_conn->ic_lock);
441 * We have an iSER message in, let's handle it.
442 * We will free the iser_msg_t later in this path,
443 * depending upon the action required.
445 iser_msg_handle(iser_chan, msg);
446 return (DDI_SUCCESS);
450 static void
451 iser_msg_handle(iser_chan_t *chan, iser_msg_t *msg)
453 int opcode;
454 iser_ctrl_hdr_t *hdr = NULL;
455 iser_conn_t *iser_conn = chan->ic_conn;
456 int status;
458 hdr = (iser_ctrl_hdr_t *)(uintptr_t)msg->msg_ds.ds_va;
459 ASSERT(hdr != NULL);
461 opcode = hdr->opcode;
462 if (opcode == ISER_OPCODE_CTRL_TYPE_PDU) {
464 * Handle an iSCSI Control PDU iSER message.
465 * Note we'll free the msg handle in the PDU callback.
467 status = iser_iscsihdr_handle(chan, msg);
468 if (status != DDI_SUCCESS) {
470 * We are unable to handle this message, and
471 * have no way to recover from this. Fail the
472 * transport.
474 ISER_LOG(CE_NOTE, "iser_msg_handle: failed "
475 "iser_iscsihdr_handle");
476 iser_msg_free(msg);
477 idm_conn_event(iser_conn->ic_idmc,
478 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
480 } else if (opcode == ISER_OPCODE_HELLO_MSG) { /* at the target */
482 * We are currently not supporting Hello Exchange,
483 * since OFED iSER does not. May be revisited.
485 ASSERT(opcode != ISER_OPCODE_HELLO_MSG);
487 if (iser_conn->ic_type != ISER_CONN_TYPE_TGT) {
488 idm_conn_event(iser_conn->ic_idmc,
489 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
492 iser_hello_hdr_t *hello_hdr = (iser_hello_hdr_t *)hdr;
494 ISER_LOG(CE_NOTE, "received Hello message: opcode[%d], "
495 "maxver[%d], minver[%d], iser_ird[%d], msg (0x%p)",
496 hello_hdr->opcode, hello_hdr->maxver, hello_hdr->minver,
497 ntohs(hello_hdr->iser_ird), (void *)msg);
499 mutex_enter(&iser_conn->ic_lock);
501 if (iser_conn->ic_stage != ISER_CONN_STAGE_HELLO_WAIT) {
502 /* target is not expected to receive a Hello */
503 idm_conn_event(iser_conn->ic_idmc,
504 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
507 iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_SENT;
508 mutex_exit(&iser_conn->ic_lock);
510 /* Prepare and send a HelloReply message */
511 status = iser_xfer_helloreply_msg(chan);
512 if (status != ISER_STATUS_SUCCESS) {
514 mutex_enter(&iser_conn->ic_lock);
515 iser_conn->ic_stage =
516 ISER_CONN_STAGE_HELLOREPLY_SENT_FAIL;
517 mutex_exit(&iser_conn->ic_lock);
519 idm_conn_event(iser_conn->ic_idmc,
520 CE_TRANSPORT_FAIL, status);
523 /* Free this msg handle */
524 iser_msg_free(msg);
526 } else if (opcode == ISER_OPCODE_HELLOREPLY_MSG) { /* at initiator */
529 * We are currently not supporting Hello Exchange,
530 * since OFED iSER does not. May be revisited.
532 ASSERT(opcode != ISER_OPCODE_HELLOREPLY_MSG);
534 if (iser_conn->ic_type != ISER_CONN_TYPE_INI) {
535 idm_conn_event(iser_conn->ic_idmc,
536 CE_TRANSPORT_FAIL, status);
539 iser_helloreply_hdr_t *hello_hdr = (iser_helloreply_hdr_t *)hdr;
541 ISER_LOG(CE_NOTE, "received Hello Reply message: opcode[%d], "
542 "maxver[%d], curver[%d], iser_ord[%d], msg (0x%p)",
543 hello_hdr->opcode, hello_hdr->maxver, hello_hdr->curver,
544 ntohs(hello_hdr->iser_ord), (void *)msg);
546 /* Free this msg handle */
547 iser_msg_free(msg);
550 * Signal the receipt of HelloReply to the waiting thread
551 * so that the initiator can proceed to the Full Feature
552 * Phase.
554 mutex_enter(&iser_conn->ic_lock);
555 iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV;
556 cv_signal(&iser_conn->ic_stage_cv);
557 mutex_exit(&iser_conn->ic_lock);
558 } else {
559 /* Protocol error: free the msg handle and fail the session */
560 ISER_LOG(CE_NOTE, "iser_msg_handle: unsupported opcode (0x%x): "
561 "terminating session on IDM handle (0x%p)", opcode,
562 (void *) iser_conn->ic_idmc);
564 iser_msg_free(msg);
565 idm_conn_event(iser_conn->ic_idmc, CE_TRANSPORT_FAIL,
566 IDM_STATUS_FAIL);
570 #define IDM_PDU_OPCODE(PDU) \
571 ((PDU)->isp_hdr->opcode & ISCSI_OPCODE_MASK)
573 /* network to host translation for 24b integers */
574 static uint32_t
575 n2h24(uchar_t *ptr)
577 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
580 /* ARGSUSED */
581 static void
582 iser_rx_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
584 /* Free the iser msg handle and the PDU handle */
585 iser_msg_free((iser_msg_t *)pdu->isp_transport_private);
586 idm_pdu_free(pdu);
590 iser_iscsihdr_handle(iser_chan_t *chan, iser_msg_t *msg)
592 idm_pdu_t *pdu;
593 uint8_t *iser_hdrp;
594 uint8_t *iscsi_hdrp;
595 iscsi_hdr_t *bhs;
597 pdu = idm_pdu_alloc_nosleep(sizeof (iscsi_hdr_t), 0);
598 pdu->isp_ic = chan->ic_conn->ic_idmc;
599 ASSERT(pdu->isp_ic != NULL);
601 /* Set the iser_msg handle into the transport-private field */
602 pdu->isp_transport_private = (void *)msg;
604 /* Set up a pointer in the pdu handle to the iSER header */
605 iser_hdrp = (uint8_t *)(uintptr_t)msg->msg_ds.ds_va;
606 if (iser_hdrp == NULL) {
607 ISER_LOG(CE_NOTE, "iser_iscsihdr_handle: iser_hdrp is NULL");
608 idm_pdu_free(pdu);
609 return (ISER_STATUS_FAIL);
611 pdu->isp_transport_hdr = (void *)iser_hdrp;
612 pdu->isp_transport_hdrlen = ISER_HEADER_LENGTH;
615 * Set up a pointer to the iSCSI header, which is directly
616 * after the iSER header in the message.
618 iscsi_hdrp = ((uint8_t *)(uintptr_t)msg->msg_ds.ds_va) +
619 ISER_HEADER_LENGTH;
620 if (iscsi_hdrp == NULL) {
621 ISER_LOG(CE_NOTE, "iser_iscsihdr_handle: iscsi_hdrp is NULL");
622 idm_pdu_free(pdu);
623 return (ISER_STATUS_FAIL);
625 pdu->isp_hdr = (iscsi_hdr_t *)(uintptr_t)iscsi_hdrp;
627 /* Fill in the BHS */
628 bhs = pdu->isp_hdr;
629 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
630 (bhs->hlength * sizeof (uint32_t));
631 pdu->isp_datalen = n2h24(bhs->dlength);
632 pdu->isp_callback = iser_rx_pdu_cb;
635 * If datalen > 0, then non-scsi data may be present. Allocate
636 * space in the PDU handle and set a pointer to the data.
638 if (pdu->isp_datalen) {
639 pdu->isp_data = ((uint8_t *)(uintptr_t)pdu->isp_hdr) +
640 pdu->isp_hdrlen;
643 /* Process RX PDU */
644 idm_pdu_rx(pdu->isp_ic, pdu);
646 return (DDI_SUCCESS);