2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/kernel.h>
33 #include <linux/slab.h>
35 #include <linux/scatterlist.h>
36 #include <linux/kfifo.h>
37 #include <scsi/scsi_cmnd.h>
38 #include <scsi/scsi_host.h>
40 #include "iscsi_iser.h"
42 /* Register user buffer memory and initialize passive rdma
43 * dto descriptor. Total data size is stored in
44 * iser_task->data[ISER_DIR_IN].data_len
46 static int iser_prepare_read_cmd(struct iscsi_task
*task
,
50 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
51 struct iser_regd_buf
*regd_buf
;
53 struct iser_hdr
*hdr
= &iser_task
->desc
.iser_header
;
54 struct iser_data_buf
*buf_in
= &iser_task
->data
[ISER_DIR_IN
];
56 err
= iser_dma_map_task_data(iser_task
,
63 if (edtl
> iser_task
->data
[ISER_DIR_IN
].data_len
) {
64 iser_err("Total data length: %ld, less than EDTL: "
65 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
66 iser_task
->data
[ISER_DIR_IN
].data_len
, edtl
,
67 task
->itt
, iser_task
->iser_conn
);
71 err
= iser_reg_rdma_mem(iser_task
,ISER_DIR_IN
);
73 iser_err("Failed to set up Data-IN RDMA\n");
76 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_IN
];
78 hdr
->flags
|= ISER_RSV
;
79 hdr
->read_stag
= cpu_to_be32(regd_buf
->reg
.rkey
);
80 hdr
->read_va
= cpu_to_be64(regd_buf
->reg
.va
);
82 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
83 task
->itt
, regd_buf
->reg
.rkey
,
84 (unsigned long long)regd_buf
->reg
.va
);
89 /* Register user buffer memory and initialize passive rdma
90 * dto descriptor. Total data size is stored in
91 * task->data[ISER_DIR_OUT].data_len
94 iser_prepare_write_cmd(struct iscsi_task
*task
,
96 unsigned int unsol_sz
,
99 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
100 struct iser_regd_buf
*regd_buf
;
102 struct iser_hdr
*hdr
= &iser_task
->desc
.iser_header
;
103 struct iser_data_buf
*buf_out
= &iser_task
->data
[ISER_DIR_OUT
];
104 struct ib_sge
*tx_dsg
= &iser_task
->desc
.tx_sg
[1];
106 err
= iser_dma_map_task_data(iser_task
,
113 if (edtl
> iser_task
->data
[ISER_DIR_OUT
].data_len
) {
114 iser_err("Total data length: %ld, less than EDTL: %d, "
115 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
116 iser_task
->data
[ISER_DIR_OUT
].data_len
,
117 edtl
, task
->itt
, task
->conn
);
121 err
= iser_reg_rdma_mem(iser_task
,ISER_DIR_OUT
);
123 iser_err("Failed to register write cmd RDMA mem\n");
127 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
129 if (unsol_sz
< edtl
) {
130 hdr
->flags
|= ISER_WSV
;
131 hdr
->write_stag
= cpu_to_be32(regd_buf
->reg
.rkey
);
132 hdr
->write_va
= cpu_to_be64(regd_buf
->reg
.va
+ unsol_sz
);
134 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
135 "VA:%#llX + unsol:%d\n",
136 task
->itt
, regd_buf
->reg
.rkey
,
137 (unsigned long long)regd_buf
->reg
.va
, unsol_sz
);
141 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
143 tx_dsg
->addr
= regd_buf
->reg
.va
;
144 tx_dsg
->length
= imm_sz
;
145 tx_dsg
->lkey
= regd_buf
->reg
.lkey
;
146 iser_task
->desc
.num_sge
= 2;
152 /* creates a new tx descriptor and adds header regd buffer */
153 static void iser_create_send_desc(struct iser_conn
*ib_conn
,
154 struct iser_tx_desc
*tx_desc
)
156 struct iser_device
*device
= ib_conn
->device
;
158 ib_dma_sync_single_for_cpu(device
->ib_device
,
159 tx_desc
->dma_addr
, ISER_HEADERS_LEN
, DMA_TO_DEVICE
);
161 memset(&tx_desc
->iser_header
, 0, sizeof(struct iser_hdr
));
162 tx_desc
->iser_header
.flags
= ISER_VER
;
164 tx_desc
->num_sge
= 1;
166 if (tx_desc
->tx_sg
[0].lkey
!= device
->mr
->lkey
) {
167 tx_desc
->tx_sg
[0].lkey
= device
->mr
->lkey
;
168 iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc
);
173 int iser_alloc_rx_descriptors(struct iser_conn
*ib_conn
)
177 struct iser_rx_desc
*rx_desc
;
178 struct ib_sge
*rx_sg
;
179 struct iser_device
*device
= ib_conn
->device
;
181 ib_conn
->rx_descs
= kmalloc(ISER_QP_MAX_RECV_DTOS
*
182 sizeof(struct iser_rx_desc
), GFP_KERNEL
);
183 if (!ib_conn
->rx_descs
)
184 goto rx_desc_alloc_fail
;
186 rx_desc
= ib_conn
->rx_descs
;
188 for (i
= 0; i
< ISER_QP_MAX_RECV_DTOS
; i
++, rx_desc
++) {
189 dma_addr
= ib_dma_map_single(device
->ib_device
, (void *)rx_desc
,
190 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
191 if (ib_dma_mapping_error(device
->ib_device
, dma_addr
))
192 goto rx_desc_dma_map_failed
;
194 rx_desc
->dma_addr
= dma_addr
;
196 rx_sg
= &rx_desc
->rx_sg
;
197 rx_sg
->addr
= rx_desc
->dma_addr
;
198 rx_sg
->length
= ISER_RX_PAYLOAD_SIZE
;
199 rx_sg
->lkey
= device
->mr
->lkey
;
202 ib_conn
->rx_desc_head
= 0;
205 rx_desc_dma_map_failed
:
206 rx_desc
= ib_conn
->rx_descs
;
207 for (j
= 0; j
< i
; j
++, rx_desc
++)
208 ib_dma_unmap_single(device
->ib_device
, rx_desc
->dma_addr
,
209 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
210 kfree(ib_conn
->rx_descs
);
211 ib_conn
->rx_descs
= NULL
;
213 iser_err("failed allocating rx descriptors / data buffers\n");
217 void iser_free_rx_descriptors(struct iser_conn
*ib_conn
)
220 struct iser_rx_desc
*rx_desc
;
221 struct iser_device
*device
= ib_conn
->device
;
223 if (ib_conn
->login_buf
) {
224 ib_dma_unmap_single(device
->ib_device
, ib_conn
->login_dma
,
225 ISER_RX_LOGIN_SIZE
, DMA_FROM_DEVICE
);
226 kfree(ib_conn
->login_buf
);
229 if (!ib_conn
->rx_descs
)
232 rx_desc
= ib_conn
->rx_descs
;
233 for (i
= 0; i
< ISER_QP_MAX_RECV_DTOS
; i
++, rx_desc
++)
234 ib_dma_unmap_single(device
->ib_device
, rx_desc
->dma_addr
,
235 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
236 kfree(ib_conn
->rx_descs
);
240 * iser_conn_set_full_featured_mode - (iSER API)
242 int iser_conn_set_full_featured_mode(struct iscsi_conn
*conn
)
244 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
246 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX
);
248 /* Check that there is no posted recv or send buffers left - */
249 /* they must be consumed during the login phase */
250 BUG_ON(iser_conn
->ib_conn
->post_recv_buf_count
!= 0);
251 BUG_ON(atomic_read(&iser_conn
->ib_conn
->post_send_buf_count
) != 0);
253 if (iser_alloc_rx_descriptors(iser_conn
->ib_conn
))
256 /* Initial post receive buffers */
257 if (iser_post_recvm(iser_conn
->ib_conn
, ISER_MIN_POSTED_RX
))
264 * iser_send_command - send command PDU
266 int iser_send_command(struct iscsi_conn
*conn
,
267 struct iscsi_task
*task
)
269 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
270 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
273 struct iser_data_buf
*data_buf
;
274 struct iscsi_cmd
*hdr
= (struct iscsi_cmd
*)task
->hdr
;
275 struct scsi_cmnd
*sc
= task
->sc
;
276 struct iser_tx_desc
*tx_desc
= &iser_task
->desc
;
278 edtl
= ntohl(hdr
->data_length
);
280 /* build the tx desc regd header and add it to the tx desc dto */
281 tx_desc
->type
= ISCSI_TX_SCSI_COMMAND
;
282 iser_create_send_desc(iser_conn
->ib_conn
, tx_desc
);
284 if (hdr
->flags
& ISCSI_FLAG_CMD_READ
)
285 data_buf
= &iser_task
->data
[ISER_DIR_IN
];
287 data_buf
= &iser_task
->data
[ISER_DIR_OUT
];
289 if (scsi_sg_count(sc
)) { /* using a scatter list */
290 data_buf
->buf
= scsi_sglist(sc
);
291 data_buf
->size
= scsi_sg_count(sc
);
294 data_buf
->data_len
= scsi_bufflen(sc
);
296 if (hdr
->flags
& ISCSI_FLAG_CMD_READ
) {
297 err
= iser_prepare_read_cmd(task
, edtl
);
299 goto send_command_error
;
301 if (hdr
->flags
& ISCSI_FLAG_CMD_WRITE
) {
302 err
= iser_prepare_write_cmd(task
,
305 task
->unsol_r2t
.data_length
,
308 goto send_command_error
;
311 iser_task
->status
= ISER_TASK_STATUS_STARTED
;
313 err
= iser_post_send(iser_conn
->ib_conn
, tx_desc
);
318 iser_err("conn %p failed task->itt %d err %d\n",conn
, task
->itt
, err
);
323 * iser_send_data_out - send data out PDU
325 int iser_send_data_out(struct iscsi_conn
*conn
,
326 struct iscsi_task
*task
,
327 struct iscsi_data
*hdr
)
329 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
330 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
331 struct iser_tx_desc
*tx_desc
= NULL
;
332 struct iser_regd_buf
*regd_buf
;
333 unsigned long buf_offset
;
334 unsigned long data_seg_len
;
337 struct ib_sge
*tx_dsg
;
339 itt
= (__force
uint32_t)hdr
->itt
;
340 data_seg_len
= ntoh24(hdr
->dlength
);
341 buf_offset
= ntohl(hdr
->offset
);
343 iser_dbg("%s itt %d dseg_len %d offset %d\n",
344 __func__
,(int)itt
,(int)data_seg_len
,(int)buf_offset
);
346 tx_desc
= kmem_cache_zalloc(ig
.desc_cache
, GFP_ATOMIC
);
347 if (tx_desc
== NULL
) {
348 iser_err("Failed to alloc desc for post dataout\n");
352 tx_desc
->type
= ISCSI_TX_DATAOUT
;
353 tx_desc
->iser_header
.flags
= ISER_VER
;
354 memcpy(&tx_desc
->iscsi_header
, hdr
, sizeof(struct iscsi_hdr
));
356 /* build the tx desc */
357 iser_initialize_task_headers(task
, tx_desc
);
359 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
360 tx_dsg
= &tx_desc
->tx_sg
[1];
361 tx_dsg
->addr
= regd_buf
->reg
.va
+ buf_offset
;
362 tx_dsg
->length
= data_seg_len
;
363 tx_dsg
->lkey
= regd_buf
->reg
.lkey
;
364 tx_desc
->num_sge
= 2;
366 if (buf_offset
+ data_seg_len
> iser_task
->data
[ISER_DIR_OUT
].data_len
) {
367 iser_err("Offset:%ld & DSL:%ld in Data-Out "
368 "inconsistent with total len:%ld, itt:%d\n",
369 buf_offset
, data_seg_len
,
370 iser_task
->data
[ISER_DIR_OUT
].data_len
, itt
);
372 goto send_data_out_error
;
374 iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
375 itt
, buf_offset
, data_seg_len
);
378 err
= iser_post_send(iser_conn
->ib_conn
, tx_desc
);
383 kmem_cache_free(ig
.desc_cache
, tx_desc
);
384 iser_err("conn %p failed err %d\n",conn
, err
);
388 int iser_send_control(struct iscsi_conn
*conn
,
389 struct iscsi_task
*task
)
391 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
392 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
393 struct iser_tx_desc
*mdesc
= &iser_task
->desc
;
394 unsigned long data_seg_len
;
396 struct iser_device
*device
;
398 /* build the tx desc regd header and add it to the tx desc dto */
399 mdesc
->type
= ISCSI_TX_CONTROL
;
400 iser_create_send_desc(iser_conn
->ib_conn
, mdesc
);
402 device
= iser_conn
->ib_conn
->device
;
404 data_seg_len
= ntoh24(task
->hdr
->dlength
);
406 if (data_seg_len
> 0) {
407 struct ib_sge
*tx_dsg
= &mdesc
->tx_sg
[1];
408 if (task
!= conn
->login_task
) {
409 iser_err("data present on non login task!!!\n");
410 goto send_control_error
;
412 memcpy(iser_conn
->ib_conn
->login_buf
, task
->data
,
414 tx_dsg
->addr
= iser_conn
->ib_conn
->login_dma
;
415 tx_dsg
->length
= data_seg_len
;
416 tx_dsg
->lkey
= device
->mr
->lkey
;
420 if (task
== conn
->login_task
) {
421 err
= iser_post_recvl(iser_conn
->ib_conn
);
423 goto send_control_error
;
426 err
= iser_post_send(iser_conn
->ib_conn
, mdesc
);
431 iser_err("conn %p failed err %d\n",conn
, err
);
436 * iser_rcv_dto_completion - recv DTO completion
438 void iser_rcv_completion(struct iser_rx_desc
*rx_desc
,
439 unsigned long rx_xfer_len
,
440 struct iser_conn
*ib_conn
)
442 struct iscsi_iser_conn
*conn
= ib_conn
->iser_conn
;
443 struct iscsi_hdr
*hdr
;
445 int rx_buflen
, outstanding
, count
, err
;
447 /* differentiate between login to all other PDUs */
448 if ((char *)rx_desc
== ib_conn
->login_buf
) {
449 rx_dma
= ib_conn
->login_dma
;
450 rx_buflen
= ISER_RX_LOGIN_SIZE
;
452 rx_dma
= rx_desc
->dma_addr
;
453 rx_buflen
= ISER_RX_PAYLOAD_SIZE
;
456 ib_dma_sync_single_for_cpu(ib_conn
->device
->ib_device
, rx_dma
,
457 rx_buflen
, DMA_FROM_DEVICE
);
459 hdr
= &rx_desc
->iscsi_header
;
461 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr
->opcode
,
462 hdr
->itt
, (int)(rx_xfer_len
- ISER_HEADERS_LEN
));
464 iscsi_iser_recv(conn
->iscsi_conn
, hdr
,
465 rx_desc
->data
, rx_xfer_len
- ISER_HEADERS_LEN
);
467 ib_dma_sync_single_for_device(ib_conn
->device
->ib_device
, rx_dma
,
468 rx_buflen
, DMA_FROM_DEVICE
);
470 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
471 * task eliminates the need to worry on tasks which are completed in *
472 * parallel to the execution of iser_conn_term. So the code that waits *
473 * for the posted rx bufs refcount to become zero handles everything */
474 conn
->ib_conn
->post_recv_buf_count
--;
476 if (rx_dma
== ib_conn
->login_dma
)
479 outstanding
= ib_conn
->post_recv_buf_count
;
480 if (outstanding
+ ISER_MIN_POSTED_RX
<= ISER_QP_MAX_RECV_DTOS
) {
481 count
= min(ISER_QP_MAX_RECV_DTOS
- outstanding
,
483 err
= iser_post_recvm(ib_conn
, count
);
485 iser_err("posting %d rx bufs err %d\n", count
, err
);
489 void iser_snd_completion(struct iser_tx_desc
*tx_desc
,
490 struct iser_conn
*ib_conn
)
492 struct iscsi_task
*task
;
493 struct iser_device
*device
= ib_conn
->device
;
495 if (tx_desc
->type
== ISCSI_TX_DATAOUT
) {
496 ib_dma_unmap_single(device
->ib_device
, tx_desc
->dma_addr
,
497 ISER_HEADERS_LEN
, DMA_TO_DEVICE
);
498 kmem_cache_free(ig
.desc_cache
, tx_desc
);
501 atomic_dec(&ib_conn
->post_send_buf_count
);
503 if (tx_desc
->type
== ISCSI_TX_CONTROL
) {
504 /* this arithmetic is legal by libiscsi dd_data allocation */
505 task
= (void *) ((long)(void *)tx_desc
-
506 sizeof(struct iscsi_task
));
507 if (task
->hdr
->itt
== RESERVED_ITT
)
508 iscsi_put_task(task
);
512 void iser_task_rdma_init(struct iscsi_iser_task
*iser_task
)
515 iser_task
->status
= ISER_TASK_STATUS_INIT
;
517 iser_task
->dir
[ISER_DIR_IN
] = 0;
518 iser_task
->dir
[ISER_DIR_OUT
] = 0;
520 iser_task
->data
[ISER_DIR_IN
].data_len
= 0;
521 iser_task
->data
[ISER_DIR_OUT
].data_len
= 0;
523 memset(&iser_task
->rdma_regd
[ISER_DIR_IN
], 0,
524 sizeof(struct iser_regd_buf
));
525 memset(&iser_task
->rdma_regd
[ISER_DIR_OUT
], 0,
526 sizeof(struct iser_regd_buf
));
529 void iser_task_rdma_finalize(struct iscsi_iser_task
*iser_task
)
531 int is_rdma_aligned
= 1;
532 struct iser_regd_buf
*regd
;
534 /* if we were reading, copy back to unaligned sglist,
535 * anyway dma_unmap and free the copy
537 if (iser_task
->data_copy
[ISER_DIR_IN
].copy_buf
!= NULL
) {
539 iser_finalize_rdma_unaligned_sg(iser_task
, ISER_DIR_IN
);
541 if (iser_task
->data_copy
[ISER_DIR_OUT
].copy_buf
!= NULL
) {
543 iser_finalize_rdma_unaligned_sg(iser_task
, ISER_DIR_OUT
);
546 if (iser_task
->dir
[ISER_DIR_IN
]) {
547 regd
= &iser_task
->rdma_regd
[ISER_DIR_IN
];
548 if (regd
->reg
.is_fmr
)
549 iser_unreg_mem(®d
->reg
);
552 if (iser_task
->dir
[ISER_DIR_OUT
]) {
553 regd
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
554 if (regd
->reg
.is_fmr
)
555 iser_unreg_mem(®d
->reg
);
558 /* if the data was unaligned, it was already unmapped and then copied */
560 iser_dma_unmap_task_data(iser_task
);