2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/kernel.h>
33 #include <linux/slab.h>
35 #include <linux/scatterlist.h>
36 #include <linux/kfifo.h>
37 #include <scsi/scsi_cmnd.h>
38 #include <scsi/scsi_host.h>
40 #include "iscsi_iser.h"
42 /* Register user buffer memory and initialize passive rdma
43 * dto descriptor. Total data size is stored in
44 * iser_task->data[ISER_DIR_IN].data_len
46 static int iser_prepare_read_cmd(struct iscsi_task
*task
,
50 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
51 struct iser_regd_buf
*regd_buf
;
53 struct iser_hdr
*hdr
= &iser_task
->desc
.iser_header
;
54 struct iser_data_buf
*buf_in
= &iser_task
->data
[ISER_DIR_IN
];
56 err
= iser_dma_map_task_data(iser_task
,
63 if (edtl
> iser_task
->data
[ISER_DIR_IN
].data_len
) {
64 iser_err("Total data length: %ld, less than EDTL: "
65 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
66 iser_task
->data
[ISER_DIR_IN
].data_len
, edtl
,
67 task
->itt
, iser_task
->iser_conn
);
71 err
= iser_reg_rdma_mem(iser_task
,ISER_DIR_IN
);
73 iser_err("Failed to set up Data-IN RDMA\n");
76 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_IN
];
78 hdr
->flags
|= ISER_RSV
;
79 hdr
->read_stag
= cpu_to_be32(regd_buf
->reg
.rkey
);
80 hdr
->read_va
= cpu_to_be64(regd_buf
->reg
.va
);
82 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
83 task
->itt
, regd_buf
->reg
.rkey
,
84 (unsigned long long)regd_buf
->reg
.va
);
89 /* Register user buffer memory and initialize passive rdma
90 * dto descriptor. Total data size is stored in
91 * task->data[ISER_DIR_OUT].data_len
94 iser_prepare_write_cmd(struct iscsi_task
*task
,
96 unsigned int unsol_sz
,
99 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
100 struct iser_regd_buf
*regd_buf
;
102 struct iser_hdr
*hdr
= &iser_task
->desc
.iser_header
;
103 struct iser_data_buf
*buf_out
= &iser_task
->data
[ISER_DIR_OUT
];
104 struct ib_sge
*tx_dsg
= &iser_task
->desc
.tx_sg
[1];
106 err
= iser_dma_map_task_data(iser_task
,
113 if (edtl
> iser_task
->data
[ISER_DIR_OUT
].data_len
) {
114 iser_err("Total data length: %ld, less than EDTL: %d, "
115 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
116 iser_task
->data
[ISER_DIR_OUT
].data_len
,
117 edtl
, task
->itt
, task
->conn
);
121 err
= iser_reg_rdma_mem(iser_task
,ISER_DIR_OUT
);
123 iser_err("Failed to register write cmd RDMA mem\n");
127 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
129 if (unsol_sz
< edtl
) {
130 hdr
->flags
|= ISER_WSV
;
131 hdr
->write_stag
= cpu_to_be32(regd_buf
->reg
.rkey
);
132 hdr
->write_va
= cpu_to_be64(regd_buf
->reg
.va
+ unsol_sz
);
134 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
135 "VA:%#llX + unsol:%d\n",
136 task
->itt
, regd_buf
->reg
.rkey
,
137 (unsigned long long)regd_buf
->reg
.va
, unsol_sz
);
141 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
143 tx_dsg
->addr
= regd_buf
->reg
.va
;
144 tx_dsg
->length
= imm_sz
;
145 tx_dsg
->lkey
= regd_buf
->reg
.lkey
;
146 iser_task
->desc
.num_sge
= 2;
152 /* creates a new tx descriptor and adds header regd buffer */
153 static void iser_create_send_desc(struct iser_conn
*ib_conn
,
154 struct iser_tx_desc
*tx_desc
)
156 struct iser_device
*device
= ib_conn
->device
;
158 ib_dma_sync_single_for_cpu(device
->ib_device
,
159 tx_desc
->dma_addr
, ISER_HEADERS_LEN
, DMA_TO_DEVICE
);
161 memset(&tx_desc
->iser_header
, 0, sizeof(struct iser_hdr
));
162 tx_desc
->iser_header
.flags
= ISER_VER
;
164 tx_desc
->num_sge
= 1;
166 if (tx_desc
->tx_sg
[0].lkey
!= device
->mr
->lkey
) {
167 tx_desc
->tx_sg
[0].lkey
= device
->mr
->lkey
;
168 iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc
);
173 static int iser_alloc_rx_descriptors(struct iser_conn
*ib_conn
)
177 struct iser_rx_desc
*rx_desc
;
178 struct ib_sge
*rx_sg
;
179 struct iser_device
*device
= ib_conn
->device
;
181 ib_conn
->rx_descs
= kmalloc(ISER_QP_MAX_RECV_DTOS
*
182 sizeof(struct iser_rx_desc
), GFP_KERNEL
);
183 if (!ib_conn
->rx_descs
)
184 goto rx_desc_alloc_fail
;
186 rx_desc
= ib_conn
->rx_descs
;
188 for (i
= 0; i
< ISER_QP_MAX_RECV_DTOS
; i
++, rx_desc
++) {
189 dma_addr
= ib_dma_map_single(device
->ib_device
, (void *)rx_desc
,
190 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
191 if (ib_dma_mapping_error(device
->ib_device
, dma_addr
))
192 goto rx_desc_dma_map_failed
;
194 rx_desc
->dma_addr
= dma_addr
;
196 rx_sg
= &rx_desc
->rx_sg
;
197 rx_sg
->addr
= rx_desc
->dma_addr
;
198 rx_sg
->length
= ISER_RX_PAYLOAD_SIZE
;
199 rx_sg
->lkey
= device
->mr
->lkey
;
202 ib_conn
->rx_desc_head
= 0;
205 rx_desc_dma_map_failed
:
206 rx_desc
= ib_conn
->rx_descs
;
207 for (j
= 0; j
< i
; j
++, rx_desc
++)
208 ib_dma_unmap_single(device
->ib_device
, rx_desc
->dma_addr
,
209 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
210 kfree(ib_conn
->rx_descs
);
211 ib_conn
->rx_descs
= NULL
;
213 iser_err("failed allocating rx descriptors / data buffers\n");
217 void iser_free_rx_descriptors(struct iser_conn
*ib_conn
)
220 struct iser_rx_desc
*rx_desc
;
221 struct iser_device
*device
= ib_conn
->device
;
223 if (ib_conn
->login_buf
) {
224 if (ib_conn
->login_req_dma
)
225 ib_dma_unmap_single(device
->ib_device
,
226 ib_conn
->login_req_dma
,
227 ISCSI_DEF_MAX_RECV_SEG_LEN
, DMA_TO_DEVICE
);
228 if (ib_conn
->login_resp_dma
)
229 ib_dma_unmap_single(device
->ib_device
,
230 ib_conn
->login_resp_dma
,
231 ISER_RX_LOGIN_SIZE
, DMA_FROM_DEVICE
);
232 kfree(ib_conn
->login_buf
);
235 if (!ib_conn
->rx_descs
)
238 rx_desc
= ib_conn
->rx_descs
;
239 for (i
= 0; i
< ISER_QP_MAX_RECV_DTOS
; i
++, rx_desc
++)
240 ib_dma_unmap_single(device
->ib_device
, rx_desc
->dma_addr
,
241 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
242 kfree(ib_conn
->rx_descs
);
246 * iser_conn_set_full_featured_mode - (iSER API)
248 int iser_conn_set_full_featured_mode(struct iscsi_conn
*conn
)
250 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
252 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX
);
254 /* Check that there is no posted recv or send buffers left - */
255 /* they must be consumed during the login phase */
256 BUG_ON(iser_conn
->ib_conn
->post_recv_buf_count
!= 0);
257 BUG_ON(atomic_read(&iser_conn
->ib_conn
->post_send_buf_count
) != 0);
259 if (iser_alloc_rx_descriptors(iser_conn
->ib_conn
))
262 /* Initial post receive buffers */
263 if (iser_post_recvm(iser_conn
->ib_conn
, ISER_MIN_POSTED_RX
))
270 * iser_send_command - send command PDU
272 int iser_send_command(struct iscsi_conn
*conn
,
273 struct iscsi_task
*task
)
275 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
276 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
279 struct iser_data_buf
*data_buf
;
280 struct iscsi_scsi_req
*hdr
= (struct iscsi_scsi_req
*)task
->hdr
;
281 struct scsi_cmnd
*sc
= task
->sc
;
282 struct iser_tx_desc
*tx_desc
= &iser_task
->desc
;
284 edtl
= ntohl(hdr
->data_length
);
286 /* build the tx desc regd header and add it to the tx desc dto */
287 tx_desc
->type
= ISCSI_TX_SCSI_COMMAND
;
288 iser_create_send_desc(iser_conn
->ib_conn
, tx_desc
);
290 if (hdr
->flags
& ISCSI_FLAG_CMD_READ
)
291 data_buf
= &iser_task
->data
[ISER_DIR_IN
];
293 data_buf
= &iser_task
->data
[ISER_DIR_OUT
];
295 if (scsi_sg_count(sc
)) { /* using a scatter list */
296 data_buf
->buf
= scsi_sglist(sc
);
297 data_buf
->size
= scsi_sg_count(sc
);
300 data_buf
->data_len
= scsi_bufflen(sc
);
302 if (hdr
->flags
& ISCSI_FLAG_CMD_READ
) {
303 err
= iser_prepare_read_cmd(task
, edtl
);
305 goto send_command_error
;
307 if (hdr
->flags
& ISCSI_FLAG_CMD_WRITE
) {
308 err
= iser_prepare_write_cmd(task
,
311 task
->unsol_r2t
.data_length
,
314 goto send_command_error
;
317 iser_task
->status
= ISER_TASK_STATUS_STARTED
;
319 err
= iser_post_send(iser_conn
->ib_conn
, tx_desc
);
324 iser_err("conn %p failed task->itt %d err %d\n",conn
, task
->itt
, err
);
329 * iser_send_data_out - send data out PDU
331 int iser_send_data_out(struct iscsi_conn
*conn
,
332 struct iscsi_task
*task
,
333 struct iscsi_data
*hdr
)
335 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
336 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
337 struct iser_tx_desc
*tx_desc
= NULL
;
338 struct iser_regd_buf
*regd_buf
;
339 unsigned long buf_offset
;
340 unsigned long data_seg_len
;
343 struct ib_sge
*tx_dsg
;
345 itt
= (__force
uint32_t)hdr
->itt
;
346 data_seg_len
= ntoh24(hdr
->dlength
);
347 buf_offset
= ntohl(hdr
->offset
);
349 iser_dbg("%s itt %d dseg_len %d offset %d\n",
350 __func__
,(int)itt
,(int)data_seg_len
,(int)buf_offset
);
352 tx_desc
= kmem_cache_zalloc(ig
.desc_cache
, GFP_ATOMIC
);
353 if (tx_desc
== NULL
) {
354 iser_err("Failed to alloc desc for post dataout\n");
358 tx_desc
->type
= ISCSI_TX_DATAOUT
;
359 tx_desc
->iser_header
.flags
= ISER_VER
;
360 memcpy(&tx_desc
->iscsi_header
, hdr
, sizeof(struct iscsi_hdr
));
362 /* build the tx desc */
363 iser_initialize_task_headers(task
, tx_desc
);
365 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
366 tx_dsg
= &tx_desc
->tx_sg
[1];
367 tx_dsg
->addr
= regd_buf
->reg
.va
+ buf_offset
;
368 tx_dsg
->length
= data_seg_len
;
369 tx_dsg
->lkey
= regd_buf
->reg
.lkey
;
370 tx_desc
->num_sge
= 2;
372 if (buf_offset
+ data_seg_len
> iser_task
->data
[ISER_DIR_OUT
].data_len
) {
373 iser_err("Offset:%ld & DSL:%ld in Data-Out "
374 "inconsistent with total len:%ld, itt:%d\n",
375 buf_offset
, data_seg_len
,
376 iser_task
->data
[ISER_DIR_OUT
].data_len
, itt
);
378 goto send_data_out_error
;
380 iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
381 itt
, buf_offset
, data_seg_len
);
384 err
= iser_post_send(iser_conn
->ib_conn
, tx_desc
);
389 kmem_cache_free(ig
.desc_cache
, tx_desc
);
390 iser_err("conn %p failed err %d\n",conn
, err
);
394 int iser_send_control(struct iscsi_conn
*conn
,
395 struct iscsi_task
*task
)
397 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
398 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
399 struct iser_tx_desc
*mdesc
= &iser_task
->desc
;
400 unsigned long data_seg_len
;
402 struct iser_device
*device
;
403 struct iser_conn
*ib_conn
= iser_conn
->ib_conn
;
405 /* build the tx desc regd header and add it to the tx desc dto */
406 mdesc
->type
= ISCSI_TX_CONTROL
;
407 iser_create_send_desc(iser_conn
->ib_conn
, mdesc
);
409 device
= iser_conn
->ib_conn
->device
;
411 data_seg_len
= ntoh24(task
->hdr
->dlength
);
413 if (data_seg_len
> 0) {
414 struct ib_sge
*tx_dsg
= &mdesc
->tx_sg
[1];
415 if (task
!= conn
->login_task
) {
416 iser_err("data present on non login task!!!\n");
417 goto send_control_error
;
420 ib_dma_sync_single_for_cpu(device
->ib_device
,
421 ib_conn
->login_req_dma
, task
->data_count
,
424 memcpy(iser_conn
->ib_conn
->login_req_buf
, task
->data
,
427 ib_dma_sync_single_for_device(device
->ib_device
,
428 ib_conn
->login_req_dma
, task
->data_count
,
431 tx_dsg
->addr
= iser_conn
->ib_conn
->login_req_dma
;
432 tx_dsg
->length
= task
->data_count
;
433 tx_dsg
->lkey
= device
->mr
->lkey
;
437 if (task
== conn
->login_task
) {
438 err
= iser_post_recvl(iser_conn
->ib_conn
);
440 goto send_control_error
;
443 err
= iser_post_send(iser_conn
->ib_conn
, mdesc
);
448 iser_err("conn %p failed err %d\n",conn
, err
);
453 * iser_rcv_dto_completion - recv DTO completion
455 void iser_rcv_completion(struct iser_rx_desc
*rx_desc
,
456 unsigned long rx_xfer_len
,
457 struct iser_conn
*ib_conn
)
459 struct iscsi_iser_conn
*conn
= ib_conn
->iser_conn
;
460 struct iscsi_hdr
*hdr
;
462 int rx_buflen
, outstanding
, count
, err
;
464 /* differentiate between login to all other PDUs */
465 if ((char *)rx_desc
== ib_conn
->login_resp_buf
) {
466 rx_dma
= ib_conn
->login_resp_dma
;
467 rx_buflen
= ISER_RX_LOGIN_SIZE
;
469 rx_dma
= rx_desc
->dma_addr
;
470 rx_buflen
= ISER_RX_PAYLOAD_SIZE
;
473 ib_dma_sync_single_for_cpu(ib_conn
->device
->ib_device
, rx_dma
,
474 rx_buflen
, DMA_FROM_DEVICE
);
476 hdr
= &rx_desc
->iscsi_header
;
478 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr
->opcode
,
479 hdr
->itt
, (int)(rx_xfer_len
- ISER_HEADERS_LEN
));
481 iscsi_iser_recv(conn
->iscsi_conn
, hdr
,
482 rx_desc
->data
, rx_xfer_len
- ISER_HEADERS_LEN
);
484 ib_dma_sync_single_for_device(ib_conn
->device
->ib_device
, rx_dma
,
485 rx_buflen
, DMA_FROM_DEVICE
);
487 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
488 * task eliminates the need to worry on tasks which are completed in *
489 * parallel to the execution of iser_conn_term. So the code that waits *
490 * for the posted rx bufs refcount to become zero handles everything */
491 conn
->ib_conn
->post_recv_buf_count
--;
493 if (rx_dma
== ib_conn
->login_resp_dma
)
496 outstanding
= ib_conn
->post_recv_buf_count
;
497 if (outstanding
+ ISER_MIN_POSTED_RX
<= ISER_QP_MAX_RECV_DTOS
) {
498 count
= min(ISER_QP_MAX_RECV_DTOS
- outstanding
,
500 err
= iser_post_recvm(ib_conn
, count
);
502 iser_err("posting %d rx bufs err %d\n", count
, err
);
506 void iser_snd_completion(struct iser_tx_desc
*tx_desc
,
507 struct iser_conn
*ib_conn
)
509 struct iscsi_task
*task
;
510 struct iser_device
*device
= ib_conn
->device
;
512 if (tx_desc
->type
== ISCSI_TX_DATAOUT
) {
513 ib_dma_unmap_single(device
->ib_device
, tx_desc
->dma_addr
,
514 ISER_HEADERS_LEN
, DMA_TO_DEVICE
);
515 kmem_cache_free(ig
.desc_cache
, tx_desc
);
518 atomic_dec(&ib_conn
->post_send_buf_count
);
520 if (tx_desc
->type
== ISCSI_TX_CONTROL
) {
521 /* this arithmetic is legal by libiscsi dd_data allocation */
522 task
= (void *) ((long)(void *)tx_desc
-
523 sizeof(struct iscsi_task
));
524 if (task
->hdr
->itt
== RESERVED_ITT
)
525 iscsi_put_task(task
);
529 void iser_task_rdma_init(struct iscsi_iser_task
*iser_task
)
532 iser_task
->status
= ISER_TASK_STATUS_INIT
;
534 iser_task
->dir
[ISER_DIR_IN
] = 0;
535 iser_task
->dir
[ISER_DIR_OUT
] = 0;
537 iser_task
->data
[ISER_DIR_IN
].data_len
= 0;
538 iser_task
->data
[ISER_DIR_OUT
].data_len
= 0;
540 memset(&iser_task
->rdma_regd
[ISER_DIR_IN
], 0,
541 sizeof(struct iser_regd_buf
));
542 memset(&iser_task
->rdma_regd
[ISER_DIR_OUT
], 0,
543 sizeof(struct iser_regd_buf
));
546 void iser_task_rdma_finalize(struct iscsi_iser_task
*iser_task
)
548 int is_rdma_aligned
= 1;
549 struct iser_regd_buf
*regd
;
551 /* if we were reading, copy back to unaligned sglist,
552 * anyway dma_unmap and free the copy
554 if (iser_task
->data_copy
[ISER_DIR_IN
].copy_buf
!= NULL
) {
556 iser_finalize_rdma_unaligned_sg(iser_task
, ISER_DIR_IN
);
558 if (iser_task
->data_copy
[ISER_DIR_OUT
].copy_buf
!= NULL
) {
560 iser_finalize_rdma_unaligned_sg(iser_task
, ISER_DIR_OUT
);
563 if (iser_task
->dir
[ISER_DIR_IN
]) {
564 regd
= &iser_task
->rdma_regd
[ISER_DIR_IN
];
565 if (regd
->reg
.is_fmr
)
566 iser_unreg_mem(®d
->reg
);
569 if (iser_task
->dir
[ISER_DIR_OUT
]) {
570 regd
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
571 if (regd
->reg
.is_fmr
)
572 iser_unreg_mem(®d
->reg
);
575 /* if the data was unaligned, it was already unmapped and then copied */
577 iser_dma_unmap_task_data(iser_task
);