2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/kernel.h>
33 #include <linux/slab.h>
35 #include <linux/scatterlist.h>
36 #include <linux/kfifo.h>
37 #include <scsi/scsi_cmnd.h>
38 #include <scsi/scsi_host.h>
40 #include "iscsi_iser.h"
42 /* Register user buffer memory and initialize passive rdma
43 * dto descriptor. Total data size is stored in
44 * iser_task->data[ISER_DIR_IN].data_len
46 static int iser_prepare_read_cmd(struct iscsi_task
*task
,
50 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
51 struct iser_regd_buf
*regd_buf
;
53 struct iser_hdr
*hdr
= &iser_task
->desc
.iser_header
;
54 struct iser_data_buf
*buf_in
= &iser_task
->data
[ISER_DIR_IN
];
56 err
= iser_dma_map_task_data(iser_task
,
63 if (edtl
> iser_task
->data
[ISER_DIR_IN
].data_len
) {
64 iser_err("Total data length: %ld, less than EDTL: "
65 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
66 iser_task
->data
[ISER_DIR_IN
].data_len
, edtl
,
67 task
->itt
, iser_task
->iser_conn
);
71 err
= iser_reg_rdma_mem(iser_task
,ISER_DIR_IN
);
73 iser_err("Failed to set up Data-IN RDMA\n");
76 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_IN
];
78 hdr
->flags
|= ISER_RSV
;
79 hdr
->read_stag
= cpu_to_be32(regd_buf
->reg
.rkey
);
80 hdr
->read_va
= cpu_to_be64(regd_buf
->reg
.va
);
82 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
83 task
->itt
, regd_buf
->reg
.rkey
,
84 (unsigned long long)regd_buf
->reg
.va
);
89 /* Register user buffer memory and initialize passive rdma
90 * dto descriptor. Total data size is stored in
91 * task->data[ISER_DIR_OUT].data_len
94 iser_prepare_write_cmd(struct iscsi_task
*task
,
96 unsigned int unsol_sz
,
99 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
100 struct iser_regd_buf
*regd_buf
;
102 struct iser_hdr
*hdr
= &iser_task
->desc
.iser_header
;
103 struct iser_data_buf
*buf_out
= &iser_task
->data
[ISER_DIR_OUT
];
104 struct ib_sge
*tx_dsg
= &iser_task
->desc
.tx_sg
[1];
106 err
= iser_dma_map_task_data(iser_task
,
113 if (edtl
> iser_task
->data
[ISER_DIR_OUT
].data_len
) {
114 iser_err("Total data length: %ld, less than EDTL: %d, "
115 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
116 iser_task
->data
[ISER_DIR_OUT
].data_len
,
117 edtl
, task
->itt
, task
->conn
);
121 err
= iser_reg_rdma_mem(iser_task
,ISER_DIR_OUT
);
123 iser_err("Failed to register write cmd RDMA mem\n");
127 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
129 if (unsol_sz
< edtl
) {
130 hdr
->flags
|= ISER_WSV
;
131 hdr
->write_stag
= cpu_to_be32(regd_buf
->reg
.rkey
);
132 hdr
->write_va
= cpu_to_be64(regd_buf
->reg
.va
+ unsol_sz
);
134 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
135 "VA:%#llX + unsol:%d\n",
136 task
->itt
, regd_buf
->reg
.rkey
,
137 (unsigned long long)regd_buf
->reg
.va
, unsol_sz
);
141 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
143 tx_dsg
->addr
= regd_buf
->reg
.va
;
144 tx_dsg
->length
= imm_sz
;
145 tx_dsg
->lkey
= regd_buf
->reg
.lkey
;
146 iser_task
->desc
.num_sge
= 2;
152 /* creates a new tx descriptor and adds header regd buffer */
153 static void iser_create_send_desc(struct iser_conn
*ib_conn
,
154 struct iser_tx_desc
*tx_desc
)
156 struct iser_device
*device
= ib_conn
->device
;
158 ib_dma_sync_single_for_cpu(device
->ib_device
,
159 tx_desc
->dma_addr
, ISER_HEADERS_LEN
, DMA_TO_DEVICE
);
161 memset(&tx_desc
->iser_header
, 0, sizeof(struct iser_hdr
));
162 tx_desc
->iser_header
.flags
= ISER_VER
;
164 tx_desc
->num_sge
= 1;
166 if (tx_desc
->tx_sg
[0].lkey
!= device
->mr
->lkey
) {
167 tx_desc
->tx_sg
[0].lkey
= device
->mr
->lkey
;
168 iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc
);
173 int iser_alloc_rx_descriptors(struct iser_conn
*ib_conn
)
177 struct iser_rx_desc
*rx_desc
;
178 struct ib_sge
*rx_sg
;
179 struct iser_device
*device
= ib_conn
->device
;
181 ib_conn
->rx_descs
= kmalloc(ISER_QP_MAX_RECV_DTOS
*
182 sizeof(struct iser_rx_desc
), GFP_KERNEL
);
183 if (!ib_conn
->rx_descs
)
184 goto rx_desc_alloc_fail
;
186 rx_desc
= ib_conn
->rx_descs
;
188 for (i
= 0; i
< ISER_QP_MAX_RECV_DTOS
; i
++, rx_desc
++) {
189 dma_addr
= ib_dma_map_single(device
->ib_device
, (void *)rx_desc
,
190 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
191 if (ib_dma_mapping_error(device
->ib_device
, dma_addr
))
192 goto rx_desc_dma_map_failed
;
194 rx_desc
->dma_addr
= dma_addr
;
196 rx_sg
= &rx_desc
->rx_sg
;
197 rx_sg
->addr
= rx_desc
->dma_addr
;
198 rx_sg
->length
= ISER_RX_PAYLOAD_SIZE
;
199 rx_sg
->lkey
= device
->mr
->lkey
;
202 ib_conn
->rx_desc_head
= 0;
205 rx_desc_dma_map_failed
:
206 rx_desc
= ib_conn
->rx_descs
;
207 for (j
= 0; j
< i
; j
++, rx_desc
++)
208 ib_dma_unmap_single(device
->ib_device
, rx_desc
->dma_addr
,
209 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
210 kfree(ib_conn
->rx_descs
);
211 ib_conn
->rx_descs
= NULL
;
213 iser_err("failed allocating rx descriptors / data buffers\n");
217 void iser_free_rx_descriptors(struct iser_conn
*ib_conn
)
220 struct iser_rx_desc
*rx_desc
;
221 struct iser_device
*device
= ib_conn
->device
;
223 if (ib_conn
->login_buf
) {
224 if (ib_conn
->login_req_dma
)
225 ib_dma_unmap_single(device
->ib_device
,
226 ib_conn
->login_req_dma
,
227 ISCSI_DEF_MAX_RECV_SEG_LEN
, DMA_TO_DEVICE
);
228 if (ib_conn
->login_resp_dma
)
229 ib_dma_unmap_single(device
->ib_device
,
230 ib_conn
->login_resp_dma
,
231 ISER_RX_LOGIN_SIZE
, DMA_FROM_DEVICE
);
232 kfree(ib_conn
->login_buf
);
235 if (!ib_conn
->rx_descs
)
238 rx_desc
= ib_conn
->rx_descs
;
239 for (i
= 0; i
< ISER_QP_MAX_RECV_DTOS
; i
++, rx_desc
++)
240 ib_dma_unmap_single(device
->ib_device
, rx_desc
->dma_addr
,
241 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
242 kfree(ib_conn
->rx_descs
);
245 static int iser_post_rx_bufs(struct iscsi_conn
*conn
, struct iscsi_hdr
*req
)
247 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
249 iser_dbg("req op %x flags %x\n", req
->opcode
, req
->flags
);
250 /* check if this is the last login - going to full feature phase */
251 if ((req
->flags
& ISCSI_FULL_FEATURE_PHASE
) != ISCSI_FULL_FEATURE_PHASE
)
255 * Check that there is one posted recv buffer (for the last login
256 * response) and no posted send buffers left - they must have been
257 * consumed during previous login phases.
259 WARN_ON(iser_conn
->ib_conn
->post_recv_buf_count
!= 1);
260 WARN_ON(atomic_read(&iser_conn
->ib_conn
->post_send_buf_count
) != 0);
262 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX
);
263 /* Initial post receive buffers */
264 if (iser_post_recvm(iser_conn
->ib_conn
, ISER_MIN_POSTED_RX
))
271 * iser_send_command - send command PDU
273 int iser_send_command(struct iscsi_conn
*conn
,
274 struct iscsi_task
*task
)
276 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
277 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
280 struct iser_data_buf
*data_buf
;
281 struct iscsi_scsi_req
*hdr
= (struct iscsi_scsi_req
*)task
->hdr
;
282 struct scsi_cmnd
*sc
= task
->sc
;
283 struct iser_tx_desc
*tx_desc
= &iser_task
->desc
;
285 edtl
= ntohl(hdr
->data_length
);
287 /* build the tx desc regd header and add it to the tx desc dto */
288 tx_desc
->type
= ISCSI_TX_SCSI_COMMAND
;
289 iser_create_send_desc(iser_conn
->ib_conn
, tx_desc
);
291 if (hdr
->flags
& ISCSI_FLAG_CMD_READ
)
292 data_buf
= &iser_task
->data
[ISER_DIR_IN
];
294 data_buf
= &iser_task
->data
[ISER_DIR_OUT
];
296 if (scsi_sg_count(sc
)) { /* using a scatter list */
297 data_buf
->buf
= scsi_sglist(sc
);
298 data_buf
->size
= scsi_sg_count(sc
);
301 data_buf
->data_len
= scsi_bufflen(sc
);
303 if (hdr
->flags
& ISCSI_FLAG_CMD_READ
) {
304 err
= iser_prepare_read_cmd(task
, edtl
);
306 goto send_command_error
;
308 if (hdr
->flags
& ISCSI_FLAG_CMD_WRITE
) {
309 err
= iser_prepare_write_cmd(task
,
312 task
->unsol_r2t
.data_length
,
315 goto send_command_error
;
318 iser_task
->status
= ISER_TASK_STATUS_STARTED
;
320 err
= iser_post_send(iser_conn
->ib_conn
, tx_desc
);
325 iser_err("conn %p failed task->itt %d err %d\n",conn
, task
->itt
, err
);
330 * iser_send_data_out - send data out PDU
332 int iser_send_data_out(struct iscsi_conn
*conn
,
333 struct iscsi_task
*task
,
334 struct iscsi_data
*hdr
)
336 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
337 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
338 struct iser_tx_desc
*tx_desc
= NULL
;
339 struct iser_regd_buf
*regd_buf
;
340 unsigned long buf_offset
;
341 unsigned long data_seg_len
;
344 struct ib_sge
*tx_dsg
;
346 itt
= (__force
uint32_t)hdr
->itt
;
347 data_seg_len
= ntoh24(hdr
->dlength
);
348 buf_offset
= ntohl(hdr
->offset
);
350 iser_dbg("%s itt %d dseg_len %d offset %d\n",
351 __func__
,(int)itt
,(int)data_seg_len
,(int)buf_offset
);
353 tx_desc
= kmem_cache_zalloc(ig
.desc_cache
, GFP_ATOMIC
);
354 if (tx_desc
== NULL
) {
355 iser_err("Failed to alloc desc for post dataout\n");
359 tx_desc
->type
= ISCSI_TX_DATAOUT
;
360 tx_desc
->iser_header
.flags
= ISER_VER
;
361 memcpy(&tx_desc
->iscsi_header
, hdr
, sizeof(struct iscsi_hdr
));
363 /* build the tx desc */
364 iser_initialize_task_headers(task
, tx_desc
);
366 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
367 tx_dsg
= &tx_desc
->tx_sg
[1];
368 tx_dsg
->addr
= regd_buf
->reg
.va
+ buf_offset
;
369 tx_dsg
->length
= data_seg_len
;
370 tx_dsg
->lkey
= regd_buf
->reg
.lkey
;
371 tx_desc
->num_sge
= 2;
373 if (buf_offset
+ data_seg_len
> iser_task
->data
[ISER_DIR_OUT
].data_len
) {
374 iser_err("Offset:%ld & DSL:%ld in Data-Out "
375 "inconsistent with total len:%ld, itt:%d\n",
376 buf_offset
, data_seg_len
,
377 iser_task
->data
[ISER_DIR_OUT
].data_len
, itt
);
379 goto send_data_out_error
;
381 iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
382 itt
, buf_offset
, data_seg_len
);
385 err
= iser_post_send(iser_conn
->ib_conn
, tx_desc
);
390 kmem_cache_free(ig
.desc_cache
, tx_desc
);
391 iser_err("conn %p failed err %d\n",conn
, err
);
395 int iser_send_control(struct iscsi_conn
*conn
,
396 struct iscsi_task
*task
)
398 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
399 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
400 struct iser_tx_desc
*mdesc
= &iser_task
->desc
;
401 unsigned long data_seg_len
;
403 struct iser_device
*device
;
404 struct iser_conn
*ib_conn
= iser_conn
->ib_conn
;
406 /* build the tx desc regd header and add it to the tx desc dto */
407 mdesc
->type
= ISCSI_TX_CONTROL
;
408 iser_create_send_desc(iser_conn
->ib_conn
, mdesc
);
410 device
= iser_conn
->ib_conn
->device
;
412 data_seg_len
= ntoh24(task
->hdr
->dlength
);
414 if (data_seg_len
> 0) {
415 struct ib_sge
*tx_dsg
= &mdesc
->tx_sg
[1];
416 if (task
!= conn
->login_task
) {
417 iser_err("data present on non login task!!!\n");
418 goto send_control_error
;
421 ib_dma_sync_single_for_cpu(device
->ib_device
,
422 ib_conn
->login_req_dma
, task
->data_count
,
425 memcpy(iser_conn
->ib_conn
->login_req_buf
, task
->data
,
428 ib_dma_sync_single_for_device(device
->ib_device
,
429 ib_conn
->login_req_dma
, task
->data_count
,
432 tx_dsg
->addr
= iser_conn
->ib_conn
->login_req_dma
;
433 tx_dsg
->length
= task
->data_count
;
434 tx_dsg
->lkey
= device
->mr
->lkey
;
438 if (task
== conn
->login_task
) {
439 err
= iser_post_recvl(iser_conn
->ib_conn
);
441 goto send_control_error
;
442 err
= iser_post_rx_bufs(conn
, task
->hdr
);
444 goto send_control_error
;
447 err
= iser_post_send(iser_conn
->ib_conn
, mdesc
);
452 iser_err("conn %p failed err %d\n",conn
, err
);
457 * iser_rcv_dto_completion - recv DTO completion
459 void iser_rcv_completion(struct iser_rx_desc
*rx_desc
,
460 unsigned long rx_xfer_len
,
461 struct iser_conn
*ib_conn
)
463 struct iscsi_iser_conn
*conn
= ib_conn
->iser_conn
;
464 struct iscsi_hdr
*hdr
;
466 int rx_buflen
, outstanding
, count
, err
;
468 /* differentiate between login to all other PDUs */
469 if ((char *)rx_desc
== ib_conn
->login_resp_buf
) {
470 rx_dma
= ib_conn
->login_resp_dma
;
471 rx_buflen
= ISER_RX_LOGIN_SIZE
;
473 rx_dma
= rx_desc
->dma_addr
;
474 rx_buflen
= ISER_RX_PAYLOAD_SIZE
;
477 ib_dma_sync_single_for_cpu(ib_conn
->device
->ib_device
, rx_dma
,
478 rx_buflen
, DMA_FROM_DEVICE
);
480 hdr
= &rx_desc
->iscsi_header
;
482 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr
->opcode
,
483 hdr
->itt
, (int)(rx_xfer_len
- ISER_HEADERS_LEN
));
485 iscsi_iser_recv(conn
->iscsi_conn
, hdr
,
486 rx_desc
->data
, rx_xfer_len
- ISER_HEADERS_LEN
);
488 ib_dma_sync_single_for_device(ib_conn
->device
->ib_device
, rx_dma
,
489 rx_buflen
, DMA_FROM_DEVICE
);
491 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
492 * task eliminates the need to worry on tasks which are completed in *
493 * parallel to the execution of iser_conn_term. So the code that waits *
494 * for the posted rx bufs refcount to become zero handles everything */
495 conn
->ib_conn
->post_recv_buf_count
--;
497 if (rx_dma
== ib_conn
->login_resp_dma
)
500 outstanding
= ib_conn
->post_recv_buf_count
;
501 if (outstanding
+ ISER_MIN_POSTED_RX
<= ISER_QP_MAX_RECV_DTOS
) {
502 count
= min(ISER_QP_MAX_RECV_DTOS
- outstanding
,
504 err
= iser_post_recvm(ib_conn
, count
);
506 iser_err("posting %d rx bufs err %d\n", count
, err
);
510 void iser_snd_completion(struct iser_tx_desc
*tx_desc
,
511 struct iser_conn
*ib_conn
)
513 struct iscsi_task
*task
;
514 struct iser_device
*device
= ib_conn
->device
;
516 if (tx_desc
->type
== ISCSI_TX_DATAOUT
) {
517 ib_dma_unmap_single(device
->ib_device
, tx_desc
->dma_addr
,
518 ISER_HEADERS_LEN
, DMA_TO_DEVICE
);
519 kmem_cache_free(ig
.desc_cache
, tx_desc
);
522 atomic_dec(&ib_conn
->post_send_buf_count
);
524 if (tx_desc
->type
== ISCSI_TX_CONTROL
) {
525 /* this arithmetic is legal by libiscsi dd_data allocation */
526 task
= (void *) ((long)(void *)tx_desc
-
527 sizeof(struct iscsi_task
));
528 if (task
->hdr
->itt
== RESERVED_ITT
)
529 iscsi_put_task(task
);
533 void iser_task_rdma_init(struct iscsi_iser_task
*iser_task
)
536 iser_task
->status
= ISER_TASK_STATUS_INIT
;
538 iser_task
->dir
[ISER_DIR_IN
] = 0;
539 iser_task
->dir
[ISER_DIR_OUT
] = 0;
541 iser_task
->data
[ISER_DIR_IN
].data_len
= 0;
542 iser_task
->data
[ISER_DIR_OUT
].data_len
= 0;
544 memset(&iser_task
->rdma_regd
[ISER_DIR_IN
], 0,
545 sizeof(struct iser_regd_buf
));
546 memset(&iser_task
->rdma_regd
[ISER_DIR_OUT
], 0,
547 sizeof(struct iser_regd_buf
));
550 void iser_task_rdma_finalize(struct iscsi_iser_task
*iser_task
)
552 int is_rdma_aligned
= 1;
553 struct iser_regd_buf
*regd
;
555 /* if we were reading, copy back to unaligned sglist,
556 * anyway dma_unmap and free the copy
558 if (iser_task
->data_copy
[ISER_DIR_IN
].copy_buf
!= NULL
) {
560 iser_finalize_rdma_unaligned_sg(iser_task
, ISER_DIR_IN
);
562 if (iser_task
->data_copy
[ISER_DIR_OUT
].copy_buf
!= NULL
) {
564 iser_finalize_rdma_unaligned_sg(iser_task
, ISER_DIR_OUT
);
567 if (iser_task
->dir
[ISER_DIR_IN
]) {
568 regd
= &iser_task
->rdma_regd
[ISER_DIR_IN
];
569 if (regd
->reg
.is_fmr
)
570 iser_unreg_mem(®d
->reg
);
573 if (iser_task
->dir
[ISER_DIR_OUT
]) {
574 regd
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
575 if (regd
->reg
.is_fmr
)
576 iser_unreg_mem(®d
->reg
);
579 /* if the data was unaligned, it was already unmapped and then copied */
581 iser_dma_unmap_task_data(iser_task
);