2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/kernel.h>
34 #include <linux/slab.h>
36 #include <linux/scatterlist.h>
37 #include <linux/kfifo.h>
38 #include <scsi/scsi_cmnd.h>
39 #include <scsi/scsi_host.h>
41 #include "iscsi_iser.h"
43 /* Register user buffer memory and initialize passive rdma
44 * dto descriptor. Total data size is stored in
45 * iser_task->data[ISER_DIR_IN].data_len
47 static int iser_prepare_read_cmd(struct iscsi_task
*task
,
51 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
52 struct iser_regd_buf
*regd_buf
;
54 struct iser_hdr
*hdr
= &iser_task
->desc
.iser_header
;
55 struct iser_data_buf
*buf_in
= &iser_task
->data
[ISER_DIR_IN
];
57 err
= iser_dma_map_task_data(iser_task
,
64 if (edtl
> iser_task
->data
[ISER_DIR_IN
].data_len
) {
65 iser_err("Total data length: %ld, less than EDTL: "
66 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
67 iser_task
->data
[ISER_DIR_IN
].data_len
, edtl
,
68 task
->itt
, iser_task
->iser_conn
);
72 err
= iser_reg_rdma_mem(iser_task
,ISER_DIR_IN
);
74 iser_err("Failed to set up Data-IN RDMA\n");
77 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_IN
];
79 hdr
->flags
|= ISER_RSV
;
80 hdr
->read_stag
= cpu_to_be32(regd_buf
->reg
.rkey
);
81 hdr
->read_va
= cpu_to_be64(regd_buf
->reg
.va
);
83 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
84 task
->itt
, regd_buf
->reg
.rkey
,
85 (unsigned long long)regd_buf
->reg
.va
);
90 /* Register user buffer memory and initialize passive rdma
91 * dto descriptor. Total data size is stored in
92 * task->data[ISER_DIR_OUT].data_len
95 iser_prepare_write_cmd(struct iscsi_task
*task
,
97 unsigned int unsol_sz
,
100 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
101 struct iser_regd_buf
*regd_buf
;
103 struct iser_hdr
*hdr
= &iser_task
->desc
.iser_header
;
104 struct iser_data_buf
*buf_out
= &iser_task
->data
[ISER_DIR_OUT
];
105 struct ib_sge
*tx_dsg
= &iser_task
->desc
.tx_sg
[1];
107 err
= iser_dma_map_task_data(iser_task
,
114 if (edtl
> iser_task
->data
[ISER_DIR_OUT
].data_len
) {
115 iser_err("Total data length: %ld, less than EDTL: %d, "
116 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
117 iser_task
->data
[ISER_DIR_OUT
].data_len
,
118 edtl
, task
->itt
, task
->conn
);
122 err
= iser_reg_rdma_mem(iser_task
,ISER_DIR_OUT
);
124 iser_err("Failed to register write cmd RDMA mem\n");
128 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
130 if (unsol_sz
< edtl
) {
131 hdr
->flags
|= ISER_WSV
;
132 hdr
->write_stag
= cpu_to_be32(regd_buf
->reg
.rkey
);
133 hdr
->write_va
= cpu_to_be64(regd_buf
->reg
.va
+ unsol_sz
);
135 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
136 "VA:%#llX + unsol:%d\n",
137 task
->itt
, regd_buf
->reg
.rkey
,
138 (unsigned long long)regd_buf
->reg
.va
, unsol_sz
);
142 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
144 tx_dsg
->addr
= regd_buf
->reg
.va
;
145 tx_dsg
->length
= imm_sz
;
146 tx_dsg
->lkey
= regd_buf
->reg
.lkey
;
147 iser_task
->desc
.num_sge
= 2;
153 /* creates a new tx descriptor and adds header regd buffer */
154 static void iser_create_send_desc(struct iser_conn
*ib_conn
,
155 struct iser_tx_desc
*tx_desc
)
157 struct iser_device
*device
= ib_conn
->device
;
159 ib_dma_sync_single_for_cpu(device
->ib_device
,
160 tx_desc
->dma_addr
, ISER_HEADERS_LEN
, DMA_TO_DEVICE
);
162 memset(&tx_desc
->iser_header
, 0, sizeof(struct iser_hdr
));
163 tx_desc
->iser_header
.flags
= ISER_VER
;
165 tx_desc
->num_sge
= 1;
167 if (tx_desc
->tx_sg
[0].lkey
!= device
->mr
->lkey
) {
168 tx_desc
->tx_sg
[0].lkey
= device
->mr
->lkey
;
169 iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc
);
174 int iser_alloc_rx_descriptors(struct iser_conn
*ib_conn
)
178 struct iser_rx_desc
*rx_desc
;
179 struct ib_sge
*rx_sg
;
180 struct iser_device
*device
= ib_conn
->device
;
182 ib_conn
->rx_descs
= kmalloc(ISER_QP_MAX_RECV_DTOS
*
183 sizeof(struct iser_rx_desc
), GFP_KERNEL
);
184 if (!ib_conn
->rx_descs
)
185 goto rx_desc_alloc_fail
;
187 rx_desc
= ib_conn
->rx_descs
;
189 for (i
= 0; i
< ISER_QP_MAX_RECV_DTOS
; i
++, rx_desc
++) {
190 dma_addr
= ib_dma_map_single(device
->ib_device
, (void *)rx_desc
,
191 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
192 if (ib_dma_mapping_error(device
->ib_device
, dma_addr
))
193 goto rx_desc_dma_map_failed
;
195 rx_desc
->dma_addr
= dma_addr
;
197 rx_sg
= &rx_desc
->rx_sg
;
198 rx_sg
->addr
= rx_desc
->dma_addr
;
199 rx_sg
->length
= ISER_RX_PAYLOAD_SIZE
;
200 rx_sg
->lkey
= device
->mr
->lkey
;
203 ib_conn
->rx_desc_head
= 0;
206 rx_desc_dma_map_failed
:
207 rx_desc
= ib_conn
->rx_descs
;
208 for (j
= 0; j
< i
; j
++, rx_desc
++)
209 ib_dma_unmap_single(device
->ib_device
, rx_desc
->dma_addr
,
210 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
211 kfree(ib_conn
->rx_descs
);
212 ib_conn
->rx_descs
= NULL
;
214 iser_err("failed allocating rx descriptors / data buffers\n");
218 void iser_free_rx_descriptors(struct iser_conn
*ib_conn
)
221 struct iser_rx_desc
*rx_desc
;
222 struct iser_device
*device
= ib_conn
->device
;
224 if (!ib_conn
->rx_descs
)
227 rx_desc
= ib_conn
->rx_descs
;
228 for (i
= 0; i
< ISER_QP_MAX_RECV_DTOS
; i
++, rx_desc
++)
229 ib_dma_unmap_single(device
->ib_device
, rx_desc
->dma_addr
,
230 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
231 kfree(ib_conn
->rx_descs
);
234 static int iser_post_rx_bufs(struct iscsi_conn
*conn
, struct iscsi_hdr
*req
)
236 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
238 iser_dbg("req op %x flags %x\n", req
->opcode
, req
->flags
);
239 /* check if this is the last login - going to full feature phase */
240 if ((req
->flags
& ISCSI_FULL_FEATURE_PHASE
) != ISCSI_FULL_FEATURE_PHASE
)
244 * Check that there is one posted recv buffer (for the last login
245 * response) and no posted send buffers left - they must have been
246 * consumed during previous login phases.
248 WARN_ON(iser_conn
->ib_conn
->post_recv_buf_count
!= 1);
249 WARN_ON(atomic_read(&iser_conn
->ib_conn
->post_send_buf_count
) != 0);
251 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX
);
252 /* Initial post receive buffers */
253 if (iser_post_recvm(iser_conn
->ib_conn
, ISER_MIN_POSTED_RX
))
260 * iser_send_command - send command PDU
262 int iser_send_command(struct iscsi_conn
*conn
,
263 struct iscsi_task
*task
)
265 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
266 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
269 struct iser_data_buf
*data_buf
;
270 struct iscsi_scsi_req
*hdr
= (struct iscsi_scsi_req
*)task
->hdr
;
271 struct scsi_cmnd
*sc
= task
->sc
;
272 struct iser_tx_desc
*tx_desc
= &iser_task
->desc
;
274 edtl
= ntohl(hdr
->data_length
);
276 /* build the tx desc regd header and add it to the tx desc dto */
277 tx_desc
->type
= ISCSI_TX_SCSI_COMMAND
;
278 iser_create_send_desc(iser_conn
->ib_conn
, tx_desc
);
280 if (hdr
->flags
& ISCSI_FLAG_CMD_READ
)
281 data_buf
= &iser_task
->data
[ISER_DIR_IN
];
283 data_buf
= &iser_task
->data
[ISER_DIR_OUT
];
285 if (scsi_sg_count(sc
)) { /* using a scatter list */
286 data_buf
->buf
= scsi_sglist(sc
);
287 data_buf
->size
= scsi_sg_count(sc
);
290 data_buf
->data_len
= scsi_bufflen(sc
);
292 if (hdr
->flags
& ISCSI_FLAG_CMD_READ
) {
293 err
= iser_prepare_read_cmd(task
, edtl
);
295 goto send_command_error
;
297 if (hdr
->flags
& ISCSI_FLAG_CMD_WRITE
) {
298 err
= iser_prepare_write_cmd(task
,
301 task
->unsol_r2t
.data_length
,
304 goto send_command_error
;
307 iser_task
->status
= ISER_TASK_STATUS_STARTED
;
309 err
= iser_post_send(iser_conn
->ib_conn
, tx_desc
);
314 iser_err("conn %p failed task->itt %d err %d\n",conn
, task
->itt
, err
);
319 * iser_send_data_out - send data out PDU
321 int iser_send_data_out(struct iscsi_conn
*conn
,
322 struct iscsi_task
*task
,
323 struct iscsi_data
*hdr
)
325 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
326 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
327 struct iser_tx_desc
*tx_desc
= NULL
;
328 struct iser_regd_buf
*regd_buf
;
329 unsigned long buf_offset
;
330 unsigned long data_seg_len
;
333 struct ib_sge
*tx_dsg
;
335 itt
= (__force
uint32_t)hdr
->itt
;
336 data_seg_len
= ntoh24(hdr
->dlength
);
337 buf_offset
= ntohl(hdr
->offset
);
339 iser_dbg("%s itt %d dseg_len %d offset %d\n",
340 __func__
,(int)itt
,(int)data_seg_len
,(int)buf_offset
);
342 tx_desc
= kmem_cache_zalloc(ig
.desc_cache
, GFP_ATOMIC
);
343 if (tx_desc
== NULL
) {
344 iser_err("Failed to alloc desc for post dataout\n");
348 tx_desc
->type
= ISCSI_TX_DATAOUT
;
349 tx_desc
->iser_header
.flags
= ISER_VER
;
350 memcpy(&tx_desc
->iscsi_header
, hdr
, sizeof(struct iscsi_hdr
));
352 /* build the tx desc */
353 iser_initialize_task_headers(task
, tx_desc
);
355 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
356 tx_dsg
= &tx_desc
->tx_sg
[1];
357 tx_dsg
->addr
= regd_buf
->reg
.va
+ buf_offset
;
358 tx_dsg
->length
= data_seg_len
;
359 tx_dsg
->lkey
= regd_buf
->reg
.lkey
;
360 tx_desc
->num_sge
= 2;
362 if (buf_offset
+ data_seg_len
> iser_task
->data
[ISER_DIR_OUT
].data_len
) {
363 iser_err("Offset:%ld & DSL:%ld in Data-Out "
364 "inconsistent with total len:%ld, itt:%d\n",
365 buf_offset
, data_seg_len
,
366 iser_task
->data
[ISER_DIR_OUT
].data_len
, itt
);
368 goto send_data_out_error
;
370 iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
371 itt
, buf_offset
, data_seg_len
);
374 err
= iser_post_send(iser_conn
->ib_conn
, tx_desc
);
379 kmem_cache_free(ig
.desc_cache
, tx_desc
);
380 iser_err("conn %p failed err %d\n",conn
, err
);
384 int iser_send_control(struct iscsi_conn
*conn
,
385 struct iscsi_task
*task
)
387 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
388 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
389 struct iser_tx_desc
*mdesc
= &iser_task
->desc
;
390 unsigned long data_seg_len
;
392 struct iser_device
*device
;
393 struct iser_conn
*ib_conn
= iser_conn
->ib_conn
;
395 /* build the tx desc regd header and add it to the tx desc dto */
396 mdesc
->type
= ISCSI_TX_CONTROL
;
397 iser_create_send_desc(iser_conn
->ib_conn
, mdesc
);
399 device
= iser_conn
->ib_conn
->device
;
401 data_seg_len
= ntoh24(task
->hdr
->dlength
);
403 if (data_seg_len
> 0) {
404 struct ib_sge
*tx_dsg
= &mdesc
->tx_sg
[1];
405 if (task
!= conn
->login_task
) {
406 iser_err("data present on non login task!!!\n");
407 goto send_control_error
;
410 ib_dma_sync_single_for_cpu(device
->ib_device
,
411 ib_conn
->login_req_dma
, task
->data_count
,
414 memcpy(iser_conn
->ib_conn
->login_req_buf
, task
->data
,
417 ib_dma_sync_single_for_device(device
->ib_device
,
418 ib_conn
->login_req_dma
, task
->data_count
,
421 tx_dsg
->addr
= iser_conn
->ib_conn
->login_req_dma
;
422 tx_dsg
->length
= task
->data_count
;
423 tx_dsg
->lkey
= device
->mr
->lkey
;
427 if (task
== conn
->login_task
) {
428 err
= iser_post_recvl(iser_conn
->ib_conn
);
430 goto send_control_error
;
431 err
= iser_post_rx_bufs(conn
, task
->hdr
);
433 goto send_control_error
;
436 err
= iser_post_send(iser_conn
->ib_conn
, mdesc
);
441 iser_err("conn %p failed err %d\n",conn
, err
);
446 * iser_rcv_dto_completion - recv DTO completion
448 void iser_rcv_completion(struct iser_rx_desc
*rx_desc
,
449 unsigned long rx_xfer_len
,
450 struct iser_conn
*ib_conn
)
452 struct iscsi_iser_conn
*conn
= ib_conn
->iser_conn
;
453 struct iscsi_hdr
*hdr
;
455 int rx_buflen
, outstanding
, count
, err
;
457 /* differentiate between login to all other PDUs */
458 if ((char *)rx_desc
== ib_conn
->login_resp_buf
) {
459 rx_dma
= ib_conn
->login_resp_dma
;
460 rx_buflen
= ISER_RX_LOGIN_SIZE
;
462 rx_dma
= rx_desc
->dma_addr
;
463 rx_buflen
= ISER_RX_PAYLOAD_SIZE
;
466 ib_dma_sync_single_for_cpu(ib_conn
->device
->ib_device
, rx_dma
,
467 rx_buflen
, DMA_FROM_DEVICE
);
469 hdr
= &rx_desc
->iscsi_header
;
471 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr
->opcode
,
472 hdr
->itt
, (int)(rx_xfer_len
- ISER_HEADERS_LEN
));
474 iscsi_iser_recv(conn
->iscsi_conn
, hdr
,
475 rx_desc
->data
, rx_xfer_len
- ISER_HEADERS_LEN
);
477 ib_dma_sync_single_for_device(ib_conn
->device
->ib_device
, rx_dma
,
478 rx_buflen
, DMA_FROM_DEVICE
);
480 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
481 * task eliminates the need to worry on tasks which are completed in *
482 * parallel to the execution of iser_conn_term. So the code that waits *
483 * for the posted rx bufs refcount to become zero handles everything */
484 conn
->ib_conn
->post_recv_buf_count
--;
486 if (rx_dma
== ib_conn
->login_resp_dma
)
489 outstanding
= ib_conn
->post_recv_buf_count
;
490 if (outstanding
+ ISER_MIN_POSTED_RX
<= ISER_QP_MAX_RECV_DTOS
) {
491 count
= min(ISER_QP_MAX_RECV_DTOS
- outstanding
,
493 err
= iser_post_recvm(ib_conn
, count
);
495 iser_err("posting %d rx bufs err %d\n", count
, err
);
499 void iser_snd_completion(struct iser_tx_desc
*tx_desc
,
500 struct iser_conn
*ib_conn
)
502 struct iscsi_task
*task
;
503 struct iser_device
*device
= ib_conn
->device
;
505 if (tx_desc
->type
== ISCSI_TX_DATAOUT
) {
506 ib_dma_unmap_single(device
->ib_device
, tx_desc
->dma_addr
,
507 ISER_HEADERS_LEN
, DMA_TO_DEVICE
);
508 kmem_cache_free(ig
.desc_cache
, tx_desc
);
511 atomic_dec(&ib_conn
->post_send_buf_count
);
513 if (tx_desc
->type
== ISCSI_TX_CONTROL
) {
514 /* this arithmetic is legal by libiscsi dd_data allocation */
515 task
= (void *) ((long)(void *)tx_desc
-
516 sizeof(struct iscsi_task
));
517 if (task
->hdr
->itt
== RESERVED_ITT
)
518 iscsi_put_task(task
);
522 void iser_task_rdma_init(struct iscsi_iser_task
*iser_task
)
525 iser_task
->status
= ISER_TASK_STATUS_INIT
;
527 iser_task
->dir
[ISER_DIR_IN
] = 0;
528 iser_task
->dir
[ISER_DIR_OUT
] = 0;
530 iser_task
->data
[ISER_DIR_IN
].data_len
= 0;
531 iser_task
->data
[ISER_DIR_OUT
].data_len
= 0;
533 memset(&iser_task
->rdma_regd
[ISER_DIR_IN
], 0,
534 sizeof(struct iser_regd_buf
));
535 memset(&iser_task
->rdma_regd
[ISER_DIR_OUT
], 0,
536 sizeof(struct iser_regd_buf
));
539 void iser_task_rdma_finalize(struct iscsi_iser_task
*iser_task
)
541 int is_rdma_aligned
= 1;
542 struct iser_regd_buf
*regd
;
544 /* if we were reading, copy back to unaligned sglist,
545 * anyway dma_unmap and free the copy
547 if (iser_task
->data_copy
[ISER_DIR_IN
].copy_buf
!= NULL
) {
549 iser_finalize_rdma_unaligned_sg(iser_task
, ISER_DIR_IN
);
551 if (iser_task
->data_copy
[ISER_DIR_OUT
].copy_buf
!= NULL
) {
553 iser_finalize_rdma_unaligned_sg(iser_task
, ISER_DIR_OUT
);
556 if (iser_task
->dir
[ISER_DIR_IN
]) {
557 regd
= &iser_task
->rdma_regd
[ISER_DIR_IN
];
558 if (regd
->reg
.is_fmr
)
559 iser_unreg_mem(®d
->reg
);
562 if (iser_task
->dir
[ISER_DIR_OUT
]) {
563 regd
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
564 if (regd
->reg
.is_fmr
)
565 iser_unreg_mem(®d
->reg
);
568 /* if the data was unaligned, it was already unmapped and then copied */
570 iser_dma_unmap_task_data(iser_task
);