2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <linux/inet.h>
45 #include <rdma/ib_cache.h>
47 #include <linux/atomic.h>
49 #include <scsi/scsi.h>
50 #include <scsi/scsi_device.h>
51 #include <scsi/scsi_dbg.h>
52 #include <scsi/scsi_tcq.h>
54 #include <scsi/scsi_transport_srp.h>
58 #define DRV_NAME "ib_srp"
59 #define PFX DRV_NAME ": "
60 #define DRV_VERSION "2.0"
61 #define DRV_RELDATE "July 26, 2015"
63 MODULE_AUTHOR("Roland Dreier");
64 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
65 MODULE_LICENSE("Dual BSD/GPL");
66 MODULE_INFO(release_date
, DRV_RELDATE
);
68 #if !defined(CONFIG_DYNAMIC_DEBUG)
69 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
70 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
73 static unsigned int srp_sg_tablesize
;
74 static unsigned int cmd_sg_entries
;
75 static unsigned int indirect_sg_entries
;
76 static bool allow_ext_sg
;
77 static bool prefer_fr
= true;
78 static bool register_always
= true;
79 static bool never_register
;
80 static int topspin_workarounds
= 1;
82 module_param(srp_sg_tablesize
, uint
, 0444);
83 MODULE_PARM_DESC(srp_sg_tablesize
, "Deprecated name for cmd_sg_entries");
85 module_param(cmd_sg_entries
, uint
, 0444);
86 MODULE_PARM_DESC(cmd_sg_entries
,
87 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
89 module_param(indirect_sg_entries
, uint
, 0444);
90 MODULE_PARM_DESC(indirect_sg_entries
,
91 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS
) ")");
93 module_param(allow_ext_sg
, bool, 0444);
94 MODULE_PARM_DESC(allow_ext_sg
,
95 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
97 module_param(topspin_workarounds
, int, 0444);
98 MODULE_PARM_DESC(topspin_workarounds
,
99 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
101 module_param(prefer_fr
, bool, 0444);
102 MODULE_PARM_DESC(prefer_fr
,
103 "Whether to use fast registration if both FMR and fast registration are supported");
105 module_param(register_always
, bool, 0444);
106 MODULE_PARM_DESC(register_always
,
107 "Use memory registration even for contiguous memory regions");
109 module_param(never_register
, bool, 0444);
110 MODULE_PARM_DESC(never_register
, "Never register memory");
112 static const struct kernel_param_ops srp_tmo_ops
;
114 static int srp_reconnect_delay
= 10;
115 module_param_cb(reconnect_delay
, &srp_tmo_ops
, &srp_reconnect_delay
,
117 MODULE_PARM_DESC(reconnect_delay
, "Time between successive reconnect attempts");
119 static int srp_fast_io_fail_tmo
= 15;
120 module_param_cb(fast_io_fail_tmo
, &srp_tmo_ops
, &srp_fast_io_fail_tmo
,
122 MODULE_PARM_DESC(fast_io_fail_tmo
,
123 "Number of seconds between the observation of a transport"
124 " layer error and failing all I/O. \"off\" means that this"
125 " functionality is disabled.");
127 static int srp_dev_loss_tmo
= 600;
128 module_param_cb(dev_loss_tmo
, &srp_tmo_ops
, &srp_dev_loss_tmo
,
130 MODULE_PARM_DESC(dev_loss_tmo
,
131 "Maximum number of seconds that the SRP transport should"
132 " insulate transport layer errors. After this time has been"
133 " exceeded the SCSI host is removed. Should be"
134 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT
)
135 " if fast_io_fail_tmo has not been set. \"off\" means that"
136 " this functionality is disabled.");
138 static unsigned ch_count
;
139 module_param(ch_count
, uint
, 0444);
140 MODULE_PARM_DESC(ch_count
,
141 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
143 static void srp_add_one(struct ib_device
*device
);
144 static void srp_remove_one(struct ib_device
*device
, void *client_data
);
145 static void srp_recv_done(struct ib_cq
*cq
, struct ib_wc
*wc
);
146 static void srp_handle_qp_err(struct ib_cq
*cq
, struct ib_wc
*wc
,
148 static int srp_ib_cm_handler(struct ib_cm_id
*cm_id
, struct ib_cm_event
*event
);
149 static int srp_rdma_cm_handler(struct rdma_cm_id
*cm_id
,
150 struct rdma_cm_event
*event
);
152 static struct scsi_transport_template
*ib_srp_transport_template
;
153 static struct workqueue_struct
*srp_remove_wq
;
155 static struct ib_client srp_client
= {
158 .remove
= srp_remove_one
161 static struct ib_sa_client srp_sa_client
;
163 static int srp_tmo_get(char *buffer
, const struct kernel_param
*kp
)
165 int tmo
= *(int *)kp
->arg
;
168 return sprintf(buffer
, "%d", tmo
);
170 return sprintf(buffer
, "off");
173 static int srp_tmo_set(const char *val
, const struct kernel_param
*kp
)
177 res
= srp_parse_tmo(&tmo
, val
);
181 if (kp
->arg
== &srp_reconnect_delay
)
182 res
= srp_tmo_valid(tmo
, srp_fast_io_fail_tmo
,
184 else if (kp
->arg
== &srp_fast_io_fail_tmo
)
185 res
= srp_tmo_valid(srp_reconnect_delay
, tmo
, srp_dev_loss_tmo
);
187 res
= srp_tmo_valid(srp_reconnect_delay
, srp_fast_io_fail_tmo
,
191 *(int *)kp
->arg
= tmo
;
197 static const struct kernel_param_ops srp_tmo_ops
= {
202 static inline struct srp_target_port
*host_to_target(struct Scsi_Host
*host
)
204 return (struct srp_target_port
*) host
->hostdata
;
207 static const char *srp_target_info(struct Scsi_Host
*host
)
209 return host_to_target(host
)->target_name
;
212 static int srp_target_is_topspin(struct srp_target_port
*target
)
214 static const u8 topspin_oui
[3] = { 0x00, 0x05, 0xad };
215 static const u8 cisco_oui
[3] = { 0x00, 0x1b, 0x0d };
217 return topspin_workarounds
&&
218 (!memcmp(&target
->ioc_guid
, topspin_oui
, sizeof topspin_oui
) ||
219 !memcmp(&target
->ioc_guid
, cisco_oui
, sizeof cisco_oui
));
222 static struct srp_iu
*srp_alloc_iu(struct srp_host
*host
, size_t size
,
224 enum dma_data_direction direction
)
228 iu
= kmalloc(sizeof *iu
, gfp_mask
);
232 iu
->buf
= kzalloc(size
, gfp_mask
);
236 iu
->dma
= ib_dma_map_single(host
->srp_dev
->dev
, iu
->buf
, size
,
238 if (ib_dma_mapping_error(host
->srp_dev
->dev
, iu
->dma
))
242 iu
->direction
= direction
;
254 static void srp_free_iu(struct srp_host
*host
, struct srp_iu
*iu
)
259 ib_dma_unmap_single(host
->srp_dev
->dev
, iu
->dma
, iu
->size
,
265 static void srp_qp_event(struct ib_event
*event
, void *context
)
267 pr_debug("QP event %s (%d)\n",
268 ib_event_msg(event
->event
), event
->event
);
271 static int srp_init_ib_qp(struct srp_target_port
*target
,
274 struct ib_qp_attr
*attr
;
277 attr
= kmalloc(sizeof *attr
, GFP_KERNEL
);
281 ret
= ib_find_cached_pkey(target
->srp_host
->srp_dev
->dev
,
282 target
->srp_host
->port
,
283 be16_to_cpu(target
->ib_cm
.pkey
),
288 attr
->qp_state
= IB_QPS_INIT
;
289 attr
->qp_access_flags
= (IB_ACCESS_REMOTE_READ
|
290 IB_ACCESS_REMOTE_WRITE
);
291 attr
->port_num
= target
->srp_host
->port
;
293 ret
= ib_modify_qp(qp
, attr
,
304 static int srp_new_ib_cm_id(struct srp_rdma_ch
*ch
)
306 struct srp_target_port
*target
= ch
->target
;
307 struct ib_cm_id
*new_cm_id
;
309 new_cm_id
= ib_create_cm_id(target
->srp_host
->srp_dev
->dev
,
310 srp_ib_cm_handler
, ch
);
311 if (IS_ERR(new_cm_id
))
312 return PTR_ERR(new_cm_id
);
315 ib_destroy_cm_id(ch
->ib_cm
.cm_id
);
316 ch
->ib_cm
.cm_id
= new_cm_id
;
317 if (rdma_cap_opa_ah(target
->srp_host
->srp_dev
->dev
,
318 target
->srp_host
->port
))
319 ch
->ib_cm
.path
.rec_type
= SA_PATH_REC_TYPE_OPA
;
321 ch
->ib_cm
.path
.rec_type
= SA_PATH_REC_TYPE_IB
;
322 ch
->ib_cm
.path
.sgid
= target
->sgid
;
323 ch
->ib_cm
.path
.dgid
= target
->ib_cm
.orig_dgid
;
324 ch
->ib_cm
.path
.pkey
= target
->ib_cm
.pkey
;
325 ch
->ib_cm
.path
.service_id
= target
->ib_cm
.service_id
;
330 static const char *inet_ntop(const void *sa
, char *dst
, unsigned int size
)
332 switch (((struct sockaddr
*)sa
)->sa_family
) {
334 snprintf(dst
, size
, "%pI4",
335 &((struct sockaddr_in
*)sa
)->sin_addr
);
338 snprintf(dst
, size
, "%pI6",
339 &((struct sockaddr_in6
*)sa
)->sin6_addr
);
342 snprintf(dst
, size
, "???");
348 static int srp_new_rdma_cm_id(struct srp_rdma_ch
*ch
)
350 struct srp_target_port
*target
= ch
->target
;
351 struct rdma_cm_id
*new_cm_id
;
352 char src_addr
[64], dst_addr
[64];
355 new_cm_id
= rdma_create_id(target
->net
, srp_rdma_cm_handler
, ch
,
356 RDMA_PS_TCP
, IB_QPT_RC
);
357 if (IS_ERR(new_cm_id
)) {
358 ret
= PTR_ERR(new_cm_id
);
363 init_completion(&ch
->done
);
364 ret
= rdma_resolve_addr(new_cm_id
, target
->rdma_cm
.src_specified
?
365 (struct sockaddr
*)&target
->rdma_cm
.src
: NULL
,
366 (struct sockaddr
*)&target
->rdma_cm
.dst
,
367 SRP_PATH_REC_TIMEOUT_MS
);
369 pr_err("No route available from %s to %s (%d)\n",
370 target
->rdma_cm
.src_specified
?
371 inet_ntop(&target
->rdma_cm
.src
, src_addr
,
372 sizeof(src_addr
)) : "(any)",
373 inet_ntop(&target
->rdma_cm
.dst
, dst_addr
,
378 ret
= wait_for_completion_interruptible(&ch
->done
);
384 pr_err("Resolving address %s failed (%d)\n",
385 inet_ntop(&target
->rdma_cm
.dst
, dst_addr
,
391 swap(ch
->rdma_cm
.cm_id
, new_cm_id
);
395 rdma_destroy_id(new_cm_id
);
400 static int srp_new_cm_id(struct srp_rdma_ch
*ch
)
402 struct srp_target_port
*target
= ch
->target
;
404 return target
->using_rdma_cm
? srp_new_rdma_cm_id(ch
) :
405 srp_new_ib_cm_id(ch
);
408 static struct ib_fmr_pool
*srp_alloc_fmr_pool(struct srp_target_port
*target
)
410 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
411 struct ib_fmr_pool_param fmr_param
;
413 memset(&fmr_param
, 0, sizeof(fmr_param
));
414 fmr_param
.pool_size
= target
->mr_pool_size
;
415 fmr_param
.dirty_watermark
= fmr_param
.pool_size
/ 4;
417 fmr_param
.max_pages_per_fmr
= dev
->max_pages_per_mr
;
418 fmr_param
.page_shift
= ilog2(dev
->mr_page_size
);
419 fmr_param
.access
= (IB_ACCESS_LOCAL_WRITE
|
420 IB_ACCESS_REMOTE_WRITE
|
421 IB_ACCESS_REMOTE_READ
);
423 return ib_create_fmr_pool(dev
->pd
, &fmr_param
);
427 * srp_destroy_fr_pool() - free the resources owned by a pool
428 * @pool: Fast registration pool to be destroyed.
430 static void srp_destroy_fr_pool(struct srp_fr_pool
*pool
)
433 struct srp_fr_desc
*d
;
438 for (i
= 0, d
= &pool
->desc
[0]; i
< pool
->size
; i
++, d
++) {
446 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
447 * @device: IB device to allocate fast registration descriptors for.
448 * @pd: Protection domain associated with the FR descriptors.
449 * @pool_size: Number of descriptors to allocate.
450 * @max_page_list_len: Maximum fast registration work request page list length.
452 static struct srp_fr_pool
*srp_create_fr_pool(struct ib_device
*device
,
453 struct ib_pd
*pd
, int pool_size
,
454 int max_page_list_len
)
456 struct srp_fr_pool
*pool
;
457 struct srp_fr_desc
*d
;
459 int i
, ret
= -EINVAL
;
464 pool
= kzalloc(sizeof(struct srp_fr_pool
) +
465 pool_size
* sizeof(struct srp_fr_desc
), GFP_KERNEL
);
468 pool
->size
= pool_size
;
469 pool
->max_page_list_len
= max_page_list_len
;
470 spin_lock_init(&pool
->lock
);
471 INIT_LIST_HEAD(&pool
->free_list
);
473 for (i
= 0, d
= &pool
->desc
[0]; i
< pool
->size
; i
++, d
++) {
474 mr
= ib_alloc_mr(pd
, IB_MR_TYPE_MEM_REG
,
479 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
480 dev_name(&device
->dev
));
484 list_add_tail(&d
->entry
, &pool
->free_list
);
491 srp_destroy_fr_pool(pool
);
499 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
500 * @pool: Pool to obtain descriptor from.
502 static struct srp_fr_desc
*srp_fr_pool_get(struct srp_fr_pool
*pool
)
504 struct srp_fr_desc
*d
= NULL
;
507 spin_lock_irqsave(&pool
->lock
, flags
);
508 if (!list_empty(&pool
->free_list
)) {
509 d
= list_first_entry(&pool
->free_list
, typeof(*d
), entry
);
512 spin_unlock_irqrestore(&pool
->lock
, flags
);
518 * srp_fr_pool_put() - put an FR descriptor back in the free list
519 * @pool: Pool the descriptor was allocated from.
520 * @desc: Pointer to an array of fast registration descriptor pointers.
521 * @n: Number of descriptors to put back.
523 * Note: The caller must already have queued an invalidation request for
524 * desc->mr->rkey before calling this function.
526 static void srp_fr_pool_put(struct srp_fr_pool
*pool
, struct srp_fr_desc
**desc
,
532 spin_lock_irqsave(&pool
->lock
, flags
);
533 for (i
= 0; i
< n
; i
++)
534 list_add(&desc
[i
]->entry
, &pool
->free_list
);
535 spin_unlock_irqrestore(&pool
->lock
, flags
);
538 static struct srp_fr_pool
*srp_alloc_fr_pool(struct srp_target_port
*target
)
540 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
542 return srp_create_fr_pool(dev
->dev
, dev
->pd
, target
->mr_pool_size
,
543 dev
->max_pages_per_mr
);
547 * srp_destroy_qp() - destroy an RDMA queue pair
548 * @ch: SRP RDMA channel.
550 * Drain the qp before destroying it. This avoids that the receive
551 * completion handler can access the queue pair while it is
554 static void srp_destroy_qp(struct srp_rdma_ch
*ch
)
556 spin_lock_irq(&ch
->lock
);
557 ib_process_cq_direct(ch
->send_cq
, -1);
558 spin_unlock_irq(&ch
->lock
);
561 ib_destroy_qp(ch
->qp
);
564 static int srp_create_ch_ib(struct srp_rdma_ch
*ch
)
566 struct srp_target_port
*target
= ch
->target
;
567 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
568 struct ib_qp_init_attr
*init_attr
;
569 struct ib_cq
*recv_cq
, *send_cq
;
571 struct ib_fmr_pool
*fmr_pool
= NULL
;
572 struct srp_fr_pool
*fr_pool
= NULL
;
573 const int m
= 1 + dev
->use_fast_reg
* target
->mr_per_cmd
* 2;
576 init_attr
= kzalloc(sizeof *init_attr
, GFP_KERNEL
);
580 /* queue_size + 1 for ib_drain_rq() */
581 recv_cq
= ib_alloc_cq(dev
->dev
, ch
, target
->queue_size
+ 1,
582 ch
->comp_vector
, IB_POLL_SOFTIRQ
);
583 if (IS_ERR(recv_cq
)) {
584 ret
= PTR_ERR(recv_cq
);
588 send_cq
= ib_alloc_cq(dev
->dev
, ch
, m
* target
->queue_size
,
589 ch
->comp_vector
, IB_POLL_DIRECT
);
590 if (IS_ERR(send_cq
)) {
591 ret
= PTR_ERR(send_cq
);
595 init_attr
->event_handler
= srp_qp_event
;
596 init_attr
->cap
.max_send_wr
= m
* target
->queue_size
;
597 init_attr
->cap
.max_recv_wr
= target
->queue_size
+ 1;
598 init_attr
->cap
.max_recv_sge
= 1;
599 init_attr
->cap
.max_send_sge
= 1;
600 init_attr
->sq_sig_type
= IB_SIGNAL_REQ_WR
;
601 init_attr
->qp_type
= IB_QPT_RC
;
602 init_attr
->send_cq
= send_cq
;
603 init_attr
->recv_cq
= recv_cq
;
605 if (target
->using_rdma_cm
) {
606 ret
= rdma_create_qp(ch
->rdma_cm
.cm_id
, dev
->pd
, init_attr
);
607 qp
= ch
->rdma_cm
.cm_id
->qp
;
609 qp
= ib_create_qp(dev
->pd
, init_attr
);
611 ret
= srp_init_ib_qp(target
, qp
);
619 pr_err("QP creation failed for dev %s: %d\n",
620 dev_name(&dev
->dev
->dev
), ret
);
624 if (dev
->use_fast_reg
) {
625 fr_pool
= srp_alloc_fr_pool(target
);
626 if (IS_ERR(fr_pool
)) {
627 ret
= PTR_ERR(fr_pool
);
628 shost_printk(KERN_WARNING
, target
->scsi_host
, PFX
629 "FR pool allocation failed (%d)\n", ret
);
632 } else if (dev
->use_fmr
) {
633 fmr_pool
= srp_alloc_fmr_pool(target
);
634 if (IS_ERR(fmr_pool
)) {
635 ret
= PTR_ERR(fmr_pool
);
636 shost_printk(KERN_WARNING
, target
->scsi_host
, PFX
637 "FMR pool allocation failed (%d)\n", ret
);
645 ib_free_cq(ch
->recv_cq
);
647 ib_free_cq(ch
->send_cq
);
650 ch
->recv_cq
= recv_cq
;
651 ch
->send_cq
= send_cq
;
653 if (dev
->use_fast_reg
) {
655 srp_destroy_fr_pool(ch
->fr_pool
);
656 ch
->fr_pool
= fr_pool
;
657 } else if (dev
->use_fmr
) {
659 ib_destroy_fmr_pool(ch
->fmr_pool
);
660 ch
->fmr_pool
= fmr_pool
;
667 if (target
->using_rdma_cm
)
668 rdma_destroy_qp(ch
->rdma_cm
.cm_id
);
684 * Note: this function may be called without srp_alloc_iu_bufs() having been
685 * invoked. Hence the ch->[rt]x_ring checks.
687 static void srp_free_ch_ib(struct srp_target_port
*target
,
688 struct srp_rdma_ch
*ch
)
690 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
696 if (target
->using_rdma_cm
) {
697 if (ch
->rdma_cm
.cm_id
) {
698 rdma_destroy_id(ch
->rdma_cm
.cm_id
);
699 ch
->rdma_cm
.cm_id
= NULL
;
702 if (ch
->ib_cm
.cm_id
) {
703 ib_destroy_cm_id(ch
->ib_cm
.cm_id
);
704 ch
->ib_cm
.cm_id
= NULL
;
708 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
712 if (dev
->use_fast_reg
) {
714 srp_destroy_fr_pool(ch
->fr_pool
);
715 } else if (dev
->use_fmr
) {
717 ib_destroy_fmr_pool(ch
->fmr_pool
);
721 ib_free_cq(ch
->send_cq
);
722 ib_free_cq(ch
->recv_cq
);
725 * Avoid that the SCSI error handler tries to use this channel after
726 * it has been freed. The SCSI error handler can namely continue
727 * trying to perform recovery actions after scsi_remove_host()
733 ch
->send_cq
= ch
->recv_cq
= NULL
;
736 for (i
= 0; i
< target
->queue_size
; ++i
)
737 srp_free_iu(target
->srp_host
, ch
->rx_ring
[i
]);
742 for (i
= 0; i
< target
->queue_size
; ++i
)
743 srp_free_iu(target
->srp_host
, ch
->tx_ring
[i
]);
749 static void srp_path_rec_completion(int status
,
750 struct sa_path_rec
*pathrec
,
753 struct srp_rdma_ch
*ch
= ch_ptr
;
754 struct srp_target_port
*target
= ch
->target
;
758 shost_printk(KERN_ERR
, target
->scsi_host
,
759 PFX
"Got failed path rec status %d\n", status
);
761 ch
->ib_cm
.path
= *pathrec
;
765 static int srp_ib_lookup_path(struct srp_rdma_ch
*ch
)
767 struct srp_target_port
*target
= ch
->target
;
770 ch
->ib_cm
.path
.numb_path
= 1;
772 init_completion(&ch
->done
);
775 * Avoid that the SCSI host can be removed by srp_remove_target()
776 * before srp_path_rec_completion() is called.
778 if (!scsi_host_get(target
->scsi_host
))
781 ch
->ib_cm
.path_query_id
= ib_sa_path_rec_get(&srp_sa_client
,
782 target
->srp_host
->srp_dev
->dev
,
783 target
->srp_host
->port
,
785 IB_SA_PATH_REC_SERVICE_ID
|
786 IB_SA_PATH_REC_DGID
|
787 IB_SA_PATH_REC_SGID
|
788 IB_SA_PATH_REC_NUMB_PATH
|
790 SRP_PATH_REC_TIMEOUT_MS
,
792 srp_path_rec_completion
,
793 ch
, &ch
->ib_cm
.path_query
);
794 ret
= ch
->ib_cm
.path_query_id
;
798 ret
= wait_for_completion_interruptible(&ch
->done
);
804 shost_printk(KERN_WARNING
, target
->scsi_host
,
805 PFX
"Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
806 ch
->ib_cm
.path
.sgid
.raw
, ch
->ib_cm
.path
.dgid
.raw
,
807 be16_to_cpu(target
->ib_cm
.pkey
),
808 be64_to_cpu(target
->ib_cm
.service_id
));
811 scsi_host_put(target
->scsi_host
);
817 static int srp_rdma_lookup_path(struct srp_rdma_ch
*ch
)
819 struct srp_target_port
*target
= ch
->target
;
822 init_completion(&ch
->done
);
824 ret
= rdma_resolve_route(ch
->rdma_cm
.cm_id
, SRP_PATH_REC_TIMEOUT_MS
);
828 wait_for_completion_interruptible(&ch
->done
);
831 shost_printk(KERN_WARNING
, target
->scsi_host
,
832 PFX
"Path resolution failed\n");
837 static int srp_lookup_path(struct srp_rdma_ch
*ch
)
839 struct srp_target_port
*target
= ch
->target
;
841 return target
->using_rdma_cm
? srp_rdma_lookup_path(ch
) :
842 srp_ib_lookup_path(ch
);
845 static u8
srp_get_subnet_timeout(struct srp_host
*host
)
847 struct ib_port_attr attr
;
849 u8 subnet_timeout
= 18;
851 ret
= ib_query_port(host
->srp_dev
->dev
, host
->port
, &attr
);
853 subnet_timeout
= attr
.subnet_timeout
;
855 if (unlikely(subnet_timeout
< 15))
856 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
857 dev_name(&host
->srp_dev
->dev
->dev
), subnet_timeout
);
859 return subnet_timeout
;
862 static int srp_send_req(struct srp_rdma_ch
*ch
, bool multich
)
864 struct srp_target_port
*target
= ch
->target
;
866 struct rdma_conn_param rdma_param
;
867 struct srp_login_req_rdma rdma_req
;
868 struct ib_cm_req_param ib_param
;
869 struct srp_login_req ib_req
;
874 req
= kzalloc(sizeof *req
, GFP_KERNEL
);
878 req
->ib_param
.flow_control
= 1;
879 req
->ib_param
.retry_count
= target
->tl_retry_count
;
882 * Pick some arbitrary defaults here; we could make these
883 * module parameters if anyone cared about setting them.
885 req
->ib_param
.responder_resources
= 4;
886 req
->ib_param
.rnr_retry_count
= 7;
887 req
->ib_param
.max_cm_retries
= 15;
889 req
->ib_req
.opcode
= SRP_LOGIN_REQ
;
891 req
->ib_req
.req_it_iu_len
= cpu_to_be32(target
->max_iu_len
);
892 req
->ib_req
.req_buf_fmt
= cpu_to_be16(SRP_BUF_FORMAT_DIRECT
|
893 SRP_BUF_FORMAT_INDIRECT
);
894 req
->ib_req
.req_flags
= (multich
? SRP_MULTICHAN_MULTI
:
895 SRP_MULTICHAN_SINGLE
);
897 if (target
->using_rdma_cm
) {
898 req
->rdma_param
.flow_control
= req
->ib_param
.flow_control
;
899 req
->rdma_param
.responder_resources
=
900 req
->ib_param
.responder_resources
;
901 req
->rdma_param
.initiator_depth
= req
->ib_param
.initiator_depth
;
902 req
->rdma_param
.retry_count
= req
->ib_param
.retry_count
;
903 req
->rdma_param
.rnr_retry_count
= req
->ib_param
.rnr_retry_count
;
904 req
->rdma_param
.private_data
= &req
->rdma_req
;
905 req
->rdma_param
.private_data_len
= sizeof(req
->rdma_req
);
907 req
->rdma_req
.opcode
= req
->ib_req
.opcode
;
908 req
->rdma_req
.tag
= req
->ib_req
.tag
;
909 req
->rdma_req
.req_it_iu_len
= req
->ib_req
.req_it_iu_len
;
910 req
->rdma_req
.req_buf_fmt
= req
->ib_req
.req_buf_fmt
;
911 req
->rdma_req
.req_flags
= req
->ib_req
.req_flags
;
913 ipi
= req
->rdma_req
.initiator_port_id
;
914 tpi
= req
->rdma_req
.target_port_id
;
918 subnet_timeout
= srp_get_subnet_timeout(target
->srp_host
);
920 req
->ib_param
.primary_path
= &ch
->ib_cm
.path
;
921 req
->ib_param
.alternate_path
= NULL
;
922 req
->ib_param
.service_id
= target
->ib_cm
.service_id
;
923 get_random_bytes(&req
->ib_param
.starting_psn
, 4);
924 req
->ib_param
.starting_psn
&= 0xffffff;
925 req
->ib_param
.qp_num
= ch
->qp
->qp_num
;
926 req
->ib_param
.qp_type
= ch
->qp
->qp_type
;
927 req
->ib_param
.local_cm_response_timeout
= subnet_timeout
+ 2;
928 req
->ib_param
.remote_cm_response_timeout
= subnet_timeout
+ 2;
929 req
->ib_param
.private_data
= &req
->ib_req
;
930 req
->ib_param
.private_data_len
= sizeof(req
->ib_req
);
932 ipi
= req
->ib_req
.initiator_port_id
;
933 tpi
= req
->ib_req
.target_port_id
;
937 * In the published SRP specification (draft rev. 16a), the
938 * port identifier format is 8 bytes of ID extension followed
939 * by 8 bytes of GUID. Older drafts put the two halves in the
940 * opposite order, so that the GUID comes first.
942 * Targets conforming to these obsolete drafts can be
943 * recognized by the I/O Class they report.
945 if (target
->io_class
== SRP_REV10_IB_IO_CLASS
) {
946 memcpy(ipi
, &target
->sgid
.global
.interface_id
, 8);
947 memcpy(ipi
+ 8, &target
->initiator_ext
, 8);
948 memcpy(tpi
, &target
->ioc_guid
, 8);
949 memcpy(tpi
+ 8, &target
->id_ext
, 8);
951 memcpy(ipi
, &target
->initiator_ext
, 8);
952 memcpy(ipi
+ 8, &target
->sgid
.global
.interface_id
, 8);
953 memcpy(tpi
, &target
->id_ext
, 8);
954 memcpy(tpi
+ 8, &target
->ioc_guid
, 8);
958 * Topspin/Cisco SRP targets will reject our login unless we
959 * zero out the first 8 bytes of our initiator port ID and set
960 * the second 8 bytes to the local node GUID.
962 if (srp_target_is_topspin(target
)) {
963 shost_printk(KERN_DEBUG
, target
->scsi_host
,
964 PFX
"Topspin/Cisco initiator port ID workaround "
965 "activated for target GUID %016llx\n",
966 be64_to_cpu(target
->ioc_guid
));
968 memcpy(ipi
+ 8, &target
->srp_host
->srp_dev
->dev
->node_guid
, 8);
971 if (target
->using_rdma_cm
)
972 status
= rdma_connect(ch
->rdma_cm
.cm_id
, &req
->rdma_param
);
974 status
= ib_send_cm_req(ch
->ib_cm
.cm_id
, &req
->ib_param
);
981 static bool srp_queue_remove_work(struct srp_target_port
*target
)
983 bool changed
= false;
985 spin_lock_irq(&target
->lock
);
986 if (target
->state
!= SRP_TARGET_REMOVED
) {
987 target
->state
= SRP_TARGET_REMOVED
;
990 spin_unlock_irq(&target
->lock
);
993 queue_work(srp_remove_wq
, &target
->remove_work
);
998 static void srp_disconnect_target(struct srp_target_port
*target
)
1000 struct srp_rdma_ch
*ch
;
1003 /* XXX should send SRP_I_LOGOUT request */
1005 for (i
= 0; i
< target
->ch_count
; i
++) {
1006 ch
= &target
->ch
[i
];
1007 ch
->connected
= false;
1009 if (target
->using_rdma_cm
) {
1010 if (ch
->rdma_cm
.cm_id
)
1011 rdma_disconnect(ch
->rdma_cm
.cm_id
);
1013 if (ch
->ib_cm
.cm_id
)
1014 ret
= ib_send_cm_dreq(ch
->ib_cm
.cm_id
,
1018 shost_printk(KERN_DEBUG
, target
->scsi_host
,
1019 PFX
"Sending CM DREQ failed\n");
1024 static void srp_free_req_data(struct srp_target_port
*target
,
1025 struct srp_rdma_ch
*ch
)
1027 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1028 struct ib_device
*ibdev
= dev
->dev
;
1029 struct srp_request
*req
;
1035 for (i
= 0; i
< target
->req_ring_size
; ++i
) {
1036 req
= &ch
->req_ring
[i
];
1037 if (dev
->use_fast_reg
) {
1038 kfree(req
->fr_list
);
1040 kfree(req
->fmr_list
);
1041 kfree(req
->map_page
);
1043 if (req
->indirect_dma_addr
) {
1044 ib_dma_unmap_single(ibdev
, req
->indirect_dma_addr
,
1045 target
->indirect_size
,
1048 kfree(req
->indirect_desc
);
1051 kfree(ch
->req_ring
);
1052 ch
->req_ring
= NULL
;
1055 static int srp_alloc_req_data(struct srp_rdma_ch
*ch
)
1057 struct srp_target_port
*target
= ch
->target
;
1058 struct srp_device
*srp_dev
= target
->srp_host
->srp_dev
;
1059 struct ib_device
*ibdev
= srp_dev
->dev
;
1060 struct srp_request
*req
;
1062 dma_addr_t dma_addr
;
1063 int i
, ret
= -ENOMEM
;
1065 ch
->req_ring
= kcalloc(target
->req_ring_size
, sizeof(*ch
->req_ring
),
1070 for (i
= 0; i
< target
->req_ring_size
; ++i
) {
1071 req
= &ch
->req_ring
[i
];
1072 mr_list
= kmalloc(target
->mr_per_cmd
* sizeof(void *),
1076 if (srp_dev
->use_fast_reg
) {
1077 req
->fr_list
= mr_list
;
1079 req
->fmr_list
= mr_list
;
1080 req
->map_page
= kmalloc(srp_dev
->max_pages_per_mr
*
1081 sizeof(void *), GFP_KERNEL
);
1085 req
->indirect_desc
= kmalloc(target
->indirect_size
, GFP_KERNEL
);
1086 if (!req
->indirect_desc
)
1089 dma_addr
= ib_dma_map_single(ibdev
, req
->indirect_desc
,
1090 target
->indirect_size
,
1092 if (ib_dma_mapping_error(ibdev
, dma_addr
))
1095 req
->indirect_dma_addr
= dma_addr
;
1104 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1105 * @shost: SCSI host whose attributes to remove from sysfs.
1107 * Note: Any attributes defined in the host template and that did not exist
1108 * before invocation of this function will be ignored.
1110 static void srp_del_scsi_host_attr(struct Scsi_Host
*shost
)
1112 struct device_attribute
**attr
;
1114 for (attr
= shost
->hostt
->shost_attrs
; attr
&& *attr
; ++attr
)
1115 device_remove_file(&shost
->shost_dev
, *attr
);
1118 static void srp_remove_target(struct srp_target_port
*target
)
1120 struct srp_rdma_ch
*ch
;
1123 WARN_ON_ONCE(target
->state
!= SRP_TARGET_REMOVED
);
1125 srp_del_scsi_host_attr(target
->scsi_host
);
1126 srp_rport_get(target
->rport
);
1127 srp_remove_host(target
->scsi_host
);
1128 scsi_remove_host(target
->scsi_host
);
1129 srp_stop_rport_timers(target
->rport
);
1130 srp_disconnect_target(target
);
1131 kobj_ns_drop(KOBJ_NS_TYPE_NET
, target
->net
);
1132 for (i
= 0; i
< target
->ch_count
; i
++) {
1133 ch
= &target
->ch
[i
];
1134 srp_free_ch_ib(target
, ch
);
1136 cancel_work_sync(&target
->tl_err_work
);
1137 srp_rport_put(target
->rport
);
1138 for (i
= 0; i
< target
->ch_count
; i
++) {
1139 ch
= &target
->ch
[i
];
1140 srp_free_req_data(target
, ch
);
1145 spin_lock(&target
->srp_host
->target_lock
);
1146 list_del(&target
->list
);
1147 spin_unlock(&target
->srp_host
->target_lock
);
1149 scsi_host_put(target
->scsi_host
);
1152 static void srp_remove_work(struct work_struct
*work
)
1154 struct srp_target_port
*target
=
1155 container_of(work
, struct srp_target_port
, remove_work
);
1157 WARN_ON_ONCE(target
->state
!= SRP_TARGET_REMOVED
);
1159 srp_remove_target(target
);
1162 static void srp_rport_delete(struct srp_rport
*rport
)
1164 struct srp_target_port
*target
= rport
->lld_data
;
1166 srp_queue_remove_work(target
);
1170 * srp_connected_ch() - number of connected channels
1171 * @target: SRP target port.
1173 static int srp_connected_ch(struct srp_target_port
*target
)
1177 for (i
= 0; i
< target
->ch_count
; i
++)
1178 c
+= target
->ch
[i
].connected
;
1183 static int srp_connect_ch(struct srp_rdma_ch
*ch
, bool multich
)
1185 struct srp_target_port
*target
= ch
->target
;
1188 WARN_ON_ONCE(!multich
&& srp_connected_ch(target
) > 0);
1190 ret
= srp_lookup_path(ch
);
1195 init_completion(&ch
->done
);
1196 ret
= srp_send_req(ch
, multich
);
1199 ret
= wait_for_completion_interruptible(&ch
->done
);
1204 * The CM event handling code will set status to
1205 * SRP_PORT_REDIRECT if we get a port redirect REJ
1206 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1207 * redirect REJ back.
1212 ch
->connected
= true;
1215 case SRP_PORT_REDIRECT
:
1216 ret
= srp_lookup_path(ch
);
1221 case SRP_DLID_REDIRECT
:
1224 case SRP_STALE_CONN
:
1225 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
1226 "giving up on stale connection\n");
1236 return ret
<= 0 ? ret
: -ENODEV
;
1239 static void srp_inv_rkey_err_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
1241 srp_handle_qp_err(cq
, wc
, "INV RKEY");
1244 static int srp_inv_rkey(struct srp_request
*req
, struct srp_rdma_ch
*ch
,
1247 struct ib_send_wr
*bad_wr
;
1248 struct ib_send_wr wr
= {
1249 .opcode
= IB_WR_LOCAL_INV
,
1253 .ex
.invalidate_rkey
= rkey
,
1256 wr
.wr_cqe
= &req
->reg_cqe
;
1257 req
->reg_cqe
.done
= srp_inv_rkey_err_done
;
1258 return ib_post_send(ch
->qp
, &wr
, &bad_wr
);
1261 static void srp_unmap_data(struct scsi_cmnd
*scmnd
,
1262 struct srp_rdma_ch
*ch
,
1263 struct srp_request
*req
)
1265 struct srp_target_port
*target
= ch
->target
;
1266 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1267 struct ib_device
*ibdev
= dev
->dev
;
1270 if (!scsi_sglist(scmnd
) ||
1271 (scmnd
->sc_data_direction
!= DMA_TO_DEVICE
&&
1272 scmnd
->sc_data_direction
!= DMA_FROM_DEVICE
))
1275 if (dev
->use_fast_reg
) {
1276 struct srp_fr_desc
**pfr
;
1278 for (i
= req
->nmdesc
, pfr
= req
->fr_list
; i
> 0; i
--, pfr
++) {
1279 res
= srp_inv_rkey(req
, ch
, (*pfr
)->mr
->rkey
);
1281 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
1282 "Queueing INV WR for rkey %#x failed (%d)\n",
1283 (*pfr
)->mr
->rkey
, res
);
1284 queue_work(system_long_wq
,
1285 &target
->tl_err_work
);
1289 srp_fr_pool_put(ch
->fr_pool
, req
->fr_list
,
1291 } else if (dev
->use_fmr
) {
1292 struct ib_pool_fmr
**pfmr
;
1294 for (i
= req
->nmdesc
, pfmr
= req
->fmr_list
; i
> 0; i
--, pfmr
++)
1295 ib_fmr_pool_unmap(*pfmr
);
1298 ib_dma_unmap_sg(ibdev
, scsi_sglist(scmnd
), scsi_sg_count(scmnd
),
1299 scmnd
->sc_data_direction
);
1303 * srp_claim_req - Take ownership of the scmnd associated with a request.
1304 * @ch: SRP RDMA channel.
1305 * @req: SRP request.
1306 * @sdev: If not NULL, only take ownership for this SCSI device.
1307 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1308 * ownership of @req->scmnd if it equals @scmnd.
1311 * Either NULL or a pointer to the SCSI command the caller became owner of.
1313 static struct scsi_cmnd
*srp_claim_req(struct srp_rdma_ch
*ch
,
1314 struct srp_request
*req
,
1315 struct scsi_device
*sdev
,
1316 struct scsi_cmnd
*scmnd
)
1318 unsigned long flags
;
1320 spin_lock_irqsave(&ch
->lock
, flags
);
1322 (!sdev
|| req
->scmnd
->device
== sdev
) &&
1323 (!scmnd
|| req
->scmnd
== scmnd
)) {
1329 spin_unlock_irqrestore(&ch
->lock
, flags
);
1335 * srp_free_req() - Unmap data and adjust ch->req_lim.
1336 * @ch: SRP RDMA channel.
1337 * @req: Request to be freed.
1338 * @scmnd: SCSI command associated with @req.
1339 * @req_lim_delta: Amount to be added to @target->req_lim.
1341 static void srp_free_req(struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1342 struct scsi_cmnd
*scmnd
, s32 req_lim_delta
)
1344 unsigned long flags
;
1346 srp_unmap_data(scmnd
, ch
, req
);
1348 spin_lock_irqsave(&ch
->lock
, flags
);
1349 ch
->req_lim
+= req_lim_delta
;
1350 spin_unlock_irqrestore(&ch
->lock
, flags
);
1353 static void srp_finish_req(struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1354 struct scsi_device
*sdev
, int result
)
1356 struct scsi_cmnd
*scmnd
= srp_claim_req(ch
, req
, sdev
, NULL
);
1359 srp_free_req(ch
, req
, scmnd
, 0);
1360 scmnd
->result
= result
;
1361 scmnd
->scsi_done(scmnd
);
1365 static void srp_terminate_io(struct srp_rport
*rport
)
1367 struct srp_target_port
*target
= rport
->lld_data
;
1368 struct srp_rdma_ch
*ch
;
1369 struct Scsi_Host
*shost
= target
->scsi_host
;
1370 struct scsi_device
*sdev
;
1374 * Invoking srp_terminate_io() while srp_queuecommand() is running
1375 * is not safe. Hence the warning statement below.
1377 shost_for_each_device(sdev
, shost
)
1378 WARN_ON_ONCE(sdev
->request_queue
->request_fn_active
);
1380 for (i
= 0; i
< target
->ch_count
; i
++) {
1381 ch
= &target
->ch
[i
];
1383 for (j
= 0; j
< target
->req_ring_size
; ++j
) {
1384 struct srp_request
*req
= &ch
->req_ring
[j
];
1386 srp_finish_req(ch
, req
, NULL
,
1387 DID_TRANSPORT_FAILFAST
<< 16);
1393 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1394 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1395 * srp_reset_device() or srp_reset_host() calls will occur while this function
1396 * is in progress. One way to realize that is not to call this function
1397 * directly but to call srp_reconnect_rport() instead since that last function
1398 * serializes calls of this function via rport->mutex and also blocks
1399 * srp_queuecommand() calls before invoking this function.
1401 static int srp_rport_reconnect(struct srp_rport
*rport
)
1403 struct srp_target_port
*target
= rport
->lld_data
;
1404 struct srp_rdma_ch
*ch
;
1406 bool multich
= false;
1408 srp_disconnect_target(target
);
1410 if (target
->state
== SRP_TARGET_SCANNING
)
1414 * Now get a new local CM ID so that we avoid confusing the target in
1415 * case things are really fouled up. Doing so also ensures that all CM
1416 * callbacks will have finished before a new QP is allocated.
1418 for (i
= 0; i
< target
->ch_count
; i
++) {
1419 ch
= &target
->ch
[i
];
1420 ret
+= srp_new_cm_id(ch
);
1422 for (i
= 0; i
< target
->ch_count
; i
++) {
1423 ch
= &target
->ch
[i
];
1424 for (j
= 0; j
< target
->req_ring_size
; ++j
) {
1425 struct srp_request
*req
= &ch
->req_ring
[j
];
1427 srp_finish_req(ch
, req
, NULL
, DID_RESET
<< 16);
1430 for (i
= 0; i
< target
->ch_count
; i
++) {
1431 ch
= &target
->ch
[i
];
1433 * Whether or not creating a new CM ID succeeded, create a new
1434 * QP. This guarantees that all completion callback function
1435 * invocations have finished before request resetting starts.
1437 ret
+= srp_create_ch_ib(ch
);
1439 INIT_LIST_HEAD(&ch
->free_tx
);
1440 for (j
= 0; j
< target
->queue_size
; ++j
)
1441 list_add(&ch
->tx_ring
[j
]->list
, &ch
->free_tx
);
1444 target
->qp_in_error
= false;
1446 for (i
= 0; i
< target
->ch_count
; i
++) {
1447 ch
= &target
->ch
[i
];
1450 ret
= srp_connect_ch(ch
, multich
);
1455 shost_printk(KERN_INFO
, target
->scsi_host
,
1456 PFX
"reconnect succeeded\n");
1461 static void srp_map_desc(struct srp_map_state
*state
, dma_addr_t dma_addr
,
1462 unsigned int dma_len
, u32 rkey
)
1464 struct srp_direct_buf
*desc
= state
->desc
;
1466 WARN_ON_ONCE(!dma_len
);
1468 desc
->va
= cpu_to_be64(dma_addr
);
1469 desc
->key
= cpu_to_be32(rkey
);
1470 desc
->len
= cpu_to_be32(dma_len
);
1472 state
->total_len
+= dma_len
;
1477 static int srp_map_finish_fmr(struct srp_map_state
*state
,
1478 struct srp_rdma_ch
*ch
)
1480 struct srp_target_port
*target
= ch
->target
;
1481 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1482 struct ib_pool_fmr
*fmr
;
1485 if (state
->fmr
.next
>= state
->fmr
.end
) {
1486 shost_printk(KERN_ERR
, ch
->target
->scsi_host
,
1487 PFX
"Out of MRs (mr_per_cmd = %d)\n",
1488 ch
->target
->mr_per_cmd
);
1492 WARN_ON_ONCE(!dev
->use_fmr
);
1494 if (state
->npages
== 0)
1497 if (state
->npages
== 1 && target
->global_rkey
) {
1498 srp_map_desc(state
, state
->base_dma_addr
, state
->dma_len
,
1499 target
->global_rkey
);
1503 fmr
= ib_fmr_pool_map_phys(ch
->fmr_pool
, state
->pages
,
1504 state
->npages
, io_addr
);
1506 return PTR_ERR(fmr
);
1508 *state
->fmr
.next
++ = fmr
;
1511 srp_map_desc(state
, state
->base_dma_addr
& ~dev
->mr_page_mask
,
1512 state
->dma_len
, fmr
->fmr
->rkey
);
1521 static void srp_reg_mr_err_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
1523 srp_handle_qp_err(cq
, wc
, "FAST REG");
1527 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1528 * where to start in the first element. If sg_offset_p != NULL then
1529 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1530 * byte that has not yet been mapped.
1532 static int srp_map_finish_fr(struct srp_map_state
*state
,
1533 struct srp_request
*req
,
1534 struct srp_rdma_ch
*ch
, int sg_nents
,
1535 unsigned int *sg_offset_p
)
1537 struct srp_target_port
*target
= ch
->target
;
1538 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1539 struct ib_send_wr
*bad_wr
;
1540 struct ib_reg_wr wr
;
1541 struct srp_fr_desc
*desc
;
1545 if (state
->fr
.next
>= state
->fr
.end
) {
1546 shost_printk(KERN_ERR
, ch
->target
->scsi_host
,
1547 PFX
"Out of MRs (mr_per_cmd = %d)\n",
1548 ch
->target
->mr_per_cmd
);
1552 WARN_ON_ONCE(!dev
->use_fast_reg
);
1554 if (sg_nents
== 1 && target
->global_rkey
) {
1555 unsigned int sg_offset
= sg_offset_p
? *sg_offset_p
: 0;
1557 srp_map_desc(state
, sg_dma_address(state
->sg
) + sg_offset
,
1558 sg_dma_len(state
->sg
) - sg_offset
,
1559 target
->global_rkey
);
1565 desc
= srp_fr_pool_get(ch
->fr_pool
);
1569 rkey
= ib_inc_rkey(desc
->mr
->rkey
);
1570 ib_update_fast_reg_key(desc
->mr
, rkey
);
1572 n
= ib_map_mr_sg(desc
->mr
, state
->sg
, sg_nents
, sg_offset_p
,
1574 if (unlikely(n
< 0)) {
1575 srp_fr_pool_put(ch
->fr_pool
, &desc
, 1);
1576 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1577 dev_name(&req
->scmnd
->device
->sdev_gendev
), sg_nents
,
1578 sg_offset_p
? *sg_offset_p
: -1, n
);
1582 WARN_ON_ONCE(desc
->mr
->length
== 0);
1584 req
->reg_cqe
.done
= srp_reg_mr_err_done
;
1587 wr
.wr
.opcode
= IB_WR_REG_MR
;
1588 wr
.wr
.wr_cqe
= &req
->reg_cqe
;
1590 wr
.wr
.send_flags
= 0;
1592 wr
.key
= desc
->mr
->rkey
;
1593 wr
.access
= (IB_ACCESS_LOCAL_WRITE
|
1594 IB_ACCESS_REMOTE_READ
|
1595 IB_ACCESS_REMOTE_WRITE
);
1597 *state
->fr
.next
++ = desc
;
1600 srp_map_desc(state
, desc
->mr
->iova
,
1601 desc
->mr
->length
, desc
->mr
->rkey
);
1603 err
= ib_post_send(ch
->qp
, &wr
.wr
, &bad_wr
);
1604 if (unlikely(err
)) {
1605 WARN_ON_ONCE(err
== -ENOMEM
);
1612 static int srp_map_sg_entry(struct srp_map_state
*state
,
1613 struct srp_rdma_ch
*ch
,
1614 struct scatterlist
*sg
)
1616 struct srp_target_port
*target
= ch
->target
;
1617 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1618 struct ib_device
*ibdev
= dev
->dev
;
1619 dma_addr_t dma_addr
= ib_sg_dma_address(ibdev
, sg
);
1620 unsigned int dma_len
= ib_sg_dma_len(ibdev
, sg
);
1621 unsigned int len
= 0;
1624 WARN_ON_ONCE(!dma_len
);
1627 unsigned offset
= dma_addr
& ~dev
->mr_page_mask
;
1629 if (state
->npages
== dev
->max_pages_per_mr
||
1630 (state
->npages
> 0 && offset
!= 0)) {
1631 ret
= srp_map_finish_fmr(state
, ch
);
1636 len
= min_t(unsigned int, dma_len
, dev
->mr_page_size
- offset
);
1639 state
->base_dma_addr
= dma_addr
;
1640 state
->pages
[state
->npages
++] = dma_addr
& dev
->mr_page_mask
;
1641 state
->dma_len
+= len
;
1647 * If the end of the MR is not on a page boundary then we need to
1648 * close it out and start a new one -- we can only merge at page
1652 if ((dma_addr
& ~dev
->mr_page_mask
) != 0)
1653 ret
= srp_map_finish_fmr(state
, ch
);
1657 static int srp_map_sg_fmr(struct srp_map_state
*state
, struct srp_rdma_ch
*ch
,
1658 struct srp_request
*req
, struct scatterlist
*scat
,
1661 struct scatterlist
*sg
;
1664 state
->pages
= req
->map_page
;
1665 state
->fmr
.next
= req
->fmr_list
;
1666 state
->fmr
.end
= req
->fmr_list
+ ch
->target
->mr_per_cmd
;
1668 for_each_sg(scat
, sg
, count
, i
) {
1669 ret
= srp_map_sg_entry(state
, ch
, sg
);
1674 ret
= srp_map_finish_fmr(state
, ch
);
1681 static int srp_map_sg_fr(struct srp_map_state
*state
, struct srp_rdma_ch
*ch
,
1682 struct srp_request
*req
, struct scatterlist
*scat
,
1685 unsigned int sg_offset
= 0;
1687 state
->fr
.next
= req
->fr_list
;
1688 state
->fr
.end
= req
->fr_list
+ ch
->target
->mr_per_cmd
;
1697 n
= srp_map_finish_fr(state
, req
, ch
, count
, &sg_offset
);
1698 if (unlikely(n
< 0))
1702 for (i
= 0; i
< n
; i
++)
1703 state
->sg
= sg_next(state
->sg
);
1709 static int srp_map_sg_dma(struct srp_map_state
*state
, struct srp_rdma_ch
*ch
,
1710 struct srp_request
*req
, struct scatterlist
*scat
,
1713 struct srp_target_port
*target
= ch
->target
;
1714 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1715 struct scatterlist
*sg
;
1718 for_each_sg(scat
, sg
, count
, i
) {
1719 srp_map_desc(state
, ib_sg_dma_address(dev
->dev
, sg
),
1720 ib_sg_dma_len(dev
->dev
, sg
),
1721 target
->global_rkey
);
1728 * Register the indirect data buffer descriptor with the HCA.
1730 * Note: since the indirect data buffer descriptor has been allocated with
1731 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1734 static int srp_map_idb(struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1735 void **next_mr
, void **end_mr
, u32 idb_len
,
1738 struct srp_target_port
*target
= ch
->target
;
1739 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1740 struct srp_map_state state
;
1741 struct srp_direct_buf idb_desc
;
1743 struct scatterlist idb_sg
[1];
1746 memset(&state
, 0, sizeof(state
));
1747 memset(&idb_desc
, 0, sizeof(idb_desc
));
1748 state
.gen
.next
= next_mr
;
1749 state
.gen
.end
= end_mr
;
1750 state
.desc
= &idb_desc
;
1751 state
.base_dma_addr
= req
->indirect_dma_addr
;
1752 state
.dma_len
= idb_len
;
1754 if (dev
->use_fast_reg
) {
1756 sg_init_one(idb_sg
, req
->indirect_desc
, idb_len
);
1757 idb_sg
->dma_address
= req
->indirect_dma_addr
; /* hack! */
1758 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1759 idb_sg
->dma_length
= idb_sg
->length
; /* hack^2 */
1761 ret
= srp_map_finish_fr(&state
, req
, ch
, 1, NULL
);
1764 WARN_ON_ONCE(ret
< 1);
1765 } else if (dev
->use_fmr
) {
1766 state
.pages
= idb_pages
;
1767 state
.pages
[0] = (req
->indirect_dma_addr
&
1770 ret
= srp_map_finish_fmr(&state
, ch
);
1777 *idb_rkey
= idb_desc
.key
;
1782 static void srp_check_mapping(struct srp_map_state
*state
,
1783 struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1784 struct scatterlist
*scat
, int count
)
1786 struct srp_device
*dev
= ch
->target
->srp_host
->srp_dev
;
1787 struct srp_fr_desc
**pfr
;
1788 u64 desc_len
= 0, mr_len
= 0;
1791 for (i
= 0; i
< state
->ndesc
; i
++)
1792 desc_len
+= be32_to_cpu(req
->indirect_desc
[i
].len
);
1793 if (dev
->use_fast_reg
)
1794 for (i
= 0, pfr
= req
->fr_list
; i
< state
->nmdesc
; i
++, pfr
++)
1795 mr_len
+= (*pfr
)->mr
->length
;
1796 else if (dev
->use_fmr
)
1797 for (i
= 0; i
< state
->nmdesc
; i
++)
1798 mr_len
+= be32_to_cpu(req
->indirect_desc
[i
].len
);
1799 if (desc_len
!= scsi_bufflen(req
->scmnd
) ||
1800 mr_len
> scsi_bufflen(req
->scmnd
))
1801 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1802 scsi_bufflen(req
->scmnd
), desc_len
, mr_len
,
1803 state
->ndesc
, state
->nmdesc
);
1807 * srp_map_data() - map SCSI data buffer onto an SRP request
1808 * @scmnd: SCSI command to map
1809 * @ch: SRP RDMA channel
1812 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1815 static int srp_map_data(struct scsi_cmnd
*scmnd
, struct srp_rdma_ch
*ch
,
1816 struct srp_request
*req
)
1818 struct srp_target_port
*target
= ch
->target
;
1819 struct scatterlist
*scat
;
1820 struct srp_cmd
*cmd
= req
->cmd
->buf
;
1821 int len
, nents
, count
, ret
;
1822 struct srp_device
*dev
;
1823 struct ib_device
*ibdev
;
1824 struct srp_map_state state
;
1825 struct srp_indirect_buf
*indirect_hdr
;
1826 u32 idb_len
, table_len
;
1830 if (!scsi_sglist(scmnd
) || scmnd
->sc_data_direction
== DMA_NONE
)
1831 return sizeof (struct srp_cmd
);
1833 if (scmnd
->sc_data_direction
!= DMA_FROM_DEVICE
&&
1834 scmnd
->sc_data_direction
!= DMA_TO_DEVICE
) {
1835 shost_printk(KERN_WARNING
, target
->scsi_host
,
1836 PFX
"Unhandled data direction %d\n",
1837 scmnd
->sc_data_direction
);
1841 nents
= scsi_sg_count(scmnd
);
1842 scat
= scsi_sglist(scmnd
);
1844 dev
= target
->srp_host
->srp_dev
;
1847 count
= ib_dma_map_sg(ibdev
, scat
, nents
, scmnd
->sc_data_direction
);
1848 if (unlikely(count
== 0))
1851 fmt
= SRP_DATA_DESC_DIRECT
;
1852 len
= sizeof (struct srp_cmd
) + sizeof (struct srp_direct_buf
);
1854 if (count
== 1 && target
->global_rkey
) {
1856 * The midlayer only generated a single gather/scatter
1857 * entry, or DMA mapping coalesced everything to a
1858 * single entry. So a direct descriptor along with
1859 * the DMA MR suffices.
1861 struct srp_direct_buf
*buf
= (void *) cmd
->add_data
;
1863 buf
->va
= cpu_to_be64(ib_sg_dma_address(ibdev
, scat
));
1864 buf
->key
= cpu_to_be32(target
->global_rkey
);
1865 buf
->len
= cpu_to_be32(ib_sg_dma_len(ibdev
, scat
));
1872 * We have more than one scatter/gather entry, so build our indirect
1873 * descriptor table, trying to merge as many entries as we can.
1875 indirect_hdr
= (void *) cmd
->add_data
;
1877 ib_dma_sync_single_for_cpu(ibdev
, req
->indirect_dma_addr
,
1878 target
->indirect_size
, DMA_TO_DEVICE
);
1880 memset(&state
, 0, sizeof(state
));
1881 state
.desc
= req
->indirect_desc
;
1882 if (dev
->use_fast_reg
)
1883 ret
= srp_map_sg_fr(&state
, ch
, req
, scat
, count
);
1884 else if (dev
->use_fmr
)
1885 ret
= srp_map_sg_fmr(&state
, ch
, req
, scat
, count
);
1887 ret
= srp_map_sg_dma(&state
, ch
, req
, scat
, count
);
1888 req
->nmdesc
= state
.nmdesc
;
1893 DEFINE_DYNAMIC_DEBUG_METADATA(ddm
,
1894 "Memory mapping consistency check");
1895 if (DYNAMIC_DEBUG_BRANCH(ddm
))
1896 srp_check_mapping(&state
, ch
, req
, scat
, count
);
1899 /* We've mapped the request, now pull as much of the indirect
1900 * descriptor table as we can into the command buffer. If this
1901 * target is not using an external indirect table, we are
1902 * guaranteed to fit into the command, as the SCSI layer won't
1903 * give us more S/G entries than we allow.
1905 if (state
.ndesc
== 1) {
1907 * Memory registration collapsed the sg-list into one entry,
1908 * so use a direct descriptor.
1910 struct srp_direct_buf
*buf
= (void *) cmd
->add_data
;
1912 *buf
= req
->indirect_desc
[0];
1916 if (unlikely(target
->cmd_sg_cnt
< state
.ndesc
&&
1917 !target
->allow_ext_sg
)) {
1918 shost_printk(KERN_ERR
, target
->scsi_host
,
1919 "Could not fit S/G list into SRP_CMD\n");
1924 count
= min(state
.ndesc
, target
->cmd_sg_cnt
);
1925 table_len
= state
.ndesc
* sizeof (struct srp_direct_buf
);
1926 idb_len
= sizeof(struct srp_indirect_buf
) + table_len
;
1928 fmt
= SRP_DATA_DESC_INDIRECT
;
1929 len
= sizeof(struct srp_cmd
) + sizeof (struct srp_indirect_buf
);
1930 len
+= count
* sizeof (struct srp_direct_buf
);
1932 memcpy(indirect_hdr
->desc_list
, req
->indirect_desc
,
1933 count
* sizeof (struct srp_direct_buf
));
1935 if (!target
->global_rkey
) {
1936 ret
= srp_map_idb(ch
, req
, state
.gen
.next
, state
.gen
.end
,
1937 idb_len
, &idb_rkey
);
1942 idb_rkey
= cpu_to_be32(target
->global_rkey
);
1945 indirect_hdr
->table_desc
.va
= cpu_to_be64(req
->indirect_dma_addr
);
1946 indirect_hdr
->table_desc
.key
= idb_rkey
;
1947 indirect_hdr
->table_desc
.len
= cpu_to_be32(table_len
);
1948 indirect_hdr
->len
= cpu_to_be32(state
.total_len
);
1950 if (scmnd
->sc_data_direction
== DMA_TO_DEVICE
)
1951 cmd
->data_out_desc_cnt
= count
;
1953 cmd
->data_in_desc_cnt
= count
;
1955 ib_dma_sync_single_for_device(ibdev
, req
->indirect_dma_addr
, table_len
,
1959 if (scmnd
->sc_data_direction
== DMA_TO_DEVICE
)
1960 cmd
->buf_fmt
= fmt
<< 4;
1967 srp_unmap_data(scmnd
, ch
, req
);
1968 if (ret
== -ENOMEM
&& req
->nmdesc
>= target
->mr_pool_size
)
1974 * Return an IU and possible credit to the free pool
1976 static void srp_put_tx_iu(struct srp_rdma_ch
*ch
, struct srp_iu
*iu
,
1977 enum srp_iu_type iu_type
)
1979 unsigned long flags
;
1981 spin_lock_irqsave(&ch
->lock
, flags
);
1982 list_add(&iu
->list
, &ch
->free_tx
);
1983 if (iu_type
!= SRP_IU_RSP
)
1985 spin_unlock_irqrestore(&ch
->lock
, flags
);
1989 * Must be called with ch->lock held to protect req_lim and free_tx.
1990 * If IU is not sent, it must be returned using srp_put_tx_iu().
1993 * An upper limit for the number of allocated information units for each
1995 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1996 * more than Scsi_Host.can_queue requests.
1997 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1998 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1999 * one unanswered SRP request to an initiator.
2001 static struct srp_iu
*__srp_get_tx_iu(struct srp_rdma_ch
*ch
,
2002 enum srp_iu_type iu_type
)
2004 struct srp_target_port
*target
= ch
->target
;
2005 s32 rsv
= (iu_type
== SRP_IU_TSK_MGMT
) ? 0 : SRP_TSK_MGMT_SQ_SIZE
;
2008 lockdep_assert_held(&ch
->lock
);
2010 ib_process_cq_direct(ch
->send_cq
, -1);
2012 if (list_empty(&ch
->free_tx
))
2015 /* Initiator responses to target requests do not consume credits */
2016 if (iu_type
!= SRP_IU_RSP
) {
2017 if (ch
->req_lim
<= rsv
) {
2018 ++target
->zero_req_lim
;
2025 iu
= list_first_entry(&ch
->free_tx
, struct srp_iu
, list
);
2026 list_del(&iu
->list
);
2031 * Note: if this function is called from inside ib_drain_sq() then it will
2032 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
2033 * with status IB_WC_SUCCESS then that's a bug.
2035 static void srp_send_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
2037 struct srp_iu
*iu
= container_of(wc
->wr_cqe
, struct srp_iu
, cqe
);
2038 struct srp_rdma_ch
*ch
= cq
->cq_context
;
2040 if (unlikely(wc
->status
!= IB_WC_SUCCESS
)) {
2041 srp_handle_qp_err(cq
, wc
, "SEND");
2045 lockdep_assert_held(&ch
->lock
);
2047 list_add(&iu
->list
, &ch
->free_tx
);
2050 static int srp_post_send(struct srp_rdma_ch
*ch
, struct srp_iu
*iu
, int len
)
2052 struct srp_target_port
*target
= ch
->target
;
2054 struct ib_send_wr wr
, *bad_wr
;
2056 list
.addr
= iu
->dma
;
2058 list
.lkey
= target
->lkey
;
2060 iu
->cqe
.done
= srp_send_done
;
2063 wr
.wr_cqe
= &iu
->cqe
;
2066 wr
.opcode
= IB_WR_SEND
;
2067 wr
.send_flags
= IB_SEND_SIGNALED
;
2069 return ib_post_send(ch
->qp
, &wr
, &bad_wr
);
2072 static int srp_post_recv(struct srp_rdma_ch
*ch
, struct srp_iu
*iu
)
2074 struct srp_target_port
*target
= ch
->target
;
2075 struct ib_recv_wr wr
, *bad_wr
;
2078 list
.addr
= iu
->dma
;
2079 list
.length
= iu
->size
;
2080 list
.lkey
= target
->lkey
;
2082 iu
->cqe
.done
= srp_recv_done
;
2085 wr
.wr_cqe
= &iu
->cqe
;
2089 return ib_post_recv(ch
->qp
, &wr
, &bad_wr
);
2092 static void srp_process_rsp(struct srp_rdma_ch
*ch
, struct srp_rsp
*rsp
)
2094 struct srp_target_port
*target
= ch
->target
;
2095 struct srp_request
*req
;
2096 struct scsi_cmnd
*scmnd
;
2097 unsigned long flags
;
2099 if (unlikely(rsp
->tag
& SRP_TAG_TSK_MGMT
)) {
2100 spin_lock_irqsave(&ch
->lock
, flags
);
2101 ch
->req_lim
+= be32_to_cpu(rsp
->req_lim_delta
);
2102 if (rsp
->tag
== ch
->tsk_mgmt_tag
) {
2103 ch
->tsk_mgmt_status
= -1;
2104 if (be32_to_cpu(rsp
->resp_data_len
) >= 4)
2105 ch
->tsk_mgmt_status
= rsp
->data
[3];
2106 complete(&ch
->tsk_mgmt_done
);
2108 shost_printk(KERN_ERR
, target
->scsi_host
,
2109 "Received tsk mgmt response too late for tag %#llx\n",
2112 spin_unlock_irqrestore(&ch
->lock
, flags
);
2114 scmnd
= scsi_host_find_tag(target
->scsi_host
, rsp
->tag
);
2115 if (scmnd
&& scmnd
->host_scribble
) {
2116 req
= (void *)scmnd
->host_scribble
;
2117 scmnd
= srp_claim_req(ch
, req
, NULL
, scmnd
);
2122 shost_printk(KERN_ERR
, target
->scsi_host
,
2123 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
2124 rsp
->tag
, ch
- target
->ch
, ch
->qp
->qp_num
);
2126 spin_lock_irqsave(&ch
->lock
, flags
);
2127 ch
->req_lim
+= be32_to_cpu(rsp
->req_lim_delta
);
2128 spin_unlock_irqrestore(&ch
->lock
, flags
);
2132 scmnd
->result
= rsp
->status
;
2134 if (rsp
->flags
& SRP_RSP_FLAG_SNSVALID
) {
2135 memcpy(scmnd
->sense_buffer
, rsp
->data
+
2136 be32_to_cpu(rsp
->resp_data_len
),
2137 min_t(int, be32_to_cpu(rsp
->sense_data_len
),
2138 SCSI_SENSE_BUFFERSIZE
));
2141 if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DIUNDER
))
2142 scsi_set_resid(scmnd
, be32_to_cpu(rsp
->data_in_res_cnt
));
2143 else if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DIOVER
))
2144 scsi_set_resid(scmnd
, -be32_to_cpu(rsp
->data_in_res_cnt
));
2145 else if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DOUNDER
))
2146 scsi_set_resid(scmnd
, be32_to_cpu(rsp
->data_out_res_cnt
));
2147 else if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DOOVER
))
2148 scsi_set_resid(scmnd
, -be32_to_cpu(rsp
->data_out_res_cnt
));
2150 srp_free_req(ch
, req
, scmnd
,
2151 be32_to_cpu(rsp
->req_lim_delta
));
2153 scmnd
->host_scribble
= NULL
;
2154 scmnd
->scsi_done(scmnd
);
2158 static int srp_response_common(struct srp_rdma_ch
*ch
, s32 req_delta
,
2161 struct srp_target_port
*target
= ch
->target
;
2162 struct ib_device
*dev
= target
->srp_host
->srp_dev
->dev
;
2163 unsigned long flags
;
2167 spin_lock_irqsave(&ch
->lock
, flags
);
2168 ch
->req_lim
+= req_delta
;
2169 iu
= __srp_get_tx_iu(ch
, SRP_IU_RSP
);
2170 spin_unlock_irqrestore(&ch
->lock
, flags
);
2173 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2174 "no IU available to send response\n");
2178 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, len
, DMA_TO_DEVICE
);
2179 memcpy(iu
->buf
, rsp
, len
);
2180 ib_dma_sync_single_for_device(dev
, iu
->dma
, len
, DMA_TO_DEVICE
);
2182 err
= srp_post_send(ch
, iu
, len
);
2184 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2185 "unable to post response: %d\n", err
);
2186 srp_put_tx_iu(ch
, iu
, SRP_IU_RSP
);
2192 static void srp_process_cred_req(struct srp_rdma_ch
*ch
,
2193 struct srp_cred_req
*req
)
2195 struct srp_cred_rsp rsp
= {
2196 .opcode
= SRP_CRED_RSP
,
2199 s32 delta
= be32_to_cpu(req
->req_lim_delta
);
2201 if (srp_response_common(ch
, delta
, &rsp
, sizeof(rsp
)))
2202 shost_printk(KERN_ERR
, ch
->target
->scsi_host
, PFX
2203 "problems processing SRP_CRED_REQ\n");
2206 static void srp_process_aer_req(struct srp_rdma_ch
*ch
,
2207 struct srp_aer_req
*req
)
2209 struct srp_target_port
*target
= ch
->target
;
2210 struct srp_aer_rsp rsp
= {
2211 .opcode
= SRP_AER_RSP
,
2214 s32 delta
= be32_to_cpu(req
->req_lim_delta
);
2216 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2217 "ignoring AER for LUN %llu\n", scsilun_to_int(&req
->lun
));
2219 if (srp_response_common(ch
, delta
, &rsp
, sizeof(rsp
)))
2220 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2221 "problems processing SRP_AER_REQ\n");
2224 static void srp_recv_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
2226 struct srp_iu
*iu
= container_of(wc
->wr_cqe
, struct srp_iu
, cqe
);
2227 struct srp_rdma_ch
*ch
= cq
->cq_context
;
2228 struct srp_target_port
*target
= ch
->target
;
2229 struct ib_device
*dev
= target
->srp_host
->srp_dev
->dev
;
2233 if (unlikely(wc
->status
!= IB_WC_SUCCESS
)) {
2234 srp_handle_qp_err(cq
, wc
, "RECV");
2238 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, ch
->max_ti_iu_len
,
2241 opcode
= *(u8
*) iu
->buf
;
2244 shost_printk(KERN_ERR
, target
->scsi_host
,
2245 PFX
"recv completion, opcode 0x%02x\n", opcode
);
2246 print_hex_dump(KERN_ERR
, "", DUMP_PREFIX_OFFSET
, 8, 1,
2247 iu
->buf
, wc
->byte_len
, true);
2252 srp_process_rsp(ch
, iu
->buf
);
2256 srp_process_cred_req(ch
, iu
->buf
);
2260 srp_process_aer_req(ch
, iu
->buf
);
2264 /* XXX Handle target logout */
2265 shost_printk(KERN_WARNING
, target
->scsi_host
,
2266 PFX
"Got target logout request\n");
2270 shost_printk(KERN_WARNING
, target
->scsi_host
,
2271 PFX
"Unhandled SRP opcode 0x%02x\n", opcode
);
2275 ib_dma_sync_single_for_device(dev
, iu
->dma
, ch
->max_ti_iu_len
,
2278 res
= srp_post_recv(ch
, iu
);
2280 shost_printk(KERN_ERR
, target
->scsi_host
,
2281 PFX
"Recv failed with error code %d\n", res
);
2285 * srp_tl_err_work() - handle a transport layer error
2286 * @work: Work structure embedded in an SRP target port.
2288 * Note: This function may get invoked before the rport has been created,
2289 * hence the target->rport test.
2291 static void srp_tl_err_work(struct work_struct
*work
)
2293 struct srp_target_port
*target
;
2295 target
= container_of(work
, struct srp_target_port
, tl_err_work
);
2297 srp_start_tl_fail_timers(target
->rport
);
2300 static void srp_handle_qp_err(struct ib_cq
*cq
, struct ib_wc
*wc
,
2303 struct srp_rdma_ch
*ch
= cq
->cq_context
;
2304 struct srp_target_port
*target
= ch
->target
;
2306 if (ch
->connected
&& !target
->qp_in_error
) {
2307 shost_printk(KERN_ERR
, target
->scsi_host
,
2308 PFX
"failed %s status %s (%d) for CQE %p\n",
2309 opname
, ib_wc_status_msg(wc
->status
), wc
->status
,
2311 queue_work(system_long_wq
, &target
->tl_err_work
);
2313 target
->qp_in_error
= true;
2316 static int srp_queuecommand(struct Scsi_Host
*shost
, struct scsi_cmnd
*scmnd
)
2318 struct srp_target_port
*target
= host_to_target(shost
);
2319 struct srp_rport
*rport
= target
->rport
;
2320 struct srp_rdma_ch
*ch
;
2321 struct srp_request
*req
;
2323 struct srp_cmd
*cmd
;
2324 struct ib_device
*dev
;
2325 unsigned long flags
;
2329 const bool in_scsi_eh
= !in_interrupt() && current
== shost
->ehandler
;
2332 * The SCSI EH thread is the only context from which srp_queuecommand()
2333 * can get invoked for blocked devices (SDEV_BLOCK /
2334 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2335 * locking the rport mutex if invoked from inside the SCSI EH.
2338 mutex_lock(&rport
->mutex
);
2340 scmnd
->result
= srp_chkready(target
->rport
);
2341 if (unlikely(scmnd
->result
))
2344 WARN_ON_ONCE(scmnd
->request
->tag
< 0);
2345 tag
= blk_mq_unique_tag(scmnd
->request
);
2346 ch
= &target
->ch
[blk_mq_unique_tag_to_hwq(tag
)];
2347 idx
= blk_mq_unique_tag_to_tag(tag
);
2348 WARN_ONCE(idx
>= target
->req_ring_size
, "%s: tag %#x: idx %d >= %d\n",
2349 dev_name(&shost
->shost_gendev
), tag
, idx
,
2350 target
->req_ring_size
);
2352 spin_lock_irqsave(&ch
->lock
, flags
);
2353 iu
= __srp_get_tx_iu(ch
, SRP_IU_CMD
);
2354 spin_unlock_irqrestore(&ch
->lock
, flags
);
2359 req
= &ch
->req_ring
[idx
];
2360 dev
= target
->srp_host
->srp_dev
->dev
;
2361 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, target
->max_iu_len
,
2364 scmnd
->host_scribble
= (void *) req
;
2367 memset(cmd
, 0, sizeof *cmd
);
2369 cmd
->opcode
= SRP_CMD
;
2370 int_to_scsilun(scmnd
->device
->lun
, &cmd
->lun
);
2372 memcpy(cmd
->cdb
, scmnd
->cmnd
, scmnd
->cmd_len
);
2377 len
= srp_map_data(scmnd
, ch
, req
);
2379 shost_printk(KERN_ERR
, target
->scsi_host
,
2380 PFX
"Failed to map data (%d)\n", len
);
2382 * If we ran out of memory descriptors (-ENOMEM) because an
2383 * application is queuing many requests with more than
2384 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2385 * to reduce queue depth temporarily.
2387 scmnd
->result
= len
== -ENOMEM
?
2388 DID_OK
<< 16 | QUEUE_FULL
<< 1 : DID_ERROR
<< 16;
2392 ib_dma_sync_single_for_device(dev
, iu
->dma
, target
->max_iu_len
,
2395 if (srp_post_send(ch
, iu
, len
)) {
2396 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
"Send failed\n");
2404 mutex_unlock(&rport
->mutex
);
2409 srp_unmap_data(scmnd
, ch
, req
);
2412 srp_put_tx_iu(ch
, iu
, SRP_IU_CMD
);
2415 * Avoid that the loops that iterate over the request ring can
2416 * encounter a dangling SCSI command pointer.
2421 if (scmnd
->result
) {
2422 scmnd
->scsi_done(scmnd
);
2425 ret
= SCSI_MLQUEUE_HOST_BUSY
;
2432 * Note: the resources allocated in this function are freed in
2435 static int srp_alloc_iu_bufs(struct srp_rdma_ch
*ch
)
2437 struct srp_target_port
*target
= ch
->target
;
2440 ch
->rx_ring
= kcalloc(target
->queue_size
, sizeof(*ch
->rx_ring
),
2444 ch
->tx_ring
= kcalloc(target
->queue_size
, sizeof(*ch
->tx_ring
),
2449 for (i
= 0; i
< target
->queue_size
; ++i
) {
2450 ch
->rx_ring
[i
] = srp_alloc_iu(target
->srp_host
,
2452 GFP_KERNEL
, DMA_FROM_DEVICE
);
2453 if (!ch
->rx_ring
[i
])
2457 for (i
= 0; i
< target
->queue_size
; ++i
) {
2458 ch
->tx_ring
[i
] = srp_alloc_iu(target
->srp_host
,
2460 GFP_KERNEL
, DMA_TO_DEVICE
);
2461 if (!ch
->tx_ring
[i
])
2464 list_add(&ch
->tx_ring
[i
]->list
, &ch
->free_tx
);
2470 for (i
= 0; i
< target
->queue_size
; ++i
) {
2471 srp_free_iu(target
->srp_host
, ch
->rx_ring
[i
]);
2472 srp_free_iu(target
->srp_host
, ch
->tx_ring
[i
]);
2485 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr
*qp_attr
, int attr_mask
)
2487 uint64_t T_tr_ns
, max_compl_time_ms
;
2488 uint32_t rq_tmo_jiffies
;
2491 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2492 * table 91), both the QP timeout and the retry count have to be set
2493 * for RC QP's during the RTR to RTS transition.
2495 WARN_ON_ONCE((attr_mask
& (IB_QP_TIMEOUT
| IB_QP_RETRY_CNT
)) !=
2496 (IB_QP_TIMEOUT
| IB_QP_RETRY_CNT
));
2499 * Set target->rq_tmo_jiffies to one second more than the largest time
2500 * it can take before an error completion is generated. See also
2501 * C9-140..142 in the IBTA spec for more information about how to
2502 * convert the QP Local ACK Timeout value to nanoseconds.
2504 T_tr_ns
= 4096 * (1ULL << qp_attr
->timeout
);
2505 max_compl_time_ms
= qp_attr
->retry_cnt
* 4 * T_tr_ns
;
2506 do_div(max_compl_time_ms
, NSEC_PER_MSEC
);
2507 rq_tmo_jiffies
= msecs_to_jiffies(max_compl_time_ms
+ 1000);
2509 return rq_tmo_jiffies
;
2512 static void srp_cm_rep_handler(struct ib_cm_id
*cm_id
,
2513 const struct srp_login_rsp
*lrsp
,
2514 struct srp_rdma_ch
*ch
)
2516 struct srp_target_port
*target
= ch
->target
;
2517 struct ib_qp_attr
*qp_attr
= NULL
;
2522 if (lrsp
->opcode
== SRP_LOGIN_RSP
) {
2523 ch
->max_ti_iu_len
= be32_to_cpu(lrsp
->max_ti_iu_len
);
2524 ch
->req_lim
= be32_to_cpu(lrsp
->req_lim_delta
);
2527 * Reserve credits for task management so we don't
2528 * bounce requests back to the SCSI mid-layer.
2530 target
->scsi_host
->can_queue
2531 = min(ch
->req_lim
- SRP_TSK_MGMT_SQ_SIZE
,
2532 target
->scsi_host
->can_queue
);
2533 target
->scsi_host
->cmd_per_lun
2534 = min_t(int, target
->scsi_host
->can_queue
,
2535 target
->scsi_host
->cmd_per_lun
);
2537 shost_printk(KERN_WARNING
, target
->scsi_host
,
2538 PFX
"Unhandled RSP opcode %#x\n", lrsp
->opcode
);
2544 ret
= srp_alloc_iu_bufs(ch
);
2549 for (i
= 0; i
< target
->queue_size
; i
++) {
2550 struct srp_iu
*iu
= ch
->rx_ring
[i
];
2552 ret
= srp_post_recv(ch
, iu
);
2557 if (!target
->using_rdma_cm
) {
2559 qp_attr
= kmalloc(sizeof(*qp_attr
), GFP_KERNEL
);
2563 qp_attr
->qp_state
= IB_QPS_RTR
;
2564 ret
= ib_cm_init_qp_attr(cm_id
, qp_attr
, &attr_mask
);
2568 ret
= ib_modify_qp(ch
->qp
, qp_attr
, attr_mask
);
2572 qp_attr
->qp_state
= IB_QPS_RTS
;
2573 ret
= ib_cm_init_qp_attr(cm_id
, qp_attr
, &attr_mask
);
2577 target
->rq_tmo_jiffies
= srp_compute_rq_tmo(qp_attr
, attr_mask
);
2579 ret
= ib_modify_qp(ch
->qp
, qp_attr
, attr_mask
);
2583 ret
= ib_send_cm_rtu(cm_id
, NULL
, 0);
2593 static void srp_ib_cm_rej_handler(struct ib_cm_id
*cm_id
,
2594 struct ib_cm_event
*event
,
2595 struct srp_rdma_ch
*ch
)
2597 struct srp_target_port
*target
= ch
->target
;
2598 struct Scsi_Host
*shost
= target
->scsi_host
;
2599 struct ib_class_port_info
*cpi
;
2603 switch (event
->param
.rej_rcvd
.reason
) {
2604 case IB_CM_REJ_PORT_CM_REDIRECT
:
2605 cpi
= event
->param
.rej_rcvd
.ari
;
2606 dlid
= be16_to_cpu(cpi
->redirect_lid
);
2607 sa_path_set_dlid(&ch
->ib_cm
.path
, dlid
);
2608 ch
->ib_cm
.path
.pkey
= cpi
->redirect_pkey
;
2609 cm_id
->remote_cm_qpn
= be32_to_cpu(cpi
->redirect_qp
) & 0x00ffffff;
2610 memcpy(ch
->ib_cm
.path
.dgid
.raw
, cpi
->redirect_gid
, 16);
2612 ch
->status
= dlid
? SRP_DLID_REDIRECT
: SRP_PORT_REDIRECT
;
2615 case IB_CM_REJ_PORT_REDIRECT
:
2616 if (srp_target_is_topspin(target
)) {
2617 union ib_gid
*dgid
= &ch
->ib_cm
.path
.dgid
;
2620 * Topspin/Cisco SRP gateways incorrectly send
2621 * reject reason code 25 when they mean 24
2624 memcpy(dgid
->raw
, event
->param
.rej_rcvd
.ari
, 16);
2626 shost_printk(KERN_DEBUG
, shost
,
2627 PFX
"Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2628 be64_to_cpu(dgid
->global
.subnet_prefix
),
2629 be64_to_cpu(dgid
->global
.interface_id
));
2631 ch
->status
= SRP_PORT_REDIRECT
;
2633 shost_printk(KERN_WARNING
, shost
,
2634 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2635 ch
->status
= -ECONNRESET
;
2639 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID
:
2640 shost_printk(KERN_WARNING
, shost
,
2641 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2642 ch
->status
= -ECONNRESET
;
2645 case IB_CM_REJ_CONSUMER_DEFINED
:
2646 opcode
= *(u8
*) event
->private_data
;
2647 if (opcode
== SRP_LOGIN_REJ
) {
2648 struct srp_login_rej
*rej
= event
->private_data
;
2649 u32 reason
= be32_to_cpu(rej
->reason
);
2651 if (reason
== SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE
)
2652 shost_printk(KERN_WARNING
, shost
,
2653 PFX
"SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2655 shost_printk(KERN_WARNING
, shost
, PFX
2656 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2658 target
->ib_cm
.orig_dgid
.raw
,
2661 shost_printk(KERN_WARNING
, shost
,
2662 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2663 " opcode 0x%02x\n", opcode
);
2664 ch
->status
= -ECONNRESET
;
2667 case IB_CM_REJ_STALE_CONN
:
2668 shost_printk(KERN_WARNING
, shost
, " REJ reason: stale connection\n");
2669 ch
->status
= SRP_STALE_CONN
;
2673 shost_printk(KERN_WARNING
, shost
, " REJ reason 0x%x\n",
2674 event
->param
.rej_rcvd
.reason
);
2675 ch
->status
= -ECONNRESET
;
2679 static int srp_ib_cm_handler(struct ib_cm_id
*cm_id
, struct ib_cm_event
*event
)
2681 struct srp_rdma_ch
*ch
= cm_id
->context
;
2682 struct srp_target_port
*target
= ch
->target
;
2685 switch (event
->event
) {
2686 case IB_CM_REQ_ERROR
:
2687 shost_printk(KERN_DEBUG
, target
->scsi_host
,
2688 PFX
"Sending CM REQ failed\n");
2690 ch
->status
= -ECONNRESET
;
2693 case IB_CM_REP_RECEIVED
:
2695 srp_cm_rep_handler(cm_id
, event
->private_data
, ch
);
2698 case IB_CM_REJ_RECEIVED
:
2699 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
"REJ received\n");
2702 srp_ib_cm_rej_handler(cm_id
, event
, ch
);
2705 case IB_CM_DREQ_RECEIVED
:
2706 shost_printk(KERN_WARNING
, target
->scsi_host
,
2707 PFX
"DREQ received - connection closed\n");
2708 ch
->connected
= false;
2709 if (ib_send_cm_drep(cm_id
, NULL
, 0))
2710 shost_printk(KERN_ERR
, target
->scsi_host
,
2711 PFX
"Sending CM DREP failed\n");
2712 queue_work(system_long_wq
, &target
->tl_err_work
);
2715 case IB_CM_TIMEWAIT_EXIT
:
2716 shost_printk(KERN_ERR
, target
->scsi_host
,
2717 PFX
"connection closed\n");
2723 case IB_CM_MRA_RECEIVED
:
2724 case IB_CM_DREQ_ERROR
:
2725 case IB_CM_DREP_RECEIVED
:
2729 shost_printk(KERN_WARNING
, target
->scsi_host
,
2730 PFX
"Unhandled CM event %d\n", event
->event
);
2735 complete(&ch
->done
);
2740 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch
*ch
,
2741 struct rdma_cm_event
*event
)
2743 struct srp_target_port
*target
= ch
->target
;
2744 struct Scsi_Host
*shost
= target
->scsi_host
;
2747 switch (event
->status
) {
2748 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID
:
2749 shost_printk(KERN_WARNING
, shost
,
2750 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2751 ch
->status
= -ECONNRESET
;
2754 case IB_CM_REJ_CONSUMER_DEFINED
:
2755 opcode
= *(u8
*) event
->param
.conn
.private_data
;
2756 if (opcode
== SRP_LOGIN_REJ
) {
2757 struct srp_login_rej
*rej
=
2758 (struct srp_login_rej
*)
2759 event
->param
.conn
.private_data
;
2760 u32 reason
= be32_to_cpu(rej
->reason
);
2762 if (reason
== SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE
)
2763 shost_printk(KERN_WARNING
, shost
,
2764 PFX
"SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2766 shost_printk(KERN_WARNING
, shost
,
2767 PFX
"SRP LOGIN REJECTED, reason 0x%08x\n", reason
);
2769 shost_printk(KERN_WARNING
, shost
,
2770 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2773 ch
->status
= -ECONNRESET
;
2776 case IB_CM_REJ_STALE_CONN
:
2777 shost_printk(KERN_WARNING
, shost
,
2778 " REJ reason: stale connection\n");
2779 ch
->status
= SRP_STALE_CONN
;
2783 shost_printk(KERN_WARNING
, shost
, " REJ reason 0x%x\n",
2785 ch
->status
= -ECONNRESET
;
2790 static int srp_rdma_cm_handler(struct rdma_cm_id
*cm_id
,
2791 struct rdma_cm_event
*event
)
2793 struct srp_rdma_ch
*ch
= cm_id
->context
;
2794 struct srp_target_port
*target
= ch
->target
;
2797 switch (event
->event
) {
2798 case RDMA_CM_EVENT_ADDR_RESOLVED
:
2803 case RDMA_CM_EVENT_ADDR_ERROR
:
2804 ch
->status
= -ENXIO
;
2808 case RDMA_CM_EVENT_ROUTE_RESOLVED
:
2813 case RDMA_CM_EVENT_ROUTE_ERROR
:
2814 case RDMA_CM_EVENT_UNREACHABLE
:
2815 ch
->status
= -EHOSTUNREACH
;
2819 case RDMA_CM_EVENT_CONNECT_ERROR
:
2820 shost_printk(KERN_DEBUG
, target
->scsi_host
,
2821 PFX
"Sending CM REQ failed\n");
2823 ch
->status
= -ECONNRESET
;
2826 case RDMA_CM_EVENT_ESTABLISHED
:
2828 srp_cm_rep_handler(NULL
, event
->param
.conn
.private_data
, ch
);
2831 case RDMA_CM_EVENT_REJECTED
:
2832 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
"REJ received\n");
2835 srp_rdma_cm_rej_handler(ch
, event
);
2838 case RDMA_CM_EVENT_DISCONNECTED
:
2839 if (ch
->connected
) {
2840 shost_printk(KERN_WARNING
, target
->scsi_host
,
2841 PFX
"received DREQ\n");
2842 rdma_disconnect(ch
->rdma_cm
.cm_id
);
2845 queue_work(system_long_wq
, &target
->tl_err_work
);
2849 case RDMA_CM_EVENT_TIMEWAIT_EXIT
:
2850 shost_printk(KERN_ERR
, target
->scsi_host
,
2851 PFX
"connection closed\n");
2858 shost_printk(KERN_WARNING
, target
->scsi_host
,
2859 PFX
"Unhandled CM event %d\n", event
->event
);
2864 complete(&ch
->done
);
2870 * srp_change_queue_depth - setting device queue depth
2871 * @sdev: scsi device struct
2872 * @qdepth: requested queue depth
2874 * Returns queue depth.
2877 srp_change_queue_depth(struct scsi_device
*sdev
, int qdepth
)
2879 if (!sdev
->tagged_supported
)
2881 return scsi_change_queue_depth(sdev
, qdepth
);
2884 static int srp_send_tsk_mgmt(struct srp_rdma_ch
*ch
, u64 req_tag
, u64 lun
,
2885 u8 func
, u8
*status
)
2887 struct srp_target_port
*target
= ch
->target
;
2888 struct srp_rport
*rport
= target
->rport
;
2889 struct ib_device
*dev
= target
->srp_host
->srp_dev
->dev
;
2891 struct srp_tsk_mgmt
*tsk_mgmt
;
2894 if (!ch
->connected
|| target
->qp_in_error
)
2898 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2899 * invoked while a task management function is being sent.
2901 mutex_lock(&rport
->mutex
);
2902 spin_lock_irq(&ch
->lock
);
2903 iu
= __srp_get_tx_iu(ch
, SRP_IU_TSK_MGMT
);
2904 spin_unlock_irq(&ch
->lock
);
2907 mutex_unlock(&rport
->mutex
);
2912 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, sizeof *tsk_mgmt
,
2915 memset(tsk_mgmt
, 0, sizeof *tsk_mgmt
);
2917 tsk_mgmt
->opcode
= SRP_TSK_MGMT
;
2918 int_to_scsilun(lun
, &tsk_mgmt
->lun
);
2919 tsk_mgmt
->tsk_mgmt_func
= func
;
2920 tsk_mgmt
->task_tag
= req_tag
;
2922 spin_lock_irq(&ch
->lock
);
2923 ch
->tsk_mgmt_tag
= (ch
->tsk_mgmt_tag
+ 1) | SRP_TAG_TSK_MGMT
;
2924 tsk_mgmt
->tag
= ch
->tsk_mgmt_tag
;
2925 spin_unlock_irq(&ch
->lock
);
2927 init_completion(&ch
->tsk_mgmt_done
);
2929 ib_dma_sync_single_for_device(dev
, iu
->dma
, sizeof *tsk_mgmt
,
2931 if (srp_post_send(ch
, iu
, sizeof(*tsk_mgmt
))) {
2932 srp_put_tx_iu(ch
, iu
, SRP_IU_TSK_MGMT
);
2933 mutex_unlock(&rport
->mutex
);
2937 res
= wait_for_completion_timeout(&ch
->tsk_mgmt_done
,
2938 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS
));
2939 if (res
> 0 && status
)
2940 *status
= ch
->tsk_mgmt_status
;
2941 mutex_unlock(&rport
->mutex
);
2943 WARN_ON_ONCE(res
< 0);
2945 return res
> 0 ? 0 : -1;
2948 static int srp_abort(struct scsi_cmnd
*scmnd
)
2950 struct srp_target_port
*target
= host_to_target(scmnd
->device
->host
);
2951 struct srp_request
*req
= (struct srp_request
*) scmnd
->host_scribble
;
2954 struct srp_rdma_ch
*ch
;
2957 shost_printk(KERN_ERR
, target
->scsi_host
, "SRP abort called\n");
2961 tag
= blk_mq_unique_tag(scmnd
->request
);
2962 ch_idx
= blk_mq_unique_tag_to_hwq(tag
);
2963 if (WARN_ON_ONCE(ch_idx
>= target
->ch_count
))
2965 ch
= &target
->ch
[ch_idx
];
2966 if (!srp_claim_req(ch
, req
, NULL
, scmnd
))
2968 shost_printk(KERN_ERR
, target
->scsi_host
,
2969 "Sending SRP abort for tag %#x\n", tag
);
2970 if (srp_send_tsk_mgmt(ch
, tag
, scmnd
->device
->lun
,
2971 SRP_TSK_ABORT_TASK
, NULL
) == 0)
2973 else if (target
->rport
->state
== SRP_RPORT_LOST
)
2977 srp_free_req(ch
, req
, scmnd
, 0);
2978 scmnd
->result
= DID_ABORT
<< 16;
2979 scmnd
->scsi_done(scmnd
);
2984 static int srp_reset_device(struct scsi_cmnd
*scmnd
)
2986 struct srp_target_port
*target
= host_to_target(scmnd
->device
->host
);
2987 struct srp_rdma_ch
*ch
;
2991 shost_printk(KERN_ERR
, target
->scsi_host
, "SRP reset_device called\n");
2993 ch
= &target
->ch
[0];
2994 if (srp_send_tsk_mgmt(ch
, SRP_TAG_NO_REQ
, scmnd
->device
->lun
,
2995 SRP_TSK_LUN_RESET
, &status
))
3000 for (i
= 0; i
< target
->ch_count
; i
++) {
3001 ch
= &target
->ch
[i
];
3002 for (i
= 0; i
< target
->req_ring_size
; ++i
) {
3003 struct srp_request
*req
= &ch
->req_ring
[i
];
3005 srp_finish_req(ch
, req
, scmnd
->device
, DID_RESET
<< 16);
3012 static int srp_reset_host(struct scsi_cmnd
*scmnd
)
3014 struct srp_target_port
*target
= host_to_target(scmnd
->device
->host
);
3016 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
"SRP reset_host called\n");
3018 return srp_reconnect_rport(target
->rport
) == 0 ? SUCCESS
: FAILED
;
3021 static int srp_target_alloc(struct scsi_target
*starget
)
3023 struct Scsi_Host
*shost
= dev_to_shost(starget
->dev
.parent
);
3024 struct srp_target_port
*target
= host_to_target(shost
);
3026 if (target
->target_can_queue
)
3027 starget
->can_queue
= target
->target_can_queue
;
3031 static int srp_slave_alloc(struct scsi_device
*sdev
)
3033 struct Scsi_Host
*shost
= sdev
->host
;
3034 struct srp_target_port
*target
= host_to_target(shost
);
3035 struct srp_device
*srp_dev
= target
->srp_host
->srp_dev
;
3038 blk_queue_virt_boundary(sdev
->request_queue
,
3039 ~srp_dev
->mr_page_mask
);
3044 static int srp_slave_configure(struct scsi_device
*sdev
)
3046 struct Scsi_Host
*shost
= sdev
->host
;
3047 struct srp_target_port
*target
= host_to_target(shost
);
3048 struct request_queue
*q
= sdev
->request_queue
;
3049 unsigned long timeout
;
3051 if (sdev
->type
== TYPE_DISK
) {
3052 timeout
= max_t(unsigned, 30 * HZ
, target
->rq_tmo_jiffies
);
3053 blk_queue_rq_timeout(q
, timeout
);
3059 static ssize_t
show_id_ext(struct device
*dev
, struct device_attribute
*attr
,
3062 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3064 return sprintf(buf
, "0x%016llx\n", be64_to_cpu(target
->id_ext
));
3067 static ssize_t
show_ioc_guid(struct device
*dev
, struct device_attribute
*attr
,
3070 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3072 return sprintf(buf
, "0x%016llx\n", be64_to_cpu(target
->ioc_guid
));
3075 static ssize_t
show_service_id(struct device
*dev
,
3076 struct device_attribute
*attr
, char *buf
)
3078 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3080 if (target
->using_rdma_cm
)
3082 return sprintf(buf
, "0x%016llx\n",
3083 be64_to_cpu(target
->ib_cm
.service_id
));
3086 static ssize_t
show_pkey(struct device
*dev
, struct device_attribute
*attr
,
3089 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3091 if (target
->using_rdma_cm
)
3093 return sprintf(buf
, "0x%04x\n", be16_to_cpu(target
->ib_cm
.pkey
));
3096 static ssize_t
show_sgid(struct device
*dev
, struct device_attribute
*attr
,
3099 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3101 return sprintf(buf
, "%pI6\n", target
->sgid
.raw
);
3104 static ssize_t
show_dgid(struct device
*dev
, struct device_attribute
*attr
,
3107 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3108 struct srp_rdma_ch
*ch
= &target
->ch
[0];
3110 if (target
->using_rdma_cm
)
3112 return sprintf(buf
, "%pI6\n", ch
->ib_cm
.path
.dgid
.raw
);
3115 static ssize_t
show_orig_dgid(struct device
*dev
,
3116 struct device_attribute
*attr
, char *buf
)
3118 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3120 if (target
->using_rdma_cm
)
3122 return sprintf(buf
, "%pI6\n", target
->ib_cm
.orig_dgid
.raw
);
3125 static ssize_t
show_req_lim(struct device
*dev
,
3126 struct device_attribute
*attr
, char *buf
)
3128 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3129 struct srp_rdma_ch
*ch
;
3130 int i
, req_lim
= INT_MAX
;
3132 for (i
= 0; i
< target
->ch_count
; i
++) {
3133 ch
= &target
->ch
[i
];
3134 req_lim
= min(req_lim
, ch
->req_lim
);
3136 return sprintf(buf
, "%d\n", req_lim
);
3139 static ssize_t
show_zero_req_lim(struct device
*dev
,
3140 struct device_attribute
*attr
, char *buf
)
3142 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3144 return sprintf(buf
, "%d\n", target
->zero_req_lim
);
3147 static ssize_t
show_local_ib_port(struct device
*dev
,
3148 struct device_attribute
*attr
, char *buf
)
3150 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3152 return sprintf(buf
, "%d\n", target
->srp_host
->port
);
3155 static ssize_t
show_local_ib_device(struct device
*dev
,
3156 struct device_attribute
*attr
, char *buf
)
3158 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3160 return sprintf(buf
, "%s\n", target
->srp_host
->srp_dev
->dev
->name
);
3163 static ssize_t
show_ch_count(struct device
*dev
, struct device_attribute
*attr
,
3166 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3168 return sprintf(buf
, "%d\n", target
->ch_count
);
3171 static ssize_t
show_comp_vector(struct device
*dev
,
3172 struct device_attribute
*attr
, char *buf
)
3174 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3176 return sprintf(buf
, "%d\n", target
->comp_vector
);
3179 static ssize_t
show_tl_retry_count(struct device
*dev
,
3180 struct device_attribute
*attr
, char *buf
)
3182 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3184 return sprintf(buf
, "%d\n", target
->tl_retry_count
);
3187 static ssize_t
show_cmd_sg_entries(struct device
*dev
,
3188 struct device_attribute
*attr
, char *buf
)
3190 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3192 return sprintf(buf
, "%u\n", target
->cmd_sg_cnt
);
3195 static ssize_t
show_allow_ext_sg(struct device
*dev
,
3196 struct device_attribute
*attr
, char *buf
)
3198 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3200 return sprintf(buf
, "%s\n", target
->allow_ext_sg
? "true" : "false");
3203 static DEVICE_ATTR(id_ext
, S_IRUGO
, show_id_ext
, NULL
);
3204 static DEVICE_ATTR(ioc_guid
, S_IRUGO
, show_ioc_guid
, NULL
);
3205 static DEVICE_ATTR(service_id
, S_IRUGO
, show_service_id
, NULL
);
3206 static DEVICE_ATTR(pkey
, S_IRUGO
, show_pkey
, NULL
);
3207 static DEVICE_ATTR(sgid
, S_IRUGO
, show_sgid
, NULL
);
3208 static DEVICE_ATTR(dgid
, S_IRUGO
, show_dgid
, NULL
);
3209 static DEVICE_ATTR(orig_dgid
, S_IRUGO
, show_orig_dgid
, NULL
);
3210 static DEVICE_ATTR(req_lim
, S_IRUGO
, show_req_lim
, NULL
);
3211 static DEVICE_ATTR(zero_req_lim
, S_IRUGO
, show_zero_req_lim
, NULL
);
3212 static DEVICE_ATTR(local_ib_port
, S_IRUGO
, show_local_ib_port
, NULL
);
3213 static DEVICE_ATTR(local_ib_device
, S_IRUGO
, show_local_ib_device
, NULL
);
3214 static DEVICE_ATTR(ch_count
, S_IRUGO
, show_ch_count
, NULL
);
3215 static DEVICE_ATTR(comp_vector
, S_IRUGO
, show_comp_vector
, NULL
);
3216 static DEVICE_ATTR(tl_retry_count
, S_IRUGO
, show_tl_retry_count
, NULL
);
3217 static DEVICE_ATTR(cmd_sg_entries
, S_IRUGO
, show_cmd_sg_entries
, NULL
);
3218 static DEVICE_ATTR(allow_ext_sg
, S_IRUGO
, show_allow_ext_sg
, NULL
);
3220 static struct device_attribute
*srp_host_attrs
[] = {
3223 &dev_attr_service_id
,
3227 &dev_attr_orig_dgid
,
3229 &dev_attr_zero_req_lim
,
3230 &dev_attr_local_ib_port
,
3231 &dev_attr_local_ib_device
,
3233 &dev_attr_comp_vector
,
3234 &dev_attr_tl_retry_count
,
3235 &dev_attr_cmd_sg_entries
,
3236 &dev_attr_allow_ext_sg
,
3240 static struct scsi_host_template srp_template
= {
3241 .module
= THIS_MODULE
,
3242 .name
= "InfiniBand SRP initiator",
3243 .proc_name
= DRV_NAME
,
3244 .target_alloc
= srp_target_alloc
,
3245 .slave_alloc
= srp_slave_alloc
,
3246 .slave_configure
= srp_slave_configure
,
3247 .info
= srp_target_info
,
3248 .queuecommand
= srp_queuecommand
,
3249 .change_queue_depth
= srp_change_queue_depth
,
3250 .eh_timed_out
= srp_timed_out
,
3251 .eh_abort_handler
= srp_abort
,
3252 .eh_device_reset_handler
= srp_reset_device
,
3253 .eh_host_reset_handler
= srp_reset_host
,
3254 .skip_settle_delay
= true,
3255 .sg_tablesize
= SRP_DEF_SG_TABLESIZE
,
3256 .can_queue
= SRP_DEFAULT_CMD_SQ_SIZE
,
3258 .cmd_per_lun
= SRP_DEFAULT_CMD_SQ_SIZE
,
3259 .use_clustering
= ENABLE_CLUSTERING
,
3260 .shost_attrs
= srp_host_attrs
,
3261 .track_queue_depth
= 1,
3264 static int srp_sdev_count(struct Scsi_Host
*host
)
3266 struct scsi_device
*sdev
;
3269 shost_for_each_device(sdev
, host
)
3277 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3278 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3279 * removal has been scheduled.
3280 * 0 and target->state != SRP_TARGET_REMOVED upon success.
3282 static int srp_add_target(struct srp_host
*host
, struct srp_target_port
*target
)
3284 struct srp_rport_identifiers ids
;
3285 struct srp_rport
*rport
;
3287 target
->state
= SRP_TARGET_SCANNING
;
3288 sprintf(target
->target_name
, "SRP.T10:%016llX",
3289 be64_to_cpu(target
->id_ext
));
3291 if (scsi_add_host(target
->scsi_host
, host
->srp_dev
->dev
->dev
.parent
))
3294 memcpy(ids
.port_id
, &target
->id_ext
, 8);
3295 memcpy(ids
.port_id
+ 8, &target
->ioc_guid
, 8);
3296 ids
.roles
= SRP_RPORT_ROLE_TARGET
;
3297 rport
= srp_rport_add(target
->scsi_host
, &ids
);
3298 if (IS_ERR(rport
)) {
3299 scsi_remove_host(target
->scsi_host
);
3300 return PTR_ERR(rport
);
3303 rport
->lld_data
= target
;
3304 target
->rport
= rport
;
3306 spin_lock(&host
->target_lock
);
3307 list_add_tail(&target
->list
, &host
->target_list
);
3308 spin_unlock(&host
->target_lock
);
3310 scsi_scan_target(&target
->scsi_host
->shost_gendev
,
3311 0, target
->scsi_id
, SCAN_WILD_CARD
, SCSI_SCAN_INITIAL
);
3313 if (srp_connected_ch(target
) < target
->ch_count
||
3314 target
->qp_in_error
) {
3315 shost_printk(KERN_INFO
, target
->scsi_host
,
3316 PFX
"SCSI scan failed - removing SCSI host\n");
3317 srp_queue_remove_work(target
);
3321 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3322 dev_name(&target
->scsi_host
->shost_gendev
),
3323 srp_sdev_count(target
->scsi_host
));
3325 spin_lock_irq(&target
->lock
);
3326 if (target
->state
== SRP_TARGET_SCANNING
)
3327 target
->state
= SRP_TARGET_LIVE
;
3328 spin_unlock_irq(&target
->lock
);
3334 static void srp_release_dev(struct device
*dev
)
3336 struct srp_host
*host
=
3337 container_of(dev
, struct srp_host
, dev
);
3339 complete(&host
->released
);
3342 static struct class srp_class
= {
3343 .name
= "infiniband_srp",
3344 .dev_release
= srp_release_dev
3348 * srp_conn_unique() - check whether the connection to a target is unique
3350 * @target: SRP target port.
3352 static bool srp_conn_unique(struct srp_host
*host
,
3353 struct srp_target_port
*target
)
3355 struct srp_target_port
*t
;
3358 if (target
->state
== SRP_TARGET_REMOVED
)
3363 spin_lock(&host
->target_lock
);
3364 list_for_each_entry(t
, &host
->target_list
, list
) {
3366 target
->id_ext
== t
->id_ext
&&
3367 target
->ioc_guid
== t
->ioc_guid
&&
3368 (!target
->using_rdma_cm
||
3369 memcmp(&target
->rdma_cm
.dst
, &t
->rdma_cm
.dst
,
3370 sizeof(target
->rdma_cm
.dst
)) == 0) &&
3371 target
->initiator_ext
== t
->initiator_ext
) {
3376 spin_unlock(&host
->target_lock
);
3383 * Target ports are added by writing
3385 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3386 * pkey=<P_Key>,service_id=<service ID>
3388 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3389 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3391 * to the add_target sysfs attribute.
3395 SRP_OPT_ID_EXT
= 1 << 0,
3396 SRP_OPT_IOC_GUID
= 1 << 1,
3397 SRP_OPT_DGID
= 1 << 2,
3398 SRP_OPT_PKEY
= 1 << 3,
3399 SRP_OPT_SERVICE_ID
= 1 << 4,
3400 SRP_OPT_MAX_SECT
= 1 << 5,
3401 SRP_OPT_MAX_CMD_PER_LUN
= 1 << 6,
3402 SRP_OPT_IO_CLASS
= 1 << 7,
3403 SRP_OPT_INITIATOR_EXT
= 1 << 8,
3404 SRP_OPT_CMD_SG_ENTRIES
= 1 << 9,
3405 SRP_OPT_ALLOW_EXT_SG
= 1 << 10,
3406 SRP_OPT_SG_TABLESIZE
= 1 << 11,
3407 SRP_OPT_COMP_VECTOR
= 1 << 12,
3408 SRP_OPT_TL_RETRY_COUNT
= 1 << 13,
3409 SRP_OPT_QUEUE_SIZE
= 1 << 14,
3410 SRP_OPT_IP_SRC
= 1 << 15,
3411 SRP_OPT_IP_DEST
= 1 << 16,
3412 SRP_OPT_TARGET_CAN_QUEUE
= 1 << 17,
3415 static unsigned int srp_opt_mandatory
[] = {
3426 static const match_table_t srp_opt_tokens
= {
3427 { SRP_OPT_ID_EXT
, "id_ext=%s" },
3428 { SRP_OPT_IOC_GUID
, "ioc_guid=%s" },
3429 { SRP_OPT_DGID
, "dgid=%s" },
3430 { SRP_OPT_PKEY
, "pkey=%x" },
3431 { SRP_OPT_SERVICE_ID
, "service_id=%s" },
3432 { SRP_OPT_MAX_SECT
, "max_sect=%d" },
3433 { SRP_OPT_MAX_CMD_PER_LUN
, "max_cmd_per_lun=%d" },
3434 { SRP_OPT_TARGET_CAN_QUEUE
, "target_can_queue=%d" },
3435 { SRP_OPT_IO_CLASS
, "io_class=%x" },
3436 { SRP_OPT_INITIATOR_EXT
, "initiator_ext=%s" },
3437 { SRP_OPT_CMD_SG_ENTRIES
, "cmd_sg_entries=%u" },
3438 { SRP_OPT_ALLOW_EXT_SG
, "allow_ext_sg=%u" },
3439 { SRP_OPT_SG_TABLESIZE
, "sg_tablesize=%u" },
3440 { SRP_OPT_COMP_VECTOR
, "comp_vector=%u" },
3441 { SRP_OPT_TL_RETRY_COUNT
, "tl_retry_count=%u" },
3442 { SRP_OPT_QUEUE_SIZE
, "queue_size=%d" },
3443 { SRP_OPT_IP_SRC
, "src=%s" },
3444 { SRP_OPT_IP_DEST
, "dest=%s" },
3445 { SRP_OPT_ERR
, NULL
}
3448 static int srp_parse_in(struct net
*net
, struct sockaddr_storage
*sa
,
3449 const char *addr_port_str
)
3451 char *addr
= kstrdup(addr_port_str
, GFP_KERNEL
);
3452 char *port_str
= addr
;
3457 strsep(&port_str
, ":");
3458 ret
= inet_pton_with_scope(net
, AF_UNSPEC
, addr
, port_str
, sa
);
3463 static int srp_parse_options(struct net
*net
, const char *buf
,
3464 struct srp_target_port
*target
)
3466 char *options
, *sep_opt
;
3468 substring_t args
[MAX_OPT_ARGS
];
3469 unsigned long long ull
;
3475 options
= kstrdup(buf
, GFP_KERNEL
);
3480 while ((p
= strsep(&sep_opt
, ",\n")) != NULL
) {
3484 token
= match_token(p
, srp_opt_tokens
, args
);
3488 case SRP_OPT_ID_EXT
:
3489 p
= match_strdup(args
);
3494 ret
= kstrtoull(p
, 16, &ull
);
3496 pr_warn("invalid id_ext parameter '%s'\n", p
);
3500 target
->id_ext
= cpu_to_be64(ull
);
3504 case SRP_OPT_IOC_GUID
:
3505 p
= match_strdup(args
);
3510 ret
= kstrtoull(p
, 16, &ull
);
3512 pr_warn("invalid ioc_guid parameter '%s'\n", p
);
3516 target
->ioc_guid
= cpu_to_be64(ull
);
3521 p
= match_strdup(args
);
3526 if (strlen(p
) != 32) {
3527 pr_warn("bad dest GID parameter '%s'\n", p
);
3532 ret
= hex2bin(target
->ib_cm
.orig_dgid
.raw
, p
, 16);
3539 if (match_hex(args
, &token
)) {
3540 pr_warn("bad P_Key parameter '%s'\n", p
);
3543 target
->ib_cm
.pkey
= cpu_to_be16(token
);
3546 case SRP_OPT_SERVICE_ID
:
3547 p
= match_strdup(args
);
3552 ret
= kstrtoull(p
, 16, &ull
);
3554 pr_warn("bad service_id parameter '%s'\n", p
);
3558 target
->ib_cm
.service_id
= cpu_to_be64(ull
);
3562 case SRP_OPT_IP_SRC
:
3563 p
= match_strdup(args
);
3568 ret
= srp_parse_in(net
, &target
->rdma_cm
.src
.ss
, p
);
3570 pr_warn("bad source parameter '%s'\n", p
);
3574 target
->rdma_cm
.src_specified
= true;
3578 case SRP_OPT_IP_DEST
:
3579 p
= match_strdup(args
);
3584 ret
= srp_parse_in(net
, &target
->rdma_cm
.dst
.ss
, p
);
3586 pr_warn("bad dest parameter '%s'\n", p
);
3590 target
->using_rdma_cm
= true;
3594 case SRP_OPT_MAX_SECT
:
3595 if (match_int(args
, &token
)) {
3596 pr_warn("bad max sect parameter '%s'\n", p
);
3599 target
->scsi_host
->max_sectors
= token
;
3602 case SRP_OPT_QUEUE_SIZE
:
3603 if (match_int(args
, &token
) || token
< 1) {
3604 pr_warn("bad queue_size parameter '%s'\n", p
);
3607 target
->scsi_host
->can_queue
= token
;
3608 target
->queue_size
= token
+ SRP_RSP_SQ_SIZE
+
3609 SRP_TSK_MGMT_SQ_SIZE
;
3610 if (!(opt_mask
& SRP_OPT_MAX_CMD_PER_LUN
))
3611 target
->scsi_host
->cmd_per_lun
= token
;
3614 case SRP_OPT_MAX_CMD_PER_LUN
:
3615 if (match_int(args
, &token
) || token
< 1) {
3616 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3620 target
->scsi_host
->cmd_per_lun
= token
;
3623 case SRP_OPT_TARGET_CAN_QUEUE
:
3624 if (match_int(args
, &token
) || token
< 1) {
3625 pr_warn("bad max target_can_queue parameter '%s'\n",
3629 target
->target_can_queue
= token
;
3632 case SRP_OPT_IO_CLASS
:
3633 if (match_hex(args
, &token
)) {
3634 pr_warn("bad IO class parameter '%s'\n", p
);
3637 if (token
!= SRP_REV10_IB_IO_CLASS
&&
3638 token
!= SRP_REV16A_IB_IO_CLASS
) {
3639 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3640 token
, SRP_REV10_IB_IO_CLASS
,
3641 SRP_REV16A_IB_IO_CLASS
);
3644 target
->io_class
= token
;
3647 case SRP_OPT_INITIATOR_EXT
:
3648 p
= match_strdup(args
);
3653 ret
= kstrtoull(p
, 16, &ull
);
3655 pr_warn("bad initiator_ext value '%s'\n", p
);
3659 target
->initiator_ext
= cpu_to_be64(ull
);
3663 case SRP_OPT_CMD_SG_ENTRIES
:
3664 if (match_int(args
, &token
) || token
< 1 || token
> 255) {
3665 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3669 target
->cmd_sg_cnt
= token
;
3672 case SRP_OPT_ALLOW_EXT_SG
:
3673 if (match_int(args
, &token
)) {
3674 pr_warn("bad allow_ext_sg parameter '%s'\n", p
);
3677 target
->allow_ext_sg
= !!token
;
3680 case SRP_OPT_SG_TABLESIZE
:
3681 if (match_int(args
, &token
) || token
< 1 ||
3682 token
> SG_MAX_SEGMENTS
) {
3683 pr_warn("bad max sg_tablesize parameter '%s'\n",
3687 target
->sg_tablesize
= token
;
3690 case SRP_OPT_COMP_VECTOR
:
3691 if (match_int(args
, &token
) || token
< 0) {
3692 pr_warn("bad comp_vector parameter '%s'\n", p
);
3695 target
->comp_vector
= token
;
3698 case SRP_OPT_TL_RETRY_COUNT
:
3699 if (match_int(args
, &token
) || token
< 2 || token
> 7) {
3700 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3704 target
->tl_retry_count
= token
;
3708 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3714 for (i
= 0; i
< ARRAY_SIZE(srp_opt_mandatory
); i
++) {
3715 if ((opt_mask
& srp_opt_mandatory
[i
]) == srp_opt_mandatory
[i
]) {
3721 pr_warn("target creation request is missing one or more parameters\n");
3723 if (target
->scsi_host
->cmd_per_lun
> target
->scsi_host
->can_queue
3724 && (opt_mask
& SRP_OPT_MAX_CMD_PER_LUN
))
3725 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3726 target
->scsi_host
->cmd_per_lun
,
3727 target
->scsi_host
->can_queue
);
3734 static ssize_t
srp_create_target(struct device
*dev
,
3735 struct device_attribute
*attr
,
3736 const char *buf
, size_t count
)
3738 struct srp_host
*host
=
3739 container_of(dev
, struct srp_host
, dev
);
3740 struct Scsi_Host
*target_host
;
3741 struct srp_target_port
*target
;
3742 struct srp_rdma_ch
*ch
;
3743 struct srp_device
*srp_dev
= host
->srp_dev
;
3744 struct ib_device
*ibdev
= srp_dev
->dev
;
3745 int ret
, node_idx
, node
, cpu
, i
;
3746 unsigned int max_sectors_per_mr
, mr_per_cmd
= 0;
3747 bool multich
= false;
3749 target_host
= scsi_host_alloc(&srp_template
,
3750 sizeof (struct srp_target_port
));
3754 target_host
->transportt
= ib_srp_transport_template
;
3755 target_host
->max_channel
= 0;
3756 target_host
->max_id
= 1;
3757 target_host
->max_lun
= -1LL;
3758 target_host
->max_cmd_len
= sizeof ((struct srp_cmd
*) (void *) 0L)->cdb
;
3760 target
= host_to_target(target_host
);
3762 target
->net
= kobj_ns_grab_current(KOBJ_NS_TYPE_NET
);
3763 target
->io_class
= SRP_REV16A_IB_IO_CLASS
;
3764 target
->scsi_host
= target_host
;
3765 target
->srp_host
= host
;
3766 target
->lkey
= host
->srp_dev
->pd
->local_dma_lkey
;
3767 target
->global_rkey
= host
->srp_dev
->global_rkey
;
3768 target
->cmd_sg_cnt
= cmd_sg_entries
;
3769 target
->sg_tablesize
= indirect_sg_entries
? : cmd_sg_entries
;
3770 target
->allow_ext_sg
= allow_ext_sg
;
3771 target
->tl_retry_count
= 7;
3772 target
->queue_size
= SRP_DEFAULT_QUEUE_SIZE
;
3775 * Avoid that the SCSI host can be removed by srp_remove_target()
3776 * before this function returns.
3778 scsi_host_get(target
->scsi_host
);
3780 ret
= mutex_lock_interruptible(&host
->add_target_mutex
);
3784 ret
= srp_parse_options(target
->net
, buf
, target
);
3788 target
->req_ring_size
= target
->queue_size
- SRP_TSK_MGMT_SQ_SIZE
;
3790 if (!srp_conn_unique(target
->srp_host
, target
)) {
3791 if (target
->using_rdma_cm
) {
3794 shost_printk(KERN_INFO
, target
->scsi_host
,
3795 PFX
"Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%s\n",
3796 be64_to_cpu(target
->id_ext
),
3797 be64_to_cpu(target
->ioc_guid
),
3798 inet_ntop(&target
->rdma_cm
.dst
, dst_addr
,
3801 shost_printk(KERN_INFO
, target
->scsi_host
,
3802 PFX
"Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3803 be64_to_cpu(target
->id_ext
),
3804 be64_to_cpu(target
->ioc_guid
),
3805 be64_to_cpu(target
->initiator_ext
));
3811 if (!srp_dev
->has_fmr
&& !srp_dev
->has_fr
&& !target
->allow_ext_sg
&&
3812 target
->cmd_sg_cnt
< target
->sg_tablesize
) {
3813 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3814 target
->sg_tablesize
= target
->cmd_sg_cnt
;
3817 if (srp_dev
->use_fast_reg
|| srp_dev
->use_fmr
) {
3819 * FR and FMR can only map one HCA page per entry. If the
3820 * start address is not aligned on a HCA page boundary two
3821 * entries will be used for the head and the tail although
3822 * these two entries combined contain at most one HCA page of
3823 * data. Hence the "+ 1" in the calculation below.
3825 * The indirect data buffer descriptor is contiguous so the
3826 * memory for that buffer will only be registered if
3827 * register_always is true. Hence add one to mr_per_cmd if
3828 * register_always has been set.
3830 max_sectors_per_mr
= srp_dev
->max_pages_per_mr
<<
3831 (ilog2(srp_dev
->mr_page_size
) - 9);
3832 mr_per_cmd
= register_always
+
3833 (target
->scsi_host
->max_sectors
+ 1 +
3834 max_sectors_per_mr
- 1) / max_sectors_per_mr
;
3835 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3836 target
->scsi_host
->max_sectors
,
3837 srp_dev
->max_pages_per_mr
, srp_dev
->mr_page_size
,
3838 max_sectors_per_mr
, mr_per_cmd
);
3841 target_host
->sg_tablesize
= target
->sg_tablesize
;
3842 target
->mr_pool_size
= target
->scsi_host
->can_queue
* mr_per_cmd
;
3843 target
->mr_per_cmd
= mr_per_cmd
;
3844 target
->indirect_size
= target
->sg_tablesize
*
3845 sizeof (struct srp_direct_buf
);
3846 target
->max_iu_len
= sizeof (struct srp_cmd
) +
3847 sizeof (struct srp_indirect_buf
) +
3848 target
->cmd_sg_cnt
* sizeof (struct srp_direct_buf
);
3850 INIT_WORK(&target
->tl_err_work
, srp_tl_err_work
);
3851 INIT_WORK(&target
->remove_work
, srp_remove_work
);
3852 spin_lock_init(&target
->lock
);
3853 ret
= ib_query_gid(ibdev
, host
->port
, 0, &target
->sgid
, NULL
);
3858 target
->ch_count
= max_t(unsigned, num_online_nodes(),
3860 min(4 * num_online_nodes(),
3861 ibdev
->num_comp_vectors
),
3862 num_online_cpus()));
3863 target
->ch
= kcalloc(target
->ch_count
, sizeof(*target
->ch
),
3869 for_each_online_node(node
) {
3870 const int ch_start
= (node_idx
* target
->ch_count
/
3871 num_online_nodes());
3872 const int ch_end
= ((node_idx
+ 1) * target
->ch_count
/
3873 num_online_nodes());
3874 const int cv_start
= (node_idx
* ibdev
->num_comp_vectors
/
3875 num_online_nodes() + target
->comp_vector
)
3876 % ibdev
->num_comp_vectors
;
3877 const int cv_end
= ((node_idx
+ 1) * ibdev
->num_comp_vectors
/
3878 num_online_nodes() + target
->comp_vector
)
3879 % ibdev
->num_comp_vectors
;
3882 for_each_online_cpu(cpu
) {
3883 if (cpu_to_node(cpu
) != node
)
3885 if (ch_start
+ cpu_idx
>= ch_end
)
3887 ch
= &target
->ch
[ch_start
+ cpu_idx
];
3888 ch
->target
= target
;
3889 ch
->comp_vector
= cv_start
== cv_end
? cv_start
:
3890 cv_start
+ cpu_idx
% (cv_end
- cv_start
);
3891 spin_lock_init(&ch
->lock
);
3892 INIT_LIST_HEAD(&ch
->free_tx
);
3893 ret
= srp_new_cm_id(ch
);
3895 goto err_disconnect
;
3897 ret
= srp_create_ch_ib(ch
);
3899 goto err_disconnect
;
3901 ret
= srp_alloc_req_data(ch
);
3903 goto err_disconnect
;
3905 ret
= srp_connect_ch(ch
, multich
);
3909 if (target
->using_rdma_cm
)
3910 inet_ntop(&target
->rdma_cm
.dst
, dst
,
3913 snprintf(dst
, sizeof(dst
), "%pI6",
3914 target
->ib_cm
.orig_dgid
.raw
);
3915 shost_printk(KERN_ERR
, target
->scsi_host
,
3916 PFX
"Connection %d/%d to %s failed\n",
3918 target
->ch_count
, dst
);
3919 if (node_idx
== 0 && cpu_idx
== 0) {
3922 srp_free_ch_ib(target
, ch
);
3923 srp_free_req_data(target
, ch
);
3924 target
->ch_count
= ch
- target
->ch
;
3936 target
->scsi_host
->nr_hw_queues
= target
->ch_count
;
3938 ret
= srp_add_target(host
, target
);
3940 goto err_disconnect
;
3942 if (target
->state
!= SRP_TARGET_REMOVED
) {
3943 if (target
->using_rdma_cm
) {
3946 inet_ntop(&target
->rdma_cm
.dst
, dst
, sizeof(dst
));
3947 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
3948 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %s\n",
3949 be64_to_cpu(target
->id_ext
),
3950 be64_to_cpu(target
->ioc_guid
),
3951 target
->sgid
.raw
, dst
);
3953 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
3954 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3955 be64_to_cpu(target
->id_ext
),
3956 be64_to_cpu(target
->ioc_guid
),
3957 be16_to_cpu(target
->ib_cm
.pkey
),
3958 be64_to_cpu(target
->ib_cm
.service_id
),
3960 target
->ib_cm
.orig_dgid
.raw
);
3967 mutex_unlock(&host
->add_target_mutex
);
3970 scsi_host_put(target
->scsi_host
);
3973 * If a call to srp_remove_target() has not been scheduled,
3974 * drop the network namespace reference now that was obtained
3975 * earlier in this function.
3977 if (target
->state
!= SRP_TARGET_REMOVED
)
3978 kobj_ns_drop(KOBJ_NS_TYPE_NET
, target
->net
);
3979 scsi_host_put(target
->scsi_host
);
3985 srp_disconnect_target(target
);
3988 for (i
= 0; i
< target
->ch_count
; i
++) {
3989 ch
= &target
->ch
[i
];
3990 srp_free_ch_ib(target
, ch
);
3991 srp_free_req_data(target
, ch
);
3998 static DEVICE_ATTR(add_target
, S_IWUSR
, NULL
, srp_create_target
);
4000 static ssize_t
show_ibdev(struct device
*dev
, struct device_attribute
*attr
,
4003 struct srp_host
*host
= container_of(dev
, struct srp_host
, dev
);
4005 return sprintf(buf
, "%s\n", host
->srp_dev
->dev
->name
);
4008 static DEVICE_ATTR(ibdev
, S_IRUGO
, show_ibdev
, NULL
);
4010 static ssize_t
show_port(struct device
*dev
, struct device_attribute
*attr
,
4013 struct srp_host
*host
= container_of(dev
, struct srp_host
, dev
);
4015 return sprintf(buf
, "%d\n", host
->port
);
4018 static DEVICE_ATTR(port
, S_IRUGO
, show_port
, NULL
);
4020 static struct srp_host
*srp_add_port(struct srp_device
*device
, u8 port
)
4022 struct srp_host
*host
;
4024 host
= kzalloc(sizeof *host
, GFP_KERNEL
);
4028 INIT_LIST_HEAD(&host
->target_list
);
4029 spin_lock_init(&host
->target_lock
);
4030 init_completion(&host
->released
);
4031 mutex_init(&host
->add_target_mutex
);
4032 host
->srp_dev
= device
;
4035 host
->dev
.class = &srp_class
;
4036 host
->dev
.parent
= device
->dev
->dev
.parent
;
4037 dev_set_name(&host
->dev
, "srp-%s-%d", device
->dev
->name
, port
);
4039 if (device_register(&host
->dev
))
4041 if (device_create_file(&host
->dev
, &dev_attr_add_target
))
4043 if (device_create_file(&host
->dev
, &dev_attr_ibdev
))
4045 if (device_create_file(&host
->dev
, &dev_attr_port
))
4051 device_unregister(&host
->dev
);
4059 static void srp_add_one(struct ib_device
*device
)
4061 struct srp_device
*srp_dev
;
4062 struct ib_device_attr
*attr
= &device
->attrs
;
4063 struct srp_host
*host
;
4064 int mr_page_shift
, p
;
4065 u64 max_pages_per_mr
;
4066 unsigned int flags
= 0;
4068 srp_dev
= kzalloc(sizeof(*srp_dev
), GFP_KERNEL
);
4073 * Use the smallest page size supported by the HCA, down to a
4074 * minimum of 4096 bytes. We're unlikely to build large sglists
4075 * out of smaller entries.
4077 mr_page_shift
= max(12, ffs(attr
->page_size_cap
) - 1);
4078 srp_dev
->mr_page_size
= 1 << mr_page_shift
;
4079 srp_dev
->mr_page_mask
= ~((u64
) srp_dev
->mr_page_size
- 1);
4080 max_pages_per_mr
= attr
->max_mr_size
;
4081 do_div(max_pages_per_mr
, srp_dev
->mr_page_size
);
4082 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__
,
4083 attr
->max_mr_size
, srp_dev
->mr_page_size
,
4084 max_pages_per_mr
, SRP_MAX_PAGES_PER_MR
);
4085 srp_dev
->max_pages_per_mr
= min_t(u64
, SRP_MAX_PAGES_PER_MR
,
4088 srp_dev
->has_fmr
= (device
->alloc_fmr
&& device
->dealloc_fmr
&&
4089 device
->map_phys_fmr
&& device
->unmap_fmr
);
4090 srp_dev
->has_fr
= (attr
->device_cap_flags
&
4091 IB_DEVICE_MEM_MGT_EXTENSIONS
);
4092 if (!never_register
&& !srp_dev
->has_fmr
&& !srp_dev
->has_fr
) {
4093 dev_warn(&device
->dev
, "neither FMR nor FR is supported\n");
4094 } else if (!never_register
&&
4095 attr
->max_mr_size
>= 2 * srp_dev
->mr_page_size
) {
4096 srp_dev
->use_fast_reg
= (srp_dev
->has_fr
&&
4097 (!srp_dev
->has_fmr
|| prefer_fr
));
4098 srp_dev
->use_fmr
= !srp_dev
->use_fast_reg
&& srp_dev
->has_fmr
;
4101 if (never_register
|| !register_always
||
4102 (!srp_dev
->has_fmr
&& !srp_dev
->has_fr
))
4103 flags
|= IB_PD_UNSAFE_GLOBAL_RKEY
;
4105 if (srp_dev
->use_fast_reg
) {
4106 srp_dev
->max_pages_per_mr
=
4107 min_t(u32
, srp_dev
->max_pages_per_mr
,
4108 attr
->max_fast_reg_page_list_len
);
4110 srp_dev
->mr_max_size
= srp_dev
->mr_page_size
*
4111 srp_dev
->max_pages_per_mr
;
4112 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4113 device
->name
, mr_page_shift
, attr
->max_mr_size
,
4114 attr
->max_fast_reg_page_list_len
,
4115 srp_dev
->max_pages_per_mr
, srp_dev
->mr_max_size
);
4117 INIT_LIST_HEAD(&srp_dev
->dev_list
);
4119 srp_dev
->dev
= device
;
4120 srp_dev
->pd
= ib_alloc_pd(device
, flags
);
4121 if (IS_ERR(srp_dev
->pd
))
4124 if (flags
& IB_PD_UNSAFE_GLOBAL_RKEY
) {
4125 srp_dev
->global_rkey
= srp_dev
->pd
->unsafe_global_rkey
;
4126 WARN_ON_ONCE(srp_dev
->global_rkey
== 0);
4129 for (p
= rdma_start_port(device
); p
<= rdma_end_port(device
); ++p
) {
4130 host
= srp_add_port(srp_dev
, p
);
4132 list_add_tail(&host
->list
, &srp_dev
->dev_list
);
4135 ib_set_client_data(device
, &srp_client
, srp_dev
);
4142 static void srp_remove_one(struct ib_device
*device
, void *client_data
)
4144 struct srp_device
*srp_dev
;
4145 struct srp_host
*host
, *tmp_host
;
4146 struct srp_target_port
*target
;
4148 srp_dev
= client_data
;
4152 list_for_each_entry_safe(host
, tmp_host
, &srp_dev
->dev_list
, list
) {
4153 device_unregister(&host
->dev
);
4155 * Wait for the sysfs entry to go away, so that no new
4156 * target ports can be created.
4158 wait_for_completion(&host
->released
);
4161 * Remove all target ports.
4163 spin_lock(&host
->target_lock
);
4164 list_for_each_entry(target
, &host
->target_list
, list
)
4165 srp_queue_remove_work(target
);
4166 spin_unlock(&host
->target_lock
);
4169 * Wait for tl_err and target port removal tasks.
4171 flush_workqueue(system_long_wq
);
4172 flush_workqueue(srp_remove_wq
);
4177 ib_dealloc_pd(srp_dev
->pd
);
4182 static struct srp_function_template ib_srp_transport_functions
= {
4183 .has_rport_state
= true,
4184 .reset_timer_if_blocked
= true,
4185 .reconnect_delay
= &srp_reconnect_delay
,
4186 .fast_io_fail_tmo
= &srp_fast_io_fail_tmo
,
4187 .dev_loss_tmo
= &srp_dev_loss_tmo
,
4188 .reconnect
= srp_rport_reconnect
,
4189 .rport_delete
= srp_rport_delete
,
4190 .terminate_rport_io
= srp_terminate_io
,
4193 static int __init
srp_init_module(void)
4197 if (srp_sg_tablesize
) {
4198 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4199 if (!cmd_sg_entries
)
4200 cmd_sg_entries
= srp_sg_tablesize
;
4203 if (!cmd_sg_entries
)
4204 cmd_sg_entries
= SRP_DEF_SG_TABLESIZE
;
4206 if (cmd_sg_entries
> 255) {
4207 pr_warn("Clamping cmd_sg_entries to 255\n");
4208 cmd_sg_entries
= 255;
4211 if (!indirect_sg_entries
)
4212 indirect_sg_entries
= cmd_sg_entries
;
4213 else if (indirect_sg_entries
< cmd_sg_entries
) {
4214 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4216 indirect_sg_entries
= cmd_sg_entries
;
4219 if (indirect_sg_entries
> SG_MAX_SEGMENTS
) {
4220 pr_warn("Clamping indirect_sg_entries to %u\n",
4222 indirect_sg_entries
= SG_MAX_SEGMENTS
;
4225 srp_remove_wq
= create_workqueue("srp_remove");
4226 if (!srp_remove_wq
) {
4232 ib_srp_transport_template
=
4233 srp_attach_transport(&ib_srp_transport_functions
);
4234 if (!ib_srp_transport_template
)
4237 ret
= class_register(&srp_class
);
4239 pr_err("couldn't register class infiniband_srp\n");
4243 ib_sa_register_client(&srp_sa_client
);
4245 ret
= ib_register_client(&srp_client
);
4247 pr_err("couldn't register IB client\n");
4255 ib_sa_unregister_client(&srp_sa_client
);
4256 class_unregister(&srp_class
);
4259 srp_release_transport(ib_srp_transport_template
);
4262 destroy_workqueue(srp_remove_wq
);
4266 static void __exit
srp_cleanup_module(void)
4268 ib_unregister_client(&srp_client
);
4269 ib_sa_unregister_client(&srp_sa_client
);
4270 class_unregister(&srp_class
);
4271 srp_release_transport(ib_srp_transport_template
);
4272 destroy_workqueue(srp_remove_wq
);
4275 module_init(srp_init_module
);
4276 module_exit(srp_cleanup_module
);