x86/topology: Fix function name in documentation
[cris-mirror.git] / drivers / infiniband / ulp / srp / ib_srp.c
blobb48843833d699210832fd50c927355b2f769dd7c
1 /*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <linux/inet.h>
45 #include <rdma/ib_cache.h>
47 #include <linux/atomic.h>
49 #include <scsi/scsi.h>
50 #include <scsi/scsi_device.h>
51 #include <scsi/scsi_dbg.h>
52 #include <scsi/scsi_tcq.h>
53 #include <scsi/srp.h>
54 #include <scsi/scsi_transport_srp.h>
56 #include "ib_srp.h"
58 #define DRV_NAME "ib_srp"
59 #define PFX DRV_NAME ": "
60 #define DRV_VERSION "2.0"
61 #define DRV_RELDATE "July 26, 2015"
63 MODULE_AUTHOR("Roland Dreier");
64 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
65 MODULE_LICENSE("Dual BSD/GPL");
66 MODULE_INFO(release_date, DRV_RELDATE);
68 #if !defined(CONFIG_DYNAMIC_DEBUG)
69 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
70 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
71 #endif
73 static unsigned int srp_sg_tablesize;
74 static unsigned int cmd_sg_entries;
75 static unsigned int indirect_sg_entries;
76 static bool allow_ext_sg;
77 static bool prefer_fr = true;
78 static bool register_always = true;
79 static bool never_register;
80 static int topspin_workarounds = 1;
82 module_param(srp_sg_tablesize, uint, 0444);
83 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
85 module_param(cmd_sg_entries, uint, 0444);
86 MODULE_PARM_DESC(cmd_sg_entries,
87 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
89 module_param(indirect_sg_entries, uint, 0444);
90 MODULE_PARM_DESC(indirect_sg_entries,
91 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
93 module_param(allow_ext_sg, bool, 0444);
94 MODULE_PARM_DESC(allow_ext_sg,
95 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
97 module_param(topspin_workarounds, int, 0444);
98 MODULE_PARM_DESC(topspin_workarounds,
99 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
101 module_param(prefer_fr, bool, 0444);
102 MODULE_PARM_DESC(prefer_fr,
103 "Whether to use fast registration if both FMR and fast registration are supported");
105 module_param(register_always, bool, 0444);
106 MODULE_PARM_DESC(register_always,
107 "Use memory registration even for contiguous memory regions");
109 module_param(never_register, bool, 0444);
110 MODULE_PARM_DESC(never_register, "Never register memory");
112 static const struct kernel_param_ops srp_tmo_ops;
114 static int srp_reconnect_delay = 10;
115 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
116 S_IRUGO | S_IWUSR);
117 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
119 static int srp_fast_io_fail_tmo = 15;
120 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
121 S_IRUGO | S_IWUSR);
122 MODULE_PARM_DESC(fast_io_fail_tmo,
123 "Number of seconds between the observation of a transport"
124 " layer error and failing all I/O. \"off\" means that this"
125 " functionality is disabled.");
127 static int srp_dev_loss_tmo = 600;
128 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
129 S_IRUGO | S_IWUSR);
130 MODULE_PARM_DESC(dev_loss_tmo,
131 "Maximum number of seconds that the SRP transport should"
132 " insulate transport layer errors. After this time has been"
133 " exceeded the SCSI host is removed. Should be"
134 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
135 " if fast_io_fail_tmo has not been set. \"off\" means that"
136 " this functionality is disabled.");
138 static unsigned ch_count;
139 module_param(ch_count, uint, 0444);
140 MODULE_PARM_DESC(ch_count,
141 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
143 static void srp_add_one(struct ib_device *device);
144 static void srp_remove_one(struct ib_device *device, void *client_data);
145 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
146 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
147 const char *opname);
148 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
149 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
150 struct rdma_cm_event *event);
152 static struct scsi_transport_template *ib_srp_transport_template;
153 static struct workqueue_struct *srp_remove_wq;
155 static struct ib_client srp_client = {
156 .name = "srp",
157 .add = srp_add_one,
158 .remove = srp_remove_one
161 static struct ib_sa_client srp_sa_client;
163 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
165 int tmo = *(int *)kp->arg;
167 if (tmo >= 0)
168 return sprintf(buffer, "%d", tmo);
169 else
170 return sprintf(buffer, "off");
173 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
175 int tmo, res;
177 res = srp_parse_tmo(&tmo, val);
178 if (res)
179 goto out;
181 if (kp->arg == &srp_reconnect_delay)
182 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
183 srp_dev_loss_tmo);
184 else if (kp->arg == &srp_fast_io_fail_tmo)
185 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
186 else
187 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
188 tmo);
189 if (res)
190 goto out;
191 *(int *)kp->arg = tmo;
193 out:
194 return res;
197 static const struct kernel_param_ops srp_tmo_ops = {
198 .get = srp_tmo_get,
199 .set = srp_tmo_set,
202 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
204 return (struct srp_target_port *) host->hostdata;
207 static const char *srp_target_info(struct Scsi_Host *host)
209 return host_to_target(host)->target_name;
212 static int srp_target_is_topspin(struct srp_target_port *target)
214 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
215 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
217 return topspin_workarounds &&
218 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
219 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
222 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
223 gfp_t gfp_mask,
224 enum dma_data_direction direction)
226 struct srp_iu *iu;
228 iu = kmalloc(sizeof *iu, gfp_mask);
229 if (!iu)
230 goto out;
232 iu->buf = kzalloc(size, gfp_mask);
233 if (!iu->buf)
234 goto out_free_iu;
236 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
237 direction);
238 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
239 goto out_free_buf;
241 iu->size = size;
242 iu->direction = direction;
244 return iu;
246 out_free_buf:
247 kfree(iu->buf);
248 out_free_iu:
249 kfree(iu);
250 out:
251 return NULL;
254 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
256 if (!iu)
257 return;
259 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
260 iu->direction);
261 kfree(iu->buf);
262 kfree(iu);
265 static void srp_qp_event(struct ib_event *event, void *context)
267 pr_debug("QP event %s (%d)\n",
268 ib_event_msg(event->event), event->event);
271 static int srp_init_ib_qp(struct srp_target_port *target,
272 struct ib_qp *qp)
274 struct ib_qp_attr *attr;
275 int ret;
277 attr = kmalloc(sizeof *attr, GFP_KERNEL);
278 if (!attr)
279 return -ENOMEM;
281 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
282 target->srp_host->port,
283 be16_to_cpu(target->ib_cm.pkey),
284 &attr->pkey_index);
285 if (ret)
286 goto out;
288 attr->qp_state = IB_QPS_INIT;
289 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
290 IB_ACCESS_REMOTE_WRITE);
291 attr->port_num = target->srp_host->port;
293 ret = ib_modify_qp(qp, attr,
294 IB_QP_STATE |
295 IB_QP_PKEY_INDEX |
296 IB_QP_ACCESS_FLAGS |
297 IB_QP_PORT);
299 out:
300 kfree(attr);
301 return ret;
304 static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
306 struct srp_target_port *target = ch->target;
307 struct ib_cm_id *new_cm_id;
309 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
310 srp_ib_cm_handler, ch);
311 if (IS_ERR(new_cm_id))
312 return PTR_ERR(new_cm_id);
314 if (ch->ib_cm.cm_id)
315 ib_destroy_cm_id(ch->ib_cm.cm_id);
316 ch->ib_cm.cm_id = new_cm_id;
317 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
318 target->srp_host->port))
319 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
320 else
321 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
322 ch->ib_cm.path.sgid = target->sgid;
323 ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
324 ch->ib_cm.path.pkey = target->ib_cm.pkey;
325 ch->ib_cm.path.service_id = target->ib_cm.service_id;
327 return 0;
330 static const char *inet_ntop(const void *sa, char *dst, unsigned int size)
332 switch (((struct sockaddr *)sa)->sa_family) {
333 case AF_INET:
334 snprintf(dst, size, "%pI4",
335 &((struct sockaddr_in *)sa)->sin_addr);
336 break;
337 case AF_INET6:
338 snprintf(dst, size, "%pI6",
339 &((struct sockaddr_in6 *)sa)->sin6_addr);
340 break;
341 default:
342 snprintf(dst, size, "???");
343 break;
345 return dst;
348 static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
350 struct srp_target_port *target = ch->target;
351 struct rdma_cm_id *new_cm_id;
352 char src_addr[64], dst_addr[64];
353 int ret;
355 new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
356 RDMA_PS_TCP, IB_QPT_RC);
357 if (IS_ERR(new_cm_id)) {
358 ret = PTR_ERR(new_cm_id);
359 new_cm_id = NULL;
360 goto out;
363 init_completion(&ch->done);
364 ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
365 (struct sockaddr *)&target->rdma_cm.src : NULL,
366 (struct sockaddr *)&target->rdma_cm.dst,
367 SRP_PATH_REC_TIMEOUT_MS);
368 if (ret) {
369 pr_err("No route available from %s to %s (%d)\n",
370 target->rdma_cm.src_specified ?
371 inet_ntop(&target->rdma_cm.src, src_addr,
372 sizeof(src_addr)) : "(any)",
373 inet_ntop(&target->rdma_cm.dst, dst_addr,
374 sizeof(dst_addr)),
375 ret);
376 goto out;
378 ret = wait_for_completion_interruptible(&ch->done);
379 if (ret < 0)
380 goto out;
382 ret = ch->status;
383 if (ret) {
384 pr_err("Resolving address %s failed (%d)\n",
385 inet_ntop(&target->rdma_cm.dst, dst_addr,
386 sizeof(dst_addr)),
387 ret);
388 goto out;
391 swap(ch->rdma_cm.cm_id, new_cm_id);
393 out:
394 if (new_cm_id)
395 rdma_destroy_id(new_cm_id);
397 return ret;
400 static int srp_new_cm_id(struct srp_rdma_ch *ch)
402 struct srp_target_port *target = ch->target;
404 return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
405 srp_new_ib_cm_id(ch);
408 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
410 struct srp_device *dev = target->srp_host->srp_dev;
411 struct ib_fmr_pool_param fmr_param;
413 memset(&fmr_param, 0, sizeof(fmr_param));
414 fmr_param.pool_size = target->mr_pool_size;
415 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
416 fmr_param.cache = 1;
417 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
418 fmr_param.page_shift = ilog2(dev->mr_page_size);
419 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
420 IB_ACCESS_REMOTE_WRITE |
421 IB_ACCESS_REMOTE_READ);
423 return ib_create_fmr_pool(dev->pd, &fmr_param);
427 * srp_destroy_fr_pool() - free the resources owned by a pool
428 * @pool: Fast registration pool to be destroyed.
430 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
432 int i;
433 struct srp_fr_desc *d;
435 if (!pool)
436 return;
438 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
439 if (d->mr)
440 ib_dereg_mr(d->mr);
442 kfree(pool);
446 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
447 * @device: IB device to allocate fast registration descriptors for.
448 * @pd: Protection domain associated with the FR descriptors.
449 * @pool_size: Number of descriptors to allocate.
450 * @max_page_list_len: Maximum fast registration work request page list length.
452 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
453 struct ib_pd *pd, int pool_size,
454 int max_page_list_len)
456 struct srp_fr_pool *pool;
457 struct srp_fr_desc *d;
458 struct ib_mr *mr;
459 int i, ret = -EINVAL;
461 if (pool_size <= 0)
462 goto err;
463 ret = -ENOMEM;
464 pool = kzalloc(sizeof(struct srp_fr_pool) +
465 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
466 if (!pool)
467 goto err;
468 pool->size = pool_size;
469 pool->max_page_list_len = max_page_list_len;
470 spin_lock_init(&pool->lock);
471 INIT_LIST_HEAD(&pool->free_list);
473 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
474 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
475 max_page_list_len);
476 if (IS_ERR(mr)) {
477 ret = PTR_ERR(mr);
478 if (ret == -ENOMEM)
479 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
480 dev_name(&device->dev));
481 goto destroy_pool;
483 d->mr = mr;
484 list_add_tail(&d->entry, &pool->free_list);
487 out:
488 return pool;
490 destroy_pool:
491 srp_destroy_fr_pool(pool);
493 err:
494 pool = ERR_PTR(ret);
495 goto out;
499 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
500 * @pool: Pool to obtain descriptor from.
502 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
504 struct srp_fr_desc *d = NULL;
505 unsigned long flags;
507 spin_lock_irqsave(&pool->lock, flags);
508 if (!list_empty(&pool->free_list)) {
509 d = list_first_entry(&pool->free_list, typeof(*d), entry);
510 list_del(&d->entry);
512 spin_unlock_irqrestore(&pool->lock, flags);
514 return d;
518 * srp_fr_pool_put() - put an FR descriptor back in the free list
519 * @pool: Pool the descriptor was allocated from.
520 * @desc: Pointer to an array of fast registration descriptor pointers.
521 * @n: Number of descriptors to put back.
523 * Note: The caller must already have queued an invalidation request for
524 * desc->mr->rkey before calling this function.
526 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
527 int n)
529 unsigned long flags;
530 int i;
532 spin_lock_irqsave(&pool->lock, flags);
533 for (i = 0; i < n; i++)
534 list_add(&desc[i]->entry, &pool->free_list);
535 spin_unlock_irqrestore(&pool->lock, flags);
538 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
540 struct srp_device *dev = target->srp_host->srp_dev;
542 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
543 dev->max_pages_per_mr);
547 * srp_destroy_qp() - destroy an RDMA queue pair
548 * @ch: SRP RDMA channel.
550 * Drain the qp before destroying it. This avoids that the receive
551 * completion handler can access the queue pair while it is
552 * being destroyed.
554 static void srp_destroy_qp(struct srp_rdma_ch *ch)
556 spin_lock_irq(&ch->lock);
557 ib_process_cq_direct(ch->send_cq, -1);
558 spin_unlock_irq(&ch->lock);
560 ib_drain_qp(ch->qp);
561 ib_destroy_qp(ch->qp);
564 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
566 struct srp_target_port *target = ch->target;
567 struct srp_device *dev = target->srp_host->srp_dev;
568 struct ib_qp_init_attr *init_attr;
569 struct ib_cq *recv_cq, *send_cq;
570 struct ib_qp *qp;
571 struct ib_fmr_pool *fmr_pool = NULL;
572 struct srp_fr_pool *fr_pool = NULL;
573 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
574 int ret;
576 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
577 if (!init_attr)
578 return -ENOMEM;
580 /* queue_size + 1 for ib_drain_rq() */
581 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
582 ch->comp_vector, IB_POLL_SOFTIRQ);
583 if (IS_ERR(recv_cq)) {
584 ret = PTR_ERR(recv_cq);
585 goto err;
588 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
589 ch->comp_vector, IB_POLL_DIRECT);
590 if (IS_ERR(send_cq)) {
591 ret = PTR_ERR(send_cq);
592 goto err_recv_cq;
595 init_attr->event_handler = srp_qp_event;
596 init_attr->cap.max_send_wr = m * target->queue_size;
597 init_attr->cap.max_recv_wr = target->queue_size + 1;
598 init_attr->cap.max_recv_sge = 1;
599 init_attr->cap.max_send_sge = 1;
600 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
601 init_attr->qp_type = IB_QPT_RC;
602 init_attr->send_cq = send_cq;
603 init_attr->recv_cq = recv_cq;
605 if (target->using_rdma_cm) {
606 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
607 qp = ch->rdma_cm.cm_id->qp;
608 } else {
609 qp = ib_create_qp(dev->pd, init_attr);
610 if (!IS_ERR(qp)) {
611 ret = srp_init_ib_qp(target, qp);
612 if (ret)
613 ib_destroy_qp(qp);
614 } else {
615 ret = PTR_ERR(qp);
618 if (ret) {
619 pr_err("QP creation failed for dev %s: %d\n",
620 dev_name(&dev->dev->dev), ret);
621 goto err_send_cq;
624 if (dev->use_fast_reg) {
625 fr_pool = srp_alloc_fr_pool(target);
626 if (IS_ERR(fr_pool)) {
627 ret = PTR_ERR(fr_pool);
628 shost_printk(KERN_WARNING, target->scsi_host, PFX
629 "FR pool allocation failed (%d)\n", ret);
630 goto err_qp;
632 } else if (dev->use_fmr) {
633 fmr_pool = srp_alloc_fmr_pool(target);
634 if (IS_ERR(fmr_pool)) {
635 ret = PTR_ERR(fmr_pool);
636 shost_printk(KERN_WARNING, target->scsi_host, PFX
637 "FMR pool allocation failed (%d)\n", ret);
638 goto err_qp;
642 if (ch->qp)
643 srp_destroy_qp(ch);
644 if (ch->recv_cq)
645 ib_free_cq(ch->recv_cq);
646 if (ch->send_cq)
647 ib_free_cq(ch->send_cq);
649 ch->qp = qp;
650 ch->recv_cq = recv_cq;
651 ch->send_cq = send_cq;
653 if (dev->use_fast_reg) {
654 if (ch->fr_pool)
655 srp_destroy_fr_pool(ch->fr_pool);
656 ch->fr_pool = fr_pool;
657 } else if (dev->use_fmr) {
658 if (ch->fmr_pool)
659 ib_destroy_fmr_pool(ch->fmr_pool);
660 ch->fmr_pool = fmr_pool;
663 kfree(init_attr);
664 return 0;
666 err_qp:
667 if (target->using_rdma_cm)
668 rdma_destroy_qp(ch->rdma_cm.cm_id);
669 else
670 ib_destroy_qp(qp);
672 err_send_cq:
673 ib_free_cq(send_cq);
675 err_recv_cq:
676 ib_free_cq(recv_cq);
678 err:
679 kfree(init_attr);
680 return ret;
684 * Note: this function may be called without srp_alloc_iu_bufs() having been
685 * invoked. Hence the ch->[rt]x_ring checks.
687 static void srp_free_ch_ib(struct srp_target_port *target,
688 struct srp_rdma_ch *ch)
690 struct srp_device *dev = target->srp_host->srp_dev;
691 int i;
693 if (!ch->target)
694 return;
696 if (target->using_rdma_cm) {
697 if (ch->rdma_cm.cm_id) {
698 rdma_destroy_id(ch->rdma_cm.cm_id);
699 ch->rdma_cm.cm_id = NULL;
701 } else {
702 if (ch->ib_cm.cm_id) {
703 ib_destroy_cm_id(ch->ib_cm.cm_id);
704 ch->ib_cm.cm_id = NULL;
708 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
709 if (!ch->qp)
710 return;
712 if (dev->use_fast_reg) {
713 if (ch->fr_pool)
714 srp_destroy_fr_pool(ch->fr_pool);
715 } else if (dev->use_fmr) {
716 if (ch->fmr_pool)
717 ib_destroy_fmr_pool(ch->fmr_pool);
720 srp_destroy_qp(ch);
721 ib_free_cq(ch->send_cq);
722 ib_free_cq(ch->recv_cq);
725 * Avoid that the SCSI error handler tries to use this channel after
726 * it has been freed. The SCSI error handler can namely continue
727 * trying to perform recovery actions after scsi_remove_host()
728 * returned.
730 ch->target = NULL;
732 ch->qp = NULL;
733 ch->send_cq = ch->recv_cq = NULL;
735 if (ch->rx_ring) {
736 for (i = 0; i < target->queue_size; ++i)
737 srp_free_iu(target->srp_host, ch->rx_ring[i]);
738 kfree(ch->rx_ring);
739 ch->rx_ring = NULL;
741 if (ch->tx_ring) {
742 for (i = 0; i < target->queue_size; ++i)
743 srp_free_iu(target->srp_host, ch->tx_ring[i]);
744 kfree(ch->tx_ring);
745 ch->tx_ring = NULL;
749 static void srp_path_rec_completion(int status,
750 struct sa_path_rec *pathrec,
751 void *ch_ptr)
753 struct srp_rdma_ch *ch = ch_ptr;
754 struct srp_target_port *target = ch->target;
756 ch->status = status;
757 if (status)
758 shost_printk(KERN_ERR, target->scsi_host,
759 PFX "Got failed path rec status %d\n", status);
760 else
761 ch->ib_cm.path = *pathrec;
762 complete(&ch->done);
765 static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
767 struct srp_target_port *target = ch->target;
768 int ret = -ENODEV;
770 ch->ib_cm.path.numb_path = 1;
772 init_completion(&ch->done);
775 * Avoid that the SCSI host can be removed by srp_remove_target()
776 * before srp_path_rec_completion() is called.
778 if (!scsi_host_get(target->scsi_host))
779 goto out;
781 ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
782 target->srp_host->srp_dev->dev,
783 target->srp_host->port,
784 &ch->ib_cm.path,
785 IB_SA_PATH_REC_SERVICE_ID |
786 IB_SA_PATH_REC_DGID |
787 IB_SA_PATH_REC_SGID |
788 IB_SA_PATH_REC_NUMB_PATH |
789 IB_SA_PATH_REC_PKEY,
790 SRP_PATH_REC_TIMEOUT_MS,
791 GFP_KERNEL,
792 srp_path_rec_completion,
793 ch, &ch->ib_cm.path_query);
794 ret = ch->ib_cm.path_query_id;
795 if (ret < 0)
796 goto put;
798 ret = wait_for_completion_interruptible(&ch->done);
799 if (ret < 0)
800 goto put;
802 ret = ch->status;
803 if (ret < 0)
804 shost_printk(KERN_WARNING, target->scsi_host,
805 PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
806 ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
807 be16_to_cpu(target->ib_cm.pkey),
808 be64_to_cpu(target->ib_cm.service_id));
810 put:
811 scsi_host_put(target->scsi_host);
813 out:
814 return ret;
817 static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
819 struct srp_target_port *target = ch->target;
820 int ret;
822 init_completion(&ch->done);
824 ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
825 if (ret)
826 return ret;
828 wait_for_completion_interruptible(&ch->done);
830 if (ch->status != 0)
831 shost_printk(KERN_WARNING, target->scsi_host,
832 PFX "Path resolution failed\n");
834 return ch->status;
837 static int srp_lookup_path(struct srp_rdma_ch *ch)
839 struct srp_target_port *target = ch->target;
841 return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
842 srp_ib_lookup_path(ch);
845 static u8 srp_get_subnet_timeout(struct srp_host *host)
847 struct ib_port_attr attr;
848 int ret;
849 u8 subnet_timeout = 18;
851 ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
852 if (ret == 0)
853 subnet_timeout = attr.subnet_timeout;
855 if (unlikely(subnet_timeout < 15))
856 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
857 dev_name(&host->srp_dev->dev->dev), subnet_timeout);
859 return subnet_timeout;
862 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
864 struct srp_target_port *target = ch->target;
865 struct {
866 struct rdma_conn_param rdma_param;
867 struct srp_login_req_rdma rdma_req;
868 struct ib_cm_req_param ib_param;
869 struct srp_login_req ib_req;
870 } *req = NULL;
871 char *ipi, *tpi;
872 int status;
874 req = kzalloc(sizeof *req, GFP_KERNEL);
875 if (!req)
876 return -ENOMEM;
878 req->ib_param.flow_control = 1;
879 req->ib_param.retry_count = target->tl_retry_count;
882 * Pick some arbitrary defaults here; we could make these
883 * module parameters if anyone cared about setting them.
885 req->ib_param.responder_resources = 4;
886 req->ib_param.rnr_retry_count = 7;
887 req->ib_param.max_cm_retries = 15;
889 req->ib_req.opcode = SRP_LOGIN_REQ;
890 req->ib_req.tag = 0;
891 req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len);
892 req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
893 SRP_BUF_FORMAT_INDIRECT);
894 req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
895 SRP_MULTICHAN_SINGLE);
897 if (target->using_rdma_cm) {
898 req->rdma_param.flow_control = req->ib_param.flow_control;
899 req->rdma_param.responder_resources =
900 req->ib_param.responder_resources;
901 req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
902 req->rdma_param.retry_count = req->ib_param.retry_count;
903 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
904 req->rdma_param.private_data = &req->rdma_req;
905 req->rdma_param.private_data_len = sizeof(req->rdma_req);
907 req->rdma_req.opcode = req->ib_req.opcode;
908 req->rdma_req.tag = req->ib_req.tag;
909 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
910 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
911 req->rdma_req.req_flags = req->ib_req.req_flags;
913 ipi = req->rdma_req.initiator_port_id;
914 tpi = req->rdma_req.target_port_id;
915 } else {
916 u8 subnet_timeout;
918 subnet_timeout = srp_get_subnet_timeout(target->srp_host);
920 req->ib_param.primary_path = &ch->ib_cm.path;
921 req->ib_param.alternate_path = NULL;
922 req->ib_param.service_id = target->ib_cm.service_id;
923 get_random_bytes(&req->ib_param.starting_psn, 4);
924 req->ib_param.starting_psn &= 0xffffff;
925 req->ib_param.qp_num = ch->qp->qp_num;
926 req->ib_param.qp_type = ch->qp->qp_type;
927 req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
928 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
929 req->ib_param.private_data = &req->ib_req;
930 req->ib_param.private_data_len = sizeof(req->ib_req);
932 ipi = req->ib_req.initiator_port_id;
933 tpi = req->ib_req.target_port_id;
937 * In the published SRP specification (draft rev. 16a), the
938 * port identifier format is 8 bytes of ID extension followed
939 * by 8 bytes of GUID. Older drafts put the two halves in the
940 * opposite order, so that the GUID comes first.
942 * Targets conforming to these obsolete drafts can be
943 * recognized by the I/O Class they report.
945 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
946 memcpy(ipi, &target->sgid.global.interface_id, 8);
947 memcpy(ipi + 8, &target->initiator_ext, 8);
948 memcpy(tpi, &target->ioc_guid, 8);
949 memcpy(tpi + 8, &target->id_ext, 8);
950 } else {
951 memcpy(ipi, &target->initiator_ext, 8);
952 memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
953 memcpy(tpi, &target->id_ext, 8);
954 memcpy(tpi + 8, &target->ioc_guid, 8);
958 * Topspin/Cisco SRP targets will reject our login unless we
959 * zero out the first 8 bytes of our initiator port ID and set
960 * the second 8 bytes to the local node GUID.
962 if (srp_target_is_topspin(target)) {
963 shost_printk(KERN_DEBUG, target->scsi_host,
964 PFX "Topspin/Cisco initiator port ID workaround "
965 "activated for target GUID %016llx\n",
966 be64_to_cpu(target->ioc_guid));
967 memset(ipi, 0, 8);
968 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
971 if (target->using_rdma_cm)
972 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
973 else
974 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
976 kfree(req);
978 return status;
981 static bool srp_queue_remove_work(struct srp_target_port *target)
983 bool changed = false;
985 spin_lock_irq(&target->lock);
986 if (target->state != SRP_TARGET_REMOVED) {
987 target->state = SRP_TARGET_REMOVED;
988 changed = true;
990 spin_unlock_irq(&target->lock);
992 if (changed)
993 queue_work(srp_remove_wq, &target->remove_work);
995 return changed;
998 static void srp_disconnect_target(struct srp_target_port *target)
1000 struct srp_rdma_ch *ch;
1001 int i, ret;
1003 /* XXX should send SRP_I_LOGOUT request */
1005 for (i = 0; i < target->ch_count; i++) {
1006 ch = &target->ch[i];
1007 ch->connected = false;
1008 ret = 0;
1009 if (target->using_rdma_cm) {
1010 if (ch->rdma_cm.cm_id)
1011 rdma_disconnect(ch->rdma_cm.cm_id);
1012 } else {
1013 if (ch->ib_cm.cm_id)
1014 ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
1015 NULL, 0);
1017 if (ret < 0) {
1018 shost_printk(KERN_DEBUG, target->scsi_host,
1019 PFX "Sending CM DREQ failed\n");
1024 static void srp_free_req_data(struct srp_target_port *target,
1025 struct srp_rdma_ch *ch)
1027 struct srp_device *dev = target->srp_host->srp_dev;
1028 struct ib_device *ibdev = dev->dev;
1029 struct srp_request *req;
1030 int i;
1032 if (!ch->req_ring)
1033 return;
1035 for (i = 0; i < target->req_ring_size; ++i) {
1036 req = &ch->req_ring[i];
1037 if (dev->use_fast_reg) {
1038 kfree(req->fr_list);
1039 } else {
1040 kfree(req->fmr_list);
1041 kfree(req->map_page);
1043 if (req->indirect_dma_addr) {
1044 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
1045 target->indirect_size,
1046 DMA_TO_DEVICE);
1048 kfree(req->indirect_desc);
1051 kfree(ch->req_ring);
1052 ch->req_ring = NULL;
1055 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
1057 struct srp_target_port *target = ch->target;
1058 struct srp_device *srp_dev = target->srp_host->srp_dev;
1059 struct ib_device *ibdev = srp_dev->dev;
1060 struct srp_request *req;
1061 void *mr_list;
1062 dma_addr_t dma_addr;
1063 int i, ret = -ENOMEM;
1065 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
1066 GFP_KERNEL);
1067 if (!ch->req_ring)
1068 goto out;
1070 for (i = 0; i < target->req_ring_size; ++i) {
1071 req = &ch->req_ring[i];
1072 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
1073 GFP_KERNEL);
1074 if (!mr_list)
1075 goto out;
1076 if (srp_dev->use_fast_reg) {
1077 req->fr_list = mr_list;
1078 } else {
1079 req->fmr_list = mr_list;
1080 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
1081 sizeof(void *), GFP_KERNEL);
1082 if (!req->map_page)
1083 goto out;
1085 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
1086 if (!req->indirect_desc)
1087 goto out;
1089 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
1090 target->indirect_size,
1091 DMA_TO_DEVICE);
1092 if (ib_dma_mapping_error(ibdev, dma_addr))
1093 goto out;
1095 req->indirect_dma_addr = dma_addr;
1097 ret = 0;
1099 out:
1100 return ret;
1104 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1105 * @shost: SCSI host whose attributes to remove from sysfs.
1107 * Note: Any attributes defined in the host template and that did not exist
1108 * before invocation of this function will be ignored.
1110 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
1112 struct device_attribute **attr;
1114 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
1115 device_remove_file(&shost->shost_dev, *attr);
1118 static void srp_remove_target(struct srp_target_port *target)
1120 struct srp_rdma_ch *ch;
1121 int i;
1123 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1125 srp_del_scsi_host_attr(target->scsi_host);
1126 srp_rport_get(target->rport);
1127 srp_remove_host(target->scsi_host);
1128 scsi_remove_host(target->scsi_host);
1129 srp_stop_rport_timers(target->rport);
1130 srp_disconnect_target(target);
1131 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
1132 for (i = 0; i < target->ch_count; i++) {
1133 ch = &target->ch[i];
1134 srp_free_ch_ib(target, ch);
1136 cancel_work_sync(&target->tl_err_work);
1137 srp_rport_put(target->rport);
1138 for (i = 0; i < target->ch_count; i++) {
1139 ch = &target->ch[i];
1140 srp_free_req_data(target, ch);
1142 kfree(target->ch);
1143 target->ch = NULL;
1145 spin_lock(&target->srp_host->target_lock);
1146 list_del(&target->list);
1147 spin_unlock(&target->srp_host->target_lock);
1149 scsi_host_put(target->scsi_host);
1152 static void srp_remove_work(struct work_struct *work)
1154 struct srp_target_port *target =
1155 container_of(work, struct srp_target_port, remove_work);
1157 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1159 srp_remove_target(target);
1162 static void srp_rport_delete(struct srp_rport *rport)
1164 struct srp_target_port *target = rport->lld_data;
1166 srp_queue_remove_work(target);
1170 * srp_connected_ch() - number of connected channels
1171 * @target: SRP target port.
1173 static int srp_connected_ch(struct srp_target_port *target)
1175 int i, c = 0;
1177 for (i = 0; i < target->ch_count; i++)
1178 c += target->ch[i].connected;
1180 return c;
1183 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
1185 struct srp_target_port *target = ch->target;
1186 int ret;
1188 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
1190 ret = srp_lookup_path(ch);
1191 if (ret)
1192 goto out;
1194 while (1) {
1195 init_completion(&ch->done);
1196 ret = srp_send_req(ch, multich);
1197 if (ret)
1198 goto out;
1199 ret = wait_for_completion_interruptible(&ch->done);
1200 if (ret < 0)
1201 goto out;
1204 * The CM event handling code will set status to
1205 * SRP_PORT_REDIRECT if we get a port redirect REJ
1206 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1207 * redirect REJ back.
1209 ret = ch->status;
1210 switch (ret) {
1211 case 0:
1212 ch->connected = true;
1213 goto out;
1215 case SRP_PORT_REDIRECT:
1216 ret = srp_lookup_path(ch);
1217 if (ret)
1218 goto out;
1219 break;
1221 case SRP_DLID_REDIRECT:
1222 break;
1224 case SRP_STALE_CONN:
1225 shost_printk(KERN_ERR, target->scsi_host, PFX
1226 "giving up on stale connection\n");
1227 ret = -ECONNRESET;
1228 goto out;
1230 default:
1231 goto out;
1235 out:
1236 return ret <= 0 ? ret : -ENODEV;
1239 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1241 srp_handle_qp_err(cq, wc, "INV RKEY");
1244 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1245 u32 rkey)
1247 struct ib_send_wr *bad_wr;
1248 struct ib_send_wr wr = {
1249 .opcode = IB_WR_LOCAL_INV,
1250 .next = NULL,
1251 .num_sge = 0,
1252 .send_flags = 0,
1253 .ex.invalidate_rkey = rkey,
1256 wr.wr_cqe = &req->reg_cqe;
1257 req->reg_cqe.done = srp_inv_rkey_err_done;
1258 return ib_post_send(ch->qp, &wr, &bad_wr);
1261 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1262 struct srp_rdma_ch *ch,
1263 struct srp_request *req)
1265 struct srp_target_port *target = ch->target;
1266 struct srp_device *dev = target->srp_host->srp_dev;
1267 struct ib_device *ibdev = dev->dev;
1268 int i, res;
1270 if (!scsi_sglist(scmnd) ||
1271 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1272 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1273 return;
1275 if (dev->use_fast_reg) {
1276 struct srp_fr_desc **pfr;
1278 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1279 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1280 if (res < 0) {
1281 shost_printk(KERN_ERR, target->scsi_host, PFX
1282 "Queueing INV WR for rkey %#x failed (%d)\n",
1283 (*pfr)->mr->rkey, res);
1284 queue_work(system_long_wq,
1285 &target->tl_err_work);
1288 if (req->nmdesc)
1289 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1290 req->nmdesc);
1291 } else if (dev->use_fmr) {
1292 struct ib_pool_fmr **pfmr;
1294 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1295 ib_fmr_pool_unmap(*pfmr);
1298 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1299 scmnd->sc_data_direction);
1303 * srp_claim_req - Take ownership of the scmnd associated with a request.
1304 * @ch: SRP RDMA channel.
1305 * @req: SRP request.
1306 * @sdev: If not NULL, only take ownership for this SCSI device.
1307 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1308 * ownership of @req->scmnd if it equals @scmnd.
1310 * Return value:
1311 * Either NULL or a pointer to the SCSI command the caller became owner of.
1313 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1314 struct srp_request *req,
1315 struct scsi_device *sdev,
1316 struct scsi_cmnd *scmnd)
1318 unsigned long flags;
1320 spin_lock_irqsave(&ch->lock, flags);
1321 if (req->scmnd &&
1322 (!sdev || req->scmnd->device == sdev) &&
1323 (!scmnd || req->scmnd == scmnd)) {
1324 scmnd = req->scmnd;
1325 req->scmnd = NULL;
1326 } else {
1327 scmnd = NULL;
1329 spin_unlock_irqrestore(&ch->lock, flags);
1331 return scmnd;
1335 * srp_free_req() - Unmap data and adjust ch->req_lim.
1336 * @ch: SRP RDMA channel.
1337 * @req: Request to be freed.
1338 * @scmnd: SCSI command associated with @req.
1339 * @req_lim_delta: Amount to be added to @target->req_lim.
1341 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1342 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1344 unsigned long flags;
1346 srp_unmap_data(scmnd, ch, req);
1348 spin_lock_irqsave(&ch->lock, flags);
1349 ch->req_lim += req_lim_delta;
1350 spin_unlock_irqrestore(&ch->lock, flags);
1353 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1354 struct scsi_device *sdev, int result)
1356 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1358 if (scmnd) {
1359 srp_free_req(ch, req, scmnd, 0);
1360 scmnd->result = result;
1361 scmnd->scsi_done(scmnd);
1365 static void srp_terminate_io(struct srp_rport *rport)
1367 struct srp_target_port *target = rport->lld_data;
1368 struct srp_rdma_ch *ch;
1369 struct Scsi_Host *shost = target->scsi_host;
1370 struct scsi_device *sdev;
1371 int i, j;
1374 * Invoking srp_terminate_io() while srp_queuecommand() is running
1375 * is not safe. Hence the warning statement below.
1377 shost_for_each_device(sdev, shost)
1378 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1380 for (i = 0; i < target->ch_count; i++) {
1381 ch = &target->ch[i];
1383 for (j = 0; j < target->req_ring_size; ++j) {
1384 struct srp_request *req = &ch->req_ring[j];
1386 srp_finish_req(ch, req, NULL,
1387 DID_TRANSPORT_FAILFAST << 16);
1393 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1394 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1395 * srp_reset_device() or srp_reset_host() calls will occur while this function
1396 * is in progress. One way to realize that is not to call this function
1397 * directly but to call srp_reconnect_rport() instead since that last function
1398 * serializes calls of this function via rport->mutex and also blocks
1399 * srp_queuecommand() calls before invoking this function.
1401 static int srp_rport_reconnect(struct srp_rport *rport)
1403 struct srp_target_port *target = rport->lld_data;
1404 struct srp_rdma_ch *ch;
1405 int i, j, ret = 0;
1406 bool multich = false;
1408 srp_disconnect_target(target);
1410 if (target->state == SRP_TARGET_SCANNING)
1411 return -ENODEV;
1414 * Now get a new local CM ID so that we avoid confusing the target in
1415 * case things are really fouled up. Doing so also ensures that all CM
1416 * callbacks will have finished before a new QP is allocated.
1418 for (i = 0; i < target->ch_count; i++) {
1419 ch = &target->ch[i];
1420 ret += srp_new_cm_id(ch);
1422 for (i = 0; i < target->ch_count; i++) {
1423 ch = &target->ch[i];
1424 for (j = 0; j < target->req_ring_size; ++j) {
1425 struct srp_request *req = &ch->req_ring[j];
1427 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1430 for (i = 0; i < target->ch_count; i++) {
1431 ch = &target->ch[i];
1433 * Whether or not creating a new CM ID succeeded, create a new
1434 * QP. This guarantees that all completion callback function
1435 * invocations have finished before request resetting starts.
1437 ret += srp_create_ch_ib(ch);
1439 INIT_LIST_HEAD(&ch->free_tx);
1440 for (j = 0; j < target->queue_size; ++j)
1441 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1444 target->qp_in_error = false;
1446 for (i = 0; i < target->ch_count; i++) {
1447 ch = &target->ch[i];
1448 if (ret)
1449 break;
1450 ret = srp_connect_ch(ch, multich);
1451 multich = true;
1454 if (ret == 0)
1455 shost_printk(KERN_INFO, target->scsi_host,
1456 PFX "reconnect succeeded\n");
1458 return ret;
1461 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1462 unsigned int dma_len, u32 rkey)
1464 struct srp_direct_buf *desc = state->desc;
1466 WARN_ON_ONCE(!dma_len);
1468 desc->va = cpu_to_be64(dma_addr);
1469 desc->key = cpu_to_be32(rkey);
1470 desc->len = cpu_to_be32(dma_len);
1472 state->total_len += dma_len;
1473 state->desc++;
1474 state->ndesc++;
1477 static int srp_map_finish_fmr(struct srp_map_state *state,
1478 struct srp_rdma_ch *ch)
1480 struct srp_target_port *target = ch->target;
1481 struct srp_device *dev = target->srp_host->srp_dev;
1482 struct ib_pool_fmr *fmr;
1483 u64 io_addr = 0;
1485 if (state->fmr.next >= state->fmr.end) {
1486 shost_printk(KERN_ERR, ch->target->scsi_host,
1487 PFX "Out of MRs (mr_per_cmd = %d)\n",
1488 ch->target->mr_per_cmd);
1489 return -ENOMEM;
1492 WARN_ON_ONCE(!dev->use_fmr);
1494 if (state->npages == 0)
1495 return 0;
1497 if (state->npages == 1 && target->global_rkey) {
1498 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1499 target->global_rkey);
1500 goto reset_state;
1503 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1504 state->npages, io_addr);
1505 if (IS_ERR(fmr))
1506 return PTR_ERR(fmr);
1508 *state->fmr.next++ = fmr;
1509 state->nmdesc++;
1511 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1512 state->dma_len, fmr->fmr->rkey);
1514 reset_state:
1515 state->npages = 0;
1516 state->dma_len = 0;
1518 return 0;
1521 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1523 srp_handle_qp_err(cq, wc, "FAST REG");
1527 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1528 * where to start in the first element. If sg_offset_p != NULL then
1529 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1530 * byte that has not yet been mapped.
1532 static int srp_map_finish_fr(struct srp_map_state *state,
1533 struct srp_request *req,
1534 struct srp_rdma_ch *ch, int sg_nents,
1535 unsigned int *sg_offset_p)
1537 struct srp_target_port *target = ch->target;
1538 struct srp_device *dev = target->srp_host->srp_dev;
1539 struct ib_send_wr *bad_wr;
1540 struct ib_reg_wr wr;
1541 struct srp_fr_desc *desc;
1542 u32 rkey;
1543 int n, err;
1545 if (state->fr.next >= state->fr.end) {
1546 shost_printk(KERN_ERR, ch->target->scsi_host,
1547 PFX "Out of MRs (mr_per_cmd = %d)\n",
1548 ch->target->mr_per_cmd);
1549 return -ENOMEM;
1552 WARN_ON_ONCE(!dev->use_fast_reg);
1554 if (sg_nents == 1 && target->global_rkey) {
1555 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1557 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1558 sg_dma_len(state->sg) - sg_offset,
1559 target->global_rkey);
1560 if (sg_offset_p)
1561 *sg_offset_p = 0;
1562 return 1;
1565 desc = srp_fr_pool_get(ch->fr_pool);
1566 if (!desc)
1567 return -ENOMEM;
1569 rkey = ib_inc_rkey(desc->mr->rkey);
1570 ib_update_fast_reg_key(desc->mr, rkey);
1572 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1573 dev->mr_page_size);
1574 if (unlikely(n < 0)) {
1575 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1576 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1577 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1578 sg_offset_p ? *sg_offset_p : -1, n);
1579 return n;
1582 WARN_ON_ONCE(desc->mr->length == 0);
1584 req->reg_cqe.done = srp_reg_mr_err_done;
1586 wr.wr.next = NULL;
1587 wr.wr.opcode = IB_WR_REG_MR;
1588 wr.wr.wr_cqe = &req->reg_cqe;
1589 wr.wr.num_sge = 0;
1590 wr.wr.send_flags = 0;
1591 wr.mr = desc->mr;
1592 wr.key = desc->mr->rkey;
1593 wr.access = (IB_ACCESS_LOCAL_WRITE |
1594 IB_ACCESS_REMOTE_READ |
1595 IB_ACCESS_REMOTE_WRITE);
1597 *state->fr.next++ = desc;
1598 state->nmdesc++;
1600 srp_map_desc(state, desc->mr->iova,
1601 desc->mr->length, desc->mr->rkey);
1603 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1604 if (unlikely(err)) {
1605 WARN_ON_ONCE(err == -ENOMEM);
1606 return err;
1609 return n;
1612 static int srp_map_sg_entry(struct srp_map_state *state,
1613 struct srp_rdma_ch *ch,
1614 struct scatterlist *sg)
1616 struct srp_target_port *target = ch->target;
1617 struct srp_device *dev = target->srp_host->srp_dev;
1618 struct ib_device *ibdev = dev->dev;
1619 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1620 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1621 unsigned int len = 0;
1622 int ret;
1624 WARN_ON_ONCE(!dma_len);
1626 while (dma_len) {
1627 unsigned offset = dma_addr & ~dev->mr_page_mask;
1629 if (state->npages == dev->max_pages_per_mr ||
1630 (state->npages > 0 && offset != 0)) {
1631 ret = srp_map_finish_fmr(state, ch);
1632 if (ret)
1633 return ret;
1636 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1638 if (!state->npages)
1639 state->base_dma_addr = dma_addr;
1640 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1641 state->dma_len += len;
1642 dma_addr += len;
1643 dma_len -= len;
1647 * If the end of the MR is not on a page boundary then we need to
1648 * close it out and start a new one -- we can only merge at page
1649 * boundaries.
1651 ret = 0;
1652 if ((dma_addr & ~dev->mr_page_mask) != 0)
1653 ret = srp_map_finish_fmr(state, ch);
1654 return ret;
1657 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1658 struct srp_request *req, struct scatterlist *scat,
1659 int count)
1661 struct scatterlist *sg;
1662 int i, ret;
1664 state->pages = req->map_page;
1665 state->fmr.next = req->fmr_list;
1666 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1668 for_each_sg(scat, sg, count, i) {
1669 ret = srp_map_sg_entry(state, ch, sg);
1670 if (ret)
1671 return ret;
1674 ret = srp_map_finish_fmr(state, ch);
1675 if (ret)
1676 return ret;
1678 return 0;
1681 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1682 struct srp_request *req, struct scatterlist *scat,
1683 int count)
1685 unsigned int sg_offset = 0;
1687 state->fr.next = req->fr_list;
1688 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1689 state->sg = scat;
1691 if (count == 0)
1692 return 0;
1694 while (count) {
1695 int i, n;
1697 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1698 if (unlikely(n < 0))
1699 return n;
1701 count -= n;
1702 for (i = 0; i < n; i++)
1703 state->sg = sg_next(state->sg);
1706 return 0;
1709 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1710 struct srp_request *req, struct scatterlist *scat,
1711 int count)
1713 struct srp_target_port *target = ch->target;
1714 struct srp_device *dev = target->srp_host->srp_dev;
1715 struct scatterlist *sg;
1716 int i;
1718 for_each_sg(scat, sg, count, i) {
1719 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1720 ib_sg_dma_len(dev->dev, sg),
1721 target->global_rkey);
1724 return 0;
1728 * Register the indirect data buffer descriptor with the HCA.
1730 * Note: since the indirect data buffer descriptor has been allocated with
1731 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1732 * memory buffer.
1734 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1735 void **next_mr, void **end_mr, u32 idb_len,
1736 __be32 *idb_rkey)
1738 struct srp_target_port *target = ch->target;
1739 struct srp_device *dev = target->srp_host->srp_dev;
1740 struct srp_map_state state;
1741 struct srp_direct_buf idb_desc;
1742 u64 idb_pages[1];
1743 struct scatterlist idb_sg[1];
1744 int ret;
1746 memset(&state, 0, sizeof(state));
1747 memset(&idb_desc, 0, sizeof(idb_desc));
1748 state.gen.next = next_mr;
1749 state.gen.end = end_mr;
1750 state.desc = &idb_desc;
1751 state.base_dma_addr = req->indirect_dma_addr;
1752 state.dma_len = idb_len;
1754 if (dev->use_fast_reg) {
1755 state.sg = idb_sg;
1756 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1757 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1758 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1759 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1760 #endif
1761 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1762 if (ret < 0)
1763 return ret;
1764 WARN_ON_ONCE(ret < 1);
1765 } else if (dev->use_fmr) {
1766 state.pages = idb_pages;
1767 state.pages[0] = (req->indirect_dma_addr &
1768 dev->mr_page_mask);
1769 state.npages = 1;
1770 ret = srp_map_finish_fmr(&state, ch);
1771 if (ret < 0)
1772 return ret;
1773 } else {
1774 return -EINVAL;
1777 *idb_rkey = idb_desc.key;
1779 return 0;
1782 static void srp_check_mapping(struct srp_map_state *state,
1783 struct srp_rdma_ch *ch, struct srp_request *req,
1784 struct scatterlist *scat, int count)
1786 struct srp_device *dev = ch->target->srp_host->srp_dev;
1787 struct srp_fr_desc **pfr;
1788 u64 desc_len = 0, mr_len = 0;
1789 int i;
1791 for (i = 0; i < state->ndesc; i++)
1792 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1793 if (dev->use_fast_reg)
1794 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1795 mr_len += (*pfr)->mr->length;
1796 else if (dev->use_fmr)
1797 for (i = 0; i < state->nmdesc; i++)
1798 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1799 if (desc_len != scsi_bufflen(req->scmnd) ||
1800 mr_len > scsi_bufflen(req->scmnd))
1801 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1802 scsi_bufflen(req->scmnd), desc_len, mr_len,
1803 state->ndesc, state->nmdesc);
1807 * srp_map_data() - map SCSI data buffer onto an SRP request
1808 * @scmnd: SCSI command to map
1809 * @ch: SRP RDMA channel
1810 * @req: SRP request
1812 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1813 * mapping failed.
1815 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1816 struct srp_request *req)
1818 struct srp_target_port *target = ch->target;
1819 struct scatterlist *scat;
1820 struct srp_cmd *cmd = req->cmd->buf;
1821 int len, nents, count, ret;
1822 struct srp_device *dev;
1823 struct ib_device *ibdev;
1824 struct srp_map_state state;
1825 struct srp_indirect_buf *indirect_hdr;
1826 u32 idb_len, table_len;
1827 __be32 idb_rkey;
1828 u8 fmt;
1830 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1831 return sizeof (struct srp_cmd);
1833 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1834 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1835 shost_printk(KERN_WARNING, target->scsi_host,
1836 PFX "Unhandled data direction %d\n",
1837 scmnd->sc_data_direction);
1838 return -EINVAL;
1841 nents = scsi_sg_count(scmnd);
1842 scat = scsi_sglist(scmnd);
1844 dev = target->srp_host->srp_dev;
1845 ibdev = dev->dev;
1847 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1848 if (unlikely(count == 0))
1849 return -EIO;
1851 fmt = SRP_DATA_DESC_DIRECT;
1852 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1854 if (count == 1 && target->global_rkey) {
1856 * The midlayer only generated a single gather/scatter
1857 * entry, or DMA mapping coalesced everything to a
1858 * single entry. So a direct descriptor along with
1859 * the DMA MR suffices.
1861 struct srp_direct_buf *buf = (void *) cmd->add_data;
1863 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1864 buf->key = cpu_to_be32(target->global_rkey);
1865 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1867 req->nmdesc = 0;
1868 goto map_complete;
1872 * We have more than one scatter/gather entry, so build our indirect
1873 * descriptor table, trying to merge as many entries as we can.
1875 indirect_hdr = (void *) cmd->add_data;
1877 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1878 target->indirect_size, DMA_TO_DEVICE);
1880 memset(&state, 0, sizeof(state));
1881 state.desc = req->indirect_desc;
1882 if (dev->use_fast_reg)
1883 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1884 else if (dev->use_fmr)
1885 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1886 else
1887 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1888 req->nmdesc = state.nmdesc;
1889 if (ret < 0)
1890 goto unmap;
1893 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1894 "Memory mapping consistency check");
1895 if (DYNAMIC_DEBUG_BRANCH(ddm))
1896 srp_check_mapping(&state, ch, req, scat, count);
1899 /* We've mapped the request, now pull as much of the indirect
1900 * descriptor table as we can into the command buffer. If this
1901 * target is not using an external indirect table, we are
1902 * guaranteed to fit into the command, as the SCSI layer won't
1903 * give us more S/G entries than we allow.
1905 if (state.ndesc == 1) {
1907 * Memory registration collapsed the sg-list into one entry,
1908 * so use a direct descriptor.
1910 struct srp_direct_buf *buf = (void *) cmd->add_data;
1912 *buf = req->indirect_desc[0];
1913 goto map_complete;
1916 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1917 !target->allow_ext_sg)) {
1918 shost_printk(KERN_ERR, target->scsi_host,
1919 "Could not fit S/G list into SRP_CMD\n");
1920 ret = -EIO;
1921 goto unmap;
1924 count = min(state.ndesc, target->cmd_sg_cnt);
1925 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1926 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1928 fmt = SRP_DATA_DESC_INDIRECT;
1929 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1930 len += count * sizeof (struct srp_direct_buf);
1932 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1933 count * sizeof (struct srp_direct_buf));
1935 if (!target->global_rkey) {
1936 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1937 idb_len, &idb_rkey);
1938 if (ret < 0)
1939 goto unmap;
1940 req->nmdesc++;
1941 } else {
1942 idb_rkey = cpu_to_be32(target->global_rkey);
1945 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1946 indirect_hdr->table_desc.key = idb_rkey;
1947 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1948 indirect_hdr->len = cpu_to_be32(state.total_len);
1950 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1951 cmd->data_out_desc_cnt = count;
1952 else
1953 cmd->data_in_desc_cnt = count;
1955 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1956 DMA_TO_DEVICE);
1958 map_complete:
1959 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1960 cmd->buf_fmt = fmt << 4;
1961 else
1962 cmd->buf_fmt = fmt;
1964 return len;
1966 unmap:
1967 srp_unmap_data(scmnd, ch, req);
1968 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1969 ret = -E2BIG;
1970 return ret;
1974 * Return an IU and possible credit to the free pool
1976 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1977 enum srp_iu_type iu_type)
1979 unsigned long flags;
1981 spin_lock_irqsave(&ch->lock, flags);
1982 list_add(&iu->list, &ch->free_tx);
1983 if (iu_type != SRP_IU_RSP)
1984 ++ch->req_lim;
1985 spin_unlock_irqrestore(&ch->lock, flags);
1989 * Must be called with ch->lock held to protect req_lim and free_tx.
1990 * If IU is not sent, it must be returned using srp_put_tx_iu().
1992 * Note:
1993 * An upper limit for the number of allocated information units for each
1994 * request type is:
1995 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1996 * more than Scsi_Host.can_queue requests.
1997 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1998 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1999 * one unanswered SRP request to an initiator.
2001 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
2002 enum srp_iu_type iu_type)
2004 struct srp_target_port *target = ch->target;
2005 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
2006 struct srp_iu *iu;
2008 lockdep_assert_held(&ch->lock);
2010 ib_process_cq_direct(ch->send_cq, -1);
2012 if (list_empty(&ch->free_tx))
2013 return NULL;
2015 /* Initiator responses to target requests do not consume credits */
2016 if (iu_type != SRP_IU_RSP) {
2017 if (ch->req_lim <= rsv) {
2018 ++target->zero_req_lim;
2019 return NULL;
2022 --ch->req_lim;
2025 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
2026 list_del(&iu->list);
2027 return iu;
2031 * Note: if this function is called from inside ib_drain_sq() then it will
2032 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
2033 * with status IB_WC_SUCCESS then that's a bug.
2035 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
2037 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2038 struct srp_rdma_ch *ch = cq->cq_context;
2040 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2041 srp_handle_qp_err(cq, wc, "SEND");
2042 return;
2045 lockdep_assert_held(&ch->lock);
2047 list_add(&iu->list, &ch->free_tx);
2050 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
2052 struct srp_target_port *target = ch->target;
2053 struct ib_sge list;
2054 struct ib_send_wr wr, *bad_wr;
2056 list.addr = iu->dma;
2057 list.length = len;
2058 list.lkey = target->lkey;
2060 iu->cqe.done = srp_send_done;
2062 wr.next = NULL;
2063 wr.wr_cqe = &iu->cqe;
2064 wr.sg_list = &list;
2065 wr.num_sge = 1;
2066 wr.opcode = IB_WR_SEND;
2067 wr.send_flags = IB_SEND_SIGNALED;
2069 return ib_post_send(ch->qp, &wr, &bad_wr);
2072 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
2074 struct srp_target_port *target = ch->target;
2075 struct ib_recv_wr wr, *bad_wr;
2076 struct ib_sge list;
2078 list.addr = iu->dma;
2079 list.length = iu->size;
2080 list.lkey = target->lkey;
2082 iu->cqe.done = srp_recv_done;
2084 wr.next = NULL;
2085 wr.wr_cqe = &iu->cqe;
2086 wr.sg_list = &list;
2087 wr.num_sge = 1;
2089 return ib_post_recv(ch->qp, &wr, &bad_wr);
2092 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
2094 struct srp_target_port *target = ch->target;
2095 struct srp_request *req;
2096 struct scsi_cmnd *scmnd;
2097 unsigned long flags;
2099 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
2100 spin_lock_irqsave(&ch->lock, flags);
2101 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2102 if (rsp->tag == ch->tsk_mgmt_tag) {
2103 ch->tsk_mgmt_status = -1;
2104 if (be32_to_cpu(rsp->resp_data_len) >= 4)
2105 ch->tsk_mgmt_status = rsp->data[3];
2106 complete(&ch->tsk_mgmt_done);
2107 } else {
2108 shost_printk(KERN_ERR, target->scsi_host,
2109 "Received tsk mgmt response too late for tag %#llx\n",
2110 rsp->tag);
2112 spin_unlock_irqrestore(&ch->lock, flags);
2113 } else {
2114 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
2115 if (scmnd && scmnd->host_scribble) {
2116 req = (void *)scmnd->host_scribble;
2117 scmnd = srp_claim_req(ch, req, NULL, scmnd);
2118 } else {
2119 scmnd = NULL;
2121 if (!scmnd) {
2122 shost_printk(KERN_ERR, target->scsi_host,
2123 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
2124 rsp->tag, ch - target->ch, ch->qp->qp_num);
2126 spin_lock_irqsave(&ch->lock, flags);
2127 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2128 spin_unlock_irqrestore(&ch->lock, flags);
2130 return;
2132 scmnd->result = rsp->status;
2134 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
2135 memcpy(scmnd->sense_buffer, rsp->data +
2136 be32_to_cpu(rsp->resp_data_len),
2137 min_t(int, be32_to_cpu(rsp->sense_data_len),
2138 SCSI_SENSE_BUFFERSIZE));
2141 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
2142 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
2143 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
2144 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
2145 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
2146 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
2147 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
2148 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
2150 srp_free_req(ch, req, scmnd,
2151 be32_to_cpu(rsp->req_lim_delta));
2153 scmnd->host_scribble = NULL;
2154 scmnd->scsi_done(scmnd);
2158 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
2159 void *rsp, int len)
2161 struct srp_target_port *target = ch->target;
2162 struct ib_device *dev = target->srp_host->srp_dev->dev;
2163 unsigned long flags;
2164 struct srp_iu *iu;
2165 int err;
2167 spin_lock_irqsave(&ch->lock, flags);
2168 ch->req_lim += req_delta;
2169 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
2170 spin_unlock_irqrestore(&ch->lock, flags);
2172 if (!iu) {
2173 shost_printk(KERN_ERR, target->scsi_host, PFX
2174 "no IU available to send response\n");
2175 return 1;
2178 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
2179 memcpy(iu->buf, rsp, len);
2180 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
2182 err = srp_post_send(ch, iu, len);
2183 if (err) {
2184 shost_printk(KERN_ERR, target->scsi_host, PFX
2185 "unable to post response: %d\n", err);
2186 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
2189 return err;
2192 static void srp_process_cred_req(struct srp_rdma_ch *ch,
2193 struct srp_cred_req *req)
2195 struct srp_cred_rsp rsp = {
2196 .opcode = SRP_CRED_RSP,
2197 .tag = req->tag,
2199 s32 delta = be32_to_cpu(req->req_lim_delta);
2201 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2202 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2203 "problems processing SRP_CRED_REQ\n");
2206 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2207 struct srp_aer_req *req)
2209 struct srp_target_port *target = ch->target;
2210 struct srp_aer_rsp rsp = {
2211 .opcode = SRP_AER_RSP,
2212 .tag = req->tag,
2214 s32 delta = be32_to_cpu(req->req_lim_delta);
2216 shost_printk(KERN_ERR, target->scsi_host, PFX
2217 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2219 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2220 shost_printk(KERN_ERR, target->scsi_host, PFX
2221 "problems processing SRP_AER_REQ\n");
2224 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2226 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2227 struct srp_rdma_ch *ch = cq->cq_context;
2228 struct srp_target_port *target = ch->target;
2229 struct ib_device *dev = target->srp_host->srp_dev->dev;
2230 int res;
2231 u8 opcode;
2233 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2234 srp_handle_qp_err(cq, wc, "RECV");
2235 return;
2238 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2239 DMA_FROM_DEVICE);
2241 opcode = *(u8 *) iu->buf;
2243 if (0) {
2244 shost_printk(KERN_ERR, target->scsi_host,
2245 PFX "recv completion, opcode 0x%02x\n", opcode);
2246 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2247 iu->buf, wc->byte_len, true);
2250 switch (opcode) {
2251 case SRP_RSP:
2252 srp_process_rsp(ch, iu->buf);
2253 break;
2255 case SRP_CRED_REQ:
2256 srp_process_cred_req(ch, iu->buf);
2257 break;
2259 case SRP_AER_REQ:
2260 srp_process_aer_req(ch, iu->buf);
2261 break;
2263 case SRP_T_LOGOUT:
2264 /* XXX Handle target logout */
2265 shost_printk(KERN_WARNING, target->scsi_host,
2266 PFX "Got target logout request\n");
2267 break;
2269 default:
2270 shost_printk(KERN_WARNING, target->scsi_host,
2271 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2272 break;
2275 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2276 DMA_FROM_DEVICE);
2278 res = srp_post_recv(ch, iu);
2279 if (res != 0)
2280 shost_printk(KERN_ERR, target->scsi_host,
2281 PFX "Recv failed with error code %d\n", res);
2285 * srp_tl_err_work() - handle a transport layer error
2286 * @work: Work structure embedded in an SRP target port.
2288 * Note: This function may get invoked before the rport has been created,
2289 * hence the target->rport test.
2291 static void srp_tl_err_work(struct work_struct *work)
2293 struct srp_target_port *target;
2295 target = container_of(work, struct srp_target_port, tl_err_work);
2296 if (target->rport)
2297 srp_start_tl_fail_timers(target->rport);
2300 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2301 const char *opname)
2303 struct srp_rdma_ch *ch = cq->cq_context;
2304 struct srp_target_port *target = ch->target;
2306 if (ch->connected && !target->qp_in_error) {
2307 shost_printk(KERN_ERR, target->scsi_host,
2308 PFX "failed %s status %s (%d) for CQE %p\n",
2309 opname, ib_wc_status_msg(wc->status), wc->status,
2310 wc->wr_cqe);
2311 queue_work(system_long_wq, &target->tl_err_work);
2313 target->qp_in_error = true;
2316 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2318 struct srp_target_port *target = host_to_target(shost);
2319 struct srp_rport *rport = target->rport;
2320 struct srp_rdma_ch *ch;
2321 struct srp_request *req;
2322 struct srp_iu *iu;
2323 struct srp_cmd *cmd;
2324 struct ib_device *dev;
2325 unsigned long flags;
2326 u32 tag;
2327 u16 idx;
2328 int len, ret;
2329 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2332 * The SCSI EH thread is the only context from which srp_queuecommand()
2333 * can get invoked for blocked devices (SDEV_BLOCK /
2334 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2335 * locking the rport mutex if invoked from inside the SCSI EH.
2337 if (in_scsi_eh)
2338 mutex_lock(&rport->mutex);
2340 scmnd->result = srp_chkready(target->rport);
2341 if (unlikely(scmnd->result))
2342 goto err;
2344 WARN_ON_ONCE(scmnd->request->tag < 0);
2345 tag = blk_mq_unique_tag(scmnd->request);
2346 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2347 idx = blk_mq_unique_tag_to_tag(tag);
2348 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2349 dev_name(&shost->shost_gendev), tag, idx,
2350 target->req_ring_size);
2352 spin_lock_irqsave(&ch->lock, flags);
2353 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2354 spin_unlock_irqrestore(&ch->lock, flags);
2356 if (!iu)
2357 goto err;
2359 req = &ch->req_ring[idx];
2360 dev = target->srp_host->srp_dev->dev;
2361 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2362 DMA_TO_DEVICE);
2364 scmnd->host_scribble = (void *) req;
2366 cmd = iu->buf;
2367 memset(cmd, 0, sizeof *cmd);
2369 cmd->opcode = SRP_CMD;
2370 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2371 cmd->tag = tag;
2372 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2374 req->scmnd = scmnd;
2375 req->cmd = iu;
2377 len = srp_map_data(scmnd, ch, req);
2378 if (len < 0) {
2379 shost_printk(KERN_ERR, target->scsi_host,
2380 PFX "Failed to map data (%d)\n", len);
2382 * If we ran out of memory descriptors (-ENOMEM) because an
2383 * application is queuing many requests with more than
2384 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2385 * to reduce queue depth temporarily.
2387 scmnd->result = len == -ENOMEM ?
2388 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2389 goto err_iu;
2392 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2393 DMA_TO_DEVICE);
2395 if (srp_post_send(ch, iu, len)) {
2396 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2397 goto err_unmap;
2400 ret = 0;
2402 unlock_rport:
2403 if (in_scsi_eh)
2404 mutex_unlock(&rport->mutex);
2406 return ret;
2408 err_unmap:
2409 srp_unmap_data(scmnd, ch, req);
2411 err_iu:
2412 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2415 * Avoid that the loops that iterate over the request ring can
2416 * encounter a dangling SCSI command pointer.
2418 req->scmnd = NULL;
2420 err:
2421 if (scmnd->result) {
2422 scmnd->scsi_done(scmnd);
2423 ret = 0;
2424 } else {
2425 ret = SCSI_MLQUEUE_HOST_BUSY;
2428 goto unlock_rport;
2432 * Note: the resources allocated in this function are freed in
2433 * srp_free_ch_ib().
2435 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2437 struct srp_target_port *target = ch->target;
2438 int i;
2440 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2441 GFP_KERNEL);
2442 if (!ch->rx_ring)
2443 goto err_no_ring;
2444 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2445 GFP_KERNEL);
2446 if (!ch->tx_ring)
2447 goto err_no_ring;
2449 for (i = 0; i < target->queue_size; ++i) {
2450 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2451 ch->max_ti_iu_len,
2452 GFP_KERNEL, DMA_FROM_DEVICE);
2453 if (!ch->rx_ring[i])
2454 goto err;
2457 for (i = 0; i < target->queue_size; ++i) {
2458 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2459 target->max_iu_len,
2460 GFP_KERNEL, DMA_TO_DEVICE);
2461 if (!ch->tx_ring[i])
2462 goto err;
2464 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2467 return 0;
2469 err:
2470 for (i = 0; i < target->queue_size; ++i) {
2471 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2472 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2476 err_no_ring:
2477 kfree(ch->tx_ring);
2478 ch->tx_ring = NULL;
2479 kfree(ch->rx_ring);
2480 ch->rx_ring = NULL;
2482 return -ENOMEM;
2485 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2487 uint64_t T_tr_ns, max_compl_time_ms;
2488 uint32_t rq_tmo_jiffies;
2491 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2492 * table 91), both the QP timeout and the retry count have to be set
2493 * for RC QP's during the RTR to RTS transition.
2495 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2496 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2499 * Set target->rq_tmo_jiffies to one second more than the largest time
2500 * it can take before an error completion is generated. See also
2501 * C9-140..142 in the IBTA spec for more information about how to
2502 * convert the QP Local ACK Timeout value to nanoseconds.
2504 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2505 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2506 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2507 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2509 return rq_tmo_jiffies;
2512 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2513 const struct srp_login_rsp *lrsp,
2514 struct srp_rdma_ch *ch)
2516 struct srp_target_port *target = ch->target;
2517 struct ib_qp_attr *qp_attr = NULL;
2518 int attr_mask = 0;
2519 int ret = 0;
2520 int i;
2522 if (lrsp->opcode == SRP_LOGIN_RSP) {
2523 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2524 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2527 * Reserve credits for task management so we don't
2528 * bounce requests back to the SCSI mid-layer.
2530 target->scsi_host->can_queue
2531 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2532 target->scsi_host->can_queue);
2533 target->scsi_host->cmd_per_lun
2534 = min_t(int, target->scsi_host->can_queue,
2535 target->scsi_host->cmd_per_lun);
2536 } else {
2537 shost_printk(KERN_WARNING, target->scsi_host,
2538 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2539 ret = -ECONNRESET;
2540 goto error;
2543 if (!ch->rx_ring) {
2544 ret = srp_alloc_iu_bufs(ch);
2545 if (ret)
2546 goto error;
2549 for (i = 0; i < target->queue_size; i++) {
2550 struct srp_iu *iu = ch->rx_ring[i];
2552 ret = srp_post_recv(ch, iu);
2553 if (ret)
2554 goto error;
2557 if (!target->using_rdma_cm) {
2558 ret = -ENOMEM;
2559 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
2560 if (!qp_attr)
2561 goto error;
2563 qp_attr->qp_state = IB_QPS_RTR;
2564 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2565 if (ret)
2566 goto error_free;
2568 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2569 if (ret)
2570 goto error_free;
2572 qp_attr->qp_state = IB_QPS_RTS;
2573 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2574 if (ret)
2575 goto error_free;
2577 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2579 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2580 if (ret)
2581 goto error_free;
2583 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2586 error_free:
2587 kfree(qp_attr);
2589 error:
2590 ch->status = ret;
2593 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2594 struct ib_cm_event *event,
2595 struct srp_rdma_ch *ch)
2597 struct srp_target_port *target = ch->target;
2598 struct Scsi_Host *shost = target->scsi_host;
2599 struct ib_class_port_info *cpi;
2600 int opcode;
2601 u16 dlid;
2603 switch (event->param.rej_rcvd.reason) {
2604 case IB_CM_REJ_PORT_CM_REDIRECT:
2605 cpi = event->param.rej_rcvd.ari;
2606 dlid = be16_to_cpu(cpi->redirect_lid);
2607 sa_path_set_dlid(&ch->ib_cm.path, dlid);
2608 ch->ib_cm.path.pkey = cpi->redirect_pkey;
2609 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2610 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
2612 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2613 break;
2615 case IB_CM_REJ_PORT_REDIRECT:
2616 if (srp_target_is_topspin(target)) {
2617 union ib_gid *dgid = &ch->ib_cm.path.dgid;
2620 * Topspin/Cisco SRP gateways incorrectly send
2621 * reject reason code 25 when they mean 24
2622 * (port redirect).
2624 memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
2626 shost_printk(KERN_DEBUG, shost,
2627 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2628 be64_to_cpu(dgid->global.subnet_prefix),
2629 be64_to_cpu(dgid->global.interface_id));
2631 ch->status = SRP_PORT_REDIRECT;
2632 } else {
2633 shost_printk(KERN_WARNING, shost,
2634 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2635 ch->status = -ECONNRESET;
2637 break;
2639 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2640 shost_printk(KERN_WARNING, shost,
2641 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2642 ch->status = -ECONNRESET;
2643 break;
2645 case IB_CM_REJ_CONSUMER_DEFINED:
2646 opcode = *(u8 *) event->private_data;
2647 if (opcode == SRP_LOGIN_REJ) {
2648 struct srp_login_rej *rej = event->private_data;
2649 u32 reason = be32_to_cpu(rej->reason);
2651 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2652 shost_printk(KERN_WARNING, shost,
2653 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2654 else
2655 shost_printk(KERN_WARNING, shost, PFX
2656 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2657 target->sgid.raw,
2658 target->ib_cm.orig_dgid.raw,
2659 reason);
2660 } else
2661 shost_printk(KERN_WARNING, shost,
2662 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2663 " opcode 0x%02x\n", opcode);
2664 ch->status = -ECONNRESET;
2665 break;
2667 case IB_CM_REJ_STALE_CONN:
2668 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2669 ch->status = SRP_STALE_CONN;
2670 break;
2672 default:
2673 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2674 event->param.rej_rcvd.reason);
2675 ch->status = -ECONNRESET;
2679 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2681 struct srp_rdma_ch *ch = cm_id->context;
2682 struct srp_target_port *target = ch->target;
2683 int comp = 0;
2685 switch (event->event) {
2686 case IB_CM_REQ_ERROR:
2687 shost_printk(KERN_DEBUG, target->scsi_host,
2688 PFX "Sending CM REQ failed\n");
2689 comp = 1;
2690 ch->status = -ECONNRESET;
2691 break;
2693 case IB_CM_REP_RECEIVED:
2694 comp = 1;
2695 srp_cm_rep_handler(cm_id, event->private_data, ch);
2696 break;
2698 case IB_CM_REJ_RECEIVED:
2699 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2700 comp = 1;
2702 srp_ib_cm_rej_handler(cm_id, event, ch);
2703 break;
2705 case IB_CM_DREQ_RECEIVED:
2706 shost_printk(KERN_WARNING, target->scsi_host,
2707 PFX "DREQ received - connection closed\n");
2708 ch->connected = false;
2709 if (ib_send_cm_drep(cm_id, NULL, 0))
2710 shost_printk(KERN_ERR, target->scsi_host,
2711 PFX "Sending CM DREP failed\n");
2712 queue_work(system_long_wq, &target->tl_err_work);
2713 break;
2715 case IB_CM_TIMEWAIT_EXIT:
2716 shost_printk(KERN_ERR, target->scsi_host,
2717 PFX "connection closed\n");
2718 comp = 1;
2720 ch->status = 0;
2721 break;
2723 case IB_CM_MRA_RECEIVED:
2724 case IB_CM_DREQ_ERROR:
2725 case IB_CM_DREP_RECEIVED:
2726 break;
2728 default:
2729 shost_printk(KERN_WARNING, target->scsi_host,
2730 PFX "Unhandled CM event %d\n", event->event);
2731 break;
2734 if (comp)
2735 complete(&ch->done);
2737 return 0;
2740 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
2741 struct rdma_cm_event *event)
2743 struct srp_target_port *target = ch->target;
2744 struct Scsi_Host *shost = target->scsi_host;
2745 int opcode;
2747 switch (event->status) {
2748 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2749 shost_printk(KERN_WARNING, shost,
2750 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2751 ch->status = -ECONNRESET;
2752 break;
2754 case IB_CM_REJ_CONSUMER_DEFINED:
2755 opcode = *(u8 *) event->param.conn.private_data;
2756 if (opcode == SRP_LOGIN_REJ) {
2757 struct srp_login_rej *rej =
2758 (struct srp_login_rej *)
2759 event->param.conn.private_data;
2760 u32 reason = be32_to_cpu(rej->reason);
2762 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2763 shost_printk(KERN_WARNING, shost,
2764 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2765 else
2766 shost_printk(KERN_WARNING, shost,
2767 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
2768 } else {
2769 shost_printk(KERN_WARNING, shost,
2770 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2771 opcode);
2773 ch->status = -ECONNRESET;
2774 break;
2776 case IB_CM_REJ_STALE_CONN:
2777 shost_printk(KERN_WARNING, shost,
2778 " REJ reason: stale connection\n");
2779 ch->status = SRP_STALE_CONN;
2780 break;
2782 default:
2783 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2784 event->status);
2785 ch->status = -ECONNRESET;
2786 break;
2790 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
2791 struct rdma_cm_event *event)
2793 struct srp_rdma_ch *ch = cm_id->context;
2794 struct srp_target_port *target = ch->target;
2795 int comp = 0;
2797 switch (event->event) {
2798 case RDMA_CM_EVENT_ADDR_RESOLVED:
2799 ch->status = 0;
2800 comp = 1;
2801 break;
2803 case RDMA_CM_EVENT_ADDR_ERROR:
2804 ch->status = -ENXIO;
2805 comp = 1;
2806 break;
2808 case RDMA_CM_EVENT_ROUTE_RESOLVED:
2809 ch->status = 0;
2810 comp = 1;
2811 break;
2813 case RDMA_CM_EVENT_ROUTE_ERROR:
2814 case RDMA_CM_EVENT_UNREACHABLE:
2815 ch->status = -EHOSTUNREACH;
2816 comp = 1;
2817 break;
2819 case RDMA_CM_EVENT_CONNECT_ERROR:
2820 shost_printk(KERN_DEBUG, target->scsi_host,
2821 PFX "Sending CM REQ failed\n");
2822 comp = 1;
2823 ch->status = -ECONNRESET;
2824 break;
2826 case RDMA_CM_EVENT_ESTABLISHED:
2827 comp = 1;
2828 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
2829 break;
2831 case RDMA_CM_EVENT_REJECTED:
2832 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2833 comp = 1;
2835 srp_rdma_cm_rej_handler(ch, event);
2836 break;
2838 case RDMA_CM_EVENT_DISCONNECTED:
2839 if (ch->connected) {
2840 shost_printk(KERN_WARNING, target->scsi_host,
2841 PFX "received DREQ\n");
2842 rdma_disconnect(ch->rdma_cm.cm_id);
2843 comp = 1;
2844 ch->status = 0;
2845 queue_work(system_long_wq, &target->tl_err_work);
2847 break;
2849 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2850 shost_printk(KERN_ERR, target->scsi_host,
2851 PFX "connection closed\n");
2853 comp = 1;
2854 ch->status = 0;
2855 break;
2857 default:
2858 shost_printk(KERN_WARNING, target->scsi_host,
2859 PFX "Unhandled CM event %d\n", event->event);
2860 break;
2863 if (comp)
2864 complete(&ch->done);
2866 return 0;
2870 * srp_change_queue_depth - setting device queue depth
2871 * @sdev: scsi device struct
2872 * @qdepth: requested queue depth
2874 * Returns queue depth.
2876 static int
2877 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2879 if (!sdev->tagged_supported)
2880 qdepth = 1;
2881 return scsi_change_queue_depth(sdev, qdepth);
2884 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2885 u8 func, u8 *status)
2887 struct srp_target_port *target = ch->target;
2888 struct srp_rport *rport = target->rport;
2889 struct ib_device *dev = target->srp_host->srp_dev->dev;
2890 struct srp_iu *iu;
2891 struct srp_tsk_mgmt *tsk_mgmt;
2892 int res;
2894 if (!ch->connected || target->qp_in_error)
2895 return -1;
2898 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2899 * invoked while a task management function is being sent.
2901 mutex_lock(&rport->mutex);
2902 spin_lock_irq(&ch->lock);
2903 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2904 spin_unlock_irq(&ch->lock);
2906 if (!iu) {
2907 mutex_unlock(&rport->mutex);
2909 return -1;
2912 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2913 DMA_TO_DEVICE);
2914 tsk_mgmt = iu->buf;
2915 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2917 tsk_mgmt->opcode = SRP_TSK_MGMT;
2918 int_to_scsilun(lun, &tsk_mgmt->lun);
2919 tsk_mgmt->tsk_mgmt_func = func;
2920 tsk_mgmt->task_tag = req_tag;
2922 spin_lock_irq(&ch->lock);
2923 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2924 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2925 spin_unlock_irq(&ch->lock);
2927 init_completion(&ch->tsk_mgmt_done);
2929 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2930 DMA_TO_DEVICE);
2931 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2932 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2933 mutex_unlock(&rport->mutex);
2935 return -1;
2937 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2938 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2939 if (res > 0 && status)
2940 *status = ch->tsk_mgmt_status;
2941 mutex_unlock(&rport->mutex);
2943 WARN_ON_ONCE(res < 0);
2945 return res > 0 ? 0 : -1;
2948 static int srp_abort(struct scsi_cmnd *scmnd)
2950 struct srp_target_port *target = host_to_target(scmnd->device->host);
2951 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2952 u32 tag;
2953 u16 ch_idx;
2954 struct srp_rdma_ch *ch;
2955 int ret;
2957 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2959 if (!req)
2960 return SUCCESS;
2961 tag = blk_mq_unique_tag(scmnd->request);
2962 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2963 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2964 return SUCCESS;
2965 ch = &target->ch[ch_idx];
2966 if (!srp_claim_req(ch, req, NULL, scmnd))
2967 return SUCCESS;
2968 shost_printk(KERN_ERR, target->scsi_host,
2969 "Sending SRP abort for tag %#x\n", tag);
2970 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2971 SRP_TSK_ABORT_TASK, NULL) == 0)
2972 ret = SUCCESS;
2973 else if (target->rport->state == SRP_RPORT_LOST)
2974 ret = FAST_IO_FAIL;
2975 else
2976 ret = FAILED;
2977 srp_free_req(ch, req, scmnd, 0);
2978 scmnd->result = DID_ABORT << 16;
2979 scmnd->scsi_done(scmnd);
2981 return ret;
2984 static int srp_reset_device(struct scsi_cmnd *scmnd)
2986 struct srp_target_port *target = host_to_target(scmnd->device->host);
2987 struct srp_rdma_ch *ch;
2988 int i;
2989 u8 status;
2991 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2993 ch = &target->ch[0];
2994 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2995 SRP_TSK_LUN_RESET, &status))
2996 return FAILED;
2997 if (status)
2998 return FAILED;
3000 for (i = 0; i < target->ch_count; i++) {
3001 ch = &target->ch[i];
3002 for (i = 0; i < target->req_ring_size; ++i) {
3003 struct srp_request *req = &ch->req_ring[i];
3005 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
3009 return SUCCESS;
3012 static int srp_reset_host(struct scsi_cmnd *scmnd)
3014 struct srp_target_port *target = host_to_target(scmnd->device->host);
3016 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
3018 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
3021 static int srp_target_alloc(struct scsi_target *starget)
3023 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
3024 struct srp_target_port *target = host_to_target(shost);
3026 if (target->target_can_queue)
3027 starget->can_queue = target->target_can_queue;
3028 return 0;
3031 static int srp_slave_alloc(struct scsi_device *sdev)
3033 struct Scsi_Host *shost = sdev->host;
3034 struct srp_target_port *target = host_to_target(shost);
3035 struct srp_device *srp_dev = target->srp_host->srp_dev;
3037 if (true)
3038 blk_queue_virt_boundary(sdev->request_queue,
3039 ~srp_dev->mr_page_mask);
3041 return 0;
3044 static int srp_slave_configure(struct scsi_device *sdev)
3046 struct Scsi_Host *shost = sdev->host;
3047 struct srp_target_port *target = host_to_target(shost);
3048 struct request_queue *q = sdev->request_queue;
3049 unsigned long timeout;
3051 if (sdev->type == TYPE_DISK) {
3052 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
3053 blk_queue_rq_timeout(q, timeout);
3056 return 0;
3059 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
3060 char *buf)
3062 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3064 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
3067 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
3068 char *buf)
3070 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3072 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
3075 static ssize_t show_service_id(struct device *dev,
3076 struct device_attribute *attr, char *buf)
3078 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3080 if (target->using_rdma_cm)
3081 return -ENOENT;
3082 return sprintf(buf, "0x%016llx\n",
3083 be64_to_cpu(target->ib_cm.service_id));
3086 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
3087 char *buf)
3089 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3091 if (target->using_rdma_cm)
3092 return -ENOENT;
3093 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
3096 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
3097 char *buf)
3099 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3101 return sprintf(buf, "%pI6\n", target->sgid.raw);
3104 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
3105 char *buf)
3107 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3108 struct srp_rdma_ch *ch = &target->ch[0];
3110 if (target->using_rdma_cm)
3111 return -ENOENT;
3112 return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
3115 static ssize_t show_orig_dgid(struct device *dev,
3116 struct device_attribute *attr, char *buf)
3118 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3120 if (target->using_rdma_cm)
3121 return -ENOENT;
3122 return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
3125 static ssize_t show_req_lim(struct device *dev,
3126 struct device_attribute *attr, char *buf)
3128 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3129 struct srp_rdma_ch *ch;
3130 int i, req_lim = INT_MAX;
3132 for (i = 0; i < target->ch_count; i++) {
3133 ch = &target->ch[i];
3134 req_lim = min(req_lim, ch->req_lim);
3136 return sprintf(buf, "%d\n", req_lim);
3139 static ssize_t show_zero_req_lim(struct device *dev,
3140 struct device_attribute *attr, char *buf)
3142 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3144 return sprintf(buf, "%d\n", target->zero_req_lim);
3147 static ssize_t show_local_ib_port(struct device *dev,
3148 struct device_attribute *attr, char *buf)
3150 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3152 return sprintf(buf, "%d\n", target->srp_host->port);
3155 static ssize_t show_local_ib_device(struct device *dev,
3156 struct device_attribute *attr, char *buf)
3158 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3160 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
3163 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
3164 char *buf)
3166 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3168 return sprintf(buf, "%d\n", target->ch_count);
3171 static ssize_t show_comp_vector(struct device *dev,
3172 struct device_attribute *attr, char *buf)
3174 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3176 return sprintf(buf, "%d\n", target->comp_vector);
3179 static ssize_t show_tl_retry_count(struct device *dev,
3180 struct device_attribute *attr, char *buf)
3182 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3184 return sprintf(buf, "%d\n", target->tl_retry_count);
3187 static ssize_t show_cmd_sg_entries(struct device *dev,
3188 struct device_attribute *attr, char *buf)
3190 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3192 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
3195 static ssize_t show_allow_ext_sg(struct device *dev,
3196 struct device_attribute *attr, char *buf)
3198 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3200 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
3203 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
3204 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
3205 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
3206 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
3207 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
3208 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
3209 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
3210 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
3211 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
3212 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
3213 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
3214 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
3215 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
3216 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
3217 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
3218 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
3220 static struct device_attribute *srp_host_attrs[] = {
3221 &dev_attr_id_ext,
3222 &dev_attr_ioc_guid,
3223 &dev_attr_service_id,
3224 &dev_attr_pkey,
3225 &dev_attr_sgid,
3226 &dev_attr_dgid,
3227 &dev_attr_orig_dgid,
3228 &dev_attr_req_lim,
3229 &dev_attr_zero_req_lim,
3230 &dev_attr_local_ib_port,
3231 &dev_attr_local_ib_device,
3232 &dev_attr_ch_count,
3233 &dev_attr_comp_vector,
3234 &dev_attr_tl_retry_count,
3235 &dev_attr_cmd_sg_entries,
3236 &dev_attr_allow_ext_sg,
3237 NULL
3240 static struct scsi_host_template srp_template = {
3241 .module = THIS_MODULE,
3242 .name = "InfiniBand SRP initiator",
3243 .proc_name = DRV_NAME,
3244 .target_alloc = srp_target_alloc,
3245 .slave_alloc = srp_slave_alloc,
3246 .slave_configure = srp_slave_configure,
3247 .info = srp_target_info,
3248 .queuecommand = srp_queuecommand,
3249 .change_queue_depth = srp_change_queue_depth,
3250 .eh_timed_out = srp_timed_out,
3251 .eh_abort_handler = srp_abort,
3252 .eh_device_reset_handler = srp_reset_device,
3253 .eh_host_reset_handler = srp_reset_host,
3254 .skip_settle_delay = true,
3255 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
3256 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
3257 .this_id = -1,
3258 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
3259 .use_clustering = ENABLE_CLUSTERING,
3260 .shost_attrs = srp_host_attrs,
3261 .track_queue_depth = 1,
3264 static int srp_sdev_count(struct Scsi_Host *host)
3266 struct scsi_device *sdev;
3267 int c = 0;
3269 shost_for_each_device(sdev, host)
3270 c++;
3272 return c;
3276 * Return values:
3277 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3278 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3279 * removal has been scheduled.
3280 * 0 and target->state != SRP_TARGET_REMOVED upon success.
3282 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
3284 struct srp_rport_identifiers ids;
3285 struct srp_rport *rport;
3287 target->state = SRP_TARGET_SCANNING;
3288 sprintf(target->target_name, "SRP.T10:%016llX",
3289 be64_to_cpu(target->id_ext));
3291 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
3292 return -ENODEV;
3294 memcpy(ids.port_id, &target->id_ext, 8);
3295 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
3296 ids.roles = SRP_RPORT_ROLE_TARGET;
3297 rport = srp_rport_add(target->scsi_host, &ids);
3298 if (IS_ERR(rport)) {
3299 scsi_remove_host(target->scsi_host);
3300 return PTR_ERR(rport);
3303 rport->lld_data = target;
3304 target->rport = rport;
3306 spin_lock(&host->target_lock);
3307 list_add_tail(&target->list, &host->target_list);
3308 spin_unlock(&host->target_lock);
3310 scsi_scan_target(&target->scsi_host->shost_gendev,
3311 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
3313 if (srp_connected_ch(target) < target->ch_count ||
3314 target->qp_in_error) {
3315 shost_printk(KERN_INFO, target->scsi_host,
3316 PFX "SCSI scan failed - removing SCSI host\n");
3317 srp_queue_remove_work(target);
3318 goto out;
3321 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3322 dev_name(&target->scsi_host->shost_gendev),
3323 srp_sdev_count(target->scsi_host));
3325 spin_lock_irq(&target->lock);
3326 if (target->state == SRP_TARGET_SCANNING)
3327 target->state = SRP_TARGET_LIVE;
3328 spin_unlock_irq(&target->lock);
3330 out:
3331 return 0;
3334 static void srp_release_dev(struct device *dev)
3336 struct srp_host *host =
3337 container_of(dev, struct srp_host, dev);
3339 complete(&host->released);
3342 static struct class srp_class = {
3343 .name = "infiniband_srp",
3344 .dev_release = srp_release_dev
3348 * srp_conn_unique() - check whether the connection to a target is unique
3349 * @host: SRP host.
3350 * @target: SRP target port.
3352 static bool srp_conn_unique(struct srp_host *host,
3353 struct srp_target_port *target)
3355 struct srp_target_port *t;
3356 bool ret = false;
3358 if (target->state == SRP_TARGET_REMOVED)
3359 goto out;
3361 ret = true;
3363 spin_lock(&host->target_lock);
3364 list_for_each_entry(t, &host->target_list, list) {
3365 if (t != target &&
3366 target->id_ext == t->id_ext &&
3367 target->ioc_guid == t->ioc_guid &&
3368 (!target->using_rdma_cm ||
3369 memcmp(&target->rdma_cm.dst, &t->rdma_cm.dst,
3370 sizeof(target->rdma_cm.dst)) == 0) &&
3371 target->initiator_ext == t->initiator_ext) {
3372 ret = false;
3373 break;
3376 spin_unlock(&host->target_lock);
3378 out:
3379 return ret;
3383 * Target ports are added by writing
3385 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3386 * pkey=<P_Key>,service_id=<service ID>
3387 * or
3388 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3389 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3391 * to the add_target sysfs attribute.
3393 enum {
3394 SRP_OPT_ERR = 0,
3395 SRP_OPT_ID_EXT = 1 << 0,
3396 SRP_OPT_IOC_GUID = 1 << 1,
3397 SRP_OPT_DGID = 1 << 2,
3398 SRP_OPT_PKEY = 1 << 3,
3399 SRP_OPT_SERVICE_ID = 1 << 4,
3400 SRP_OPT_MAX_SECT = 1 << 5,
3401 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3402 SRP_OPT_IO_CLASS = 1 << 7,
3403 SRP_OPT_INITIATOR_EXT = 1 << 8,
3404 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3405 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3406 SRP_OPT_SG_TABLESIZE = 1 << 11,
3407 SRP_OPT_COMP_VECTOR = 1 << 12,
3408 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3409 SRP_OPT_QUEUE_SIZE = 1 << 14,
3410 SRP_OPT_IP_SRC = 1 << 15,
3411 SRP_OPT_IP_DEST = 1 << 16,
3412 SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
3415 static unsigned int srp_opt_mandatory[] = {
3416 SRP_OPT_ID_EXT |
3417 SRP_OPT_IOC_GUID |
3418 SRP_OPT_DGID |
3419 SRP_OPT_PKEY |
3420 SRP_OPT_SERVICE_ID,
3421 SRP_OPT_ID_EXT |
3422 SRP_OPT_IOC_GUID |
3423 SRP_OPT_IP_DEST,
3426 static const match_table_t srp_opt_tokens = {
3427 { SRP_OPT_ID_EXT, "id_ext=%s" },
3428 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3429 { SRP_OPT_DGID, "dgid=%s" },
3430 { SRP_OPT_PKEY, "pkey=%x" },
3431 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3432 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3433 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3434 { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" },
3435 { SRP_OPT_IO_CLASS, "io_class=%x" },
3436 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3437 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3438 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3439 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3440 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3441 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3442 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3443 { SRP_OPT_IP_SRC, "src=%s" },
3444 { SRP_OPT_IP_DEST, "dest=%s" },
3445 { SRP_OPT_ERR, NULL }
3448 static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
3449 const char *addr_port_str)
3451 char *addr = kstrdup(addr_port_str, GFP_KERNEL);
3452 char *port_str = addr;
3453 int ret;
3455 if (!addr)
3456 return -ENOMEM;
3457 strsep(&port_str, ":");
3458 ret = inet_pton_with_scope(net, AF_UNSPEC, addr, port_str, sa);
3459 kfree(addr);
3460 return ret;
3463 static int srp_parse_options(struct net *net, const char *buf,
3464 struct srp_target_port *target)
3466 char *options, *sep_opt;
3467 char *p;
3468 substring_t args[MAX_OPT_ARGS];
3469 unsigned long long ull;
3470 int opt_mask = 0;
3471 int token;
3472 int ret = -EINVAL;
3473 int i;
3475 options = kstrdup(buf, GFP_KERNEL);
3476 if (!options)
3477 return -ENOMEM;
3479 sep_opt = options;
3480 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3481 if (!*p)
3482 continue;
3484 token = match_token(p, srp_opt_tokens, args);
3485 opt_mask |= token;
3487 switch (token) {
3488 case SRP_OPT_ID_EXT:
3489 p = match_strdup(args);
3490 if (!p) {
3491 ret = -ENOMEM;
3492 goto out;
3494 ret = kstrtoull(p, 16, &ull);
3495 if (ret) {
3496 pr_warn("invalid id_ext parameter '%s'\n", p);
3497 kfree(p);
3498 goto out;
3500 target->id_ext = cpu_to_be64(ull);
3501 kfree(p);
3502 break;
3504 case SRP_OPT_IOC_GUID:
3505 p = match_strdup(args);
3506 if (!p) {
3507 ret = -ENOMEM;
3508 goto out;
3510 ret = kstrtoull(p, 16, &ull);
3511 if (ret) {
3512 pr_warn("invalid ioc_guid parameter '%s'\n", p);
3513 kfree(p);
3514 goto out;
3516 target->ioc_guid = cpu_to_be64(ull);
3517 kfree(p);
3518 break;
3520 case SRP_OPT_DGID:
3521 p = match_strdup(args);
3522 if (!p) {
3523 ret = -ENOMEM;
3524 goto out;
3526 if (strlen(p) != 32) {
3527 pr_warn("bad dest GID parameter '%s'\n", p);
3528 kfree(p);
3529 goto out;
3532 ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
3533 kfree(p);
3534 if (ret < 0)
3535 goto out;
3536 break;
3538 case SRP_OPT_PKEY:
3539 if (match_hex(args, &token)) {
3540 pr_warn("bad P_Key parameter '%s'\n", p);
3541 goto out;
3543 target->ib_cm.pkey = cpu_to_be16(token);
3544 break;
3546 case SRP_OPT_SERVICE_ID:
3547 p = match_strdup(args);
3548 if (!p) {
3549 ret = -ENOMEM;
3550 goto out;
3552 ret = kstrtoull(p, 16, &ull);
3553 if (ret) {
3554 pr_warn("bad service_id parameter '%s'\n", p);
3555 kfree(p);
3556 goto out;
3558 target->ib_cm.service_id = cpu_to_be64(ull);
3559 kfree(p);
3560 break;
3562 case SRP_OPT_IP_SRC:
3563 p = match_strdup(args);
3564 if (!p) {
3565 ret = -ENOMEM;
3566 goto out;
3568 ret = srp_parse_in(net, &target->rdma_cm.src.ss, p);
3569 if (ret < 0) {
3570 pr_warn("bad source parameter '%s'\n", p);
3571 kfree(p);
3572 goto out;
3574 target->rdma_cm.src_specified = true;
3575 kfree(p);
3576 break;
3578 case SRP_OPT_IP_DEST:
3579 p = match_strdup(args);
3580 if (!p) {
3581 ret = -ENOMEM;
3582 goto out;
3584 ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p);
3585 if (ret < 0) {
3586 pr_warn("bad dest parameter '%s'\n", p);
3587 kfree(p);
3588 goto out;
3590 target->using_rdma_cm = true;
3591 kfree(p);
3592 break;
3594 case SRP_OPT_MAX_SECT:
3595 if (match_int(args, &token)) {
3596 pr_warn("bad max sect parameter '%s'\n", p);
3597 goto out;
3599 target->scsi_host->max_sectors = token;
3600 break;
3602 case SRP_OPT_QUEUE_SIZE:
3603 if (match_int(args, &token) || token < 1) {
3604 pr_warn("bad queue_size parameter '%s'\n", p);
3605 goto out;
3607 target->scsi_host->can_queue = token;
3608 target->queue_size = token + SRP_RSP_SQ_SIZE +
3609 SRP_TSK_MGMT_SQ_SIZE;
3610 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3611 target->scsi_host->cmd_per_lun = token;
3612 break;
3614 case SRP_OPT_MAX_CMD_PER_LUN:
3615 if (match_int(args, &token) || token < 1) {
3616 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3618 goto out;
3620 target->scsi_host->cmd_per_lun = token;
3621 break;
3623 case SRP_OPT_TARGET_CAN_QUEUE:
3624 if (match_int(args, &token) || token < 1) {
3625 pr_warn("bad max target_can_queue parameter '%s'\n",
3627 goto out;
3629 target->target_can_queue = token;
3630 break;
3632 case SRP_OPT_IO_CLASS:
3633 if (match_hex(args, &token)) {
3634 pr_warn("bad IO class parameter '%s'\n", p);
3635 goto out;
3637 if (token != SRP_REV10_IB_IO_CLASS &&
3638 token != SRP_REV16A_IB_IO_CLASS) {
3639 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3640 token, SRP_REV10_IB_IO_CLASS,
3641 SRP_REV16A_IB_IO_CLASS);
3642 goto out;
3644 target->io_class = token;
3645 break;
3647 case SRP_OPT_INITIATOR_EXT:
3648 p = match_strdup(args);
3649 if (!p) {
3650 ret = -ENOMEM;
3651 goto out;
3653 ret = kstrtoull(p, 16, &ull);
3654 if (ret) {
3655 pr_warn("bad initiator_ext value '%s'\n", p);
3656 kfree(p);
3657 goto out;
3659 target->initiator_ext = cpu_to_be64(ull);
3660 kfree(p);
3661 break;
3663 case SRP_OPT_CMD_SG_ENTRIES:
3664 if (match_int(args, &token) || token < 1 || token > 255) {
3665 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3667 goto out;
3669 target->cmd_sg_cnt = token;
3670 break;
3672 case SRP_OPT_ALLOW_EXT_SG:
3673 if (match_int(args, &token)) {
3674 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3675 goto out;
3677 target->allow_ext_sg = !!token;
3678 break;
3680 case SRP_OPT_SG_TABLESIZE:
3681 if (match_int(args, &token) || token < 1 ||
3682 token > SG_MAX_SEGMENTS) {
3683 pr_warn("bad max sg_tablesize parameter '%s'\n",
3685 goto out;
3687 target->sg_tablesize = token;
3688 break;
3690 case SRP_OPT_COMP_VECTOR:
3691 if (match_int(args, &token) || token < 0) {
3692 pr_warn("bad comp_vector parameter '%s'\n", p);
3693 goto out;
3695 target->comp_vector = token;
3696 break;
3698 case SRP_OPT_TL_RETRY_COUNT:
3699 if (match_int(args, &token) || token < 2 || token > 7) {
3700 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3702 goto out;
3704 target->tl_retry_count = token;
3705 break;
3707 default:
3708 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3710 goto out;
3714 for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
3715 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
3716 ret = 0;
3717 break;
3720 if (ret)
3721 pr_warn("target creation request is missing one or more parameters\n");
3723 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3724 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3725 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3726 target->scsi_host->cmd_per_lun,
3727 target->scsi_host->can_queue);
3729 out:
3730 kfree(options);
3731 return ret;
3734 static ssize_t srp_create_target(struct device *dev,
3735 struct device_attribute *attr,
3736 const char *buf, size_t count)
3738 struct srp_host *host =
3739 container_of(dev, struct srp_host, dev);
3740 struct Scsi_Host *target_host;
3741 struct srp_target_port *target;
3742 struct srp_rdma_ch *ch;
3743 struct srp_device *srp_dev = host->srp_dev;
3744 struct ib_device *ibdev = srp_dev->dev;
3745 int ret, node_idx, node, cpu, i;
3746 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3747 bool multich = false;
3749 target_host = scsi_host_alloc(&srp_template,
3750 sizeof (struct srp_target_port));
3751 if (!target_host)
3752 return -ENOMEM;
3754 target_host->transportt = ib_srp_transport_template;
3755 target_host->max_channel = 0;
3756 target_host->max_id = 1;
3757 target_host->max_lun = -1LL;
3758 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3760 target = host_to_target(target_host);
3762 target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
3763 target->io_class = SRP_REV16A_IB_IO_CLASS;
3764 target->scsi_host = target_host;
3765 target->srp_host = host;
3766 target->lkey = host->srp_dev->pd->local_dma_lkey;
3767 target->global_rkey = host->srp_dev->global_rkey;
3768 target->cmd_sg_cnt = cmd_sg_entries;
3769 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3770 target->allow_ext_sg = allow_ext_sg;
3771 target->tl_retry_count = 7;
3772 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3775 * Avoid that the SCSI host can be removed by srp_remove_target()
3776 * before this function returns.
3778 scsi_host_get(target->scsi_host);
3780 ret = mutex_lock_interruptible(&host->add_target_mutex);
3781 if (ret < 0)
3782 goto put;
3784 ret = srp_parse_options(target->net, buf, target);
3785 if (ret)
3786 goto out;
3788 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3790 if (!srp_conn_unique(target->srp_host, target)) {
3791 if (target->using_rdma_cm) {
3792 char dst_addr[64];
3794 shost_printk(KERN_INFO, target->scsi_host,
3795 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%s\n",
3796 be64_to_cpu(target->id_ext),
3797 be64_to_cpu(target->ioc_guid),
3798 inet_ntop(&target->rdma_cm.dst, dst_addr,
3799 sizeof(dst_addr)));
3800 } else {
3801 shost_printk(KERN_INFO, target->scsi_host,
3802 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3803 be64_to_cpu(target->id_ext),
3804 be64_to_cpu(target->ioc_guid),
3805 be64_to_cpu(target->initiator_ext));
3807 ret = -EEXIST;
3808 goto out;
3811 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3812 target->cmd_sg_cnt < target->sg_tablesize) {
3813 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3814 target->sg_tablesize = target->cmd_sg_cnt;
3817 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3819 * FR and FMR can only map one HCA page per entry. If the
3820 * start address is not aligned on a HCA page boundary two
3821 * entries will be used for the head and the tail although
3822 * these two entries combined contain at most one HCA page of
3823 * data. Hence the "+ 1" in the calculation below.
3825 * The indirect data buffer descriptor is contiguous so the
3826 * memory for that buffer will only be registered if
3827 * register_always is true. Hence add one to mr_per_cmd if
3828 * register_always has been set.
3830 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3831 (ilog2(srp_dev->mr_page_size) - 9);
3832 mr_per_cmd = register_always +
3833 (target->scsi_host->max_sectors + 1 +
3834 max_sectors_per_mr - 1) / max_sectors_per_mr;
3835 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3836 target->scsi_host->max_sectors,
3837 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3838 max_sectors_per_mr, mr_per_cmd);
3841 target_host->sg_tablesize = target->sg_tablesize;
3842 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3843 target->mr_per_cmd = mr_per_cmd;
3844 target->indirect_size = target->sg_tablesize *
3845 sizeof (struct srp_direct_buf);
3846 target->max_iu_len = sizeof (struct srp_cmd) +
3847 sizeof (struct srp_indirect_buf) +
3848 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3850 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3851 INIT_WORK(&target->remove_work, srp_remove_work);
3852 spin_lock_init(&target->lock);
3853 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3854 if (ret)
3855 goto out;
3857 ret = -ENOMEM;
3858 target->ch_count = max_t(unsigned, num_online_nodes(),
3859 min(ch_count ? :
3860 min(4 * num_online_nodes(),
3861 ibdev->num_comp_vectors),
3862 num_online_cpus()));
3863 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3864 GFP_KERNEL);
3865 if (!target->ch)
3866 goto out;
3868 node_idx = 0;
3869 for_each_online_node(node) {
3870 const int ch_start = (node_idx * target->ch_count /
3871 num_online_nodes());
3872 const int ch_end = ((node_idx + 1) * target->ch_count /
3873 num_online_nodes());
3874 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3875 num_online_nodes() + target->comp_vector)
3876 % ibdev->num_comp_vectors;
3877 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3878 num_online_nodes() + target->comp_vector)
3879 % ibdev->num_comp_vectors;
3880 int cpu_idx = 0;
3882 for_each_online_cpu(cpu) {
3883 if (cpu_to_node(cpu) != node)
3884 continue;
3885 if (ch_start + cpu_idx >= ch_end)
3886 continue;
3887 ch = &target->ch[ch_start + cpu_idx];
3888 ch->target = target;
3889 ch->comp_vector = cv_start == cv_end ? cv_start :
3890 cv_start + cpu_idx % (cv_end - cv_start);
3891 spin_lock_init(&ch->lock);
3892 INIT_LIST_HEAD(&ch->free_tx);
3893 ret = srp_new_cm_id(ch);
3894 if (ret)
3895 goto err_disconnect;
3897 ret = srp_create_ch_ib(ch);
3898 if (ret)
3899 goto err_disconnect;
3901 ret = srp_alloc_req_data(ch);
3902 if (ret)
3903 goto err_disconnect;
3905 ret = srp_connect_ch(ch, multich);
3906 if (ret) {
3907 char dst[64];
3909 if (target->using_rdma_cm)
3910 inet_ntop(&target->rdma_cm.dst, dst,
3911 sizeof(dst));
3912 else
3913 snprintf(dst, sizeof(dst), "%pI6",
3914 target->ib_cm.orig_dgid.raw);
3915 shost_printk(KERN_ERR, target->scsi_host,
3916 PFX "Connection %d/%d to %s failed\n",
3917 ch_start + cpu_idx,
3918 target->ch_count, dst);
3919 if (node_idx == 0 && cpu_idx == 0) {
3920 goto free_ch;
3921 } else {
3922 srp_free_ch_ib(target, ch);
3923 srp_free_req_data(target, ch);
3924 target->ch_count = ch - target->ch;
3925 goto connected;
3929 multich = true;
3930 cpu_idx++;
3932 node_idx++;
3935 connected:
3936 target->scsi_host->nr_hw_queues = target->ch_count;
3938 ret = srp_add_target(host, target);
3939 if (ret)
3940 goto err_disconnect;
3942 if (target->state != SRP_TARGET_REMOVED) {
3943 if (target->using_rdma_cm) {
3944 char dst[64];
3946 inet_ntop(&target->rdma_cm.dst, dst, sizeof(dst));
3947 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3948 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %s\n",
3949 be64_to_cpu(target->id_ext),
3950 be64_to_cpu(target->ioc_guid),
3951 target->sgid.raw, dst);
3952 } else {
3953 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3954 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3955 be64_to_cpu(target->id_ext),
3956 be64_to_cpu(target->ioc_guid),
3957 be16_to_cpu(target->ib_cm.pkey),
3958 be64_to_cpu(target->ib_cm.service_id),
3959 target->sgid.raw,
3960 target->ib_cm.orig_dgid.raw);
3964 ret = count;
3966 out:
3967 mutex_unlock(&host->add_target_mutex);
3969 put:
3970 scsi_host_put(target->scsi_host);
3971 if (ret < 0) {
3973 * If a call to srp_remove_target() has not been scheduled,
3974 * drop the network namespace reference now that was obtained
3975 * earlier in this function.
3977 if (target->state != SRP_TARGET_REMOVED)
3978 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
3979 scsi_host_put(target->scsi_host);
3982 return ret;
3984 err_disconnect:
3985 srp_disconnect_target(target);
3987 free_ch:
3988 for (i = 0; i < target->ch_count; i++) {
3989 ch = &target->ch[i];
3990 srp_free_ch_ib(target, ch);
3991 srp_free_req_data(target, ch);
3994 kfree(target->ch);
3995 goto out;
3998 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
4000 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
4001 char *buf)
4003 struct srp_host *host = container_of(dev, struct srp_host, dev);
4005 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
4008 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
4010 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
4011 char *buf)
4013 struct srp_host *host = container_of(dev, struct srp_host, dev);
4015 return sprintf(buf, "%d\n", host->port);
4018 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
4020 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
4022 struct srp_host *host;
4024 host = kzalloc(sizeof *host, GFP_KERNEL);
4025 if (!host)
4026 return NULL;
4028 INIT_LIST_HEAD(&host->target_list);
4029 spin_lock_init(&host->target_lock);
4030 init_completion(&host->released);
4031 mutex_init(&host->add_target_mutex);
4032 host->srp_dev = device;
4033 host->port = port;
4035 host->dev.class = &srp_class;
4036 host->dev.parent = device->dev->dev.parent;
4037 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
4039 if (device_register(&host->dev))
4040 goto free_host;
4041 if (device_create_file(&host->dev, &dev_attr_add_target))
4042 goto err_class;
4043 if (device_create_file(&host->dev, &dev_attr_ibdev))
4044 goto err_class;
4045 if (device_create_file(&host->dev, &dev_attr_port))
4046 goto err_class;
4048 return host;
4050 err_class:
4051 device_unregister(&host->dev);
4053 free_host:
4054 kfree(host);
4056 return NULL;
4059 static void srp_add_one(struct ib_device *device)
4061 struct srp_device *srp_dev;
4062 struct ib_device_attr *attr = &device->attrs;
4063 struct srp_host *host;
4064 int mr_page_shift, p;
4065 u64 max_pages_per_mr;
4066 unsigned int flags = 0;
4068 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
4069 if (!srp_dev)
4070 return;
4073 * Use the smallest page size supported by the HCA, down to a
4074 * minimum of 4096 bytes. We're unlikely to build large sglists
4075 * out of smaller entries.
4077 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
4078 srp_dev->mr_page_size = 1 << mr_page_shift;
4079 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
4080 max_pages_per_mr = attr->max_mr_size;
4081 do_div(max_pages_per_mr, srp_dev->mr_page_size);
4082 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
4083 attr->max_mr_size, srp_dev->mr_page_size,
4084 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
4085 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
4086 max_pages_per_mr);
4088 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
4089 device->map_phys_fmr && device->unmap_fmr);
4090 srp_dev->has_fr = (attr->device_cap_flags &
4091 IB_DEVICE_MEM_MGT_EXTENSIONS);
4092 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
4093 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
4094 } else if (!never_register &&
4095 attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
4096 srp_dev->use_fast_reg = (srp_dev->has_fr &&
4097 (!srp_dev->has_fmr || prefer_fr));
4098 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
4101 if (never_register || !register_always ||
4102 (!srp_dev->has_fmr && !srp_dev->has_fr))
4103 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
4105 if (srp_dev->use_fast_reg) {
4106 srp_dev->max_pages_per_mr =
4107 min_t(u32, srp_dev->max_pages_per_mr,
4108 attr->max_fast_reg_page_list_len);
4110 srp_dev->mr_max_size = srp_dev->mr_page_size *
4111 srp_dev->max_pages_per_mr;
4112 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4113 device->name, mr_page_shift, attr->max_mr_size,
4114 attr->max_fast_reg_page_list_len,
4115 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
4117 INIT_LIST_HEAD(&srp_dev->dev_list);
4119 srp_dev->dev = device;
4120 srp_dev->pd = ib_alloc_pd(device, flags);
4121 if (IS_ERR(srp_dev->pd))
4122 goto free_dev;
4124 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
4125 srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
4126 WARN_ON_ONCE(srp_dev->global_rkey == 0);
4129 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
4130 host = srp_add_port(srp_dev, p);
4131 if (host)
4132 list_add_tail(&host->list, &srp_dev->dev_list);
4135 ib_set_client_data(device, &srp_client, srp_dev);
4136 return;
4138 free_dev:
4139 kfree(srp_dev);
4142 static void srp_remove_one(struct ib_device *device, void *client_data)
4144 struct srp_device *srp_dev;
4145 struct srp_host *host, *tmp_host;
4146 struct srp_target_port *target;
4148 srp_dev = client_data;
4149 if (!srp_dev)
4150 return;
4152 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4153 device_unregister(&host->dev);
4155 * Wait for the sysfs entry to go away, so that no new
4156 * target ports can be created.
4158 wait_for_completion(&host->released);
4161 * Remove all target ports.
4163 spin_lock(&host->target_lock);
4164 list_for_each_entry(target, &host->target_list, list)
4165 srp_queue_remove_work(target);
4166 spin_unlock(&host->target_lock);
4169 * Wait for tl_err and target port removal tasks.
4171 flush_workqueue(system_long_wq);
4172 flush_workqueue(srp_remove_wq);
4174 kfree(host);
4177 ib_dealloc_pd(srp_dev->pd);
4179 kfree(srp_dev);
4182 static struct srp_function_template ib_srp_transport_functions = {
4183 .has_rport_state = true,
4184 .reset_timer_if_blocked = true,
4185 .reconnect_delay = &srp_reconnect_delay,
4186 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
4187 .dev_loss_tmo = &srp_dev_loss_tmo,
4188 .reconnect = srp_rport_reconnect,
4189 .rport_delete = srp_rport_delete,
4190 .terminate_rport_io = srp_terminate_io,
4193 static int __init srp_init_module(void)
4195 int ret;
4197 if (srp_sg_tablesize) {
4198 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4199 if (!cmd_sg_entries)
4200 cmd_sg_entries = srp_sg_tablesize;
4203 if (!cmd_sg_entries)
4204 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
4206 if (cmd_sg_entries > 255) {
4207 pr_warn("Clamping cmd_sg_entries to 255\n");
4208 cmd_sg_entries = 255;
4211 if (!indirect_sg_entries)
4212 indirect_sg_entries = cmd_sg_entries;
4213 else if (indirect_sg_entries < cmd_sg_entries) {
4214 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4215 cmd_sg_entries);
4216 indirect_sg_entries = cmd_sg_entries;
4219 if (indirect_sg_entries > SG_MAX_SEGMENTS) {
4220 pr_warn("Clamping indirect_sg_entries to %u\n",
4221 SG_MAX_SEGMENTS);
4222 indirect_sg_entries = SG_MAX_SEGMENTS;
4225 srp_remove_wq = create_workqueue("srp_remove");
4226 if (!srp_remove_wq) {
4227 ret = -ENOMEM;
4228 goto out;
4231 ret = -ENOMEM;
4232 ib_srp_transport_template =
4233 srp_attach_transport(&ib_srp_transport_functions);
4234 if (!ib_srp_transport_template)
4235 goto destroy_wq;
4237 ret = class_register(&srp_class);
4238 if (ret) {
4239 pr_err("couldn't register class infiniband_srp\n");
4240 goto release_tr;
4243 ib_sa_register_client(&srp_sa_client);
4245 ret = ib_register_client(&srp_client);
4246 if (ret) {
4247 pr_err("couldn't register IB client\n");
4248 goto unreg_sa;
4251 out:
4252 return ret;
4254 unreg_sa:
4255 ib_sa_unregister_client(&srp_sa_client);
4256 class_unregister(&srp_class);
4258 release_tr:
4259 srp_release_transport(ib_srp_transport_template);
4261 destroy_wq:
4262 destroy_workqueue(srp_remove_wq);
4263 goto out;
4266 static void __exit srp_cleanup_module(void)
4268 ib_unregister_client(&srp_client);
4269 ib_sa_unregister_client(&srp_sa_client);
4270 class_unregister(&srp_class);
4271 srp_release_transport(ib_srp_transport_template);
4272 destroy_workqueue(srp_remove_wq);
4275 module_init(srp_init_module);
4276 module_exit(srp_cleanup_module);