mm: hugetlb: fix hugepage memory leak caused by wrong reserve count
[linux/fpc-iii.git] / drivers / infiniband / ulp / srp / ib_srp.c
blob3db9a659719b0f6283af610bf01b8f65d27292a9
1 /*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
45 #include <linux/atomic.h>
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
54 #include "ib_srp.h"
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static int topspin_workarounds = 1;
75 module_param(srp_sg_tablesize, uint, 0444);
76 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
78 module_param(cmd_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(cmd_sg_entries,
80 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
82 module_param(indirect_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(indirect_sg_entries,
84 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
86 module_param(allow_ext_sg, bool, 0444);
87 MODULE_PARM_DESC(allow_ext_sg,
88 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
90 module_param(topspin_workarounds, int, 0444);
91 MODULE_PARM_DESC(topspin_workarounds,
92 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
94 module_param(prefer_fr, bool, 0444);
95 MODULE_PARM_DESC(prefer_fr,
96 "Whether to use fast registration if both FMR and fast registration are supported");
98 module_param(register_always, bool, 0444);
99 MODULE_PARM_DESC(register_always,
100 "Use memory registration even for contiguous memory regions");
102 static const struct kernel_param_ops srp_tmo_ops;
104 static int srp_reconnect_delay = 10;
105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
106 S_IRUGO | S_IWUSR);
107 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
109 static int srp_fast_io_fail_tmo = 15;
110 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
111 S_IRUGO | S_IWUSR);
112 MODULE_PARM_DESC(fast_io_fail_tmo,
113 "Number of seconds between the observation of a transport"
114 " layer error and failing all I/O. \"off\" means that this"
115 " functionality is disabled.");
117 static int srp_dev_loss_tmo = 600;
118 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
119 S_IRUGO | S_IWUSR);
120 MODULE_PARM_DESC(dev_loss_tmo,
121 "Maximum number of seconds that the SRP transport should"
122 " insulate transport layer errors. After this time has been"
123 " exceeded the SCSI host is removed. Should be"
124 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
125 " if fast_io_fail_tmo has not been set. \"off\" means that"
126 " this functionality is disabled.");
128 static unsigned ch_count;
129 module_param(ch_count, uint, 0444);
130 MODULE_PARM_DESC(ch_count,
131 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
133 static void srp_add_one(struct ib_device *device);
134 static void srp_remove_one(struct ib_device *device, void *client_data);
135 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
136 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
137 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
139 static struct scsi_transport_template *ib_srp_transport_template;
140 static struct workqueue_struct *srp_remove_wq;
142 static struct ib_client srp_client = {
143 .name = "srp",
144 .add = srp_add_one,
145 .remove = srp_remove_one
148 static struct ib_sa_client srp_sa_client;
150 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
152 int tmo = *(int *)kp->arg;
154 if (tmo >= 0)
155 return sprintf(buffer, "%d", tmo);
156 else
157 return sprintf(buffer, "off");
160 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
162 int tmo, res;
164 res = srp_parse_tmo(&tmo, val);
165 if (res)
166 goto out;
168 if (kp->arg == &srp_reconnect_delay)
169 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
170 srp_dev_loss_tmo);
171 else if (kp->arg == &srp_fast_io_fail_tmo)
172 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
173 else
174 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
175 tmo);
176 if (res)
177 goto out;
178 *(int *)kp->arg = tmo;
180 out:
181 return res;
184 static const struct kernel_param_ops srp_tmo_ops = {
185 .get = srp_tmo_get,
186 .set = srp_tmo_set,
189 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
191 return (struct srp_target_port *) host->hostdata;
194 static const char *srp_target_info(struct Scsi_Host *host)
196 return host_to_target(host)->target_name;
199 static int srp_target_is_topspin(struct srp_target_port *target)
201 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
202 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
204 return topspin_workarounds &&
205 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
206 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
209 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
210 gfp_t gfp_mask,
211 enum dma_data_direction direction)
213 struct srp_iu *iu;
215 iu = kmalloc(sizeof *iu, gfp_mask);
216 if (!iu)
217 goto out;
219 iu->buf = kzalloc(size, gfp_mask);
220 if (!iu->buf)
221 goto out_free_iu;
223 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
224 direction);
225 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
226 goto out_free_buf;
228 iu->size = size;
229 iu->direction = direction;
231 return iu;
233 out_free_buf:
234 kfree(iu->buf);
235 out_free_iu:
236 kfree(iu);
237 out:
238 return NULL;
241 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
243 if (!iu)
244 return;
246 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
247 iu->direction);
248 kfree(iu->buf);
249 kfree(iu);
252 static void srp_qp_event(struct ib_event *event, void *context)
254 pr_debug("QP event %s (%d)\n",
255 ib_event_msg(event->event), event->event);
258 static int srp_init_qp(struct srp_target_port *target,
259 struct ib_qp *qp)
261 struct ib_qp_attr *attr;
262 int ret;
264 attr = kmalloc(sizeof *attr, GFP_KERNEL);
265 if (!attr)
266 return -ENOMEM;
268 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
269 target->srp_host->port,
270 be16_to_cpu(target->pkey),
271 &attr->pkey_index);
272 if (ret)
273 goto out;
275 attr->qp_state = IB_QPS_INIT;
276 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
277 IB_ACCESS_REMOTE_WRITE);
278 attr->port_num = target->srp_host->port;
280 ret = ib_modify_qp(qp, attr,
281 IB_QP_STATE |
282 IB_QP_PKEY_INDEX |
283 IB_QP_ACCESS_FLAGS |
284 IB_QP_PORT);
286 out:
287 kfree(attr);
288 return ret;
291 static int srp_new_cm_id(struct srp_rdma_ch *ch)
293 struct srp_target_port *target = ch->target;
294 struct ib_cm_id *new_cm_id;
296 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
297 srp_cm_handler, ch);
298 if (IS_ERR(new_cm_id))
299 return PTR_ERR(new_cm_id);
301 if (ch->cm_id)
302 ib_destroy_cm_id(ch->cm_id);
303 ch->cm_id = new_cm_id;
304 ch->path.sgid = target->sgid;
305 ch->path.dgid = target->orig_dgid;
306 ch->path.pkey = target->pkey;
307 ch->path.service_id = target->service_id;
309 return 0;
312 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
314 struct srp_device *dev = target->srp_host->srp_dev;
315 struct ib_fmr_pool_param fmr_param;
317 memset(&fmr_param, 0, sizeof(fmr_param));
318 fmr_param.pool_size = target->scsi_host->can_queue;
319 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
320 fmr_param.cache = 1;
321 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
322 fmr_param.page_shift = ilog2(dev->mr_page_size);
323 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
324 IB_ACCESS_REMOTE_WRITE |
325 IB_ACCESS_REMOTE_READ);
327 return ib_create_fmr_pool(dev->pd, &fmr_param);
331 * srp_destroy_fr_pool() - free the resources owned by a pool
332 * @pool: Fast registration pool to be destroyed.
334 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
336 int i;
337 struct srp_fr_desc *d;
339 if (!pool)
340 return;
342 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
343 if (d->mr)
344 ib_dereg_mr(d->mr);
346 kfree(pool);
350 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
351 * @device: IB device to allocate fast registration descriptors for.
352 * @pd: Protection domain associated with the FR descriptors.
353 * @pool_size: Number of descriptors to allocate.
354 * @max_page_list_len: Maximum fast registration work request page list length.
356 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
357 struct ib_pd *pd, int pool_size,
358 int max_page_list_len)
360 struct srp_fr_pool *pool;
361 struct srp_fr_desc *d;
362 struct ib_mr *mr;
363 int i, ret = -EINVAL;
365 if (pool_size <= 0)
366 goto err;
367 ret = -ENOMEM;
368 pool = kzalloc(sizeof(struct srp_fr_pool) +
369 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
370 if (!pool)
371 goto err;
372 pool->size = pool_size;
373 pool->max_page_list_len = max_page_list_len;
374 spin_lock_init(&pool->lock);
375 INIT_LIST_HEAD(&pool->free_list);
377 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
378 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
379 max_page_list_len);
380 if (IS_ERR(mr)) {
381 ret = PTR_ERR(mr);
382 goto destroy_pool;
384 d->mr = mr;
385 list_add_tail(&d->entry, &pool->free_list);
388 out:
389 return pool;
391 destroy_pool:
392 srp_destroy_fr_pool(pool);
394 err:
395 pool = ERR_PTR(ret);
396 goto out;
400 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
401 * @pool: Pool to obtain descriptor from.
403 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
405 struct srp_fr_desc *d = NULL;
406 unsigned long flags;
408 spin_lock_irqsave(&pool->lock, flags);
409 if (!list_empty(&pool->free_list)) {
410 d = list_first_entry(&pool->free_list, typeof(*d), entry);
411 list_del(&d->entry);
413 spin_unlock_irqrestore(&pool->lock, flags);
415 return d;
419 * srp_fr_pool_put() - put an FR descriptor back in the free list
420 * @pool: Pool the descriptor was allocated from.
421 * @desc: Pointer to an array of fast registration descriptor pointers.
422 * @n: Number of descriptors to put back.
424 * Note: The caller must already have queued an invalidation request for
425 * desc->mr->rkey before calling this function.
427 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
428 int n)
430 unsigned long flags;
431 int i;
433 spin_lock_irqsave(&pool->lock, flags);
434 for (i = 0; i < n; i++)
435 list_add(&desc[i]->entry, &pool->free_list);
436 spin_unlock_irqrestore(&pool->lock, flags);
439 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
441 struct srp_device *dev = target->srp_host->srp_dev;
443 return srp_create_fr_pool(dev->dev, dev->pd,
444 target->scsi_host->can_queue,
445 dev->max_pages_per_mr);
449 * srp_destroy_qp() - destroy an RDMA queue pair
450 * @ch: SRP RDMA channel.
452 * Change a queue pair into the error state and wait until all receive
453 * completions have been processed before destroying it. This avoids that
454 * the receive completion handler can access the queue pair while it is
455 * being destroyed.
457 static void srp_destroy_qp(struct srp_rdma_ch *ch)
459 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
460 static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
461 struct ib_recv_wr *bad_wr;
462 int ret;
464 /* Destroying a QP and reusing ch->done is only safe if not connected */
465 WARN_ON_ONCE(ch->connected);
467 ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
468 WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
469 if (ret)
470 goto out;
472 init_completion(&ch->done);
473 ret = ib_post_recv(ch->qp, &wr, &bad_wr);
474 WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
475 if (ret == 0)
476 wait_for_completion(&ch->done);
478 out:
479 ib_destroy_qp(ch->qp);
482 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
484 struct srp_target_port *target = ch->target;
485 struct srp_device *dev = target->srp_host->srp_dev;
486 struct ib_qp_init_attr *init_attr;
487 struct ib_cq *recv_cq, *send_cq;
488 struct ib_qp *qp;
489 struct ib_fmr_pool *fmr_pool = NULL;
490 struct srp_fr_pool *fr_pool = NULL;
491 const int m = dev->use_fast_reg ? 3 : 1;
492 struct ib_cq_init_attr cq_attr = {};
493 int ret;
495 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
496 if (!init_attr)
497 return -ENOMEM;
499 /* + 1 for SRP_LAST_WR_ID */
500 cq_attr.cqe = target->queue_size + 1;
501 cq_attr.comp_vector = ch->comp_vector;
502 recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
503 &cq_attr);
504 if (IS_ERR(recv_cq)) {
505 ret = PTR_ERR(recv_cq);
506 goto err;
509 cq_attr.cqe = m * target->queue_size;
510 cq_attr.comp_vector = ch->comp_vector;
511 send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
512 &cq_attr);
513 if (IS_ERR(send_cq)) {
514 ret = PTR_ERR(send_cq);
515 goto err_recv_cq;
518 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
520 init_attr->event_handler = srp_qp_event;
521 init_attr->cap.max_send_wr = m * target->queue_size;
522 init_attr->cap.max_recv_wr = target->queue_size + 1;
523 init_attr->cap.max_recv_sge = 1;
524 init_attr->cap.max_send_sge = 1;
525 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
526 init_attr->qp_type = IB_QPT_RC;
527 init_attr->send_cq = send_cq;
528 init_attr->recv_cq = recv_cq;
530 qp = ib_create_qp(dev->pd, init_attr);
531 if (IS_ERR(qp)) {
532 ret = PTR_ERR(qp);
533 goto err_send_cq;
536 ret = srp_init_qp(target, qp);
537 if (ret)
538 goto err_qp;
540 if (dev->use_fast_reg) {
541 fr_pool = srp_alloc_fr_pool(target);
542 if (IS_ERR(fr_pool)) {
543 ret = PTR_ERR(fr_pool);
544 shost_printk(KERN_WARNING, target->scsi_host, PFX
545 "FR pool allocation failed (%d)\n", ret);
546 goto err_qp;
548 } else if (dev->use_fmr) {
549 fmr_pool = srp_alloc_fmr_pool(target);
550 if (IS_ERR(fmr_pool)) {
551 ret = PTR_ERR(fmr_pool);
552 shost_printk(KERN_WARNING, target->scsi_host, PFX
553 "FMR pool allocation failed (%d)\n", ret);
554 goto err_qp;
558 if (ch->qp)
559 srp_destroy_qp(ch);
560 if (ch->recv_cq)
561 ib_destroy_cq(ch->recv_cq);
562 if (ch->send_cq)
563 ib_destroy_cq(ch->send_cq);
565 ch->qp = qp;
566 ch->recv_cq = recv_cq;
567 ch->send_cq = send_cq;
569 if (dev->use_fast_reg) {
570 if (ch->fr_pool)
571 srp_destroy_fr_pool(ch->fr_pool);
572 ch->fr_pool = fr_pool;
573 } else if (dev->use_fmr) {
574 if (ch->fmr_pool)
575 ib_destroy_fmr_pool(ch->fmr_pool);
576 ch->fmr_pool = fmr_pool;
579 kfree(init_attr);
580 return 0;
582 err_qp:
583 ib_destroy_qp(qp);
585 err_send_cq:
586 ib_destroy_cq(send_cq);
588 err_recv_cq:
589 ib_destroy_cq(recv_cq);
591 err:
592 kfree(init_attr);
593 return ret;
597 * Note: this function may be called without srp_alloc_iu_bufs() having been
598 * invoked. Hence the ch->[rt]x_ring checks.
600 static void srp_free_ch_ib(struct srp_target_port *target,
601 struct srp_rdma_ch *ch)
603 struct srp_device *dev = target->srp_host->srp_dev;
604 int i;
606 if (!ch->target)
607 return;
609 if (ch->cm_id) {
610 ib_destroy_cm_id(ch->cm_id);
611 ch->cm_id = NULL;
614 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
615 if (!ch->qp)
616 return;
618 if (dev->use_fast_reg) {
619 if (ch->fr_pool)
620 srp_destroy_fr_pool(ch->fr_pool);
621 } else if (dev->use_fmr) {
622 if (ch->fmr_pool)
623 ib_destroy_fmr_pool(ch->fmr_pool);
625 srp_destroy_qp(ch);
626 ib_destroy_cq(ch->send_cq);
627 ib_destroy_cq(ch->recv_cq);
630 * Avoid that the SCSI error handler tries to use this channel after
631 * it has been freed. The SCSI error handler can namely continue
632 * trying to perform recovery actions after scsi_remove_host()
633 * returned.
635 ch->target = NULL;
637 ch->qp = NULL;
638 ch->send_cq = ch->recv_cq = NULL;
640 if (ch->rx_ring) {
641 for (i = 0; i < target->queue_size; ++i)
642 srp_free_iu(target->srp_host, ch->rx_ring[i]);
643 kfree(ch->rx_ring);
644 ch->rx_ring = NULL;
646 if (ch->tx_ring) {
647 for (i = 0; i < target->queue_size; ++i)
648 srp_free_iu(target->srp_host, ch->tx_ring[i]);
649 kfree(ch->tx_ring);
650 ch->tx_ring = NULL;
654 static void srp_path_rec_completion(int status,
655 struct ib_sa_path_rec *pathrec,
656 void *ch_ptr)
658 struct srp_rdma_ch *ch = ch_ptr;
659 struct srp_target_port *target = ch->target;
661 ch->status = status;
662 if (status)
663 shost_printk(KERN_ERR, target->scsi_host,
664 PFX "Got failed path rec status %d\n", status);
665 else
666 ch->path = *pathrec;
667 complete(&ch->done);
670 static int srp_lookup_path(struct srp_rdma_ch *ch)
672 struct srp_target_port *target = ch->target;
673 int ret;
675 ch->path.numb_path = 1;
677 init_completion(&ch->done);
679 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
680 target->srp_host->srp_dev->dev,
681 target->srp_host->port,
682 &ch->path,
683 IB_SA_PATH_REC_SERVICE_ID |
684 IB_SA_PATH_REC_DGID |
685 IB_SA_PATH_REC_SGID |
686 IB_SA_PATH_REC_NUMB_PATH |
687 IB_SA_PATH_REC_PKEY,
688 SRP_PATH_REC_TIMEOUT_MS,
689 GFP_KERNEL,
690 srp_path_rec_completion,
691 ch, &ch->path_query);
692 if (ch->path_query_id < 0)
693 return ch->path_query_id;
695 ret = wait_for_completion_interruptible(&ch->done);
696 if (ret < 0)
697 return ret;
699 if (ch->status < 0)
700 shost_printk(KERN_WARNING, target->scsi_host,
701 PFX "Path record query failed\n");
703 return ch->status;
706 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
708 struct srp_target_port *target = ch->target;
709 struct {
710 struct ib_cm_req_param param;
711 struct srp_login_req priv;
712 } *req = NULL;
713 int status;
715 req = kzalloc(sizeof *req, GFP_KERNEL);
716 if (!req)
717 return -ENOMEM;
719 req->param.primary_path = &ch->path;
720 req->param.alternate_path = NULL;
721 req->param.service_id = target->service_id;
722 req->param.qp_num = ch->qp->qp_num;
723 req->param.qp_type = ch->qp->qp_type;
724 req->param.private_data = &req->priv;
725 req->param.private_data_len = sizeof req->priv;
726 req->param.flow_control = 1;
728 get_random_bytes(&req->param.starting_psn, 4);
729 req->param.starting_psn &= 0xffffff;
732 * Pick some arbitrary defaults here; we could make these
733 * module parameters if anyone cared about setting them.
735 req->param.responder_resources = 4;
736 req->param.remote_cm_response_timeout = 20;
737 req->param.local_cm_response_timeout = 20;
738 req->param.retry_count = target->tl_retry_count;
739 req->param.rnr_retry_count = 7;
740 req->param.max_cm_retries = 15;
742 req->priv.opcode = SRP_LOGIN_REQ;
743 req->priv.tag = 0;
744 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
745 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
746 SRP_BUF_FORMAT_INDIRECT);
747 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
748 SRP_MULTICHAN_SINGLE);
750 * In the published SRP specification (draft rev. 16a), the
751 * port identifier format is 8 bytes of ID extension followed
752 * by 8 bytes of GUID. Older drafts put the two halves in the
753 * opposite order, so that the GUID comes first.
755 * Targets conforming to these obsolete drafts can be
756 * recognized by the I/O Class they report.
758 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
759 memcpy(req->priv.initiator_port_id,
760 &target->sgid.global.interface_id, 8);
761 memcpy(req->priv.initiator_port_id + 8,
762 &target->initiator_ext, 8);
763 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
764 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
765 } else {
766 memcpy(req->priv.initiator_port_id,
767 &target->initiator_ext, 8);
768 memcpy(req->priv.initiator_port_id + 8,
769 &target->sgid.global.interface_id, 8);
770 memcpy(req->priv.target_port_id, &target->id_ext, 8);
771 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
775 * Topspin/Cisco SRP targets will reject our login unless we
776 * zero out the first 8 bytes of our initiator port ID and set
777 * the second 8 bytes to the local node GUID.
779 if (srp_target_is_topspin(target)) {
780 shost_printk(KERN_DEBUG, target->scsi_host,
781 PFX "Topspin/Cisco initiator port ID workaround "
782 "activated for target GUID %016llx\n",
783 be64_to_cpu(target->ioc_guid));
784 memset(req->priv.initiator_port_id, 0, 8);
785 memcpy(req->priv.initiator_port_id + 8,
786 &target->srp_host->srp_dev->dev->node_guid, 8);
789 status = ib_send_cm_req(ch->cm_id, &req->param);
791 kfree(req);
793 return status;
796 static bool srp_queue_remove_work(struct srp_target_port *target)
798 bool changed = false;
800 spin_lock_irq(&target->lock);
801 if (target->state != SRP_TARGET_REMOVED) {
802 target->state = SRP_TARGET_REMOVED;
803 changed = true;
805 spin_unlock_irq(&target->lock);
807 if (changed)
808 queue_work(srp_remove_wq, &target->remove_work);
810 return changed;
813 static void srp_disconnect_target(struct srp_target_port *target)
815 struct srp_rdma_ch *ch;
816 int i;
818 /* XXX should send SRP_I_LOGOUT request */
820 for (i = 0; i < target->ch_count; i++) {
821 ch = &target->ch[i];
822 ch->connected = false;
823 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
824 shost_printk(KERN_DEBUG, target->scsi_host,
825 PFX "Sending CM DREQ failed\n");
830 static void srp_free_req_data(struct srp_target_port *target,
831 struct srp_rdma_ch *ch)
833 struct srp_device *dev = target->srp_host->srp_dev;
834 struct ib_device *ibdev = dev->dev;
835 struct srp_request *req;
836 int i;
838 if (!ch->req_ring)
839 return;
841 for (i = 0; i < target->req_ring_size; ++i) {
842 req = &ch->req_ring[i];
843 if (dev->use_fast_reg) {
844 kfree(req->fr_list);
845 } else {
846 kfree(req->fmr_list);
847 kfree(req->map_page);
849 if (req->indirect_dma_addr) {
850 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
851 target->indirect_size,
852 DMA_TO_DEVICE);
854 kfree(req->indirect_desc);
857 kfree(ch->req_ring);
858 ch->req_ring = NULL;
861 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
863 struct srp_target_port *target = ch->target;
864 struct srp_device *srp_dev = target->srp_host->srp_dev;
865 struct ib_device *ibdev = srp_dev->dev;
866 struct srp_request *req;
867 void *mr_list;
868 dma_addr_t dma_addr;
869 int i, ret = -ENOMEM;
871 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
872 GFP_KERNEL);
873 if (!ch->req_ring)
874 goto out;
876 for (i = 0; i < target->req_ring_size; ++i) {
877 req = &ch->req_ring[i];
878 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
879 GFP_KERNEL);
880 if (!mr_list)
881 goto out;
882 if (srp_dev->use_fast_reg) {
883 req->fr_list = mr_list;
884 } else {
885 req->fmr_list = mr_list;
886 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
887 sizeof(void *), GFP_KERNEL);
888 if (!req->map_page)
889 goto out;
891 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
892 if (!req->indirect_desc)
893 goto out;
895 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
896 target->indirect_size,
897 DMA_TO_DEVICE);
898 if (ib_dma_mapping_error(ibdev, dma_addr))
899 goto out;
901 req->indirect_dma_addr = dma_addr;
903 ret = 0;
905 out:
906 return ret;
910 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
911 * @shost: SCSI host whose attributes to remove from sysfs.
913 * Note: Any attributes defined in the host template and that did not exist
914 * before invocation of this function will be ignored.
916 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
918 struct device_attribute **attr;
920 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
921 device_remove_file(&shost->shost_dev, *attr);
924 static void srp_remove_target(struct srp_target_port *target)
926 struct srp_rdma_ch *ch;
927 int i;
929 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
931 srp_del_scsi_host_attr(target->scsi_host);
932 srp_rport_get(target->rport);
933 srp_remove_host(target->scsi_host);
934 scsi_remove_host(target->scsi_host);
935 srp_stop_rport_timers(target->rport);
936 srp_disconnect_target(target);
937 for (i = 0; i < target->ch_count; i++) {
938 ch = &target->ch[i];
939 srp_free_ch_ib(target, ch);
941 cancel_work_sync(&target->tl_err_work);
942 srp_rport_put(target->rport);
943 for (i = 0; i < target->ch_count; i++) {
944 ch = &target->ch[i];
945 srp_free_req_data(target, ch);
947 kfree(target->ch);
948 target->ch = NULL;
950 spin_lock(&target->srp_host->target_lock);
951 list_del(&target->list);
952 spin_unlock(&target->srp_host->target_lock);
954 scsi_host_put(target->scsi_host);
957 static void srp_remove_work(struct work_struct *work)
959 struct srp_target_port *target =
960 container_of(work, struct srp_target_port, remove_work);
962 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
964 srp_remove_target(target);
967 static void srp_rport_delete(struct srp_rport *rport)
969 struct srp_target_port *target = rport->lld_data;
971 srp_queue_remove_work(target);
975 * srp_connected_ch() - number of connected channels
976 * @target: SRP target port.
978 static int srp_connected_ch(struct srp_target_port *target)
980 int i, c = 0;
982 for (i = 0; i < target->ch_count; i++)
983 c += target->ch[i].connected;
985 return c;
988 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
990 struct srp_target_port *target = ch->target;
991 int ret;
993 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
995 ret = srp_lookup_path(ch);
996 if (ret)
997 goto out;
999 while (1) {
1000 init_completion(&ch->done);
1001 ret = srp_send_req(ch, multich);
1002 if (ret)
1003 goto out;
1004 ret = wait_for_completion_interruptible(&ch->done);
1005 if (ret < 0)
1006 goto out;
1009 * The CM event handling code will set status to
1010 * SRP_PORT_REDIRECT if we get a port redirect REJ
1011 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1012 * redirect REJ back.
1014 ret = ch->status;
1015 switch (ret) {
1016 case 0:
1017 ch->connected = true;
1018 goto out;
1020 case SRP_PORT_REDIRECT:
1021 ret = srp_lookup_path(ch);
1022 if (ret)
1023 goto out;
1024 break;
1026 case SRP_DLID_REDIRECT:
1027 break;
1029 case SRP_STALE_CONN:
1030 shost_printk(KERN_ERR, target->scsi_host, PFX
1031 "giving up on stale connection\n");
1032 ret = -ECONNRESET;
1033 goto out;
1035 default:
1036 goto out;
1040 out:
1041 return ret <= 0 ? ret : -ENODEV;
1044 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1046 struct ib_send_wr *bad_wr;
1047 struct ib_send_wr wr = {
1048 .opcode = IB_WR_LOCAL_INV,
1049 .wr_id = LOCAL_INV_WR_ID_MASK,
1050 .next = NULL,
1051 .num_sge = 0,
1052 .send_flags = 0,
1053 .ex.invalidate_rkey = rkey,
1056 return ib_post_send(ch->qp, &wr, &bad_wr);
1059 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1060 struct srp_rdma_ch *ch,
1061 struct srp_request *req)
1063 struct srp_target_port *target = ch->target;
1064 struct srp_device *dev = target->srp_host->srp_dev;
1065 struct ib_device *ibdev = dev->dev;
1066 int i, res;
1068 if (!scsi_sglist(scmnd) ||
1069 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1070 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1071 return;
1073 if (dev->use_fast_reg) {
1074 struct srp_fr_desc **pfr;
1076 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1077 res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1078 if (res < 0) {
1079 shost_printk(KERN_ERR, target->scsi_host, PFX
1080 "Queueing INV WR for rkey %#x failed (%d)\n",
1081 (*pfr)->mr->rkey, res);
1082 queue_work(system_long_wq,
1083 &target->tl_err_work);
1086 if (req->nmdesc)
1087 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1088 req->nmdesc);
1089 } else if (dev->use_fmr) {
1090 struct ib_pool_fmr **pfmr;
1092 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1093 ib_fmr_pool_unmap(*pfmr);
1096 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1097 scmnd->sc_data_direction);
1101 * srp_claim_req - Take ownership of the scmnd associated with a request.
1102 * @ch: SRP RDMA channel.
1103 * @req: SRP request.
1104 * @sdev: If not NULL, only take ownership for this SCSI device.
1105 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1106 * ownership of @req->scmnd if it equals @scmnd.
1108 * Return value:
1109 * Either NULL or a pointer to the SCSI command the caller became owner of.
1111 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1112 struct srp_request *req,
1113 struct scsi_device *sdev,
1114 struct scsi_cmnd *scmnd)
1116 unsigned long flags;
1118 spin_lock_irqsave(&ch->lock, flags);
1119 if (req->scmnd &&
1120 (!sdev || req->scmnd->device == sdev) &&
1121 (!scmnd || req->scmnd == scmnd)) {
1122 scmnd = req->scmnd;
1123 req->scmnd = NULL;
1124 } else {
1125 scmnd = NULL;
1127 spin_unlock_irqrestore(&ch->lock, flags);
1129 return scmnd;
1133 * srp_free_req() - Unmap data and add request to the free request list.
1134 * @ch: SRP RDMA channel.
1135 * @req: Request to be freed.
1136 * @scmnd: SCSI command associated with @req.
1137 * @req_lim_delta: Amount to be added to @target->req_lim.
1139 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1140 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1142 unsigned long flags;
1144 srp_unmap_data(scmnd, ch, req);
1146 spin_lock_irqsave(&ch->lock, flags);
1147 ch->req_lim += req_lim_delta;
1148 spin_unlock_irqrestore(&ch->lock, flags);
1151 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1152 struct scsi_device *sdev, int result)
1154 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1156 if (scmnd) {
1157 srp_free_req(ch, req, scmnd, 0);
1158 scmnd->result = result;
1159 scmnd->scsi_done(scmnd);
1163 static void srp_terminate_io(struct srp_rport *rport)
1165 struct srp_target_port *target = rport->lld_data;
1166 struct srp_rdma_ch *ch;
1167 struct Scsi_Host *shost = target->scsi_host;
1168 struct scsi_device *sdev;
1169 int i, j;
1172 * Invoking srp_terminate_io() while srp_queuecommand() is running
1173 * is not safe. Hence the warning statement below.
1175 shost_for_each_device(sdev, shost)
1176 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1178 for (i = 0; i < target->ch_count; i++) {
1179 ch = &target->ch[i];
1181 for (j = 0; j < target->req_ring_size; ++j) {
1182 struct srp_request *req = &ch->req_ring[j];
1184 srp_finish_req(ch, req, NULL,
1185 DID_TRANSPORT_FAILFAST << 16);
1191 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1192 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1193 * srp_reset_device() or srp_reset_host() calls will occur while this function
1194 * is in progress. One way to realize that is not to call this function
1195 * directly but to call srp_reconnect_rport() instead since that last function
1196 * serializes calls of this function via rport->mutex and also blocks
1197 * srp_queuecommand() calls before invoking this function.
1199 static int srp_rport_reconnect(struct srp_rport *rport)
1201 struct srp_target_port *target = rport->lld_data;
1202 struct srp_rdma_ch *ch;
1203 int i, j, ret = 0;
1204 bool multich = false;
1206 srp_disconnect_target(target);
1208 if (target->state == SRP_TARGET_SCANNING)
1209 return -ENODEV;
1212 * Now get a new local CM ID so that we avoid confusing the target in
1213 * case things are really fouled up. Doing so also ensures that all CM
1214 * callbacks will have finished before a new QP is allocated.
1216 for (i = 0; i < target->ch_count; i++) {
1217 ch = &target->ch[i];
1218 ret += srp_new_cm_id(ch);
1220 for (i = 0; i < target->ch_count; i++) {
1221 ch = &target->ch[i];
1222 for (j = 0; j < target->req_ring_size; ++j) {
1223 struct srp_request *req = &ch->req_ring[j];
1225 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1228 for (i = 0; i < target->ch_count; i++) {
1229 ch = &target->ch[i];
1231 * Whether or not creating a new CM ID succeeded, create a new
1232 * QP. This guarantees that all completion callback function
1233 * invocations have finished before request resetting starts.
1235 ret += srp_create_ch_ib(ch);
1237 INIT_LIST_HEAD(&ch->free_tx);
1238 for (j = 0; j < target->queue_size; ++j)
1239 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1242 target->qp_in_error = false;
1244 for (i = 0; i < target->ch_count; i++) {
1245 ch = &target->ch[i];
1246 if (ret)
1247 break;
1248 ret = srp_connect_ch(ch, multich);
1249 multich = true;
1252 if (ret == 0)
1253 shost_printk(KERN_INFO, target->scsi_host,
1254 PFX "reconnect succeeded\n");
1256 return ret;
1259 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1260 unsigned int dma_len, u32 rkey)
1262 struct srp_direct_buf *desc = state->desc;
1264 WARN_ON_ONCE(!dma_len);
1266 desc->va = cpu_to_be64(dma_addr);
1267 desc->key = cpu_to_be32(rkey);
1268 desc->len = cpu_to_be32(dma_len);
1270 state->total_len += dma_len;
1271 state->desc++;
1272 state->ndesc++;
1275 static int srp_map_finish_fmr(struct srp_map_state *state,
1276 struct srp_rdma_ch *ch)
1278 struct srp_target_port *target = ch->target;
1279 struct srp_device *dev = target->srp_host->srp_dev;
1280 struct ib_pool_fmr *fmr;
1281 u64 io_addr = 0;
1283 if (state->fmr.next >= state->fmr.end)
1284 return -ENOMEM;
1286 WARN_ON_ONCE(!dev->use_fmr);
1288 if (state->npages == 0)
1289 return 0;
1291 if (state->npages == 1 && target->global_mr) {
1292 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1293 target->global_mr->rkey);
1294 goto reset_state;
1297 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1298 state->npages, io_addr);
1299 if (IS_ERR(fmr))
1300 return PTR_ERR(fmr);
1302 *state->fmr.next++ = fmr;
1303 state->nmdesc++;
1305 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1306 state->dma_len, fmr->fmr->rkey);
1308 reset_state:
1309 state->npages = 0;
1310 state->dma_len = 0;
1312 return 0;
1315 static int srp_map_finish_fr(struct srp_map_state *state,
1316 struct srp_rdma_ch *ch, int sg_nents)
1318 struct srp_target_port *target = ch->target;
1319 struct srp_device *dev = target->srp_host->srp_dev;
1320 struct ib_send_wr *bad_wr;
1321 struct ib_reg_wr wr;
1322 struct srp_fr_desc *desc;
1323 u32 rkey;
1324 int n, err;
1326 if (state->fr.next >= state->fr.end)
1327 return -ENOMEM;
1329 WARN_ON_ONCE(!dev->use_fast_reg);
1331 if (sg_nents == 0)
1332 return 0;
1334 if (sg_nents == 1 && target->global_mr) {
1335 srp_map_desc(state, sg_dma_address(state->sg),
1336 sg_dma_len(state->sg),
1337 target->global_mr->rkey);
1338 return 1;
1341 desc = srp_fr_pool_get(ch->fr_pool);
1342 if (!desc)
1343 return -ENOMEM;
1345 rkey = ib_inc_rkey(desc->mr->rkey);
1346 ib_update_fast_reg_key(desc->mr, rkey);
1348 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
1349 if (unlikely(n < 0))
1350 return n;
1352 wr.wr.next = NULL;
1353 wr.wr.opcode = IB_WR_REG_MR;
1354 wr.wr.wr_id = FAST_REG_WR_ID_MASK;
1355 wr.wr.num_sge = 0;
1356 wr.wr.send_flags = 0;
1357 wr.mr = desc->mr;
1358 wr.key = desc->mr->rkey;
1359 wr.access = (IB_ACCESS_LOCAL_WRITE |
1360 IB_ACCESS_REMOTE_READ |
1361 IB_ACCESS_REMOTE_WRITE);
1363 *state->fr.next++ = desc;
1364 state->nmdesc++;
1366 srp_map_desc(state, desc->mr->iova,
1367 desc->mr->length, desc->mr->rkey);
1369 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1370 if (unlikely(err))
1371 return err;
1373 return n;
1376 static int srp_map_sg_entry(struct srp_map_state *state,
1377 struct srp_rdma_ch *ch,
1378 struct scatterlist *sg, int sg_index)
1380 struct srp_target_port *target = ch->target;
1381 struct srp_device *dev = target->srp_host->srp_dev;
1382 struct ib_device *ibdev = dev->dev;
1383 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1384 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1385 unsigned int len = 0;
1386 int ret;
1388 WARN_ON_ONCE(!dma_len);
1390 while (dma_len) {
1391 unsigned offset = dma_addr & ~dev->mr_page_mask;
1392 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1393 ret = srp_map_finish_fmr(state, ch);
1394 if (ret)
1395 return ret;
1398 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1400 if (!state->npages)
1401 state->base_dma_addr = dma_addr;
1402 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1403 state->dma_len += len;
1404 dma_addr += len;
1405 dma_len -= len;
1409 * If the last entry of the MR wasn't a full page, then we need to
1410 * close it out and start a new one -- we can only merge at page
1411 * boundries.
1413 ret = 0;
1414 if (len != dev->mr_page_size)
1415 ret = srp_map_finish_fmr(state, ch);
1416 return ret;
1419 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1420 struct srp_request *req, struct scatterlist *scat,
1421 int count)
1423 struct scatterlist *sg;
1424 int i, ret;
1426 state->desc = req->indirect_desc;
1427 state->pages = req->map_page;
1428 state->fmr.next = req->fmr_list;
1429 state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
1431 for_each_sg(scat, sg, count, i) {
1432 ret = srp_map_sg_entry(state, ch, sg, i);
1433 if (ret)
1434 return ret;
1437 ret = srp_map_finish_fmr(state, ch);
1438 if (ret)
1439 return ret;
1441 req->nmdesc = state->nmdesc;
1443 return 0;
1446 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1447 struct srp_request *req, struct scatterlist *scat,
1448 int count)
1450 state->desc = req->indirect_desc;
1451 state->fr.next = req->fr_list;
1452 state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
1453 state->sg = scat;
1455 while (count) {
1456 int i, n;
1458 n = srp_map_finish_fr(state, ch, count);
1459 if (unlikely(n < 0))
1460 return n;
1462 count -= n;
1463 for (i = 0; i < n; i++)
1464 state->sg = sg_next(state->sg);
1467 req->nmdesc = state->nmdesc;
1469 return 0;
1472 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1473 struct srp_request *req, struct scatterlist *scat,
1474 int count)
1476 struct srp_target_port *target = ch->target;
1477 struct srp_device *dev = target->srp_host->srp_dev;
1478 struct scatterlist *sg;
1479 int i;
1481 state->desc = req->indirect_desc;
1482 for_each_sg(scat, sg, count, i) {
1483 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1484 ib_sg_dma_len(dev->dev, sg),
1485 target->global_mr->rkey);
1488 req->nmdesc = state->nmdesc;
1490 return 0;
1494 * Register the indirect data buffer descriptor with the HCA.
1496 * Note: since the indirect data buffer descriptor has been allocated with
1497 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1498 * memory buffer.
1500 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1501 void **next_mr, void **end_mr, u32 idb_len,
1502 __be32 *idb_rkey)
1504 struct srp_target_port *target = ch->target;
1505 struct srp_device *dev = target->srp_host->srp_dev;
1506 struct srp_map_state state;
1507 struct srp_direct_buf idb_desc;
1508 u64 idb_pages[1];
1509 struct scatterlist idb_sg[1];
1510 int ret;
1512 memset(&state, 0, sizeof(state));
1513 memset(&idb_desc, 0, sizeof(idb_desc));
1514 state.gen.next = next_mr;
1515 state.gen.end = end_mr;
1516 state.desc = &idb_desc;
1517 state.base_dma_addr = req->indirect_dma_addr;
1518 state.dma_len = idb_len;
1520 if (dev->use_fast_reg) {
1521 state.sg = idb_sg;
1522 sg_set_buf(idb_sg, req->indirect_desc, idb_len);
1523 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1524 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1525 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1526 #endif
1527 ret = srp_map_finish_fr(&state, ch, 1);
1528 if (ret < 0)
1529 return ret;
1530 } else if (dev->use_fmr) {
1531 state.pages = idb_pages;
1532 state.pages[0] = (req->indirect_dma_addr &
1533 dev->mr_page_mask);
1534 state.npages = 1;
1535 ret = srp_map_finish_fmr(&state, ch);
1536 if (ret < 0)
1537 return ret;
1538 } else {
1539 return -EINVAL;
1542 *idb_rkey = idb_desc.key;
1544 return 0;
1547 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1548 struct srp_request *req)
1550 struct srp_target_port *target = ch->target;
1551 struct scatterlist *scat;
1552 struct srp_cmd *cmd = req->cmd->buf;
1553 int len, nents, count, ret;
1554 struct srp_device *dev;
1555 struct ib_device *ibdev;
1556 struct srp_map_state state;
1557 struct srp_indirect_buf *indirect_hdr;
1558 u32 idb_len, table_len;
1559 __be32 idb_rkey;
1560 u8 fmt;
1562 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1563 return sizeof (struct srp_cmd);
1565 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1566 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1567 shost_printk(KERN_WARNING, target->scsi_host,
1568 PFX "Unhandled data direction %d\n",
1569 scmnd->sc_data_direction);
1570 return -EINVAL;
1573 nents = scsi_sg_count(scmnd);
1574 scat = scsi_sglist(scmnd);
1576 dev = target->srp_host->srp_dev;
1577 ibdev = dev->dev;
1579 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1580 if (unlikely(count == 0))
1581 return -EIO;
1583 fmt = SRP_DATA_DESC_DIRECT;
1584 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1586 if (count == 1 && target->global_mr) {
1588 * The midlayer only generated a single gather/scatter
1589 * entry, or DMA mapping coalesced everything to a
1590 * single entry. So a direct descriptor along with
1591 * the DMA MR suffices.
1593 struct srp_direct_buf *buf = (void *) cmd->add_data;
1595 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1596 buf->key = cpu_to_be32(target->global_mr->rkey);
1597 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1599 req->nmdesc = 0;
1600 goto map_complete;
1604 * We have more than one scatter/gather entry, so build our indirect
1605 * descriptor table, trying to merge as many entries as we can.
1607 indirect_hdr = (void *) cmd->add_data;
1609 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1610 target->indirect_size, DMA_TO_DEVICE);
1612 memset(&state, 0, sizeof(state));
1613 if (dev->use_fast_reg)
1614 srp_map_sg_fr(&state, ch, req, scat, count);
1615 else if (dev->use_fmr)
1616 srp_map_sg_fmr(&state, ch, req, scat, count);
1617 else
1618 srp_map_sg_dma(&state, ch, req, scat, count);
1620 /* We've mapped the request, now pull as much of the indirect
1621 * descriptor table as we can into the command buffer. If this
1622 * target is not using an external indirect table, we are
1623 * guaranteed to fit into the command, as the SCSI layer won't
1624 * give us more S/G entries than we allow.
1626 if (state.ndesc == 1) {
1628 * Memory registration collapsed the sg-list into one entry,
1629 * so use a direct descriptor.
1631 struct srp_direct_buf *buf = (void *) cmd->add_data;
1633 *buf = req->indirect_desc[0];
1634 goto map_complete;
1637 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1638 !target->allow_ext_sg)) {
1639 shost_printk(KERN_ERR, target->scsi_host,
1640 "Could not fit S/G list into SRP_CMD\n");
1641 return -EIO;
1644 count = min(state.ndesc, target->cmd_sg_cnt);
1645 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1646 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1648 fmt = SRP_DATA_DESC_INDIRECT;
1649 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1650 len += count * sizeof (struct srp_direct_buf);
1652 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1653 count * sizeof (struct srp_direct_buf));
1655 if (!target->global_mr) {
1656 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1657 idb_len, &idb_rkey);
1658 if (ret < 0)
1659 return ret;
1660 req->nmdesc++;
1661 } else {
1662 idb_rkey = cpu_to_be32(target->global_mr->rkey);
1665 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1666 indirect_hdr->table_desc.key = idb_rkey;
1667 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1668 indirect_hdr->len = cpu_to_be32(state.total_len);
1670 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1671 cmd->data_out_desc_cnt = count;
1672 else
1673 cmd->data_in_desc_cnt = count;
1675 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1676 DMA_TO_DEVICE);
1678 map_complete:
1679 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1680 cmd->buf_fmt = fmt << 4;
1681 else
1682 cmd->buf_fmt = fmt;
1684 return len;
1688 * Return an IU and possible credit to the free pool
1690 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1691 enum srp_iu_type iu_type)
1693 unsigned long flags;
1695 spin_lock_irqsave(&ch->lock, flags);
1696 list_add(&iu->list, &ch->free_tx);
1697 if (iu_type != SRP_IU_RSP)
1698 ++ch->req_lim;
1699 spin_unlock_irqrestore(&ch->lock, flags);
1703 * Must be called with ch->lock held to protect req_lim and free_tx.
1704 * If IU is not sent, it must be returned using srp_put_tx_iu().
1706 * Note:
1707 * An upper limit for the number of allocated information units for each
1708 * request type is:
1709 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1710 * more than Scsi_Host.can_queue requests.
1711 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1712 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1713 * one unanswered SRP request to an initiator.
1715 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1716 enum srp_iu_type iu_type)
1718 struct srp_target_port *target = ch->target;
1719 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1720 struct srp_iu *iu;
1722 srp_send_completion(ch->send_cq, ch);
1724 if (list_empty(&ch->free_tx))
1725 return NULL;
1727 /* Initiator responses to target requests do not consume credits */
1728 if (iu_type != SRP_IU_RSP) {
1729 if (ch->req_lim <= rsv) {
1730 ++target->zero_req_lim;
1731 return NULL;
1734 --ch->req_lim;
1737 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1738 list_del(&iu->list);
1739 return iu;
1742 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1744 struct srp_target_port *target = ch->target;
1745 struct ib_sge list;
1746 struct ib_send_wr wr, *bad_wr;
1748 list.addr = iu->dma;
1749 list.length = len;
1750 list.lkey = target->lkey;
1752 wr.next = NULL;
1753 wr.wr_id = (uintptr_t) iu;
1754 wr.sg_list = &list;
1755 wr.num_sge = 1;
1756 wr.opcode = IB_WR_SEND;
1757 wr.send_flags = IB_SEND_SIGNALED;
1759 return ib_post_send(ch->qp, &wr, &bad_wr);
1762 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1764 struct srp_target_port *target = ch->target;
1765 struct ib_recv_wr wr, *bad_wr;
1766 struct ib_sge list;
1768 list.addr = iu->dma;
1769 list.length = iu->size;
1770 list.lkey = target->lkey;
1772 wr.next = NULL;
1773 wr.wr_id = (uintptr_t) iu;
1774 wr.sg_list = &list;
1775 wr.num_sge = 1;
1777 return ib_post_recv(ch->qp, &wr, &bad_wr);
1780 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1782 struct srp_target_port *target = ch->target;
1783 struct srp_request *req;
1784 struct scsi_cmnd *scmnd;
1785 unsigned long flags;
1787 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1788 spin_lock_irqsave(&ch->lock, flags);
1789 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1790 spin_unlock_irqrestore(&ch->lock, flags);
1792 ch->tsk_mgmt_status = -1;
1793 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1794 ch->tsk_mgmt_status = rsp->data[3];
1795 complete(&ch->tsk_mgmt_done);
1796 } else {
1797 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1798 if (scmnd) {
1799 req = (void *)scmnd->host_scribble;
1800 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1802 if (!scmnd) {
1803 shost_printk(KERN_ERR, target->scsi_host,
1804 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1805 rsp->tag, ch - target->ch, ch->qp->qp_num);
1807 spin_lock_irqsave(&ch->lock, flags);
1808 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1809 spin_unlock_irqrestore(&ch->lock, flags);
1811 return;
1813 scmnd->result = rsp->status;
1815 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1816 memcpy(scmnd->sense_buffer, rsp->data +
1817 be32_to_cpu(rsp->resp_data_len),
1818 min_t(int, be32_to_cpu(rsp->sense_data_len),
1819 SCSI_SENSE_BUFFERSIZE));
1822 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1823 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1824 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1825 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1826 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1827 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1828 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1829 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1831 srp_free_req(ch, req, scmnd,
1832 be32_to_cpu(rsp->req_lim_delta));
1834 scmnd->host_scribble = NULL;
1835 scmnd->scsi_done(scmnd);
1839 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1840 void *rsp, int len)
1842 struct srp_target_port *target = ch->target;
1843 struct ib_device *dev = target->srp_host->srp_dev->dev;
1844 unsigned long flags;
1845 struct srp_iu *iu;
1846 int err;
1848 spin_lock_irqsave(&ch->lock, flags);
1849 ch->req_lim += req_delta;
1850 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1851 spin_unlock_irqrestore(&ch->lock, flags);
1853 if (!iu) {
1854 shost_printk(KERN_ERR, target->scsi_host, PFX
1855 "no IU available to send response\n");
1856 return 1;
1859 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1860 memcpy(iu->buf, rsp, len);
1861 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1863 err = srp_post_send(ch, iu, len);
1864 if (err) {
1865 shost_printk(KERN_ERR, target->scsi_host, PFX
1866 "unable to post response: %d\n", err);
1867 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1870 return err;
1873 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1874 struct srp_cred_req *req)
1876 struct srp_cred_rsp rsp = {
1877 .opcode = SRP_CRED_RSP,
1878 .tag = req->tag,
1880 s32 delta = be32_to_cpu(req->req_lim_delta);
1882 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1883 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1884 "problems processing SRP_CRED_REQ\n");
1887 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1888 struct srp_aer_req *req)
1890 struct srp_target_port *target = ch->target;
1891 struct srp_aer_rsp rsp = {
1892 .opcode = SRP_AER_RSP,
1893 .tag = req->tag,
1895 s32 delta = be32_to_cpu(req->req_lim_delta);
1897 shost_printk(KERN_ERR, target->scsi_host, PFX
1898 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1900 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1901 shost_printk(KERN_ERR, target->scsi_host, PFX
1902 "problems processing SRP_AER_REQ\n");
1905 static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1907 struct srp_target_port *target = ch->target;
1908 struct ib_device *dev = target->srp_host->srp_dev->dev;
1909 struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1910 int res;
1911 u8 opcode;
1913 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1914 DMA_FROM_DEVICE);
1916 opcode = *(u8 *) iu->buf;
1918 if (0) {
1919 shost_printk(KERN_ERR, target->scsi_host,
1920 PFX "recv completion, opcode 0x%02x\n", opcode);
1921 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1922 iu->buf, wc->byte_len, true);
1925 switch (opcode) {
1926 case SRP_RSP:
1927 srp_process_rsp(ch, iu->buf);
1928 break;
1930 case SRP_CRED_REQ:
1931 srp_process_cred_req(ch, iu->buf);
1932 break;
1934 case SRP_AER_REQ:
1935 srp_process_aer_req(ch, iu->buf);
1936 break;
1938 case SRP_T_LOGOUT:
1939 /* XXX Handle target logout */
1940 shost_printk(KERN_WARNING, target->scsi_host,
1941 PFX "Got target logout request\n");
1942 break;
1944 default:
1945 shost_printk(KERN_WARNING, target->scsi_host,
1946 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1947 break;
1950 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1951 DMA_FROM_DEVICE);
1953 res = srp_post_recv(ch, iu);
1954 if (res != 0)
1955 shost_printk(KERN_ERR, target->scsi_host,
1956 PFX "Recv failed with error code %d\n", res);
1960 * srp_tl_err_work() - handle a transport layer error
1961 * @work: Work structure embedded in an SRP target port.
1963 * Note: This function may get invoked before the rport has been created,
1964 * hence the target->rport test.
1966 static void srp_tl_err_work(struct work_struct *work)
1968 struct srp_target_port *target;
1970 target = container_of(work, struct srp_target_port, tl_err_work);
1971 if (target->rport)
1972 srp_start_tl_fail_timers(target->rport);
1975 static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1976 bool send_err, struct srp_rdma_ch *ch)
1978 struct srp_target_port *target = ch->target;
1980 if (wr_id == SRP_LAST_WR_ID) {
1981 complete(&ch->done);
1982 return;
1985 if (ch->connected && !target->qp_in_error) {
1986 if (wr_id & LOCAL_INV_WR_ID_MASK) {
1987 shost_printk(KERN_ERR, target->scsi_host, PFX
1988 "LOCAL_INV failed with status %s (%d)\n",
1989 ib_wc_status_msg(wc_status), wc_status);
1990 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1991 shost_printk(KERN_ERR, target->scsi_host, PFX
1992 "FAST_REG_MR failed status %s (%d)\n",
1993 ib_wc_status_msg(wc_status), wc_status);
1994 } else {
1995 shost_printk(KERN_ERR, target->scsi_host,
1996 PFX "failed %s status %s (%d) for iu %p\n",
1997 send_err ? "send" : "receive",
1998 ib_wc_status_msg(wc_status), wc_status,
1999 (void *)(uintptr_t)wr_id);
2001 queue_work(system_long_wq, &target->tl_err_work);
2003 target->qp_in_error = true;
2006 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
2008 struct srp_rdma_ch *ch = ch_ptr;
2009 struct ib_wc wc;
2011 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2012 while (ib_poll_cq(cq, 1, &wc) > 0) {
2013 if (likely(wc.status == IB_WC_SUCCESS)) {
2014 srp_handle_recv(ch, &wc);
2015 } else {
2016 srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
2021 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
2023 struct srp_rdma_ch *ch = ch_ptr;
2024 struct ib_wc wc;
2025 struct srp_iu *iu;
2027 while (ib_poll_cq(cq, 1, &wc) > 0) {
2028 if (likely(wc.status == IB_WC_SUCCESS)) {
2029 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
2030 list_add(&iu->list, &ch->free_tx);
2031 } else {
2032 srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
2037 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2039 struct srp_target_port *target = host_to_target(shost);
2040 struct srp_rport *rport = target->rport;
2041 struct srp_rdma_ch *ch;
2042 struct srp_request *req;
2043 struct srp_iu *iu;
2044 struct srp_cmd *cmd;
2045 struct ib_device *dev;
2046 unsigned long flags;
2047 u32 tag;
2048 u16 idx;
2049 int len, ret;
2050 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2053 * The SCSI EH thread is the only context from which srp_queuecommand()
2054 * can get invoked for blocked devices (SDEV_BLOCK /
2055 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2056 * locking the rport mutex if invoked from inside the SCSI EH.
2058 if (in_scsi_eh)
2059 mutex_lock(&rport->mutex);
2061 scmnd->result = srp_chkready(target->rport);
2062 if (unlikely(scmnd->result))
2063 goto err;
2065 WARN_ON_ONCE(scmnd->request->tag < 0);
2066 tag = blk_mq_unique_tag(scmnd->request);
2067 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2068 idx = blk_mq_unique_tag_to_tag(tag);
2069 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2070 dev_name(&shost->shost_gendev), tag, idx,
2071 target->req_ring_size);
2073 spin_lock_irqsave(&ch->lock, flags);
2074 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2075 spin_unlock_irqrestore(&ch->lock, flags);
2077 if (!iu)
2078 goto err;
2080 req = &ch->req_ring[idx];
2081 dev = target->srp_host->srp_dev->dev;
2082 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2083 DMA_TO_DEVICE);
2085 scmnd->host_scribble = (void *) req;
2087 cmd = iu->buf;
2088 memset(cmd, 0, sizeof *cmd);
2090 cmd->opcode = SRP_CMD;
2091 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2092 cmd->tag = tag;
2093 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2095 req->scmnd = scmnd;
2096 req->cmd = iu;
2098 len = srp_map_data(scmnd, ch, req);
2099 if (len < 0) {
2100 shost_printk(KERN_ERR, target->scsi_host,
2101 PFX "Failed to map data (%d)\n", len);
2103 * If we ran out of memory descriptors (-ENOMEM) because an
2104 * application is queuing many requests with more than
2105 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2106 * to reduce queue depth temporarily.
2108 scmnd->result = len == -ENOMEM ?
2109 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2110 goto err_iu;
2113 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2114 DMA_TO_DEVICE);
2116 if (srp_post_send(ch, iu, len)) {
2117 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2118 goto err_unmap;
2121 ret = 0;
2123 unlock_rport:
2124 if (in_scsi_eh)
2125 mutex_unlock(&rport->mutex);
2127 return ret;
2129 err_unmap:
2130 srp_unmap_data(scmnd, ch, req);
2132 err_iu:
2133 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2136 * Avoid that the loops that iterate over the request ring can
2137 * encounter a dangling SCSI command pointer.
2139 req->scmnd = NULL;
2141 err:
2142 if (scmnd->result) {
2143 scmnd->scsi_done(scmnd);
2144 ret = 0;
2145 } else {
2146 ret = SCSI_MLQUEUE_HOST_BUSY;
2149 goto unlock_rport;
2153 * Note: the resources allocated in this function are freed in
2154 * srp_free_ch_ib().
2156 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2158 struct srp_target_port *target = ch->target;
2159 int i;
2161 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2162 GFP_KERNEL);
2163 if (!ch->rx_ring)
2164 goto err_no_ring;
2165 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2166 GFP_KERNEL);
2167 if (!ch->tx_ring)
2168 goto err_no_ring;
2170 for (i = 0; i < target->queue_size; ++i) {
2171 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2172 ch->max_ti_iu_len,
2173 GFP_KERNEL, DMA_FROM_DEVICE);
2174 if (!ch->rx_ring[i])
2175 goto err;
2178 for (i = 0; i < target->queue_size; ++i) {
2179 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2180 target->max_iu_len,
2181 GFP_KERNEL, DMA_TO_DEVICE);
2182 if (!ch->tx_ring[i])
2183 goto err;
2185 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2188 return 0;
2190 err:
2191 for (i = 0; i < target->queue_size; ++i) {
2192 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2193 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2197 err_no_ring:
2198 kfree(ch->tx_ring);
2199 ch->tx_ring = NULL;
2200 kfree(ch->rx_ring);
2201 ch->rx_ring = NULL;
2203 return -ENOMEM;
2206 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2208 uint64_t T_tr_ns, max_compl_time_ms;
2209 uint32_t rq_tmo_jiffies;
2212 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2213 * table 91), both the QP timeout and the retry count have to be set
2214 * for RC QP's during the RTR to RTS transition.
2216 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2217 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2220 * Set target->rq_tmo_jiffies to one second more than the largest time
2221 * it can take before an error completion is generated. See also
2222 * C9-140..142 in the IBTA spec for more information about how to
2223 * convert the QP Local ACK Timeout value to nanoseconds.
2225 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2226 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2227 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2228 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2230 return rq_tmo_jiffies;
2233 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2234 const struct srp_login_rsp *lrsp,
2235 struct srp_rdma_ch *ch)
2237 struct srp_target_port *target = ch->target;
2238 struct ib_qp_attr *qp_attr = NULL;
2239 int attr_mask = 0;
2240 int ret;
2241 int i;
2243 if (lrsp->opcode == SRP_LOGIN_RSP) {
2244 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2245 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2248 * Reserve credits for task management so we don't
2249 * bounce requests back to the SCSI mid-layer.
2251 target->scsi_host->can_queue
2252 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2253 target->scsi_host->can_queue);
2254 target->scsi_host->cmd_per_lun
2255 = min_t(int, target->scsi_host->can_queue,
2256 target->scsi_host->cmd_per_lun);
2257 } else {
2258 shost_printk(KERN_WARNING, target->scsi_host,
2259 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2260 ret = -ECONNRESET;
2261 goto error;
2264 if (!ch->rx_ring) {
2265 ret = srp_alloc_iu_bufs(ch);
2266 if (ret)
2267 goto error;
2270 ret = -ENOMEM;
2271 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2272 if (!qp_attr)
2273 goto error;
2275 qp_attr->qp_state = IB_QPS_RTR;
2276 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2277 if (ret)
2278 goto error_free;
2280 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2281 if (ret)
2282 goto error_free;
2284 for (i = 0; i < target->queue_size; i++) {
2285 struct srp_iu *iu = ch->rx_ring[i];
2287 ret = srp_post_recv(ch, iu);
2288 if (ret)
2289 goto error_free;
2292 qp_attr->qp_state = IB_QPS_RTS;
2293 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2294 if (ret)
2295 goto error_free;
2297 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2299 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2300 if (ret)
2301 goto error_free;
2303 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2305 error_free:
2306 kfree(qp_attr);
2308 error:
2309 ch->status = ret;
2312 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2313 struct ib_cm_event *event,
2314 struct srp_rdma_ch *ch)
2316 struct srp_target_port *target = ch->target;
2317 struct Scsi_Host *shost = target->scsi_host;
2318 struct ib_class_port_info *cpi;
2319 int opcode;
2321 switch (event->param.rej_rcvd.reason) {
2322 case IB_CM_REJ_PORT_CM_REDIRECT:
2323 cpi = event->param.rej_rcvd.ari;
2324 ch->path.dlid = cpi->redirect_lid;
2325 ch->path.pkey = cpi->redirect_pkey;
2326 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2327 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2329 ch->status = ch->path.dlid ?
2330 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2331 break;
2333 case IB_CM_REJ_PORT_REDIRECT:
2334 if (srp_target_is_topspin(target)) {
2336 * Topspin/Cisco SRP gateways incorrectly send
2337 * reject reason code 25 when they mean 24
2338 * (port redirect).
2340 memcpy(ch->path.dgid.raw,
2341 event->param.rej_rcvd.ari, 16);
2343 shost_printk(KERN_DEBUG, shost,
2344 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2345 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2346 be64_to_cpu(ch->path.dgid.global.interface_id));
2348 ch->status = SRP_PORT_REDIRECT;
2349 } else {
2350 shost_printk(KERN_WARNING, shost,
2351 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2352 ch->status = -ECONNRESET;
2354 break;
2356 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2357 shost_printk(KERN_WARNING, shost,
2358 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2359 ch->status = -ECONNRESET;
2360 break;
2362 case IB_CM_REJ_CONSUMER_DEFINED:
2363 opcode = *(u8 *) event->private_data;
2364 if (opcode == SRP_LOGIN_REJ) {
2365 struct srp_login_rej *rej = event->private_data;
2366 u32 reason = be32_to_cpu(rej->reason);
2368 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2369 shost_printk(KERN_WARNING, shost,
2370 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2371 else
2372 shost_printk(KERN_WARNING, shost, PFX
2373 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2374 target->sgid.raw,
2375 target->orig_dgid.raw, reason);
2376 } else
2377 shost_printk(KERN_WARNING, shost,
2378 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2379 " opcode 0x%02x\n", opcode);
2380 ch->status = -ECONNRESET;
2381 break;
2383 case IB_CM_REJ_STALE_CONN:
2384 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2385 ch->status = SRP_STALE_CONN;
2386 break;
2388 default:
2389 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2390 event->param.rej_rcvd.reason);
2391 ch->status = -ECONNRESET;
2395 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2397 struct srp_rdma_ch *ch = cm_id->context;
2398 struct srp_target_port *target = ch->target;
2399 int comp = 0;
2401 switch (event->event) {
2402 case IB_CM_REQ_ERROR:
2403 shost_printk(KERN_DEBUG, target->scsi_host,
2404 PFX "Sending CM REQ failed\n");
2405 comp = 1;
2406 ch->status = -ECONNRESET;
2407 break;
2409 case IB_CM_REP_RECEIVED:
2410 comp = 1;
2411 srp_cm_rep_handler(cm_id, event->private_data, ch);
2412 break;
2414 case IB_CM_REJ_RECEIVED:
2415 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2416 comp = 1;
2418 srp_cm_rej_handler(cm_id, event, ch);
2419 break;
2421 case IB_CM_DREQ_RECEIVED:
2422 shost_printk(KERN_WARNING, target->scsi_host,
2423 PFX "DREQ received - connection closed\n");
2424 ch->connected = false;
2425 if (ib_send_cm_drep(cm_id, NULL, 0))
2426 shost_printk(KERN_ERR, target->scsi_host,
2427 PFX "Sending CM DREP failed\n");
2428 queue_work(system_long_wq, &target->tl_err_work);
2429 break;
2431 case IB_CM_TIMEWAIT_EXIT:
2432 shost_printk(KERN_ERR, target->scsi_host,
2433 PFX "connection closed\n");
2434 comp = 1;
2436 ch->status = 0;
2437 break;
2439 case IB_CM_MRA_RECEIVED:
2440 case IB_CM_DREQ_ERROR:
2441 case IB_CM_DREP_RECEIVED:
2442 break;
2444 default:
2445 shost_printk(KERN_WARNING, target->scsi_host,
2446 PFX "Unhandled CM event %d\n", event->event);
2447 break;
2450 if (comp)
2451 complete(&ch->done);
2453 return 0;
2457 * srp_change_queue_depth - setting device queue depth
2458 * @sdev: scsi device struct
2459 * @qdepth: requested queue depth
2461 * Returns queue depth.
2463 static int
2464 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2466 if (!sdev->tagged_supported)
2467 qdepth = 1;
2468 return scsi_change_queue_depth(sdev, qdepth);
2471 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2472 u8 func)
2474 struct srp_target_port *target = ch->target;
2475 struct srp_rport *rport = target->rport;
2476 struct ib_device *dev = target->srp_host->srp_dev->dev;
2477 struct srp_iu *iu;
2478 struct srp_tsk_mgmt *tsk_mgmt;
2480 if (!ch->connected || target->qp_in_error)
2481 return -1;
2483 init_completion(&ch->tsk_mgmt_done);
2486 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2487 * invoked while a task management function is being sent.
2489 mutex_lock(&rport->mutex);
2490 spin_lock_irq(&ch->lock);
2491 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2492 spin_unlock_irq(&ch->lock);
2494 if (!iu) {
2495 mutex_unlock(&rport->mutex);
2497 return -1;
2500 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2501 DMA_TO_DEVICE);
2502 tsk_mgmt = iu->buf;
2503 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2505 tsk_mgmt->opcode = SRP_TSK_MGMT;
2506 int_to_scsilun(lun, &tsk_mgmt->lun);
2507 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
2508 tsk_mgmt->tsk_mgmt_func = func;
2509 tsk_mgmt->task_tag = req_tag;
2511 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2512 DMA_TO_DEVICE);
2513 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2514 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2515 mutex_unlock(&rport->mutex);
2517 return -1;
2519 mutex_unlock(&rport->mutex);
2521 if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2522 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2523 return -1;
2525 return 0;
2528 static int srp_abort(struct scsi_cmnd *scmnd)
2530 struct srp_target_port *target = host_to_target(scmnd->device->host);
2531 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2532 u32 tag;
2533 u16 ch_idx;
2534 struct srp_rdma_ch *ch;
2535 int ret;
2537 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2539 if (!req)
2540 return SUCCESS;
2541 tag = blk_mq_unique_tag(scmnd->request);
2542 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2543 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2544 return SUCCESS;
2545 ch = &target->ch[ch_idx];
2546 if (!srp_claim_req(ch, req, NULL, scmnd))
2547 return SUCCESS;
2548 shost_printk(KERN_ERR, target->scsi_host,
2549 "Sending SRP abort for tag %#x\n", tag);
2550 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2551 SRP_TSK_ABORT_TASK) == 0)
2552 ret = SUCCESS;
2553 else if (target->rport->state == SRP_RPORT_LOST)
2554 ret = FAST_IO_FAIL;
2555 else
2556 ret = FAILED;
2557 srp_free_req(ch, req, scmnd, 0);
2558 scmnd->result = DID_ABORT << 16;
2559 scmnd->scsi_done(scmnd);
2561 return ret;
2564 static int srp_reset_device(struct scsi_cmnd *scmnd)
2566 struct srp_target_port *target = host_to_target(scmnd->device->host);
2567 struct srp_rdma_ch *ch;
2568 int i;
2570 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2572 ch = &target->ch[0];
2573 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2574 SRP_TSK_LUN_RESET))
2575 return FAILED;
2576 if (ch->tsk_mgmt_status)
2577 return FAILED;
2579 for (i = 0; i < target->ch_count; i++) {
2580 ch = &target->ch[i];
2581 for (i = 0; i < target->req_ring_size; ++i) {
2582 struct srp_request *req = &ch->req_ring[i];
2584 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2588 return SUCCESS;
2591 static int srp_reset_host(struct scsi_cmnd *scmnd)
2593 struct srp_target_port *target = host_to_target(scmnd->device->host);
2595 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2597 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2600 static int srp_slave_configure(struct scsi_device *sdev)
2602 struct Scsi_Host *shost = sdev->host;
2603 struct srp_target_port *target = host_to_target(shost);
2604 struct request_queue *q = sdev->request_queue;
2605 unsigned long timeout;
2607 if (sdev->type == TYPE_DISK) {
2608 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2609 blk_queue_rq_timeout(q, timeout);
2612 return 0;
2615 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2616 char *buf)
2618 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2620 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2623 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2624 char *buf)
2626 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2628 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2631 static ssize_t show_service_id(struct device *dev,
2632 struct device_attribute *attr, char *buf)
2634 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2636 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2639 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2640 char *buf)
2642 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2644 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2647 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2648 char *buf)
2650 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2652 return sprintf(buf, "%pI6\n", target->sgid.raw);
2655 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2656 char *buf)
2658 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2659 struct srp_rdma_ch *ch = &target->ch[0];
2661 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2664 static ssize_t show_orig_dgid(struct device *dev,
2665 struct device_attribute *attr, char *buf)
2667 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2669 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2672 static ssize_t show_req_lim(struct device *dev,
2673 struct device_attribute *attr, char *buf)
2675 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2676 struct srp_rdma_ch *ch;
2677 int i, req_lim = INT_MAX;
2679 for (i = 0; i < target->ch_count; i++) {
2680 ch = &target->ch[i];
2681 req_lim = min(req_lim, ch->req_lim);
2683 return sprintf(buf, "%d\n", req_lim);
2686 static ssize_t show_zero_req_lim(struct device *dev,
2687 struct device_attribute *attr, char *buf)
2689 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2691 return sprintf(buf, "%d\n", target->zero_req_lim);
2694 static ssize_t show_local_ib_port(struct device *dev,
2695 struct device_attribute *attr, char *buf)
2697 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2699 return sprintf(buf, "%d\n", target->srp_host->port);
2702 static ssize_t show_local_ib_device(struct device *dev,
2703 struct device_attribute *attr, char *buf)
2705 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2707 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2710 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2711 char *buf)
2713 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2715 return sprintf(buf, "%d\n", target->ch_count);
2718 static ssize_t show_comp_vector(struct device *dev,
2719 struct device_attribute *attr, char *buf)
2721 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2723 return sprintf(buf, "%d\n", target->comp_vector);
2726 static ssize_t show_tl_retry_count(struct device *dev,
2727 struct device_attribute *attr, char *buf)
2729 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2731 return sprintf(buf, "%d\n", target->tl_retry_count);
2734 static ssize_t show_cmd_sg_entries(struct device *dev,
2735 struct device_attribute *attr, char *buf)
2737 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2739 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2742 static ssize_t show_allow_ext_sg(struct device *dev,
2743 struct device_attribute *attr, char *buf)
2745 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2747 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2750 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2751 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2752 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2753 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2754 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2755 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2756 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2757 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2758 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2759 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2760 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2761 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2762 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2763 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2764 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2765 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2767 static struct device_attribute *srp_host_attrs[] = {
2768 &dev_attr_id_ext,
2769 &dev_attr_ioc_guid,
2770 &dev_attr_service_id,
2771 &dev_attr_pkey,
2772 &dev_attr_sgid,
2773 &dev_attr_dgid,
2774 &dev_attr_orig_dgid,
2775 &dev_attr_req_lim,
2776 &dev_attr_zero_req_lim,
2777 &dev_attr_local_ib_port,
2778 &dev_attr_local_ib_device,
2779 &dev_attr_ch_count,
2780 &dev_attr_comp_vector,
2781 &dev_attr_tl_retry_count,
2782 &dev_attr_cmd_sg_entries,
2783 &dev_attr_allow_ext_sg,
2784 NULL
2787 static struct scsi_host_template srp_template = {
2788 .module = THIS_MODULE,
2789 .name = "InfiniBand SRP initiator",
2790 .proc_name = DRV_NAME,
2791 .slave_configure = srp_slave_configure,
2792 .info = srp_target_info,
2793 .queuecommand = srp_queuecommand,
2794 .change_queue_depth = srp_change_queue_depth,
2795 .eh_abort_handler = srp_abort,
2796 .eh_device_reset_handler = srp_reset_device,
2797 .eh_host_reset_handler = srp_reset_host,
2798 .skip_settle_delay = true,
2799 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2800 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2801 .this_id = -1,
2802 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2803 .use_clustering = ENABLE_CLUSTERING,
2804 .shost_attrs = srp_host_attrs,
2805 .track_queue_depth = 1,
2808 static int srp_sdev_count(struct Scsi_Host *host)
2810 struct scsi_device *sdev;
2811 int c = 0;
2813 shost_for_each_device(sdev, host)
2814 c++;
2816 return c;
2820 * Return values:
2821 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2822 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2823 * removal has been scheduled.
2824 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2826 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2828 struct srp_rport_identifiers ids;
2829 struct srp_rport *rport;
2831 target->state = SRP_TARGET_SCANNING;
2832 sprintf(target->target_name, "SRP.T10:%016llX",
2833 be64_to_cpu(target->id_ext));
2835 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2836 return -ENODEV;
2838 memcpy(ids.port_id, &target->id_ext, 8);
2839 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2840 ids.roles = SRP_RPORT_ROLE_TARGET;
2841 rport = srp_rport_add(target->scsi_host, &ids);
2842 if (IS_ERR(rport)) {
2843 scsi_remove_host(target->scsi_host);
2844 return PTR_ERR(rport);
2847 rport->lld_data = target;
2848 target->rport = rport;
2850 spin_lock(&host->target_lock);
2851 list_add_tail(&target->list, &host->target_list);
2852 spin_unlock(&host->target_lock);
2854 scsi_scan_target(&target->scsi_host->shost_gendev,
2855 0, target->scsi_id, SCAN_WILD_CARD, 0);
2857 if (srp_connected_ch(target) < target->ch_count ||
2858 target->qp_in_error) {
2859 shost_printk(KERN_INFO, target->scsi_host,
2860 PFX "SCSI scan failed - removing SCSI host\n");
2861 srp_queue_remove_work(target);
2862 goto out;
2865 pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2866 dev_name(&target->scsi_host->shost_gendev),
2867 srp_sdev_count(target->scsi_host));
2869 spin_lock_irq(&target->lock);
2870 if (target->state == SRP_TARGET_SCANNING)
2871 target->state = SRP_TARGET_LIVE;
2872 spin_unlock_irq(&target->lock);
2874 out:
2875 return 0;
2878 static void srp_release_dev(struct device *dev)
2880 struct srp_host *host =
2881 container_of(dev, struct srp_host, dev);
2883 complete(&host->released);
2886 static struct class srp_class = {
2887 .name = "infiniband_srp",
2888 .dev_release = srp_release_dev
2892 * srp_conn_unique() - check whether the connection to a target is unique
2893 * @host: SRP host.
2894 * @target: SRP target port.
2896 static bool srp_conn_unique(struct srp_host *host,
2897 struct srp_target_port *target)
2899 struct srp_target_port *t;
2900 bool ret = false;
2902 if (target->state == SRP_TARGET_REMOVED)
2903 goto out;
2905 ret = true;
2907 spin_lock(&host->target_lock);
2908 list_for_each_entry(t, &host->target_list, list) {
2909 if (t != target &&
2910 target->id_ext == t->id_ext &&
2911 target->ioc_guid == t->ioc_guid &&
2912 target->initiator_ext == t->initiator_ext) {
2913 ret = false;
2914 break;
2917 spin_unlock(&host->target_lock);
2919 out:
2920 return ret;
2924 * Target ports are added by writing
2926 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2927 * pkey=<P_Key>,service_id=<service ID>
2929 * to the add_target sysfs attribute.
2931 enum {
2932 SRP_OPT_ERR = 0,
2933 SRP_OPT_ID_EXT = 1 << 0,
2934 SRP_OPT_IOC_GUID = 1 << 1,
2935 SRP_OPT_DGID = 1 << 2,
2936 SRP_OPT_PKEY = 1 << 3,
2937 SRP_OPT_SERVICE_ID = 1 << 4,
2938 SRP_OPT_MAX_SECT = 1 << 5,
2939 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2940 SRP_OPT_IO_CLASS = 1 << 7,
2941 SRP_OPT_INITIATOR_EXT = 1 << 8,
2942 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2943 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2944 SRP_OPT_SG_TABLESIZE = 1 << 11,
2945 SRP_OPT_COMP_VECTOR = 1 << 12,
2946 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2947 SRP_OPT_QUEUE_SIZE = 1 << 14,
2948 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2949 SRP_OPT_IOC_GUID |
2950 SRP_OPT_DGID |
2951 SRP_OPT_PKEY |
2952 SRP_OPT_SERVICE_ID),
2955 static const match_table_t srp_opt_tokens = {
2956 { SRP_OPT_ID_EXT, "id_ext=%s" },
2957 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
2958 { SRP_OPT_DGID, "dgid=%s" },
2959 { SRP_OPT_PKEY, "pkey=%x" },
2960 { SRP_OPT_SERVICE_ID, "service_id=%s" },
2961 { SRP_OPT_MAX_SECT, "max_sect=%d" },
2962 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
2963 { SRP_OPT_IO_CLASS, "io_class=%x" },
2964 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
2965 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
2966 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2967 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2968 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2969 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2970 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2971 { SRP_OPT_ERR, NULL }
2974 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2976 char *options, *sep_opt;
2977 char *p;
2978 char dgid[3];
2979 substring_t args[MAX_OPT_ARGS];
2980 int opt_mask = 0;
2981 int token;
2982 int ret = -EINVAL;
2983 int i;
2985 options = kstrdup(buf, GFP_KERNEL);
2986 if (!options)
2987 return -ENOMEM;
2989 sep_opt = options;
2990 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
2991 if (!*p)
2992 continue;
2994 token = match_token(p, srp_opt_tokens, args);
2995 opt_mask |= token;
2997 switch (token) {
2998 case SRP_OPT_ID_EXT:
2999 p = match_strdup(args);
3000 if (!p) {
3001 ret = -ENOMEM;
3002 goto out;
3004 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3005 kfree(p);
3006 break;
3008 case SRP_OPT_IOC_GUID:
3009 p = match_strdup(args);
3010 if (!p) {
3011 ret = -ENOMEM;
3012 goto out;
3014 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3015 kfree(p);
3016 break;
3018 case SRP_OPT_DGID:
3019 p = match_strdup(args);
3020 if (!p) {
3021 ret = -ENOMEM;
3022 goto out;
3024 if (strlen(p) != 32) {
3025 pr_warn("bad dest GID parameter '%s'\n", p);
3026 kfree(p);
3027 goto out;
3030 for (i = 0; i < 16; ++i) {
3031 strlcpy(dgid, p + i * 2, sizeof(dgid));
3032 if (sscanf(dgid, "%hhx",
3033 &target->orig_dgid.raw[i]) < 1) {
3034 ret = -EINVAL;
3035 kfree(p);
3036 goto out;
3039 kfree(p);
3040 break;
3042 case SRP_OPT_PKEY:
3043 if (match_hex(args, &token)) {
3044 pr_warn("bad P_Key parameter '%s'\n", p);
3045 goto out;
3047 target->pkey = cpu_to_be16(token);
3048 break;
3050 case SRP_OPT_SERVICE_ID:
3051 p = match_strdup(args);
3052 if (!p) {
3053 ret = -ENOMEM;
3054 goto out;
3056 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3057 kfree(p);
3058 break;
3060 case SRP_OPT_MAX_SECT:
3061 if (match_int(args, &token)) {
3062 pr_warn("bad max sect parameter '%s'\n", p);
3063 goto out;
3065 target->scsi_host->max_sectors = token;
3066 break;
3068 case SRP_OPT_QUEUE_SIZE:
3069 if (match_int(args, &token) || token < 1) {
3070 pr_warn("bad queue_size parameter '%s'\n", p);
3071 goto out;
3073 target->scsi_host->can_queue = token;
3074 target->queue_size = token + SRP_RSP_SQ_SIZE +
3075 SRP_TSK_MGMT_SQ_SIZE;
3076 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3077 target->scsi_host->cmd_per_lun = token;
3078 break;
3080 case SRP_OPT_MAX_CMD_PER_LUN:
3081 if (match_int(args, &token) || token < 1) {
3082 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3084 goto out;
3086 target->scsi_host->cmd_per_lun = token;
3087 break;
3089 case SRP_OPT_IO_CLASS:
3090 if (match_hex(args, &token)) {
3091 pr_warn("bad IO class parameter '%s'\n", p);
3092 goto out;
3094 if (token != SRP_REV10_IB_IO_CLASS &&
3095 token != SRP_REV16A_IB_IO_CLASS) {
3096 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3097 token, SRP_REV10_IB_IO_CLASS,
3098 SRP_REV16A_IB_IO_CLASS);
3099 goto out;
3101 target->io_class = token;
3102 break;
3104 case SRP_OPT_INITIATOR_EXT:
3105 p = match_strdup(args);
3106 if (!p) {
3107 ret = -ENOMEM;
3108 goto out;
3110 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3111 kfree(p);
3112 break;
3114 case SRP_OPT_CMD_SG_ENTRIES:
3115 if (match_int(args, &token) || token < 1 || token > 255) {
3116 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3118 goto out;
3120 target->cmd_sg_cnt = token;
3121 break;
3123 case SRP_OPT_ALLOW_EXT_SG:
3124 if (match_int(args, &token)) {
3125 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3126 goto out;
3128 target->allow_ext_sg = !!token;
3129 break;
3131 case SRP_OPT_SG_TABLESIZE:
3132 if (match_int(args, &token) || token < 1 ||
3133 token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3134 pr_warn("bad max sg_tablesize parameter '%s'\n",
3136 goto out;
3138 target->sg_tablesize = token;
3139 break;
3141 case SRP_OPT_COMP_VECTOR:
3142 if (match_int(args, &token) || token < 0) {
3143 pr_warn("bad comp_vector parameter '%s'\n", p);
3144 goto out;
3146 target->comp_vector = token;
3147 break;
3149 case SRP_OPT_TL_RETRY_COUNT:
3150 if (match_int(args, &token) || token < 2 || token > 7) {
3151 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3153 goto out;
3155 target->tl_retry_count = token;
3156 break;
3158 default:
3159 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3161 goto out;
3165 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3166 ret = 0;
3167 else
3168 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3169 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3170 !(srp_opt_tokens[i].token & opt_mask))
3171 pr_warn("target creation request is missing parameter '%s'\n",
3172 srp_opt_tokens[i].pattern);
3174 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3175 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3176 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3177 target->scsi_host->cmd_per_lun,
3178 target->scsi_host->can_queue);
3180 out:
3181 kfree(options);
3182 return ret;
3185 static ssize_t srp_create_target(struct device *dev,
3186 struct device_attribute *attr,
3187 const char *buf, size_t count)
3189 struct srp_host *host =
3190 container_of(dev, struct srp_host, dev);
3191 struct Scsi_Host *target_host;
3192 struct srp_target_port *target;
3193 struct srp_rdma_ch *ch;
3194 struct srp_device *srp_dev = host->srp_dev;
3195 struct ib_device *ibdev = srp_dev->dev;
3196 int ret, node_idx, node, cpu, i;
3197 bool multich = false;
3199 target_host = scsi_host_alloc(&srp_template,
3200 sizeof (struct srp_target_port));
3201 if (!target_host)
3202 return -ENOMEM;
3204 target_host->transportt = ib_srp_transport_template;
3205 target_host->max_channel = 0;
3206 target_host->max_id = 1;
3207 target_host->max_lun = -1LL;
3208 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3210 target = host_to_target(target_host);
3212 target->io_class = SRP_REV16A_IB_IO_CLASS;
3213 target->scsi_host = target_host;
3214 target->srp_host = host;
3215 target->lkey = host->srp_dev->pd->local_dma_lkey;
3216 target->global_mr = host->srp_dev->global_mr;
3217 target->cmd_sg_cnt = cmd_sg_entries;
3218 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3219 target->allow_ext_sg = allow_ext_sg;
3220 target->tl_retry_count = 7;
3221 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3224 * Avoid that the SCSI host can be removed by srp_remove_target()
3225 * before this function returns.
3227 scsi_host_get(target->scsi_host);
3229 mutex_lock(&host->add_target_mutex);
3231 ret = srp_parse_options(buf, target);
3232 if (ret)
3233 goto out;
3235 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3237 if (!srp_conn_unique(target->srp_host, target)) {
3238 shost_printk(KERN_INFO, target->scsi_host,
3239 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3240 be64_to_cpu(target->id_ext),
3241 be64_to_cpu(target->ioc_guid),
3242 be64_to_cpu(target->initiator_ext));
3243 ret = -EEXIST;
3244 goto out;
3247 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3248 target->cmd_sg_cnt < target->sg_tablesize) {
3249 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3250 target->sg_tablesize = target->cmd_sg_cnt;
3253 target_host->sg_tablesize = target->sg_tablesize;
3254 target->indirect_size = target->sg_tablesize *
3255 sizeof (struct srp_direct_buf);
3256 target->max_iu_len = sizeof (struct srp_cmd) +
3257 sizeof (struct srp_indirect_buf) +
3258 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3260 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3261 INIT_WORK(&target->remove_work, srp_remove_work);
3262 spin_lock_init(&target->lock);
3263 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3264 if (ret)
3265 goto out;
3267 ret = -ENOMEM;
3268 target->ch_count = max_t(unsigned, num_online_nodes(),
3269 min(ch_count ? :
3270 min(4 * num_online_nodes(),
3271 ibdev->num_comp_vectors),
3272 num_online_cpus()));
3273 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3274 GFP_KERNEL);
3275 if (!target->ch)
3276 goto out;
3278 node_idx = 0;
3279 for_each_online_node(node) {
3280 const int ch_start = (node_idx * target->ch_count /
3281 num_online_nodes());
3282 const int ch_end = ((node_idx + 1) * target->ch_count /
3283 num_online_nodes());
3284 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3285 num_online_nodes() + target->comp_vector)
3286 % ibdev->num_comp_vectors;
3287 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3288 num_online_nodes() + target->comp_vector)
3289 % ibdev->num_comp_vectors;
3290 int cpu_idx = 0;
3292 for_each_online_cpu(cpu) {
3293 if (cpu_to_node(cpu) != node)
3294 continue;
3295 if (ch_start + cpu_idx >= ch_end)
3296 continue;
3297 ch = &target->ch[ch_start + cpu_idx];
3298 ch->target = target;
3299 ch->comp_vector = cv_start == cv_end ? cv_start :
3300 cv_start + cpu_idx % (cv_end - cv_start);
3301 spin_lock_init(&ch->lock);
3302 INIT_LIST_HEAD(&ch->free_tx);
3303 ret = srp_new_cm_id(ch);
3304 if (ret)
3305 goto err_disconnect;
3307 ret = srp_create_ch_ib(ch);
3308 if (ret)
3309 goto err_disconnect;
3311 ret = srp_alloc_req_data(ch);
3312 if (ret)
3313 goto err_disconnect;
3315 ret = srp_connect_ch(ch, multich);
3316 if (ret) {
3317 shost_printk(KERN_ERR, target->scsi_host,
3318 PFX "Connection %d/%d failed\n",
3319 ch_start + cpu_idx,
3320 target->ch_count);
3321 if (node_idx == 0 && cpu_idx == 0) {
3322 goto err_disconnect;
3323 } else {
3324 srp_free_ch_ib(target, ch);
3325 srp_free_req_data(target, ch);
3326 target->ch_count = ch - target->ch;
3327 goto connected;
3331 multich = true;
3332 cpu_idx++;
3334 node_idx++;
3337 connected:
3338 target->scsi_host->nr_hw_queues = target->ch_count;
3340 ret = srp_add_target(host, target);
3341 if (ret)
3342 goto err_disconnect;
3344 if (target->state != SRP_TARGET_REMOVED) {
3345 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3346 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3347 be64_to_cpu(target->id_ext),
3348 be64_to_cpu(target->ioc_guid),
3349 be16_to_cpu(target->pkey),
3350 be64_to_cpu(target->service_id),
3351 target->sgid.raw, target->orig_dgid.raw);
3354 ret = count;
3356 out:
3357 mutex_unlock(&host->add_target_mutex);
3359 scsi_host_put(target->scsi_host);
3360 if (ret < 0)
3361 scsi_host_put(target->scsi_host);
3363 return ret;
3365 err_disconnect:
3366 srp_disconnect_target(target);
3368 for (i = 0; i < target->ch_count; i++) {
3369 ch = &target->ch[i];
3370 srp_free_ch_ib(target, ch);
3371 srp_free_req_data(target, ch);
3374 kfree(target->ch);
3375 goto out;
3378 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3380 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3381 char *buf)
3383 struct srp_host *host = container_of(dev, struct srp_host, dev);
3385 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3388 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3390 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3391 char *buf)
3393 struct srp_host *host = container_of(dev, struct srp_host, dev);
3395 return sprintf(buf, "%d\n", host->port);
3398 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3400 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3402 struct srp_host *host;
3404 host = kzalloc(sizeof *host, GFP_KERNEL);
3405 if (!host)
3406 return NULL;
3408 INIT_LIST_HEAD(&host->target_list);
3409 spin_lock_init(&host->target_lock);
3410 init_completion(&host->released);
3411 mutex_init(&host->add_target_mutex);
3412 host->srp_dev = device;
3413 host->port = port;
3415 host->dev.class = &srp_class;
3416 host->dev.parent = device->dev->dma_device;
3417 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3419 if (device_register(&host->dev))
3420 goto free_host;
3421 if (device_create_file(&host->dev, &dev_attr_add_target))
3422 goto err_class;
3423 if (device_create_file(&host->dev, &dev_attr_ibdev))
3424 goto err_class;
3425 if (device_create_file(&host->dev, &dev_attr_port))
3426 goto err_class;
3428 return host;
3430 err_class:
3431 device_unregister(&host->dev);
3433 free_host:
3434 kfree(host);
3436 return NULL;
3439 static void srp_add_one(struct ib_device *device)
3441 struct srp_device *srp_dev;
3442 struct ib_device_attr *dev_attr;
3443 struct srp_host *host;
3444 int mr_page_shift, p;
3445 u64 max_pages_per_mr;
3447 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3448 if (!dev_attr)
3449 return;
3451 if (ib_query_device(device, dev_attr)) {
3452 pr_warn("Query device failed for %s\n", device->name);
3453 goto free_attr;
3456 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3457 if (!srp_dev)
3458 goto free_attr;
3460 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3461 device->map_phys_fmr && device->unmap_fmr);
3462 srp_dev->has_fr = (dev_attr->device_cap_flags &
3463 IB_DEVICE_MEM_MGT_EXTENSIONS);
3464 if (!srp_dev->has_fmr && !srp_dev->has_fr)
3465 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3467 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3468 (!srp_dev->has_fmr || prefer_fr));
3469 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3472 * Use the smallest page size supported by the HCA, down to a
3473 * minimum of 4096 bytes. We're unlikely to build large sglists
3474 * out of smaller entries.
3476 mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
3477 srp_dev->mr_page_size = 1 << mr_page_shift;
3478 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3479 max_pages_per_mr = dev_attr->max_mr_size;
3480 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3481 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3482 max_pages_per_mr);
3483 if (srp_dev->use_fast_reg) {
3484 srp_dev->max_pages_per_mr =
3485 min_t(u32, srp_dev->max_pages_per_mr,
3486 dev_attr->max_fast_reg_page_list_len);
3488 srp_dev->mr_max_size = srp_dev->mr_page_size *
3489 srp_dev->max_pages_per_mr;
3490 pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3491 device->name, mr_page_shift, dev_attr->max_mr_size,
3492 dev_attr->max_fast_reg_page_list_len,
3493 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3495 INIT_LIST_HEAD(&srp_dev->dev_list);
3497 srp_dev->dev = device;
3498 srp_dev->pd = ib_alloc_pd(device);
3499 if (IS_ERR(srp_dev->pd))
3500 goto free_dev;
3502 if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
3503 srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
3504 IB_ACCESS_LOCAL_WRITE |
3505 IB_ACCESS_REMOTE_READ |
3506 IB_ACCESS_REMOTE_WRITE);
3507 if (IS_ERR(srp_dev->global_mr))
3508 goto err_pd;
3509 } else {
3510 srp_dev->global_mr = NULL;
3513 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3514 host = srp_add_port(srp_dev, p);
3515 if (host)
3516 list_add_tail(&host->list, &srp_dev->dev_list);
3519 ib_set_client_data(device, &srp_client, srp_dev);
3521 goto free_attr;
3523 err_pd:
3524 ib_dealloc_pd(srp_dev->pd);
3526 free_dev:
3527 kfree(srp_dev);
3529 free_attr:
3530 kfree(dev_attr);
3533 static void srp_remove_one(struct ib_device *device, void *client_data)
3535 struct srp_device *srp_dev;
3536 struct srp_host *host, *tmp_host;
3537 struct srp_target_port *target;
3539 srp_dev = client_data;
3540 if (!srp_dev)
3541 return;
3543 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3544 device_unregister(&host->dev);
3546 * Wait for the sysfs entry to go away, so that no new
3547 * target ports can be created.
3549 wait_for_completion(&host->released);
3552 * Remove all target ports.
3554 spin_lock(&host->target_lock);
3555 list_for_each_entry(target, &host->target_list, list)
3556 srp_queue_remove_work(target);
3557 spin_unlock(&host->target_lock);
3560 * Wait for tl_err and target port removal tasks.
3562 flush_workqueue(system_long_wq);
3563 flush_workqueue(srp_remove_wq);
3565 kfree(host);
3568 if (srp_dev->global_mr)
3569 ib_dereg_mr(srp_dev->global_mr);
3570 ib_dealloc_pd(srp_dev->pd);
3572 kfree(srp_dev);
3575 static struct srp_function_template ib_srp_transport_functions = {
3576 .has_rport_state = true,
3577 .reset_timer_if_blocked = true,
3578 .reconnect_delay = &srp_reconnect_delay,
3579 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3580 .dev_loss_tmo = &srp_dev_loss_tmo,
3581 .reconnect = srp_rport_reconnect,
3582 .rport_delete = srp_rport_delete,
3583 .terminate_rport_io = srp_terminate_io,
3586 static int __init srp_init_module(void)
3588 int ret;
3590 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3592 if (srp_sg_tablesize) {
3593 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3594 if (!cmd_sg_entries)
3595 cmd_sg_entries = srp_sg_tablesize;
3598 if (!cmd_sg_entries)
3599 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3601 if (cmd_sg_entries > 255) {
3602 pr_warn("Clamping cmd_sg_entries to 255\n");
3603 cmd_sg_entries = 255;
3606 if (!indirect_sg_entries)
3607 indirect_sg_entries = cmd_sg_entries;
3608 else if (indirect_sg_entries < cmd_sg_entries) {
3609 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3610 cmd_sg_entries);
3611 indirect_sg_entries = cmd_sg_entries;
3614 srp_remove_wq = create_workqueue("srp_remove");
3615 if (!srp_remove_wq) {
3616 ret = -ENOMEM;
3617 goto out;
3620 ret = -ENOMEM;
3621 ib_srp_transport_template =
3622 srp_attach_transport(&ib_srp_transport_functions);
3623 if (!ib_srp_transport_template)
3624 goto destroy_wq;
3626 ret = class_register(&srp_class);
3627 if (ret) {
3628 pr_err("couldn't register class infiniband_srp\n");
3629 goto release_tr;
3632 ib_sa_register_client(&srp_sa_client);
3634 ret = ib_register_client(&srp_client);
3635 if (ret) {
3636 pr_err("couldn't register IB client\n");
3637 goto unreg_sa;
3640 out:
3641 return ret;
3643 unreg_sa:
3644 ib_sa_unregister_client(&srp_sa_client);
3645 class_unregister(&srp_class);
3647 release_tr:
3648 srp_release_transport(ib_srp_transport_template);
3650 destroy_wq:
3651 destroy_workqueue(srp_remove_wq);
3652 goto out;
3655 static void __exit srp_cleanup_module(void)
3657 ib_unregister_client(&srp_client);
3658 ib_sa_unregister_client(&srp_sa_client);
3659 class_unregister(&srp_class);
3660 srp_release_transport(ib_srp_transport_template);
3661 destroy_workqueue(srp_remove_wq);
3664 module_init(srp_init_module);
3665 module_exit(srp_cleanup_module);