inet: frag: enforce memory limits earlier
[linux/fpc-iii.git] / drivers / infiniband / ulp / srp / ib_srp.c
blob463ea592a42abdceea2c3693d530b5ea82d70639
1 /*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
45 #include <linux/atomic.h>
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
54 #include "ib_srp.h"
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static bool never_register;
74 static int topspin_workarounds = 1;
76 module_param(srp_sg_tablesize, uint, 0444);
77 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
79 module_param(cmd_sg_entries, uint, 0444);
80 MODULE_PARM_DESC(cmd_sg_entries,
81 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
83 module_param(indirect_sg_entries, uint, 0444);
84 MODULE_PARM_DESC(indirect_sg_entries,
85 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
87 module_param(allow_ext_sg, bool, 0444);
88 MODULE_PARM_DESC(allow_ext_sg,
89 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
91 module_param(topspin_workarounds, int, 0444);
92 MODULE_PARM_DESC(topspin_workarounds,
93 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
95 module_param(prefer_fr, bool, 0444);
96 MODULE_PARM_DESC(prefer_fr,
97 "Whether to use fast registration if both FMR and fast registration are supported");
99 module_param(register_always, bool, 0444);
100 MODULE_PARM_DESC(register_always,
101 "Use memory registration even for contiguous memory regions");
103 module_param(never_register, bool, 0444);
104 MODULE_PARM_DESC(never_register, "Never register memory");
106 static const struct kernel_param_ops srp_tmo_ops;
108 static int srp_reconnect_delay = 10;
109 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
110 S_IRUGO | S_IWUSR);
111 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
113 static int srp_fast_io_fail_tmo = 15;
114 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
115 S_IRUGO | S_IWUSR);
116 MODULE_PARM_DESC(fast_io_fail_tmo,
117 "Number of seconds between the observation of a transport"
118 " layer error and failing all I/O. \"off\" means that this"
119 " functionality is disabled.");
121 static int srp_dev_loss_tmo = 600;
122 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
123 S_IRUGO | S_IWUSR);
124 MODULE_PARM_DESC(dev_loss_tmo,
125 "Maximum number of seconds that the SRP transport should"
126 " insulate transport layer errors. After this time has been"
127 " exceeded the SCSI host is removed. Should be"
128 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
129 " if fast_io_fail_tmo has not been set. \"off\" means that"
130 " this functionality is disabled.");
132 static unsigned ch_count;
133 module_param(ch_count, uint, 0444);
134 MODULE_PARM_DESC(ch_count,
135 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
137 static void srp_add_one(struct ib_device *device);
138 static void srp_remove_one(struct ib_device *device, void *client_data);
139 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
140 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
141 const char *opname);
142 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
144 static struct scsi_transport_template *ib_srp_transport_template;
145 static struct workqueue_struct *srp_remove_wq;
147 static struct ib_client srp_client = {
148 .name = "srp",
149 .add = srp_add_one,
150 .remove = srp_remove_one
153 static struct ib_sa_client srp_sa_client;
155 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
157 int tmo = *(int *)kp->arg;
159 if (tmo >= 0)
160 return sprintf(buffer, "%d", tmo);
161 else
162 return sprintf(buffer, "off");
165 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
167 int tmo, res;
169 res = srp_parse_tmo(&tmo, val);
170 if (res)
171 goto out;
173 if (kp->arg == &srp_reconnect_delay)
174 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
175 srp_dev_loss_tmo);
176 else if (kp->arg == &srp_fast_io_fail_tmo)
177 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
178 else
179 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
180 tmo);
181 if (res)
182 goto out;
183 *(int *)kp->arg = tmo;
185 out:
186 return res;
189 static const struct kernel_param_ops srp_tmo_ops = {
190 .get = srp_tmo_get,
191 .set = srp_tmo_set,
194 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
196 return (struct srp_target_port *) host->hostdata;
199 static const char *srp_target_info(struct Scsi_Host *host)
201 return host_to_target(host)->target_name;
204 static int srp_target_is_topspin(struct srp_target_port *target)
206 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
207 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
209 return topspin_workarounds &&
210 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
211 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
214 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
215 gfp_t gfp_mask,
216 enum dma_data_direction direction)
218 struct srp_iu *iu;
220 iu = kmalloc(sizeof *iu, gfp_mask);
221 if (!iu)
222 goto out;
224 iu->buf = kzalloc(size, gfp_mask);
225 if (!iu->buf)
226 goto out_free_iu;
228 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
229 direction);
230 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
231 goto out_free_buf;
233 iu->size = size;
234 iu->direction = direction;
236 return iu;
238 out_free_buf:
239 kfree(iu->buf);
240 out_free_iu:
241 kfree(iu);
242 out:
243 return NULL;
246 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
248 if (!iu)
249 return;
251 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
252 iu->direction);
253 kfree(iu->buf);
254 kfree(iu);
257 static void srp_qp_event(struct ib_event *event, void *context)
259 pr_debug("QP event %s (%d)\n",
260 ib_event_msg(event->event), event->event);
263 static int srp_init_qp(struct srp_target_port *target,
264 struct ib_qp *qp)
266 struct ib_qp_attr *attr;
267 int ret;
269 attr = kmalloc(sizeof *attr, GFP_KERNEL);
270 if (!attr)
271 return -ENOMEM;
273 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
274 target->srp_host->port,
275 be16_to_cpu(target->pkey),
276 &attr->pkey_index);
277 if (ret)
278 goto out;
280 attr->qp_state = IB_QPS_INIT;
281 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
282 IB_ACCESS_REMOTE_WRITE);
283 attr->port_num = target->srp_host->port;
285 ret = ib_modify_qp(qp, attr,
286 IB_QP_STATE |
287 IB_QP_PKEY_INDEX |
288 IB_QP_ACCESS_FLAGS |
289 IB_QP_PORT);
291 out:
292 kfree(attr);
293 return ret;
296 static int srp_new_cm_id(struct srp_rdma_ch *ch)
298 struct srp_target_port *target = ch->target;
299 struct ib_cm_id *new_cm_id;
301 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
302 srp_cm_handler, ch);
303 if (IS_ERR(new_cm_id))
304 return PTR_ERR(new_cm_id);
306 if (ch->cm_id)
307 ib_destroy_cm_id(ch->cm_id);
308 ch->cm_id = new_cm_id;
309 ch->path.sgid = target->sgid;
310 ch->path.dgid = target->orig_dgid;
311 ch->path.pkey = target->pkey;
312 ch->path.service_id = target->service_id;
314 return 0;
317 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
319 struct srp_device *dev = target->srp_host->srp_dev;
320 struct ib_fmr_pool_param fmr_param;
322 memset(&fmr_param, 0, sizeof(fmr_param));
323 fmr_param.pool_size = target->mr_pool_size;
324 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
325 fmr_param.cache = 1;
326 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
327 fmr_param.page_shift = ilog2(dev->mr_page_size);
328 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
329 IB_ACCESS_REMOTE_WRITE |
330 IB_ACCESS_REMOTE_READ);
332 return ib_create_fmr_pool(dev->pd, &fmr_param);
336 * srp_destroy_fr_pool() - free the resources owned by a pool
337 * @pool: Fast registration pool to be destroyed.
339 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
341 int i;
342 struct srp_fr_desc *d;
344 if (!pool)
345 return;
347 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
348 if (d->mr)
349 ib_dereg_mr(d->mr);
351 kfree(pool);
355 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
356 * @device: IB device to allocate fast registration descriptors for.
357 * @pd: Protection domain associated with the FR descriptors.
358 * @pool_size: Number of descriptors to allocate.
359 * @max_page_list_len: Maximum fast registration work request page list length.
361 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
362 struct ib_pd *pd, int pool_size,
363 int max_page_list_len)
365 struct srp_fr_pool *pool;
366 struct srp_fr_desc *d;
367 struct ib_mr *mr;
368 int i, ret = -EINVAL;
370 if (pool_size <= 0)
371 goto err;
372 ret = -ENOMEM;
373 pool = kzalloc(sizeof(struct srp_fr_pool) +
374 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
375 if (!pool)
376 goto err;
377 pool->size = pool_size;
378 pool->max_page_list_len = max_page_list_len;
379 spin_lock_init(&pool->lock);
380 INIT_LIST_HEAD(&pool->free_list);
382 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
383 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
384 max_page_list_len);
385 if (IS_ERR(mr)) {
386 ret = PTR_ERR(mr);
387 goto destroy_pool;
389 d->mr = mr;
390 list_add_tail(&d->entry, &pool->free_list);
393 out:
394 return pool;
396 destroy_pool:
397 srp_destroy_fr_pool(pool);
399 err:
400 pool = ERR_PTR(ret);
401 goto out;
405 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
406 * @pool: Pool to obtain descriptor from.
408 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
410 struct srp_fr_desc *d = NULL;
411 unsigned long flags;
413 spin_lock_irqsave(&pool->lock, flags);
414 if (!list_empty(&pool->free_list)) {
415 d = list_first_entry(&pool->free_list, typeof(*d), entry);
416 list_del(&d->entry);
418 spin_unlock_irqrestore(&pool->lock, flags);
420 return d;
424 * srp_fr_pool_put() - put an FR descriptor back in the free list
425 * @pool: Pool the descriptor was allocated from.
426 * @desc: Pointer to an array of fast registration descriptor pointers.
427 * @n: Number of descriptors to put back.
429 * Note: The caller must already have queued an invalidation request for
430 * desc->mr->rkey before calling this function.
432 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
433 int n)
435 unsigned long flags;
436 int i;
438 spin_lock_irqsave(&pool->lock, flags);
439 for (i = 0; i < n; i++)
440 list_add(&desc[i]->entry, &pool->free_list);
441 spin_unlock_irqrestore(&pool->lock, flags);
444 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
446 struct srp_device *dev = target->srp_host->srp_dev;
448 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
449 dev->max_pages_per_mr);
453 * srp_destroy_qp() - destroy an RDMA queue pair
454 * @qp: RDMA queue pair.
456 * Drain the qp before destroying it. This avoids that the receive
457 * completion handler can access the queue pair while it is
458 * being destroyed.
460 static void srp_destroy_qp(struct ib_qp *qp)
462 ib_drain_rq(qp);
463 ib_destroy_qp(qp);
466 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
468 struct srp_target_port *target = ch->target;
469 struct srp_device *dev = target->srp_host->srp_dev;
470 struct ib_qp_init_attr *init_attr;
471 struct ib_cq *recv_cq, *send_cq;
472 struct ib_qp *qp;
473 struct ib_fmr_pool *fmr_pool = NULL;
474 struct srp_fr_pool *fr_pool = NULL;
475 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
476 int ret;
478 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
479 if (!init_attr)
480 return -ENOMEM;
482 /* queue_size + 1 for ib_drain_rq() */
483 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
484 ch->comp_vector, IB_POLL_SOFTIRQ);
485 if (IS_ERR(recv_cq)) {
486 ret = PTR_ERR(recv_cq);
487 goto err;
490 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
491 ch->comp_vector, IB_POLL_DIRECT);
492 if (IS_ERR(send_cq)) {
493 ret = PTR_ERR(send_cq);
494 goto err_recv_cq;
497 init_attr->event_handler = srp_qp_event;
498 init_attr->cap.max_send_wr = m * target->queue_size;
499 init_attr->cap.max_recv_wr = target->queue_size + 1;
500 init_attr->cap.max_recv_sge = 1;
501 init_attr->cap.max_send_sge = 1;
502 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
503 init_attr->qp_type = IB_QPT_RC;
504 init_attr->send_cq = send_cq;
505 init_attr->recv_cq = recv_cq;
507 qp = ib_create_qp(dev->pd, init_attr);
508 if (IS_ERR(qp)) {
509 ret = PTR_ERR(qp);
510 goto err_send_cq;
513 ret = srp_init_qp(target, qp);
514 if (ret)
515 goto err_qp;
517 if (dev->use_fast_reg) {
518 fr_pool = srp_alloc_fr_pool(target);
519 if (IS_ERR(fr_pool)) {
520 ret = PTR_ERR(fr_pool);
521 shost_printk(KERN_WARNING, target->scsi_host, PFX
522 "FR pool allocation failed (%d)\n", ret);
523 goto err_qp;
525 } else if (dev->use_fmr) {
526 fmr_pool = srp_alloc_fmr_pool(target);
527 if (IS_ERR(fmr_pool)) {
528 ret = PTR_ERR(fmr_pool);
529 shost_printk(KERN_WARNING, target->scsi_host, PFX
530 "FMR pool allocation failed (%d)\n", ret);
531 goto err_qp;
535 if (ch->qp)
536 srp_destroy_qp(ch->qp);
537 if (ch->recv_cq)
538 ib_free_cq(ch->recv_cq);
539 if (ch->send_cq)
540 ib_free_cq(ch->send_cq);
542 ch->qp = qp;
543 ch->recv_cq = recv_cq;
544 ch->send_cq = send_cq;
546 if (dev->use_fast_reg) {
547 if (ch->fr_pool)
548 srp_destroy_fr_pool(ch->fr_pool);
549 ch->fr_pool = fr_pool;
550 } else if (dev->use_fmr) {
551 if (ch->fmr_pool)
552 ib_destroy_fmr_pool(ch->fmr_pool);
553 ch->fmr_pool = fmr_pool;
556 kfree(init_attr);
557 return 0;
559 err_qp:
560 srp_destroy_qp(qp);
562 err_send_cq:
563 ib_free_cq(send_cq);
565 err_recv_cq:
566 ib_free_cq(recv_cq);
568 err:
569 kfree(init_attr);
570 return ret;
574 * Note: this function may be called without srp_alloc_iu_bufs() having been
575 * invoked. Hence the ch->[rt]x_ring checks.
577 static void srp_free_ch_ib(struct srp_target_port *target,
578 struct srp_rdma_ch *ch)
580 struct srp_device *dev = target->srp_host->srp_dev;
581 int i;
583 if (!ch->target)
584 return;
586 if (ch->cm_id) {
587 ib_destroy_cm_id(ch->cm_id);
588 ch->cm_id = NULL;
591 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
592 if (!ch->qp)
593 return;
595 if (dev->use_fast_reg) {
596 if (ch->fr_pool)
597 srp_destroy_fr_pool(ch->fr_pool);
598 } else if (dev->use_fmr) {
599 if (ch->fmr_pool)
600 ib_destroy_fmr_pool(ch->fmr_pool);
603 srp_destroy_qp(ch->qp);
604 ib_free_cq(ch->send_cq);
605 ib_free_cq(ch->recv_cq);
608 * Avoid that the SCSI error handler tries to use this channel after
609 * it has been freed. The SCSI error handler can namely continue
610 * trying to perform recovery actions after scsi_remove_host()
611 * returned.
613 ch->target = NULL;
615 ch->qp = NULL;
616 ch->send_cq = ch->recv_cq = NULL;
618 if (ch->rx_ring) {
619 for (i = 0; i < target->queue_size; ++i)
620 srp_free_iu(target->srp_host, ch->rx_ring[i]);
621 kfree(ch->rx_ring);
622 ch->rx_ring = NULL;
624 if (ch->tx_ring) {
625 for (i = 0; i < target->queue_size; ++i)
626 srp_free_iu(target->srp_host, ch->tx_ring[i]);
627 kfree(ch->tx_ring);
628 ch->tx_ring = NULL;
632 static void srp_path_rec_completion(int status,
633 struct ib_sa_path_rec *pathrec,
634 void *ch_ptr)
636 struct srp_rdma_ch *ch = ch_ptr;
637 struct srp_target_port *target = ch->target;
639 ch->status = status;
640 if (status)
641 shost_printk(KERN_ERR, target->scsi_host,
642 PFX "Got failed path rec status %d\n", status);
643 else
644 ch->path = *pathrec;
645 complete(&ch->done);
648 static int srp_lookup_path(struct srp_rdma_ch *ch)
650 struct srp_target_port *target = ch->target;
651 int ret = -ENODEV;
653 ch->path.numb_path = 1;
655 init_completion(&ch->done);
658 * Avoid that the SCSI host can be removed by srp_remove_target()
659 * before srp_path_rec_completion() is called.
661 if (!scsi_host_get(target->scsi_host))
662 goto out;
664 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
665 target->srp_host->srp_dev->dev,
666 target->srp_host->port,
667 &ch->path,
668 IB_SA_PATH_REC_SERVICE_ID |
669 IB_SA_PATH_REC_DGID |
670 IB_SA_PATH_REC_SGID |
671 IB_SA_PATH_REC_NUMB_PATH |
672 IB_SA_PATH_REC_PKEY,
673 SRP_PATH_REC_TIMEOUT_MS,
674 GFP_KERNEL,
675 srp_path_rec_completion,
676 ch, &ch->path_query);
677 ret = ch->path_query_id;
678 if (ret < 0)
679 goto put;
681 ret = wait_for_completion_interruptible(&ch->done);
682 if (ret < 0)
683 goto put;
685 ret = ch->status;
686 if (ret < 0)
687 shost_printk(KERN_WARNING, target->scsi_host,
688 PFX "Path record query failed\n");
690 put:
691 scsi_host_put(target->scsi_host);
693 out:
694 return ret;
697 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
699 struct srp_target_port *target = ch->target;
700 struct {
701 struct ib_cm_req_param param;
702 struct srp_login_req priv;
703 } *req = NULL;
704 int status;
706 req = kzalloc(sizeof *req, GFP_KERNEL);
707 if (!req)
708 return -ENOMEM;
710 req->param.primary_path = &ch->path;
711 req->param.alternate_path = NULL;
712 req->param.service_id = target->service_id;
713 req->param.qp_num = ch->qp->qp_num;
714 req->param.qp_type = ch->qp->qp_type;
715 req->param.private_data = &req->priv;
716 req->param.private_data_len = sizeof req->priv;
717 req->param.flow_control = 1;
719 get_random_bytes(&req->param.starting_psn, 4);
720 req->param.starting_psn &= 0xffffff;
723 * Pick some arbitrary defaults here; we could make these
724 * module parameters if anyone cared about setting them.
726 req->param.responder_resources = 4;
727 req->param.remote_cm_response_timeout = 20;
728 req->param.local_cm_response_timeout = 20;
729 req->param.retry_count = target->tl_retry_count;
730 req->param.rnr_retry_count = 7;
731 req->param.max_cm_retries = 15;
733 req->priv.opcode = SRP_LOGIN_REQ;
734 req->priv.tag = 0;
735 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
736 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
737 SRP_BUF_FORMAT_INDIRECT);
738 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
739 SRP_MULTICHAN_SINGLE);
741 * In the published SRP specification (draft rev. 16a), the
742 * port identifier format is 8 bytes of ID extension followed
743 * by 8 bytes of GUID. Older drafts put the two halves in the
744 * opposite order, so that the GUID comes first.
746 * Targets conforming to these obsolete drafts can be
747 * recognized by the I/O Class they report.
749 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
750 memcpy(req->priv.initiator_port_id,
751 &target->sgid.global.interface_id, 8);
752 memcpy(req->priv.initiator_port_id + 8,
753 &target->initiator_ext, 8);
754 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
755 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
756 } else {
757 memcpy(req->priv.initiator_port_id,
758 &target->initiator_ext, 8);
759 memcpy(req->priv.initiator_port_id + 8,
760 &target->sgid.global.interface_id, 8);
761 memcpy(req->priv.target_port_id, &target->id_ext, 8);
762 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
766 * Topspin/Cisco SRP targets will reject our login unless we
767 * zero out the first 8 bytes of our initiator port ID and set
768 * the second 8 bytes to the local node GUID.
770 if (srp_target_is_topspin(target)) {
771 shost_printk(KERN_DEBUG, target->scsi_host,
772 PFX "Topspin/Cisco initiator port ID workaround "
773 "activated for target GUID %016llx\n",
774 be64_to_cpu(target->ioc_guid));
775 memset(req->priv.initiator_port_id, 0, 8);
776 memcpy(req->priv.initiator_port_id + 8,
777 &target->srp_host->srp_dev->dev->node_guid, 8);
780 status = ib_send_cm_req(ch->cm_id, &req->param);
782 kfree(req);
784 return status;
787 static bool srp_queue_remove_work(struct srp_target_port *target)
789 bool changed = false;
791 spin_lock_irq(&target->lock);
792 if (target->state != SRP_TARGET_REMOVED) {
793 target->state = SRP_TARGET_REMOVED;
794 changed = true;
796 spin_unlock_irq(&target->lock);
798 if (changed)
799 queue_work(srp_remove_wq, &target->remove_work);
801 return changed;
804 static void srp_disconnect_target(struct srp_target_port *target)
806 struct srp_rdma_ch *ch;
807 int i;
809 /* XXX should send SRP_I_LOGOUT request */
811 for (i = 0; i < target->ch_count; i++) {
812 ch = &target->ch[i];
813 ch->connected = false;
814 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
815 shost_printk(KERN_DEBUG, target->scsi_host,
816 PFX "Sending CM DREQ failed\n");
821 static void srp_free_req_data(struct srp_target_port *target,
822 struct srp_rdma_ch *ch)
824 struct srp_device *dev = target->srp_host->srp_dev;
825 struct ib_device *ibdev = dev->dev;
826 struct srp_request *req;
827 int i;
829 if (!ch->req_ring)
830 return;
832 for (i = 0; i < target->req_ring_size; ++i) {
833 req = &ch->req_ring[i];
834 if (dev->use_fast_reg) {
835 kfree(req->fr_list);
836 } else {
837 kfree(req->fmr_list);
838 kfree(req->map_page);
840 if (req->indirect_dma_addr) {
841 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
842 target->indirect_size,
843 DMA_TO_DEVICE);
845 kfree(req->indirect_desc);
848 kfree(ch->req_ring);
849 ch->req_ring = NULL;
852 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
854 struct srp_target_port *target = ch->target;
855 struct srp_device *srp_dev = target->srp_host->srp_dev;
856 struct ib_device *ibdev = srp_dev->dev;
857 struct srp_request *req;
858 void *mr_list;
859 dma_addr_t dma_addr;
860 int i, ret = -ENOMEM;
862 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
863 GFP_KERNEL);
864 if (!ch->req_ring)
865 goto out;
867 for (i = 0; i < target->req_ring_size; ++i) {
868 req = &ch->req_ring[i];
869 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
870 GFP_KERNEL);
871 if (!mr_list)
872 goto out;
873 if (srp_dev->use_fast_reg) {
874 req->fr_list = mr_list;
875 } else {
876 req->fmr_list = mr_list;
877 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
878 sizeof(void *), GFP_KERNEL);
879 if (!req->map_page)
880 goto out;
882 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
883 if (!req->indirect_desc)
884 goto out;
886 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
887 target->indirect_size,
888 DMA_TO_DEVICE);
889 if (ib_dma_mapping_error(ibdev, dma_addr))
890 goto out;
892 req->indirect_dma_addr = dma_addr;
894 ret = 0;
896 out:
897 return ret;
901 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
902 * @shost: SCSI host whose attributes to remove from sysfs.
904 * Note: Any attributes defined in the host template and that did not exist
905 * before invocation of this function will be ignored.
907 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
909 struct device_attribute **attr;
911 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
912 device_remove_file(&shost->shost_dev, *attr);
915 static void srp_remove_target(struct srp_target_port *target)
917 struct srp_rdma_ch *ch;
918 int i;
920 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
922 srp_del_scsi_host_attr(target->scsi_host);
923 srp_rport_get(target->rport);
924 srp_remove_host(target->scsi_host);
925 scsi_remove_host(target->scsi_host);
926 srp_stop_rport_timers(target->rport);
927 srp_disconnect_target(target);
928 for (i = 0; i < target->ch_count; i++) {
929 ch = &target->ch[i];
930 srp_free_ch_ib(target, ch);
932 cancel_work_sync(&target->tl_err_work);
933 srp_rport_put(target->rport);
934 for (i = 0; i < target->ch_count; i++) {
935 ch = &target->ch[i];
936 srp_free_req_data(target, ch);
938 kfree(target->ch);
939 target->ch = NULL;
941 spin_lock(&target->srp_host->target_lock);
942 list_del(&target->list);
943 spin_unlock(&target->srp_host->target_lock);
945 scsi_host_put(target->scsi_host);
948 static void srp_remove_work(struct work_struct *work)
950 struct srp_target_port *target =
951 container_of(work, struct srp_target_port, remove_work);
953 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
955 srp_remove_target(target);
958 static void srp_rport_delete(struct srp_rport *rport)
960 struct srp_target_port *target = rport->lld_data;
962 srp_queue_remove_work(target);
966 * srp_connected_ch() - number of connected channels
967 * @target: SRP target port.
969 static int srp_connected_ch(struct srp_target_port *target)
971 int i, c = 0;
973 for (i = 0; i < target->ch_count; i++)
974 c += target->ch[i].connected;
976 return c;
979 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
981 struct srp_target_port *target = ch->target;
982 int ret;
984 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
986 ret = srp_lookup_path(ch);
987 if (ret)
988 goto out;
990 while (1) {
991 init_completion(&ch->done);
992 ret = srp_send_req(ch, multich);
993 if (ret)
994 goto out;
995 ret = wait_for_completion_interruptible(&ch->done);
996 if (ret < 0)
997 goto out;
1000 * The CM event handling code will set status to
1001 * SRP_PORT_REDIRECT if we get a port redirect REJ
1002 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1003 * redirect REJ back.
1005 ret = ch->status;
1006 switch (ret) {
1007 case 0:
1008 ch->connected = true;
1009 goto out;
1011 case SRP_PORT_REDIRECT:
1012 ret = srp_lookup_path(ch);
1013 if (ret)
1014 goto out;
1015 break;
1017 case SRP_DLID_REDIRECT:
1018 break;
1020 case SRP_STALE_CONN:
1021 shost_printk(KERN_ERR, target->scsi_host, PFX
1022 "giving up on stale connection\n");
1023 ret = -ECONNRESET;
1024 goto out;
1026 default:
1027 goto out;
1031 out:
1032 return ret <= 0 ? ret : -ENODEV;
1035 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1037 srp_handle_qp_err(cq, wc, "INV RKEY");
1040 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1041 u32 rkey)
1043 struct ib_send_wr *bad_wr;
1044 struct ib_send_wr wr = {
1045 .opcode = IB_WR_LOCAL_INV,
1046 .next = NULL,
1047 .num_sge = 0,
1048 .send_flags = 0,
1049 .ex.invalidate_rkey = rkey,
1052 wr.wr_cqe = &req->reg_cqe;
1053 req->reg_cqe.done = srp_inv_rkey_err_done;
1054 return ib_post_send(ch->qp, &wr, &bad_wr);
1057 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1058 struct srp_rdma_ch *ch,
1059 struct srp_request *req)
1061 struct srp_target_port *target = ch->target;
1062 struct srp_device *dev = target->srp_host->srp_dev;
1063 struct ib_device *ibdev = dev->dev;
1064 int i, res;
1066 if (!scsi_sglist(scmnd) ||
1067 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1068 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1069 return;
1071 if (dev->use_fast_reg) {
1072 struct srp_fr_desc **pfr;
1074 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1075 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1076 if (res < 0) {
1077 shost_printk(KERN_ERR, target->scsi_host, PFX
1078 "Queueing INV WR for rkey %#x failed (%d)\n",
1079 (*pfr)->mr->rkey, res);
1080 queue_work(system_long_wq,
1081 &target->tl_err_work);
1084 if (req->nmdesc)
1085 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1086 req->nmdesc);
1087 } else if (dev->use_fmr) {
1088 struct ib_pool_fmr **pfmr;
1090 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1091 ib_fmr_pool_unmap(*pfmr);
1094 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1095 scmnd->sc_data_direction);
1099 * srp_claim_req - Take ownership of the scmnd associated with a request.
1100 * @ch: SRP RDMA channel.
1101 * @req: SRP request.
1102 * @sdev: If not NULL, only take ownership for this SCSI device.
1103 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1104 * ownership of @req->scmnd if it equals @scmnd.
1106 * Return value:
1107 * Either NULL or a pointer to the SCSI command the caller became owner of.
1109 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1110 struct srp_request *req,
1111 struct scsi_device *sdev,
1112 struct scsi_cmnd *scmnd)
1114 unsigned long flags;
1116 spin_lock_irqsave(&ch->lock, flags);
1117 if (req->scmnd &&
1118 (!sdev || req->scmnd->device == sdev) &&
1119 (!scmnd || req->scmnd == scmnd)) {
1120 scmnd = req->scmnd;
1121 req->scmnd = NULL;
1122 } else {
1123 scmnd = NULL;
1125 spin_unlock_irqrestore(&ch->lock, flags);
1127 return scmnd;
1131 * srp_free_req() - Unmap data and adjust ch->req_lim.
1132 * @ch: SRP RDMA channel.
1133 * @req: Request to be freed.
1134 * @scmnd: SCSI command associated with @req.
1135 * @req_lim_delta: Amount to be added to @target->req_lim.
1137 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1138 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1140 unsigned long flags;
1142 srp_unmap_data(scmnd, ch, req);
1144 spin_lock_irqsave(&ch->lock, flags);
1145 ch->req_lim += req_lim_delta;
1146 spin_unlock_irqrestore(&ch->lock, flags);
1149 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1150 struct scsi_device *sdev, int result)
1152 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1154 if (scmnd) {
1155 srp_free_req(ch, req, scmnd, 0);
1156 scmnd->result = result;
1157 scmnd->scsi_done(scmnd);
1161 static void srp_terminate_io(struct srp_rport *rport)
1163 struct srp_target_port *target = rport->lld_data;
1164 struct srp_rdma_ch *ch;
1165 struct Scsi_Host *shost = target->scsi_host;
1166 struct scsi_device *sdev;
1167 int i, j;
1170 * Invoking srp_terminate_io() while srp_queuecommand() is running
1171 * is not safe. Hence the warning statement below.
1173 shost_for_each_device(sdev, shost)
1174 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1176 for (i = 0; i < target->ch_count; i++) {
1177 ch = &target->ch[i];
1179 for (j = 0; j < target->req_ring_size; ++j) {
1180 struct srp_request *req = &ch->req_ring[j];
1182 srp_finish_req(ch, req, NULL,
1183 DID_TRANSPORT_FAILFAST << 16);
1189 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1190 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1191 * srp_reset_device() or srp_reset_host() calls will occur while this function
1192 * is in progress. One way to realize that is not to call this function
1193 * directly but to call srp_reconnect_rport() instead since that last function
1194 * serializes calls of this function via rport->mutex and also blocks
1195 * srp_queuecommand() calls before invoking this function.
1197 static int srp_rport_reconnect(struct srp_rport *rport)
1199 struct srp_target_port *target = rport->lld_data;
1200 struct srp_rdma_ch *ch;
1201 int i, j, ret = 0;
1202 bool multich = false;
1204 srp_disconnect_target(target);
1206 if (target->state == SRP_TARGET_SCANNING)
1207 return -ENODEV;
1210 * Now get a new local CM ID so that we avoid confusing the target in
1211 * case things are really fouled up. Doing so also ensures that all CM
1212 * callbacks will have finished before a new QP is allocated.
1214 for (i = 0; i < target->ch_count; i++) {
1215 ch = &target->ch[i];
1216 ret += srp_new_cm_id(ch);
1218 for (i = 0; i < target->ch_count; i++) {
1219 ch = &target->ch[i];
1220 for (j = 0; j < target->req_ring_size; ++j) {
1221 struct srp_request *req = &ch->req_ring[j];
1223 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1226 for (i = 0; i < target->ch_count; i++) {
1227 ch = &target->ch[i];
1229 * Whether or not creating a new CM ID succeeded, create a new
1230 * QP. This guarantees that all completion callback function
1231 * invocations have finished before request resetting starts.
1233 ret += srp_create_ch_ib(ch);
1235 INIT_LIST_HEAD(&ch->free_tx);
1236 for (j = 0; j < target->queue_size; ++j)
1237 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1240 target->qp_in_error = false;
1242 for (i = 0; i < target->ch_count; i++) {
1243 ch = &target->ch[i];
1244 if (ret)
1245 break;
1246 ret = srp_connect_ch(ch, multich);
1247 multich = true;
1250 if (ret == 0)
1251 shost_printk(KERN_INFO, target->scsi_host,
1252 PFX "reconnect succeeded\n");
1254 return ret;
1257 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1258 unsigned int dma_len, u32 rkey)
1260 struct srp_direct_buf *desc = state->desc;
1262 WARN_ON_ONCE(!dma_len);
1264 desc->va = cpu_to_be64(dma_addr);
1265 desc->key = cpu_to_be32(rkey);
1266 desc->len = cpu_to_be32(dma_len);
1268 state->total_len += dma_len;
1269 state->desc++;
1270 state->ndesc++;
1273 static int srp_map_finish_fmr(struct srp_map_state *state,
1274 struct srp_rdma_ch *ch)
1276 struct srp_target_port *target = ch->target;
1277 struct srp_device *dev = target->srp_host->srp_dev;
1278 struct ib_pd *pd = target->pd;
1279 struct ib_pool_fmr *fmr;
1280 u64 io_addr = 0;
1282 if (state->fmr.next >= state->fmr.end)
1283 return -ENOMEM;
1285 WARN_ON_ONCE(!dev->use_fmr);
1287 if (state->npages == 0)
1288 return 0;
1290 if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1291 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1292 pd->unsafe_global_rkey);
1293 goto reset_state;
1296 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1297 state->npages, io_addr);
1298 if (IS_ERR(fmr))
1299 return PTR_ERR(fmr);
1301 *state->fmr.next++ = fmr;
1302 state->nmdesc++;
1304 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1305 state->dma_len, fmr->fmr->rkey);
1307 reset_state:
1308 state->npages = 0;
1309 state->dma_len = 0;
1311 return 0;
1314 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1316 srp_handle_qp_err(cq, wc, "FAST REG");
1320 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1321 * where to start in the first element. If sg_offset_p != NULL then
1322 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1323 * byte that has not yet been mapped.
1325 static int srp_map_finish_fr(struct srp_map_state *state,
1326 struct srp_request *req,
1327 struct srp_rdma_ch *ch, int sg_nents,
1328 unsigned int *sg_offset_p)
1330 struct srp_target_port *target = ch->target;
1331 struct srp_device *dev = target->srp_host->srp_dev;
1332 struct ib_pd *pd = target->pd;
1333 struct ib_send_wr *bad_wr;
1334 struct ib_reg_wr wr;
1335 struct srp_fr_desc *desc;
1336 u32 rkey;
1337 int n, err;
1339 if (state->fr.next >= state->fr.end)
1340 return -ENOMEM;
1342 WARN_ON_ONCE(!dev->use_fast_reg);
1344 if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1345 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1347 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1348 sg_dma_len(state->sg) - sg_offset,
1349 pd->unsafe_global_rkey);
1350 if (sg_offset_p)
1351 *sg_offset_p = 0;
1352 return 1;
1355 desc = srp_fr_pool_get(ch->fr_pool);
1356 if (!desc)
1357 return -ENOMEM;
1359 rkey = ib_inc_rkey(desc->mr->rkey);
1360 ib_update_fast_reg_key(desc->mr, rkey);
1362 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1363 dev->mr_page_size);
1364 if (unlikely(n < 0)) {
1365 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1366 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1367 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1368 sg_offset_p ? *sg_offset_p : -1, n);
1369 return n;
1372 WARN_ON_ONCE(desc->mr->length == 0);
1374 req->reg_cqe.done = srp_reg_mr_err_done;
1376 wr.wr.next = NULL;
1377 wr.wr.opcode = IB_WR_REG_MR;
1378 wr.wr.wr_cqe = &req->reg_cqe;
1379 wr.wr.num_sge = 0;
1380 wr.wr.send_flags = 0;
1381 wr.mr = desc->mr;
1382 wr.key = desc->mr->rkey;
1383 wr.access = (IB_ACCESS_LOCAL_WRITE |
1384 IB_ACCESS_REMOTE_READ |
1385 IB_ACCESS_REMOTE_WRITE);
1387 *state->fr.next++ = desc;
1388 state->nmdesc++;
1390 srp_map_desc(state, desc->mr->iova,
1391 desc->mr->length, desc->mr->rkey);
1393 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1394 if (unlikely(err)) {
1395 WARN_ON_ONCE(err == -ENOMEM);
1396 return err;
1399 return n;
1402 static int srp_map_sg_entry(struct srp_map_state *state,
1403 struct srp_rdma_ch *ch,
1404 struct scatterlist *sg)
1406 struct srp_target_port *target = ch->target;
1407 struct srp_device *dev = target->srp_host->srp_dev;
1408 struct ib_device *ibdev = dev->dev;
1409 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1410 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1411 unsigned int len = 0;
1412 int ret;
1414 WARN_ON_ONCE(!dma_len);
1416 while (dma_len) {
1417 unsigned offset = dma_addr & ~dev->mr_page_mask;
1419 if (state->npages == dev->max_pages_per_mr ||
1420 (state->npages > 0 && offset != 0)) {
1421 ret = srp_map_finish_fmr(state, ch);
1422 if (ret)
1423 return ret;
1426 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1428 if (!state->npages)
1429 state->base_dma_addr = dma_addr;
1430 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1431 state->dma_len += len;
1432 dma_addr += len;
1433 dma_len -= len;
1437 * If the end of the MR is not on a page boundary then we need to
1438 * close it out and start a new one -- we can only merge at page
1439 * boundaries.
1441 ret = 0;
1442 if ((dma_addr & ~dev->mr_page_mask) != 0)
1443 ret = srp_map_finish_fmr(state, ch);
1444 return ret;
1447 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1448 struct srp_request *req, struct scatterlist *scat,
1449 int count)
1451 struct scatterlist *sg;
1452 int i, ret;
1454 state->pages = req->map_page;
1455 state->fmr.next = req->fmr_list;
1456 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1458 for_each_sg(scat, sg, count, i) {
1459 ret = srp_map_sg_entry(state, ch, sg);
1460 if (ret)
1461 return ret;
1464 ret = srp_map_finish_fmr(state, ch);
1465 if (ret)
1466 return ret;
1468 return 0;
1471 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1472 struct srp_request *req, struct scatterlist *scat,
1473 int count)
1475 unsigned int sg_offset = 0;
1477 state->fr.next = req->fr_list;
1478 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1479 state->sg = scat;
1481 if (count == 0)
1482 return 0;
1484 while (count) {
1485 int i, n;
1487 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1488 if (unlikely(n < 0))
1489 return n;
1491 count -= n;
1492 for (i = 0; i < n; i++)
1493 state->sg = sg_next(state->sg);
1496 return 0;
1499 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1500 struct srp_request *req, struct scatterlist *scat,
1501 int count)
1503 struct srp_target_port *target = ch->target;
1504 struct srp_device *dev = target->srp_host->srp_dev;
1505 struct scatterlist *sg;
1506 int i;
1508 for_each_sg(scat, sg, count, i) {
1509 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1510 ib_sg_dma_len(dev->dev, sg),
1511 target->pd->unsafe_global_rkey);
1514 return 0;
1518 * Register the indirect data buffer descriptor with the HCA.
1520 * Note: since the indirect data buffer descriptor has been allocated with
1521 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1522 * memory buffer.
1524 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1525 void **next_mr, void **end_mr, u32 idb_len,
1526 __be32 *idb_rkey)
1528 struct srp_target_port *target = ch->target;
1529 struct srp_device *dev = target->srp_host->srp_dev;
1530 struct srp_map_state state;
1531 struct srp_direct_buf idb_desc;
1532 u64 idb_pages[1];
1533 struct scatterlist idb_sg[1];
1534 int ret;
1536 memset(&state, 0, sizeof(state));
1537 memset(&idb_desc, 0, sizeof(idb_desc));
1538 state.gen.next = next_mr;
1539 state.gen.end = end_mr;
1540 state.desc = &idb_desc;
1541 state.base_dma_addr = req->indirect_dma_addr;
1542 state.dma_len = idb_len;
1544 if (dev->use_fast_reg) {
1545 state.sg = idb_sg;
1546 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1547 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1548 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1549 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1550 #endif
1551 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1552 if (ret < 0)
1553 return ret;
1554 WARN_ON_ONCE(ret < 1);
1555 } else if (dev->use_fmr) {
1556 state.pages = idb_pages;
1557 state.pages[0] = (req->indirect_dma_addr &
1558 dev->mr_page_mask);
1559 state.npages = 1;
1560 ret = srp_map_finish_fmr(&state, ch);
1561 if (ret < 0)
1562 return ret;
1563 } else {
1564 return -EINVAL;
1567 *idb_rkey = idb_desc.key;
1569 return 0;
1572 #if defined(DYNAMIC_DATA_DEBUG)
1573 static void srp_check_mapping(struct srp_map_state *state,
1574 struct srp_rdma_ch *ch, struct srp_request *req,
1575 struct scatterlist *scat, int count)
1577 struct srp_device *dev = ch->target->srp_host->srp_dev;
1578 struct srp_fr_desc **pfr;
1579 u64 desc_len = 0, mr_len = 0;
1580 int i;
1582 for (i = 0; i < state->ndesc; i++)
1583 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1584 if (dev->use_fast_reg)
1585 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1586 mr_len += (*pfr)->mr->length;
1587 else if (dev->use_fmr)
1588 for (i = 0; i < state->nmdesc; i++)
1589 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1590 if (desc_len != scsi_bufflen(req->scmnd) ||
1591 mr_len > scsi_bufflen(req->scmnd))
1592 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1593 scsi_bufflen(req->scmnd), desc_len, mr_len,
1594 state->ndesc, state->nmdesc);
1596 #endif
1599 * srp_map_data() - map SCSI data buffer onto an SRP request
1600 * @scmnd: SCSI command to map
1601 * @ch: SRP RDMA channel
1602 * @req: SRP request
1604 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1605 * mapping failed.
1607 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1608 struct srp_request *req)
1610 struct srp_target_port *target = ch->target;
1611 struct ib_pd *pd = target->pd;
1612 struct scatterlist *scat;
1613 struct srp_cmd *cmd = req->cmd->buf;
1614 int len, nents, count, ret;
1615 struct srp_device *dev;
1616 struct ib_device *ibdev;
1617 struct srp_map_state state;
1618 struct srp_indirect_buf *indirect_hdr;
1619 u32 idb_len, table_len;
1620 __be32 idb_rkey;
1621 u8 fmt;
1623 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1624 return sizeof (struct srp_cmd);
1626 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1627 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1628 shost_printk(KERN_WARNING, target->scsi_host,
1629 PFX "Unhandled data direction %d\n",
1630 scmnd->sc_data_direction);
1631 return -EINVAL;
1634 nents = scsi_sg_count(scmnd);
1635 scat = scsi_sglist(scmnd);
1637 dev = target->srp_host->srp_dev;
1638 ibdev = dev->dev;
1640 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1641 if (unlikely(count == 0))
1642 return -EIO;
1644 fmt = SRP_DATA_DESC_DIRECT;
1645 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1647 if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1649 * The midlayer only generated a single gather/scatter
1650 * entry, or DMA mapping coalesced everything to a
1651 * single entry. So a direct descriptor along with
1652 * the DMA MR suffices.
1654 struct srp_direct_buf *buf = (void *) cmd->add_data;
1656 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1657 buf->key = cpu_to_be32(pd->unsafe_global_rkey);
1658 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1660 req->nmdesc = 0;
1661 goto map_complete;
1665 * We have more than one scatter/gather entry, so build our indirect
1666 * descriptor table, trying to merge as many entries as we can.
1668 indirect_hdr = (void *) cmd->add_data;
1670 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1671 target->indirect_size, DMA_TO_DEVICE);
1673 memset(&state, 0, sizeof(state));
1674 state.desc = req->indirect_desc;
1675 if (dev->use_fast_reg)
1676 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1677 else if (dev->use_fmr)
1678 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1679 else
1680 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1681 req->nmdesc = state.nmdesc;
1682 if (ret < 0)
1683 goto unmap;
1685 #if defined(DYNAMIC_DEBUG)
1687 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1688 "Memory mapping consistency check");
1689 if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
1690 srp_check_mapping(&state, ch, req, scat, count);
1692 #endif
1694 /* We've mapped the request, now pull as much of the indirect
1695 * descriptor table as we can into the command buffer. If this
1696 * target is not using an external indirect table, we are
1697 * guaranteed to fit into the command, as the SCSI layer won't
1698 * give us more S/G entries than we allow.
1700 if (state.ndesc == 1) {
1702 * Memory registration collapsed the sg-list into one entry,
1703 * so use a direct descriptor.
1705 struct srp_direct_buf *buf = (void *) cmd->add_data;
1707 *buf = req->indirect_desc[0];
1708 goto map_complete;
1711 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1712 !target->allow_ext_sg)) {
1713 shost_printk(KERN_ERR, target->scsi_host,
1714 "Could not fit S/G list into SRP_CMD\n");
1715 ret = -EIO;
1716 goto unmap;
1719 count = min(state.ndesc, target->cmd_sg_cnt);
1720 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1721 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1723 fmt = SRP_DATA_DESC_INDIRECT;
1724 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1725 len += count * sizeof (struct srp_direct_buf);
1727 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1728 count * sizeof (struct srp_direct_buf));
1730 if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1731 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1732 idb_len, &idb_rkey);
1733 if (ret < 0)
1734 goto unmap;
1735 req->nmdesc++;
1736 } else {
1737 idb_rkey = cpu_to_be32(pd->unsafe_global_rkey);
1740 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1741 indirect_hdr->table_desc.key = idb_rkey;
1742 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1743 indirect_hdr->len = cpu_to_be32(state.total_len);
1745 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1746 cmd->data_out_desc_cnt = count;
1747 else
1748 cmd->data_in_desc_cnt = count;
1750 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1751 DMA_TO_DEVICE);
1753 map_complete:
1754 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1755 cmd->buf_fmt = fmt << 4;
1756 else
1757 cmd->buf_fmt = fmt;
1759 return len;
1761 unmap:
1762 srp_unmap_data(scmnd, ch, req);
1763 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1764 ret = -E2BIG;
1765 return ret;
1769 * Return an IU and possible credit to the free pool
1771 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1772 enum srp_iu_type iu_type)
1774 unsigned long flags;
1776 spin_lock_irqsave(&ch->lock, flags);
1777 list_add(&iu->list, &ch->free_tx);
1778 if (iu_type != SRP_IU_RSP)
1779 ++ch->req_lim;
1780 spin_unlock_irqrestore(&ch->lock, flags);
1784 * Must be called with ch->lock held to protect req_lim and free_tx.
1785 * If IU is not sent, it must be returned using srp_put_tx_iu().
1787 * Note:
1788 * An upper limit for the number of allocated information units for each
1789 * request type is:
1790 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1791 * more than Scsi_Host.can_queue requests.
1792 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1793 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1794 * one unanswered SRP request to an initiator.
1796 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1797 enum srp_iu_type iu_type)
1799 struct srp_target_port *target = ch->target;
1800 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1801 struct srp_iu *iu;
1803 ib_process_cq_direct(ch->send_cq, -1);
1805 if (list_empty(&ch->free_tx))
1806 return NULL;
1808 /* Initiator responses to target requests do not consume credits */
1809 if (iu_type != SRP_IU_RSP) {
1810 if (ch->req_lim <= rsv) {
1811 ++target->zero_req_lim;
1812 return NULL;
1815 --ch->req_lim;
1818 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1819 list_del(&iu->list);
1820 return iu;
1823 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1825 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1826 struct srp_rdma_ch *ch = cq->cq_context;
1828 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1829 srp_handle_qp_err(cq, wc, "SEND");
1830 return;
1833 list_add(&iu->list, &ch->free_tx);
1836 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1838 struct srp_target_port *target = ch->target;
1839 struct ib_sge list;
1840 struct ib_send_wr wr, *bad_wr;
1842 list.addr = iu->dma;
1843 list.length = len;
1844 list.lkey = target->lkey;
1846 iu->cqe.done = srp_send_done;
1848 wr.next = NULL;
1849 wr.wr_cqe = &iu->cqe;
1850 wr.sg_list = &list;
1851 wr.num_sge = 1;
1852 wr.opcode = IB_WR_SEND;
1853 wr.send_flags = IB_SEND_SIGNALED;
1855 return ib_post_send(ch->qp, &wr, &bad_wr);
1858 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1860 struct srp_target_port *target = ch->target;
1861 struct ib_recv_wr wr, *bad_wr;
1862 struct ib_sge list;
1864 list.addr = iu->dma;
1865 list.length = iu->size;
1866 list.lkey = target->lkey;
1868 iu->cqe.done = srp_recv_done;
1870 wr.next = NULL;
1871 wr.wr_cqe = &iu->cqe;
1872 wr.sg_list = &list;
1873 wr.num_sge = 1;
1875 return ib_post_recv(ch->qp, &wr, &bad_wr);
1878 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1880 struct srp_target_port *target = ch->target;
1881 struct srp_request *req;
1882 struct scsi_cmnd *scmnd;
1883 unsigned long flags;
1885 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1886 spin_lock_irqsave(&ch->lock, flags);
1887 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1888 if (rsp->tag == ch->tsk_mgmt_tag) {
1889 ch->tsk_mgmt_status = -1;
1890 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1891 ch->tsk_mgmt_status = rsp->data[3];
1892 complete(&ch->tsk_mgmt_done);
1893 } else {
1894 shost_printk(KERN_ERR, target->scsi_host,
1895 "Received tsk mgmt response too late for tag %#llx\n",
1896 rsp->tag);
1898 spin_unlock_irqrestore(&ch->lock, flags);
1899 } else {
1900 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1901 if (scmnd && scmnd->host_scribble) {
1902 req = (void *)scmnd->host_scribble;
1903 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1904 } else {
1905 scmnd = NULL;
1907 if (!scmnd) {
1908 shost_printk(KERN_ERR, target->scsi_host,
1909 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1910 rsp->tag, ch - target->ch, ch->qp->qp_num);
1912 spin_lock_irqsave(&ch->lock, flags);
1913 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1914 spin_unlock_irqrestore(&ch->lock, flags);
1916 return;
1918 scmnd->result = rsp->status;
1920 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1921 memcpy(scmnd->sense_buffer, rsp->data +
1922 be32_to_cpu(rsp->resp_data_len),
1923 min_t(int, be32_to_cpu(rsp->sense_data_len),
1924 SCSI_SENSE_BUFFERSIZE));
1927 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1928 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1929 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1930 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1931 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1932 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1933 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1934 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1936 srp_free_req(ch, req, scmnd,
1937 be32_to_cpu(rsp->req_lim_delta));
1939 scmnd->host_scribble = NULL;
1940 scmnd->scsi_done(scmnd);
1944 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1945 void *rsp, int len)
1947 struct srp_target_port *target = ch->target;
1948 struct ib_device *dev = target->srp_host->srp_dev->dev;
1949 unsigned long flags;
1950 struct srp_iu *iu;
1951 int err;
1953 spin_lock_irqsave(&ch->lock, flags);
1954 ch->req_lim += req_delta;
1955 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1956 spin_unlock_irqrestore(&ch->lock, flags);
1958 if (!iu) {
1959 shost_printk(KERN_ERR, target->scsi_host, PFX
1960 "no IU available to send response\n");
1961 return 1;
1964 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1965 memcpy(iu->buf, rsp, len);
1966 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1968 err = srp_post_send(ch, iu, len);
1969 if (err) {
1970 shost_printk(KERN_ERR, target->scsi_host, PFX
1971 "unable to post response: %d\n", err);
1972 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1975 return err;
1978 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1979 struct srp_cred_req *req)
1981 struct srp_cred_rsp rsp = {
1982 .opcode = SRP_CRED_RSP,
1983 .tag = req->tag,
1985 s32 delta = be32_to_cpu(req->req_lim_delta);
1987 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1988 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1989 "problems processing SRP_CRED_REQ\n");
1992 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1993 struct srp_aer_req *req)
1995 struct srp_target_port *target = ch->target;
1996 struct srp_aer_rsp rsp = {
1997 .opcode = SRP_AER_RSP,
1998 .tag = req->tag,
2000 s32 delta = be32_to_cpu(req->req_lim_delta);
2002 shost_printk(KERN_ERR, target->scsi_host, PFX
2003 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2005 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2006 shost_printk(KERN_ERR, target->scsi_host, PFX
2007 "problems processing SRP_AER_REQ\n");
2010 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2012 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2013 struct srp_rdma_ch *ch = cq->cq_context;
2014 struct srp_target_port *target = ch->target;
2015 struct ib_device *dev = target->srp_host->srp_dev->dev;
2016 int res;
2017 u8 opcode;
2019 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2020 srp_handle_qp_err(cq, wc, "RECV");
2021 return;
2024 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2025 DMA_FROM_DEVICE);
2027 opcode = *(u8 *) iu->buf;
2029 if (0) {
2030 shost_printk(KERN_ERR, target->scsi_host,
2031 PFX "recv completion, opcode 0x%02x\n", opcode);
2032 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2033 iu->buf, wc->byte_len, true);
2036 switch (opcode) {
2037 case SRP_RSP:
2038 srp_process_rsp(ch, iu->buf);
2039 break;
2041 case SRP_CRED_REQ:
2042 srp_process_cred_req(ch, iu->buf);
2043 break;
2045 case SRP_AER_REQ:
2046 srp_process_aer_req(ch, iu->buf);
2047 break;
2049 case SRP_T_LOGOUT:
2050 /* XXX Handle target logout */
2051 shost_printk(KERN_WARNING, target->scsi_host,
2052 PFX "Got target logout request\n");
2053 break;
2055 default:
2056 shost_printk(KERN_WARNING, target->scsi_host,
2057 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2058 break;
2061 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2062 DMA_FROM_DEVICE);
2064 res = srp_post_recv(ch, iu);
2065 if (res != 0)
2066 shost_printk(KERN_ERR, target->scsi_host,
2067 PFX "Recv failed with error code %d\n", res);
2071 * srp_tl_err_work() - handle a transport layer error
2072 * @work: Work structure embedded in an SRP target port.
2074 * Note: This function may get invoked before the rport has been created,
2075 * hence the target->rport test.
2077 static void srp_tl_err_work(struct work_struct *work)
2079 struct srp_target_port *target;
2081 target = container_of(work, struct srp_target_port, tl_err_work);
2082 if (target->rport)
2083 srp_start_tl_fail_timers(target->rport);
2086 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2087 const char *opname)
2089 struct srp_rdma_ch *ch = cq->cq_context;
2090 struct srp_target_port *target = ch->target;
2092 if (ch->connected && !target->qp_in_error) {
2093 shost_printk(KERN_ERR, target->scsi_host,
2094 PFX "failed %s status %s (%d) for CQE %p\n",
2095 opname, ib_wc_status_msg(wc->status), wc->status,
2096 wc->wr_cqe);
2097 queue_work(system_long_wq, &target->tl_err_work);
2099 target->qp_in_error = true;
2102 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2104 struct srp_target_port *target = host_to_target(shost);
2105 struct srp_rport *rport = target->rport;
2106 struct srp_rdma_ch *ch;
2107 struct srp_request *req;
2108 struct srp_iu *iu;
2109 struct srp_cmd *cmd;
2110 struct ib_device *dev;
2111 unsigned long flags;
2112 u32 tag;
2113 u16 idx;
2114 int len, ret;
2115 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2118 * The SCSI EH thread is the only context from which srp_queuecommand()
2119 * can get invoked for blocked devices (SDEV_BLOCK /
2120 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2121 * locking the rport mutex if invoked from inside the SCSI EH.
2123 if (in_scsi_eh)
2124 mutex_lock(&rport->mutex);
2126 scmnd->result = srp_chkready(target->rport);
2127 if (unlikely(scmnd->result))
2128 goto err;
2130 WARN_ON_ONCE(scmnd->request->tag < 0);
2131 tag = blk_mq_unique_tag(scmnd->request);
2132 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2133 idx = blk_mq_unique_tag_to_tag(tag);
2134 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2135 dev_name(&shost->shost_gendev), tag, idx,
2136 target->req_ring_size);
2138 spin_lock_irqsave(&ch->lock, flags);
2139 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2140 spin_unlock_irqrestore(&ch->lock, flags);
2142 if (!iu)
2143 goto err;
2145 req = &ch->req_ring[idx];
2146 dev = target->srp_host->srp_dev->dev;
2147 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2148 DMA_TO_DEVICE);
2150 scmnd->host_scribble = (void *) req;
2152 cmd = iu->buf;
2153 memset(cmd, 0, sizeof *cmd);
2155 cmd->opcode = SRP_CMD;
2156 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2157 cmd->tag = tag;
2158 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2160 req->scmnd = scmnd;
2161 req->cmd = iu;
2163 len = srp_map_data(scmnd, ch, req);
2164 if (len < 0) {
2165 shost_printk(KERN_ERR, target->scsi_host,
2166 PFX "Failed to map data (%d)\n", len);
2168 * If we ran out of memory descriptors (-ENOMEM) because an
2169 * application is queuing many requests with more than
2170 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2171 * to reduce queue depth temporarily.
2173 scmnd->result = len == -ENOMEM ?
2174 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2175 goto err_iu;
2178 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2179 DMA_TO_DEVICE);
2181 if (srp_post_send(ch, iu, len)) {
2182 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2183 goto err_unmap;
2186 ret = 0;
2188 unlock_rport:
2189 if (in_scsi_eh)
2190 mutex_unlock(&rport->mutex);
2192 return ret;
2194 err_unmap:
2195 srp_unmap_data(scmnd, ch, req);
2197 err_iu:
2198 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2201 * Avoid that the loops that iterate over the request ring can
2202 * encounter a dangling SCSI command pointer.
2204 req->scmnd = NULL;
2206 err:
2207 if (scmnd->result) {
2208 scmnd->scsi_done(scmnd);
2209 ret = 0;
2210 } else {
2211 ret = SCSI_MLQUEUE_HOST_BUSY;
2214 goto unlock_rport;
2218 * Note: the resources allocated in this function are freed in
2219 * srp_free_ch_ib().
2221 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2223 struct srp_target_port *target = ch->target;
2224 int i;
2226 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2227 GFP_KERNEL);
2228 if (!ch->rx_ring)
2229 goto err_no_ring;
2230 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2231 GFP_KERNEL);
2232 if (!ch->tx_ring)
2233 goto err_no_ring;
2235 for (i = 0; i < target->queue_size; ++i) {
2236 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2237 ch->max_ti_iu_len,
2238 GFP_KERNEL, DMA_FROM_DEVICE);
2239 if (!ch->rx_ring[i])
2240 goto err;
2243 for (i = 0; i < target->queue_size; ++i) {
2244 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2245 target->max_iu_len,
2246 GFP_KERNEL, DMA_TO_DEVICE);
2247 if (!ch->tx_ring[i])
2248 goto err;
2250 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2253 return 0;
2255 err:
2256 for (i = 0; i < target->queue_size; ++i) {
2257 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2258 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2262 err_no_ring:
2263 kfree(ch->tx_ring);
2264 ch->tx_ring = NULL;
2265 kfree(ch->rx_ring);
2266 ch->rx_ring = NULL;
2268 return -ENOMEM;
2271 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2273 uint64_t T_tr_ns, max_compl_time_ms;
2274 uint32_t rq_tmo_jiffies;
2277 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2278 * table 91), both the QP timeout and the retry count have to be set
2279 * for RC QP's during the RTR to RTS transition.
2281 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2282 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2285 * Set target->rq_tmo_jiffies to one second more than the largest time
2286 * it can take before an error completion is generated. See also
2287 * C9-140..142 in the IBTA spec for more information about how to
2288 * convert the QP Local ACK Timeout value to nanoseconds.
2290 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2291 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2292 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2293 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2295 return rq_tmo_jiffies;
2298 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2299 const struct srp_login_rsp *lrsp,
2300 struct srp_rdma_ch *ch)
2302 struct srp_target_port *target = ch->target;
2303 struct ib_qp_attr *qp_attr = NULL;
2304 int attr_mask = 0;
2305 int ret;
2306 int i;
2308 if (lrsp->opcode == SRP_LOGIN_RSP) {
2309 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2310 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2313 * Reserve credits for task management so we don't
2314 * bounce requests back to the SCSI mid-layer.
2316 target->scsi_host->can_queue
2317 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2318 target->scsi_host->can_queue);
2319 target->scsi_host->cmd_per_lun
2320 = min_t(int, target->scsi_host->can_queue,
2321 target->scsi_host->cmd_per_lun);
2322 } else {
2323 shost_printk(KERN_WARNING, target->scsi_host,
2324 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2325 ret = -ECONNRESET;
2326 goto error;
2329 if (!ch->rx_ring) {
2330 ret = srp_alloc_iu_bufs(ch);
2331 if (ret)
2332 goto error;
2335 ret = -ENOMEM;
2336 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2337 if (!qp_attr)
2338 goto error;
2340 qp_attr->qp_state = IB_QPS_RTR;
2341 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2342 if (ret)
2343 goto error_free;
2345 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2346 if (ret)
2347 goto error_free;
2349 for (i = 0; i < target->queue_size; i++) {
2350 struct srp_iu *iu = ch->rx_ring[i];
2352 ret = srp_post_recv(ch, iu);
2353 if (ret)
2354 goto error_free;
2357 qp_attr->qp_state = IB_QPS_RTS;
2358 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2359 if (ret)
2360 goto error_free;
2362 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2364 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2365 if (ret)
2366 goto error_free;
2368 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2370 error_free:
2371 kfree(qp_attr);
2373 error:
2374 ch->status = ret;
2377 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2378 struct ib_cm_event *event,
2379 struct srp_rdma_ch *ch)
2381 struct srp_target_port *target = ch->target;
2382 struct Scsi_Host *shost = target->scsi_host;
2383 struct ib_class_port_info *cpi;
2384 int opcode;
2386 switch (event->param.rej_rcvd.reason) {
2387 case IB_CM_REJ_PORT_CM_REDIRECT:
2388 cpi = event->param.rej_rcvd.ari;
2389 ch->path.dlid = cpi->redirect_lid;
2390 ch->path.pkey = cpi->redirect_pkey;
2391 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2392 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2394 ch->status = ch->path.dlid ?
2395 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2396 break;
2398 case IB_CM_REJ_PORT_REDIRECT:
2399 if (srp_target_is_topspin(target)) {
2401 * Topspin/Cisco SRP gateways incorrectly send
2402 * reject reason code 25 when they mean 24
2403 * (port redirect).
2405 memcpy(ch->path.dgid.raw,
2406 event->param.rej_rcvd.ari, 16);
2408 shost_printk(KERN_DEBUG, shost,
2409 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2410 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2411 be64_to_cpu(ch->path.dgid.global.interface_id));
2413 ch->status = SRP_PORT_REDIRECT;
2414 } else {
2415 shost_printk(KERN_WARNING, shost,
2416 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2417 ch->status = -ECONNRESET;
2419 break;
2421 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2422 shost_printk(KERN_WARNING, shost,
2423 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2424 ch->status = -ECONNRESET;
2425 break;
2427 case IB_CM_REJ_CONSUMER_DEFINED:
2428 opcode = *(u8 *) event->private_data;
2429 if (opcode == SRP_LOGIN_REJ) {
2430 struct srp_login_rej *rej = event->private_data;
2431 u32 reason = be32_to_cpu(rej->reason);
2433 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2434 shost_printk(KERN_WARNING, shost,
2435 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2436 else
2437 shost_printk(KERN_WARNING, shost, PFX
2438 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2439 target->sgid.raw,
2440 target->orig_dgid.raw, reason);
2441 } else
2442 shost_printk(KERN_WARNING, shost,
2443 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2444 " opcode 0x%02x\n", opcode);
2445 ch->status = -ECONNRESET;
2446 break;
2448 case IB_CM_REJ_STALE_CONN:
2449 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2450 ch->status = SRP_STALE_CONN;
2451 break;
2453 default:
2454 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2455 event->param.rej_rcvd.reason);
2456 ch->status = -ECONNRESET;
2460 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2462 struct srp_rdma_ch *ch = cm_id->context;
2463 struct srp_target_port *target = ch->target;
2464 int comp = 0;
2466 switch (event->event) {
2467 case IB_CM_REQ_ERROR:
2468 shost_printk(KERN_DEBUG, target->scsi_host,
2469 PFX "Sending CM REQ failed\n");
2470 comp = 1;
2471 ch->status = -ECONNRESET;
2472 break;
2474 case IB_CM_REP_RECEIVED:
2475 comp = 1;
2476 srp_cm_rep_handler(cm_id, event->private_data, ch);
2477 break;
2479 case IB_CM_REJ_RECEIVED:
2480 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2481 comp = 1;
2483 srp_cm_rej_handler(cm_id, event, ch);
2484 break;
2486 case IB_CM_DREQ_RECEIVED:
2487 shost_printk(KERN_WARNING, target->scsi_host,
2488 PFX "DREQ received - connection closed\n");
2489 ch->connected = false;
2490 if (ib_send_cm_drep(cm_id, NULL, 0))
2491 shost_printk(KERN_ERR, target->scsi_host,
2492 PFX "Sending CM DREP failed\n");
2493 queue_work(system_long_wq, &target->tl_err_work);
2494 break;
2496 case IB_CM_TIMEWAIT_EXIT:
2497 shost_printk(KERN_ERR, target->scsi_host,
2498 PFX "connection closed\n");
2499 comp = 1;
2501 ch->status = 0;
2502 break;
2504 case IB_CM_MRA_RECEIVED:
2505 case IB_CM_DREQ_ERROR:
2506 case IB_CM_DREP_RECEIVED:
2507 break;
2509 default:
2510 shost_printk(KERN_WARNING, target->scsi_host,
2511 PFX "Unhandled CM event %d\n", event->event);
2512 break;
2515 if (comp)
2516 complete(&ch->done);
2518 return 0;
2522 * srp_change_queue_depth - setting device queue depth
2523 * @sdev: scsi device struct
2524 * @qdepth: requested queue depth
2526 * Returns queue depth.
2528 static int
2529 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2531 if (!sdev->tagged_supported)
2532 qdepth = 1;
2533 return scsi_change_queue_depth(sdev, qdepth);
2536 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2537 u8 func, u8 *status)
2539 struct srp_target_port *target = ch->target;
2540 struct srp_rport *rport = target->rport;
2541 struct ib_device *dev = target->srp_host->srp_dev->dev;
2542 struct srp_iu *iu;
2543 struct srp_tsk_mgmt *tsk_mgmt;
2544 int res;
2546 if (!ch->connected || target->qp_in_error)
2547 return -1;
2550 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2551 * invoked while a task management function is being sent.
2553 mutex_lock(&rport->mutex);
2554 spin_lock_irq(&ch->lock);
2555 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2556 spin_unlock_irq(&ch->lock);
2558 if (!iu) {
2559 mutex_unlock(&rport->mutex);
2561 return -1;
2564 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2565 DMA_TO_DEVICE);
2566 tsk_mgmt = iu->buf;
2567 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2569 tsk_mgmt->opcode = SRP_TSK_MGMT;
2570 int_to_scsilun(lun, &tsk_mgmt->lun);
2571 tsk_mgmt->tsk_mgmt_func = func;
2572 tsk_mgmt->task_tag = req_tag;
2574 spin_lock_irq(&ch->lock);
2575 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2576 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2577 spin_unlock_irq(&ch->lock);
2579 init_completion(&ch->tsk_mgmt_done);
2581 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2582 DMA_TO_DEVICE);
2583 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2584 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2585 mutex_unlock(&rport->mutex);
2587 return -1;
2589 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2590 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2591 if (res > 0 && status)
2592 *status = ch->tsk_mgmt_status;
2593 mutex_unlock(&rport->mutex);
2595 WARN_ON_ONCE(res < 0);
2597 return res > 0 ? 0 : -1;
2600 static int srp_abort(struct scsi_cmnd *scmnd)
2602 struct srp_target_port *target = host_to_target(scmnd->device->host);
2603 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2604 u32 tag;
2605 u16 ch_idx;
2606 struct srp_rdma_ch *ch;
2607 int ret;
2609 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2611 if (!req)
2612 return SUCCESS;
2613 tag = blk_mq_unique_tag(scmnd->request);
2614 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2615 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2616 return SUCCESS;
2617 ch = &target->ch[ch_idx];
2618 if (!srp_claim_req(ch, req, NULL, scmnd))
2619 return SUCCESS;
2620 shost_printk(KERN_ERR, target->scsi_host,
2621 "Sending SRP abort for tag %#x\n", tag);
2622 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2623 SRP_TSK_ABORT_TASK, NULL) == 0)
2624 ret = SUCCESS;
2625 else if (target->rport->state == SRP_RPORT_LOST)
2626 ret = FAST_IO_FAIL;
2627 else
2628 ret = FAILED;
2629 if (ret == SUCCESS) {
2630 srp_free_req(ch, req, scmnd, 0);
2631 scmnd->result = DID_ABORT << 16;
2632 scmnd->scsi_done(scmnd);
2635 return ret;
2638 static int srp_reset_device(struct scsi_cmnd *scmnd)
2640 struct srp_target_port *target = host_to_target(scmnd->device->host);
2641 struct srp_rdma_ch *ch;
2642 int i;
2643 u8 status;
2645 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2647 ch = &target->ch[0];
2648 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2649 SRP_TSK_LUN_RESET, &status))
2650 return FAILED;
2651 if (status)
2652 return FAILED;
2654 for (i = 0; i < target->ch_count; i++) {
2655 ch = &target->ch[i];
2656 for (i = 0; i < target->req_ring_size; ++i) {
2657 struct srp_request *req = &ch->req_ring[i];
2659 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2663 return SUCCESS;
2666 static int srp_reset_host(struct scsi_cmnd *scmnd)
2668 struct srp_target_port *target = host_to_target(scmnd->device->host);
2670 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2672 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2675 static int srp_slave_alloc(struct scsi_device *sdev)
2677 struct Scsi_Host *shost = sdev->host;
2678 struct srp_target_port *target = host_to_target(shost);
2679 struct srp_device *srp_dev = target->srp_host->srp_dev;
2681 if (true)
2682 blk_queue_virt_boundary(sdev->request_queue,
2683 ~srp_dev->mr_page_mask);
2685 return 0;
2688 static int srp_slave_configure(struct scsi_device *sdev)
2690 struct Scsi_Host *shost = sdev->host;
2691 struct srp_target_port *target = host_to_target(shost);
2692 struct request_queue *q = sdev->request_queue;
2693 unsigned long timeout;
2695 if (sdev->type == TYPE_DISK) {
2696 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2697 blk_queue_rq_timeout(q, timeout);
2700 return 0;
2703 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2704 char *buf)
2706 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2708 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2711 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2712 char *buf)
2714 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2716 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2719 static ssize_t show_service_id(struct device *dev,
2720 struct device_attribute *attr, char *buf)
2722 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2724 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2727 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2728 char *buf)
2730 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2732 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2735 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2736 char *buf)
2738 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2740 return sprintf(buf, "%pI6\n", target->sgid.raw);
2743 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2744 char *buf)
2746 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2747 struct srp_rdma_ch *ch = &target->ch[0];
2749 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2752 static ssize_t show_orig_dgid(struct device *dev,
2753 struct device_attribute *attr, char *buf)
2755 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2757 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2760 static ssize_t show_req_lim(struct device *dev,
2761 struct device_attribute *attr, char *buf)
2763 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2764 struct srp_rdma_ch *ch;
2765 int i, req_lim = INT_MAX;
2767 for (i = 0; i < target->ch_count; i++) {
2768 ch = &target->ch[i];
2769 req_lim = min(req_lim, ch->req_lim);
2771 return sprintf(buf, "%d\n", req_lim);
2774 static ssize_t show_zero_req_lim(struct device *dev,
2775 struct device_attribute *attr, char *buf)
2777 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2779 return sprintf(buf, "%d\n", target->zero_req_lim);
2782 static ssize_t show_local_ib_port(struct device *dev,
2783 struct device_attribute *attr, char *buf)
2785 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2787 return sprintf(buf, "%d\n", target->srp_host->port);
2790 static ssize_t show_local_ib_device(struct device *dev,
2791 struct device_attribute *attr, char *buf)
2793 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2795 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2798 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2799 char *buf)
2801 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2803 return sprintf(buf, "%d\n", target->ch_count);
2806 static ssize_t show_comp_vector(struct device *dev,
2807 struct device_attribute *attr, char *buf)
2809 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2811 return sprintf(buf, "%d\n", target->comp_vector);
2814 static ssize_t show_tl_retry_count(struct device *dev,
2815 struct device_attribute *attr, char *buf)
2817 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2819 return sprintf(buf, "%d\n", target->tl_retry_count);
2822 static ssize_t show_cmd_sg_entries(struct device *dev,
2823 struct device_attribute *attr, char *buf)
2825 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2827 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2830 static ssize_t show_allow_ext_sg(struct device *dev,
2831 struct device_attribute *attr, char *buf)
2833 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2835 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2838 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2839 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2840 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2841 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2842 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2843 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2844 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2845 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2846 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2847 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2848 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2849 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2850 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2851 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2852 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2853 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2855 static struct device_attribute *srp_host_attrs[] = {
2856 &dev_attr_id_ext,
2857 &dev_attr_ioc_guid,
2858 &dev_attr_service_id,
2859 &dev_attr_pkey,
2860 &dev_attr_sgid,
2861 &dev_attr_dgid,
2862 &dev_attr_orig_dgid,
2863 &dev_attr_req_lim,
2864 &dev_attr_zero_req_lim,
2865 &dev_attr_local_ib_port,
2866 &dev_attr_local_ib_device,
2867 &dev_attr_ch_count,
2868 &dev_attr_comp_vector,
2869 &dev_attr_tl_retry_count,
2870 &dev_attr_cmd_sg_entries,
2871 &dev_attr_allow_ext_sg,
2872 NULL
2875 static struct scsi_host_template srp_template = {
2876 .module = THIS_MODULE,
2877 .name = "InfiniBand SRP initiator",
2878 .proc_name = DRV_NAME,
2879 .slave_alloc = srp_slave_alloc,
2880 .slave_configure = srp_slave_configure,
2881 .info = srp_target_info,
2882 .queuecommand = srp_queuecommand,
2883 .change_queue_depth = srp_change_queue_depth,
2884 .eh_abort_handler = srp_abort,
2885 .eh_device_reset_handler = srp_reset_device,
2886 .eh_host_reset_handler = srp_reset_host,
2887 .skip_settle_delay = true,
2888 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2889 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2890 .this_id = -1,
2891 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2892 .use_clustering = ENABLE_CLUSTERING,
2893 .shost_attrs = srp_host_attrs,
2894 .track_queue_depth = 1,
2897 static int srp_sdev_count(struct Scsi_Host *host)
2899 struct scsi_device *sdev;
2900 int c = 0;
2902 shost_for_each_device(sdev, host)
2903 c++;
2905 return c;
2909 * Return values:
2910 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2911 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2912 * removal has been scheduled.
2913 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2915 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2917 struct srp_rport_identifiers ids;
2918 struct srp_rport *rport;
2920 target->state = SRP_TARGET_SCANNING;
2921 sprintf(target->target_name, "SRP.T10:%016llX",
2922 be64_to_cpu(target->id_ext));
2924 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2925 return -ENODEV;
2927 memcpy(ids.port_id, &target->id_ext, 8);
2928 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2929 ids.roles = SRP_RPORT_ROLE_TARGET;
2930 rport = srp_rport_add(target->scsi_host, &ids);
2931 if (IS_ERR(rport)) {
2932 scsi_remove_host(target->scsi_host);
2933 return PTR_ERR(rport);
2936 rport->lld_data = target;
2937 target->rport = rport;
2939 spin_lock(&host->target_lock);
2940 list_add_tail(&target->list, &host->target_list);
2941 spin_unlock(&host->target_lock);
2943 scsi_scan_target(&target->scsi_host->shost_gendev,
2944 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2946 if (srp_connected_ch(target) < target->ch_count ||
2947 target->qp_in_error) {
2948 shost_printk(KERN_INFO, target->scsi_host,
2949 PFX "SCSI scan failed - removing SCSI host\n");
2950 srp_queue_remove_work(target);
2951 goto out;
2954 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2955 dev_name(&target->scsi_host->shost_gendev),
2956 srp_sdev_count(target->scsi_host));
2958 spin_lock_irq(&target->lock);
2959 if (target->state == SRP_TARGET_SCANNING)
2960 target->state = SRP_TARGET_LIVE;
2961 spin_unlock_irq(&target->lock);
2963 out:
2964 return 0;
2967 static void srp_release_dev(struct device *dev)
2969 struct srp_host *host =
2970 container_of(dev, struct srp_host, dev);
2972 complete(&host->released);
2975 static struct class srp_class = {
2976 .name = "infiniband_srp",
2977 .dev_release = srp_release_dev
2981 * srp_conn_unique() - check whether the connection to a target is unique
2982 * @host: SRP host.
2983 * @target: SRP target port.
2985 static bool srp_conn_unique(struct srp_host *host,
2986 struct srp_target_port *target)
2988 struct srp_target_port *t;
2989 bool ret = false;
2991 if (target->state == SRP_TARGET_REMOVED)
2992 goto out;
2994 ret = true;
2996 spin_lock(&host->target_lock);
2997 list_for_each_entry(t, &host->target_list, list) {
2998 if (t != target &&
2999 target->id_ext == t->id_ext &&
3000 target->ioc_guid == t->ioc_guid &&
3001 target->initiator_ext == t->initiator_ext) {
3002 ret = false;
3003 break;
3006 spin_unlock(&host->target_lock);
3008 out:
3009 return ret;
3013 * Target ports are added by writing
3015 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3016 * pkey=<P_Key>,service_id=<service ID>
3018 * to the add_target sysfs attribute.
3020 enum {
3021 SRP_OPT_ERR = 0,
3022 SRP_OPT_ID_EXT = 1 << 0,
3023 SRP_OPT_IOC_GUID = 1 << 1,
3024 SRP_OPT_DGID = 1 << 2,
3025 SRP_OPT_PKEY = 1 << 3,
3026 SRP_OPT_SERVICE_ID = 1 << 4,
3027 SRP_OPT_MAX_SECT = 1 << 5,
3028 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3029 SRP_OPT_IO_CLASS = 1 << 7,
3030 SRP_OPT_INITIATOR_EXT = 1 << 8,
3031 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3032 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3033 SRP_OPT_SG_TABLESIZE = 1 << 11,
3034 SRP_OPT_COMP_VECTOR = 1 << 12,
3035 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3036 SRP_OPT_QUEUE_SIZE = 1 << 14,
3037 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
3038 SRP_OPT_IOC_GUID |
3039 SRP_OPT_DGID |
3040 SRP_OPT_PKEY |
3041 SRP_OPT_SERVICE_ID),
3044 static const match_table_t srp_opt_tokens = {
3045 { SRP_OPT_ID_EXT, "id_ext=%s" },
3046 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3047 { SRP_OPT_DGID, "dgid=%s" },
3048 { SRP_OPT_PKEY, "pkey=%x" },
3049 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3050 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3051 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3052 { SRP_OPT_IO_CLASS, "io_class=%x" },
3053 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3054 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3055 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3056 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3057 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3058 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3059 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3060 { SRP_OPT_ERR, NULL }
3063 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3065 char *options, *sep_opt;
3066 char *p;
3067 char dgid[3];
3068 substring_t args[MAX_OPT_ARGS];
3069 int opt_mask = 0;
3070 int token;
3071 int ret = -EINVAL;
3072 int i;
3074 options = kstrdup(buf, GFP_KERNEL);
3075 if (!options)
3076 return -ENOMEM;
3078 sep_opt = options;
3079 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3080 if (!*p)
3081 continue;
3083 token = match_token(p, srp_opt_tokens, args);
3084 opt_mask |= token;
3086 switch (token) {
3087 case SRP_OPT_ID_EXT:
3088 p = match_strdup(args);
3089 if (!p) {
3090 ret = -ENOMEM;
3091 goto out;
3093 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3094 kfree(p);
3095 break;
3097 case SRP_OPT_IOC_GUID:
3098 p = match_strdup(args);
3099 if (!p) {
3100 ret = -ENOMEM;
3101 goto out;
3103 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3104 kfree(p);
3105 break;
3107 case SRP_OPT_DGID:
3108 p = match_strdup(args);
3109 if (!p) {
3110 ret = -ENOMEM;
3111 goto out;
3113 if (strlen(p) != 32) {
3114 pr_warn("bad dest GID parameter '%s'\n", p);
3115 kfree(p);
3116 goto out;
3119 for (i = 0; i < 16; ++i) {
3120 strlcpy(dgid, p + i * 2, sizeof(dgid));
3121 if (sscanf(dgid, "%hhx",
3122 &target->orig_dgid.raw[i]) < 1) {
3123 ret = -EINVAL;
3124 kfree(p);
3125 goto out;
3128 kfree(p);
3129 break;
3131 case SRP_OPT_PKEY:
3132 if (match_hex(args, &token)) {
3133 pr_warn("bad P_Key parameter '%s'\n", p);
3134 goto out;
3136 target->pkey = cpu_to_be16(token);
3137 break;
3139 case SRP_OPT_SERVICE_ID:
3140 p = match_strdup(args);
3141 if (!p) {
3142 ret = -ENOMEM;
3143 goto out;
3145 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3146 kfree(p);
3147 break;
3149 case SRP_OPT_MAX_SECT:
3150 if (match_int(args, &token)) {
3151 pr_warn("bad max sect parameter '%s'\n", p);
3152 goto out;
3154 target->scsi_host->max_sectors = token;
3155 break;
3157 case SRP_OPT_QUEUE_SIZE:
3158 if (match_int(args, &token) || token < 1) {
3159 pr_warn("bad queue_size parameter '%s'\n", p);
3160 goto out;
3162 target->scsi_host->can_queue = token;
3163 target->queue_size = token + SRP_RSP_SQ_SIZE +
3164 SRP_TSK_MGMT_SQ_SIZE;
3165 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3166 target->scsi_host->cmd_per_lun = token;
3167 break;
3169 case SRP_OPT_MAX_CMD_PER_LUN:
3170 if (match_int(args, &token) || token < 1) {
3171 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3173 goto out;
3175 target->scsi_host->cmd_per_lun = token;
3176 break;
3178 case SRP_OPT_IO_CLASS:
3179 if (match_hex(args, &token)) {
3180 pr_warn("bad IO class parameter '%s'\n", p);
3181 goto out;
3183 if (token != SRP_REV10_IB_IO_CLASS &&
3184 token != SRP_REV16A_IB_IO_CLASS) {
3185 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3186 token, SRP_REV10_IB_IO_CLASS,
3187 SRP_REV16A_IB_IO_CLASS);
3188 goto out;
3190 target->io_class = token;
3191 break;
3193 case SRP_OPT_INITIATOR_EXT:
3194 p = match_strdup(args);
3195 if (!p) {
3196 ret = -ENOMEM;
3197 goto out;
3199 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3200 kfree(p);
3201 break;
3203 case SRP_OPT_CMD_SG_ENTRIES:
3204 if (match_int(args, &token) || token < 1 || token > 255) {
3205 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3207 goto out;
3209 target->cmd_sg_cnt = token;
3210 break;
3212 case SRP_OPT_ALLOW_EXT_SG:
3213 if (match_int(args, &token)) {
3214 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3215 goto out;
3217 target->allow_ext_sg = !!token;
3218 break;
3220 case SRP_OPT_SG_TABLESIZE:
3221 if (match_int(args, &token) || token < 1 ||
3222 token > SG_MAX_SEGMENTS) {
3223 pr_warn("bad max sg_tablesize parameter '%s'\n",
3225 goto out;
3227 target->sg_tablesize = token;
3228 break;
3230 case SRP_OPT_COMP_VECTOR:
3231 if (match_int(args, &token) || token < 0) {
3232 pr_warn("bad comp_vector parameter '%s'\n", p);
3233 goto out;
3235 target->comp_vector = token;
3236 break;
3238 case SRP_OPT_TL_RETRY_COUNT:
3239 if (match_int(args, &token) || token < 2 || token > 7) {
3240 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3242 goto out;
3244 target->tl_retry_count = token;
3245 break;
3247 default:
3248 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3250 goto out;
3254 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3255 ret = 0;
3256 else
3257 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3258 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3259 !(srp_opt_tokens[i].token & opt_mask))
3260 pr_warn("target creation request is missing parameter '%s'\n",
3261 srp_opt_tokens[i].pattern);
3263 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3264 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3265 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3266 target->scsi_host->cmd_per_lun,
3267 target->scsi_host->can_queue);
3269 out:
3270 kfree(options);
3271 return ret;
3274 static ssize_t srp_create_target(struct device *dev,
3275 struct device_attribute *attr,
3276 const char *buf, size_t count)
3278 struct srp_host *host =
3279 container_of(dev, struct srp_host, dev);
3280 struct Scsi_Host *target_host;
3281 struct srp_target_port *target;
3282 struct srp_rdma_ch *ch;
3283 struct srp_device *srp_dev = host->srp_dev;
3284 struct ib_device *ibdev = srp_dev->dev;
3285 int ret, node_idx, node, cpu, i;
3286 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3287 bool multich = false;
3289 target_host = scsi_host_alloc(&srp_template,
3290 sizeof (struct srp_target_port));
3291 if (!target_host)
3292 return -ENOMEM;
3294 target_host->transportt = ib_srp_transport_template;
3295 target_host->max_channel = 0;
3296 target_host->max_id = 1;
3297 target_host->max_lun = -1LL;
3298 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3300 target = host_to_target(target_host);
3302 target->io_class = SRP_REV16A_IB_IO_CLASS;
3303 target->scsi_host = target_host;
3304 target->srp_host = host;
3305 target->pd = host->srp_dev->pd;
3306 target->lkey = host->srp_dev->pd->local_dma_lkey;
3307 target->cmd_sg_cnt = cmd_sg_entries;
3308 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3309 target->allow_ext_sg = allow_ext_sg;
3310 target->tl_retry_count = 7;
3311 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3314 * Avoid that the SCSI host can be removed by srp_remove_target()
3315 * before this function returns.
3317 scsi_host_get(target->scsi_host);
3319 mutex_lock(&host->add_target_mutex);
3321 ret = srp_parse_options(buf, target);
3322 if (ret)
3323 goto out;
3325 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3327 if (!srp_conn_unique(target->srp_host, target)) {
3328 shost_printk(KERN_INFO, target->scsi_host,
3329 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3330 be64_to_cpu(target->id_ext),
3331 be64_to_cpu(target->ioc_guid),
3332 be64_to_cpu(target->initiator_ext));
3333 ret = -EEXIST;
3334 goto out;
3337 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3338 target->cmd_sg_cnt < target->sg_tablesize) {
3339 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3340 target->sg_tablesize = target->cmd_sg_cnt;
3343 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3345 * FR and FMR can only map one HCA page per entry. If the
3346 * start address is not aligned on a HCA page boundary two
3347 * entries will be used for the head and the tail although
3348 * these two entries combined contain at most one HCA page of
3349 * data. Hence the "+ 1" in the calculation below.
3351 * The indirect data buffer descriptor is contiguous so the
3352 * memory for that buffer will only be registered if
3353 * register_always is true. Hence add one to mr_per_cmd if
3354 * register_always has been set.
3356 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3357 (ilog2(srp_dev->mr_page_size) - 9);
3358 mr_per_cmd = register_always +
3359 (target->scsi_host->max_sectors + 1 +
3360 max_sectors_per_mr - 1) / max_sectors_per_mr;
3361 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3362 target->scsi_host->max_sectors,
3363 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3364 max_sectors_per_mr, mr_per_cmd);
3367 target_host->sg_tablesize = target->sg_tablesize;
3368 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3369 target->mr_per_cmd = mr_per_cmd;
3370 target->indirect_size = target->sg_tablesize *
3371 sizeof (struct srp_direct_buf);
3372 target->max_iu_len = sizeof (struct srp_cmd) +
3373 sizeof (struct srp_indirect_buf) +
3374 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3376 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3377 INIT_WORK(&target->remove_work, srp_remove_work);
3378 spin_lock_init(&target->lock);
3379 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3380 if (ret)
3381 goto out;
3383 ret = -ENOMEM;
3384 target->ch_count = max_t(unsigned, num_online_nodes(),
3385 min(ch_count ? :
3386 min(4 * num_online_nodes(),
3387 ibdev->num_comp_vectors),
3388 num_online_cpus()));
3389 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3390 GFP_KERNEL);
3391 if (!target->ch)
3392 goto out;
3394 node_idx = 0;
3395 for_each_online_node(node) {
3396 const int ch_start = (node_idx * target->ch_count /
3397 num_online_nodes());
3398 const int ch_end = ((node_idx + 1) * target->ch_count /
3399 num_online_nodes());
3400 const int cv_start = node_idx * ibdev->num_comp_vectors /
3401 num_online_nodes();
3402 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
3403 num_online_nodes();
3404 int cpu_idx = 0;
3406 for_each_online_cpu(cpu) {
3407 if (cpu_to_node(cpu) != node)
3408 continue;
3409 if (ch_start + cpu_idx >= ch_end)
3410 continue;
3411 ch = &target->ch[ch_start + cpu_idx];
3412 ch->target = target;
3413 ch->comp_vector = cv_start == cv_end ? cv_start :
3414 cv_start + cpu_idx % (cv_end - cv_start);
3415 spin_lock_init(&ch->lock);
3416 INIT_LIST_HEAD(&ch->free_tx);
3417 ret = srp_new_cm_id(ch);
3418 if (ret)
3419 goto err_disconnect;
3421 ret = srp_create_ch_ib(ch);
3422 if (ret)
3423 goto err_disconnect;
3425 ret = srp_alloc_req_data(ch);
3426 if (ret)
3427 goto err_disconnect;
3429 ret = srp_connect_ch(ch, multich);
3430 if (ret) {
3431 shost_printk(KERN_ERR, target->scsi_host,
3432 PFX "Connection %d/%d failed\n",
3433 ch_start + cpu_idx,
3434 target->ch_count);
3435 if (node_idx == 0 && cpu_idx == 0) {
3436 goto err_disconnect;
3437 } else {
3438 srp_free_ch_ib(target, ch);
3439 srp_free_req_data(target, ch);
3440 target->ch_count = ch - target->ch;
3441 goto connected;
3445 multich = true;
3446 cpu_idx++;
3448 node_idx++;
3451 connected:
3452 target->scsi_host->nr_hw_queues = target->ch_count;
3454 ret = srp_add_target(host, target);
3455 if (ret)
3456 goto err_disconnect;
3458 if (target->state != SRP_TARGET_REMOVED) {
3459 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3460 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3461 be64_to_cpu(target->id_ext),
3462 be64_to_cpu(target->ioc_guid),
3463 be16_to_cpu(target->pkey),
3464 be64_to_cpu(target->service_id),
3465 target->sgid.raw, target->orig_dgid.raw);
3468 ret = count;
3470 out:
3471 mutex_unlock(&host->add_target_mutex);
3473 scsi_host_put(target->scsi_host);
3474 if (ret < 0)
3475 scsi_host_put(target->scsi_host);
3477 return ret;
3479 err_disconnect:
3480 srp_disconnect_target(target);
3482 for (i = 0; i < target->ch_count; i++) {
3483 ch = &target->ch[i];
3484 srp_free_ch_ib(target, ch);
3485 srp_free_req_data(target, ch);
3488 kfree(target->ch);
3489 goto out;
3492 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3494 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3495 char *buf)
3497 struct srp_host *host = container_of(dev, struct srp_host, dev);
3499 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3502 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3504 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3505 char *buf)
3507 struct srp_host *host = container_of(dev, struct srp_host, dev);
3509 return sprintf(buf, "%d\n", host->port);
3512 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3514 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3516 struct srp_host *host;
3518 host = kzalloc(sizeof *host, GFP_KERNEL);
3519 if (!host)
3520 return NULL;
3522 INIT_LIST_HEAD(&host->target_list);
3523 spin_lock_init(&host->target_lock);
3524 init_completion(&host->released);
3525 mutex_init(&host->add_target_mutex);
3526 host->srp_dev = device;
3527 host->port = port;
3529 host->dev.class = &srp_class;
3530 host->dev.parent = device->dev->dma_device;
3531 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3533 if (device_register(&host->dev))
3534 goto free_host;
3535 if (device_create_file(&host->dev, &dev_attr_add_target))
3536 goto err_class;
3537 if (device_create_file(&host->dev, &dev_attr_ibdev))
3538 goto err_class;
3539 if (device_create_file(&host->dev, &dev_attr_port))
3540 goto err_class;
3542 return host;
3544 err_class:
3545 device_unregister(&host->dev);
3547 free_host:
3548 kfree(host);
3550 return NULL;
3553 static void srp_add_one(struct ib_device *device)
3555 struct srp_device *srp_dev;
3556 struct srp_host *host;
3557 int mr_page_shift, p;
3558 u64 max_pages_per_mr;
3559 unsigned int flags = 0;
3561 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3562 if (!srp_dev)
3563 return;
3566 * Use the smallest page size supported by the HCA, down to a
3567 * minimum of 4096 bytes. We're unlikely to build large sglists
3568 * out of smaller entries.
3570 mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
3571 srp_dev->mr_page_size = 1 << mr_page_shift;
3572 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3573 max_pages_per_mr = device->attrs.max_mr_size;
3574 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3575 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3576 device->attrs.max_mr_size, srp_dev->mr_page_size,
3577 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3578 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3579 max_pages_per_mr);
3581 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3582 device->map_phys_fmr && device->unmap_fmr);
3583 srp_dev->has_fr = (device->attrs.device_cap_flags &
3584 IB_DEVICE_MEM_MGT_EXTENSIONS);
3585 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3586 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3587 } else if (!never_register &&
3588 device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
3589 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3590 (!srp_dev->has_fmr || prefer_fr));
3591 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3594 if (never_register || !register_always ||
3595 (!srp_dev->has_fmr && !srp_dev->has_fr))
3596 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3598 if (srp_dev->use_fast_reg) {
3599 srp_dev->max_pages_per_mr =
3600 min_t(u32, srp_dev->max_pages_per_mr,
3601 device->attrs.max_fast_reg_page_list_len);
3603 srp_dev->mr_max_size = srp_dev->mr_page_size *
3604 srp_dev->max_pages_per_mr;
3605 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3606 device->name, mr_page_shift, device->attrs.max_mr_size,
3607 device->attrs.max_fast_reg_page_list_len,
3608 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3610 INIT_LIST_HEAD(&srp_dev->dev_list);
3612 srp_dev->dev = device;
3613 srp_dev->pd = ib_alloc_pd(device, flags);
3614 if (IS_ERR(srp_dev->pd))
3615 goto free_dev;
3618 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3619 host = srp_add_port(srp_dev, p);
3620 if (host)
3621 list_add_tail(&host->list, &srp_dev->dev_list);
3624 ib_set_client_data(device, &srp_client, srp_dev);
3625 return;
3627 free_dev:
3628 kfree(srp_dev);
3631 static void srp_remove_one(struct ib_device *device, void *client_data)
3633 struct srp_device *srp_dev;
3634 struct srp_host *host, *tmp_host;
3635 struct srp_target_port *target;
3637 srp_dev = client_data;
3638 if (!srp_dev)
3639 return;
3641 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3642 device_unregister(&host->dev);
3644 * Wait for the sysfs entry to go away, so that no new
3645 * target ports can be created.
3647 wait_for_completion(&host->released);
3650 * Remove all target ports.
3652 spin_lock(&host->target_lock);
3653 list_for_each_entry(target, &host->target_list, list)
3654 srp_queue_remove_work(target);
3655 spin_unlock(&host->target_lock);
3658 * Wait for tl_err and target port removal tasks.
3660 flush_workqueue(system_long_wq);
3661 flush_workqueue(srp_remove_wq);
3663 kfree(host);
3666 ib_dealloc_pd(srp_dev->pd);
3668 kfree(srp_dev);
3671 static struct srp_function_template ib_srp_transport_functions = {
3672 .has_rport_state = true,
3673 .reset_timer_if_blocked = true,
3674 .reconnect_delay = &srp_reconnect_delay,
3675 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3676 .dev_loss_tmo = &srp_dev_loss_tmo,
3677 .reconnect = srp_rport_reconnect,
3678 .rport_delete = srp_rport_delete,
3679 .terminate_rport_io = srp_terminate_io,
3682 static int __init srp_init_module(void)
3684 int ret;
3686 if (srp_sg_tablesize) {
3687 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3688 if (!cmd_sg_entries)
3689 cmd_sg_entries = srp_sg_tablesize;
3692 if (!cmd_sg_entries)
3693 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3695 if (cmd_sg_entries > 255) {
3696 pr_warn("Clamping cmd_sg_entries to 255\n");
3697 cmd_sg_entries = 255;
3700 if (!indirect_sg_entries)
3701 indirect_sg_entries = cmd_sg_entries;
3702 else if (indirect_sg_entries < cmd_sg_entries) {
3703 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3704 cmd_sg_entries);
3705 indirect_sg_entries = cmd_sg_entries;
3708 if (indirect_sg_entries > SG_MAX_SEGMENTS) {
3709 pr_warn("Clamping indirect_sg_entries to %u\n",
3710 SG_MAX_SEGMENTS);
3711 indirect_sg_entries = SG_MAX_SEGMENTS;
3714 srp_remove_wq = create_workqueue("srp_remove");
3715 if (!srp_remove_wq) {
3716 ret = -ENOMEM;
3717 goto out;
3720 ret = -ENOMEM;
3721 ib_srp_transport_template =
3722 srp_attach_transport(&ib_srp_transport_functions);
3723 if (!ib_srp_transport_template)
3724 goto destroy_wq;
3726 ret = class_register(&srp_class);
3727 if (ret) {
3728 pr_err("couldn't register class infiniband_srp\n");
3729 goto release_tr;
3732 ib_sa_register_client(&srp_sa_client);
3734 ret = ib_register_client(&srp_client);
3735 if (ret) {
3736 pr_err("couldn't register IB client\n");
3737 goto unreg_sa;
3740 out:
3741 return ret;
3743 unreg_sa:
3744 ib_sa_unregister_client(&srp_sa_client);
3745 class_unregister(&srp_class);
3747 release_tr:
3748 srp_release_transport(ib_srp_transport_template);
3750 destroy_wq:
3751 destroy_workqueue(srp_remove_wq);
3752 goto out;
3755 static void __exit srp_cleanup_module(void)
3757 ib_unregister_client(&srp_client);
3758 ib_sa_unregister_client(&srp_sa_client);
3759 class_unregister(&srp_class);
3760 srp_release_transport(ib_srp_transport_template);
3761 destroy_workqueue(srp_remove_wq);
3764 module_init(srp_init_module);
3765 module_exit(srp_cleanup_module);