2 * Common code for the NVMe target.
3 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/module.h>
16 #include <linux/random.h>
17 #include <linux/rculist.h>
21 static struct nvmet_fabrics_ops
*nvmet_transports
[NVMF_TRTYPE_MAX
];
22 static DEFINE_IDA(cntlid_ida
);
25 * This read/write semaphore is used to synchronize access to configuration
26 * information on a target system that will result in discovery log page
27 * information change for at least one host.
28 * The full list of resources to protected by this semaphore is:
31 * - per-subsystem allowed hosts list
32 * - allow_any_host subsystem attribute
34 * - the nvmet_transports array
36 * When updating any of those lists/structures write lock should be obtained,
37 * while when reading (popolating discovery log page or checking host-subsystem
38 * link) read lock is obtained to allow concurrent reads.
40 DECLARE_RWSEM(nvmet_config_sem
);
42 static struct nvmet_subsys
*nvmet_find_get_subsys(struct nvmet_port
*port
,
43 const char *subsysnqn
);
45 u16
nvmet_copy_to_sgl(struct nvmet_req
*req
, off_t off
, const void *buf
,
48 if (sg_pcopy_from_buffer(req
->sg
, req
->sg_cnt
, buf
, len
, off
) != len
)
49 return NVME_SC_SGL_INVALID_DATA
| NVME_SC_DNR
;
53 u16
nvmet_copy_from_sgl(struct nvmet_req
*req
, off_t off
, void *buf
, size_t len
)
55 if (sg_pcopy_to_buffer(req
->sg
, req
->sg_cnt
, buf
, len
, off
) != len
)
56 return NVME_SC_SGL_INVALID_DATA
| NVME_SC_DNR
;
60 static unsigned int nvmet_max_nsid(struct nvmet_subsys
*subsys
)
64 if (list_empty(&subsys
->namespaces
))
67 ns
= list_last_entry(&subsys
->namespaces
, struct nvmet_ns
, dev_link
);
71 static u32
nvmet_async_event_result(struct nvmet_async_event
*aen
)
73 return aen
->event_type
| (aen
->event_info
<< 8) | (aen
->log_page
<< 16);
76 static void nvmet_async_events_free(struct nvmet_ctrl
*ctrl
)
78 struct nvmet_req
*req
;
81 mutex_lock(&ctrl
->lock
);
82 if (!ctrl
->nr_async_event_cmds
) {
83 mutex_unlock(&ctrl
->lock
);
87 req
= ctrl
->async_event_cmds
[--ctrl
->nr_async_event_cmds
];
88 mutex_unlock(&ctrl
->lock
);
89 nvmet_req_complete(req
, NVME_SC_INTERNAL
| NVME_SC_DNR
);
93 static void nvmet_async_event_work(struct work_struct
*work
)
95 struct nvmet_ctrl
*ctrl
=
96 container_of(work
, struct nvmet_ctrl
, async_event_work
);
97 struct nvmet_async_event
*aen
;
98 struct nvmet_req
*req
;
101 mutex_lock(&ctrl
->lock
);
102 aen
= list_first_entry_or_null(&ctrl
->async_events
,
103 struct nvmet_async_event
, entry
);
104 if (!aen
|| !ctrl
->nr_async_event_cmds
) {
105 mutex_unlock(&ctrl
->lock
);
109 req
= ctrl
->async_event_cmds
[--ctrl
->nr_async_event_cmds
];
110 nvmet_set_result(req
, nvmet_async_event_result(aen
));
112 list_del(&aen
->entry
);
115 mutex_unlock(&ctrl
->lock
);
116 nvmet_req_complete(req
, 0);
120 static void nvmet_add_async_event(struct nvmet_ctrl
*ctrl
, u8 event_type
,
121 u8 event_info
, u8 log_page
)
123 struct nvmet_async_event
*aen
;
125 aen
= kmalloc(sizeof(*aen
), GFP_KERNEL
);
129 aen
->event_type
= event_type
;
130 aen
->event_info
= event_info
;
131 aen
->log_page
= log_page
;
133 mutex_lock(&ctrl
->lock
);
134 list_add_tail(&aen
->entry
, &ctrl
->async_events
);
135 mutex_unlock(&ctrl
->lock
);
137 schedule_work(&ctrl
->async_event_work
);
140 int nvmet_register_transport(struct nvmet_fabrics_ops
*ops
)
144 down_write(&nvmet_config_sem
);
145 if (nvmet_transports
[ops
->type
])
148 nvmet_transports
[ops
->type
] = ops
;
149 up_write(&nvmet_config_sem
);
153 EXPORT_SYMBOL_GPL(nvmet_register_transport
);
155 void nvmet_unregister_transport(struct nvmet_fabrics_ops
*ops
)
157 down_write(&nvmet_config_sem
);
158 nvmet_transports
[ops
->type
] = NULL
;
159 up_write(&nvmet_config_sem
);
161 EXPORT_SYMBOL_GPL(nvmet_unregister_transport
);
163 int nvmet_enable_port(struct nvmet_port
*port
)
165 struct nvmet_fabrics_ops
*ops
;
168 lockdep_assert_held(&nvmet_config_sem
);
170 ops
= nvmet_transports
[port
->disc_addr
.trtype
];
172 up_write(&nvmet_config_sem
);
173 request_module("nvmet-transport-%d", port
->disc_addr
.trtype
);
174 down_write(&nvmet_config_sem
);
175 ops
= nvmet_transports
[port
->disc_addr
.trtype
];
177 pr_err("transport type %d not supported\n",
178 port
->disc_addr
.trtype
);
183 if (!try_module_get(ops
->owner
))
186 ret
= ops
->add_port(port
);
188 module_put(ops
->owner
);
192 port
->enabled
= true;
196 void nvmet_disable_port(struct nvmet_port
*port
)
198 struct nvmet_fabrics_ops
*ops
;
200 lockdep_assert_held(&nvmet_config_sem
);
202 port
->enabled
= false;
204 ops
= nvmet_transports
[port
->disc_addr
.trtype
];
205 ops
->remove_port(port
);
206 module_put(ops
->owner
);
209 static void nvmet_keep_alive_timer(struct work_struct
*work
)
211 struct nvmet_ctrl
*ctrl
= container_of(to_delayed_work(work
),
212 struct nvmet_ctrl
, ka_work
);
214 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
215 ctrl
->cntlid
, ctrl
->kato
);
217 nvmet_ctrl_fatal_error(ctrl
);
220 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl
*ctrl
)
222 pr_debug("ctrl %d start keep-alive timer for %d secs\n",
223 ctrl
->cntlid
, ctrl
->kato
);
225 INIT_DELAYED_WORK(&ctrl
->ka_work
, nvmet_keep_alive_timer
);
226 schedule_delayed_work(&ctrl
->ka_work
, ctrl
->kato
* HZ
);
229 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl
*ctrl
)
231 pr_debug("ctrl %d stop keep-alive\n", ctrl
->cntlid
);
233 cancel_delayed_work_sync(&ctrl
->ka_work
);
236 static struct nvmet_ns
*__nvmet_find_namespace(struct nvmet_ctrl
*ctrl
,
241 list_for_each_entry_rcu(ns
, &ctrl
->subsys
->namespaces
, dev_link
) {
242 if (ns
->nsid
== le32_to_cpu(nsid
))
249 struct nvmet_ns
*nvmet_find_namespace(struct nvmet_ctrl
*ctrl
, __le32 nsid
)
254 ns
= __nvmet_find_namespace(ctrl
, nsid
);
256 percpu_ref_get(&ns
->ref
);
262 static void nvmet_destroy_namespace(struct percpu_ref
*ref
)
264 struct nvmet_ns
*ns
= container_of(ref
, struct nvmet_ns
, ref
);
266 complete(&ns
->disable_done
);
269 void nvmet_put_namespace(struct nvmet_ns
*ns
)
271 percpu_ref_put(&ns
->ref
);
274 int nvmet_ns_enable(struct nvmet_ns
*ns
)
276 struct nvmet_subsys
*subsys
= ns
->subsys
;
277 struct nvmet_ctrl
*ctrl
;
280 mutex_lock(&subsys
->lock
);
284 ns
->bdev
= blkdev_get_by_path(ns
->device_path
, FMODE_READ
| FMODE_WRITE
,
286 if (IS_ERR(ns
->bdev
)) {
287 pr_err("failed to open block device %s: (%ld)\n",
288 ns
->device_path
, PTR_ERR(ns
->bdev
));
289 ret
= PTR_ERR(ns
->bdev
);
294 ns
->size
= i_size_read(ns
->bdev
->bd_inode
);
295 ns
->blksize_shift
= blksize_bits(bdev_logical_block_size(ns
->bdev
));
297 ret
= percpu_ref_init(&ns
->ref
, nvmet_destroy_namespace
,
302 if (ns
->nsid
> subsys
->max_nsid
)
303 subsys
->max_nsid
= ns
->nsid
;
306 * The namespaces list needs to be sorted to simplify the implementation
307 * of the Identify Namepace List subcommand.
309 if (list_empty(&subsys
->namespaces
)) {
310 list_add_tail_rcu(&ns
->dev_link
, &subsys
->namespaces
);
312 struct nvmet_ns
*old
;
314 list_for_each_entry_rcu(old
, &subsys
->namespaces
, dev_link
) {
315 BUG_ON(ns
->nsid
== old
->nsid
);
316 if (ns
->nsid
< old
->nsid
)
320 list_add_tail_rcu(&ns
->dev_link
, &old
->dev_link
);
323 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
)
324 nvmet_add_async_event(ctrl
, NVME_AER_TYPE_NOTICE
, 0, 0);
329 mutex_unlock(&subsys
->lock
);
332 blkdev_put(ns
->bdev
, FMODE_WRITE
|FMODE_READ
);
337 void nvmet_ns_disable(struct nvmet_ns
*ns
)
339 struct nvmet_subsys
*subsys
= ns
->subsys
;
340 struct nvmet_ctrl
*ctrl
;
342 mutex_lock(&subsys
->lock
);
347 list_del_rcu(&ns
->dev_link
);
348 if (ns
->nsid
== subsys
->max_nsid
)
349 subsys
->max_nsid
= nvmet_max_nsid(subsys
);
350 mutex_unlock(&subsys
->lock
);
353 * Now that we removed the namespaces from the lookup list, we
354 * can kill the per_cpu ref and wait for any remaining references
355 * to be dropped, as well as a RCU grace period for anyone only
356 * using the namepace under rcu_read_lock(). Note that we can't
357 * use call_rcu here as we need to ensure the namespaces have
358 * been fully destroyed before unloading the module.
360 percpu_ref_kill(&ns
->ref
);
362 wait_for_completion(&ns
->disable_done
);
363 percpu_ref_exit(&ns
->ref
);
365 mutex_lock(&subsys
->lock
);
366 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
)
367 nvmet_add_async_event(ctrl
, NVME_AER_TYPE_NOTICE
, 0, 0);
370 blkdev_put(ns
->bdev
, FMODE_WRITE
|FMODE_READ
);
372 mutex_unlock(&subsys
->lock
);
375 void nvmet_ns_free(struct nvmet_ns
*ns
)
377 nvmet_ns_disable(ns
);
379 kfree(ns
->device_path
);
383 struct nvmet_ns
*nvmet_ns_alloc(struct nvmet_subsys
*subsys
, u32 nsid
)
387 ns
= kzalloc(sizeof(*ns
), GFP_KERNEL
);
391 INIT_LIST_HEAD(&ns
->dev_link
);
392 init_completion(&ns
->disable_done
);
401 static void __nvmet_req_complete(struct nvmet_req
*req
, u16 status
)
403 u32 old_sqhd
, new_sqhd
;
407 nvmet_set_status(req
, status
);
411 old_sqhd
= req
->sq
->sqhd
;
412 new_sqhd
= (old_sqhd
+ 1) % req
->sq
->size
;
413 } while (cmpxchg(&req
->sq
->sqhd
, old_sqhd
, new_sqhd
) !=
416 sqhd
= req
->sq
->sqhd
& 0x0000FFFF;
417 req
->rsp
->sq_head
= cpu_to_le16(sqhd
);
418 req
->rsp
->sq_id
= cpu_to_le16(req
->sq
->qid
);
419 req
->rsp
->command_id
= req
->cmd
->common
.command_id
;
422 nvmet_put_namespace(req
->ns
);
423 req
->ops
->queue_response(req
);
426 void nvmet_req_complete(struct nvmet_req
*req
, u16 status
)
428 __nvmet_req_complete(req
, status
);
429 percpu_ref_put(&req
->sq
->ref
);
431 EXPORT_SYMBOL_GPL(nvmet_req_complete
);
433 void nvmet_cq_setup(struct nvmet_ctrl
*ctrl
, struct nvmet_cq
*cq
,
442 void nvmet_sq_setup(struct nvmet_ctrl
*ctrl
, struct nvmet_sq
*sq
,
452 static void nvmet_confirm_sq(struct percpu_ref
*ref
)
454 struct nvmet_sq
*sq
= container_of(ref
, struct nvmet_sq
, ref
);
456 complete(&sq
->confirm_done
);
459 void nvmet_sq_destroy(struct nvmet_sq
*sq
)
462 * If this is the admin queue, complete all AERs so that our
463 * queue doesn't have outstanding requests on it.
465 if (sq
->ctrl
&& sq
->ctrl
->sqs
&& sq
->ctrl
->sqs
[0] == sq
)
466 nvmet_async_events_free(sq
->ctrl
);
467 percpu_ref_kill_and_confirm(&sq
->ref
, nvmet_confirm_sq
);
468 wait_for_completion(&sq
->confirm_done
);
469 wait_for_completion(&sq
->free_done
);
470 percpu_ref_exit(&sq
->ref
);
473 nvmet_ctrl_put(sq
->ctrl
);
474 sq
->ctrl
= NULL
; /* allows reusing the queue later */
477 EXPORT_SYMBOL_GPL(nvmet_sq_destroy
);
479 static void nvmet_sq_free(struct percpu_ref
*ref
)
481 struct nvmet_sq
*sq
= container_of(ref
, struct nvmet_sq
, ref
);
483 complete(&sq
->free_done
);
486 int nvmet_sq_init(struct nvmet_sq
*sq
)
490 ret
= percpu_ref_init(&sq
->ref
, nvmet_sq_free
, 0, GFP_KERNEL
);
492 pr_err("percpu_ref init failed!\n");
495 init_completion(&sq
->free_done
);
496 init_completion(&sq
->confirm_done
);
500 EXPORT_SYMBOL_GPL(nvmet_sq_init
);
502 bool nvmet_req_init(struct nvmet_req
*req
, struct nvmet_cq
*cq
,
503 struct nvmet_sq
*sq
, struct nvmet_fabrics_ops
*ops
)
505 u8 flags
= req
->cmd
->common
.flags
;
513 req
->transfer_len
= 0;
514 req
->rsp
->status
= 0;
517 /* no support for fused commands yet */
518 if (unlikely(flags
& (NVME_CMD_FUSE_FIRST
| NVME_CMD_FUSE_SECOND
))) {
519 status
= NVME_SC_INVALID_FIELD
| NVME_SC_DNR
;
524 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
525 * contains an address of a single contiguous physical buffer that is
528 if (unlikely((flags
& NVME_CMD_SGL_ALL
) != NVME_CMD_SGL_METABUF
)) {
529 status
= NVME_SC_INVALID_FIELD
| NVME_SC_DNR
;
533 if (unlikely(!req
->sq
->ctrl
))
534 /* will return an error for any Non-connect command: */
535 status
= nvmet_parse_connect_cmd(req
);
536 else if (likely(req
->sq
->qid
!= 0))
537 status
= nvmet_parse_io_cmd(req
);
538 else if (req
->cmd
->common
.opcode
== nvme_fabrics_command
)
539 status
= nvmet_parse_fabrics_cmd(req
);
540 else if (req
->sq
->ctrl
->subsys
->type
== NVME_NQN_DISC
)
541 status
= nvmet_parse_discovery_cmd(req
);
543 status
= nvmet_parse_admin_cmd(req
);
548 if (unlikely(!percpu_ref_tryget_live(&sq
->ref
))) {
549 status
= NVME_SC_INVALID_FIELD
| NVME_SC_DNR
;
556 __nvmet_req_complete(req
, status
);
559 EXPORT_SYMBOL_GPL(nvmet_req_init
);
561 void nvmet_req_uninit(struct nvmet_req
*req
)
563 percpu_ref_put(&req
->sq
->ref
);
565 nvmet_put_namespace(req
->ns
);
567 EXPORT_SYMBOL_GPL(nvmet_req_uninit
);
569 void nvmet_req_execute(struct nvmet_req
*req
)
571 if (unlikely(req
->data_len
!= req
->transfer_len
))
572 nvmet_req_complete(req
, NVME_SC_SGL_INVALID_DATA
| NVME_SC_DNR
);
576 EXPORT_SYMBOL_GPL(nvmet_req_execute
);
578 static inline bool nvmet_cc_en(u32 cc
)
580 return (cc
>> NVME_CC_EN_SHIFT
) & 0x1;
583 static inline u8
nvmet_cc_css(u32 cc
)
585 return (cc
>> NVME_CC_CSS_SHIFT
) & 0x7;
588 static inline u8
nvmet_cc_mps(u32 cc
)
590 return (cc
>> NVME_CC_MPS_SHIFT
) & 0xf;
593 static inline u8
nvmet_cc_ams(u32 cc
)
595 return (cc
>> NVME_CC_AMS_SHIFT
) & 0x7;
598 static inline u8
nvmet_cc_shn(u32 cc
)
600 return (cc
>> NVME_CC_SHN_SHIFT
) & 0x3;
603 static inline u8
nvmet_cc_iosqes(u32 cc
)
605 return (cc
>> NVME_CC_IOSQES_SHIFT
) & 0xf;
608 static inline u8
nvmet_cc_iocqes(u32 cc
)
610 return (cc
>> NVME_CC_IOCQES_SHIFT
) & 0xf;
613 static void nvmet_start_ctrl(struct nvmet_ctrl
*ctrl
)
615 lockdep_assert_held(&ctrl
->lock
);
617 if (nvmet_cc_iosqes(ctrl
->cc
) != NVME_NVM_IOSQES
||
618 nvmet_cc_iocqes(ctrl
->cc
) != NVME_NVM_IOCQES
||
619 nvmet_cc_mps(ctrl
->cc
) != 0 ||
620 nvmet_cc_ams(ctrl
->cc
) != 0 ||
621 nvmet_cc_css(ctrl
->cc
) != 0) {
622 ctrl
->csts
= NVME_CSTS_CFS
;
626 ctrl
->csts
= NVME_CSTS_RDY
;
629 static void nvmet_clear_ctrl(struct nvmet_ctrl
*ctrl
)
631 lockdep_assert_held(&ctrl
->lock
);
633 /* XXX: tear down queues? */
634 ctrl
->csts
&= ~NVME_CSTS_RDY
;
638 void nvmet_update_cc(struct nvmet_ctrl
*ctrl
, u32
new)
642 mutex_lock(&ctrl
->lock
);
646 if (nvmet_cc_en(new) && !nvmet_cc_en(old
))
647 nvmet_start_ctrl(ctrl
);
648 if (!nvmet_cc_en(new) && nvmet_cc_en(old
))
649 nvmet_clear_ctrl(ctrl
);
650 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old
)) {
651 nvmet_clear_ctrl(ctrl
);
652 ctrl
->csts
|= NVME_CSTS_SHST_CMPLT
;
654 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old
))
655 ctrl
->csts
&= ~NVME_CSTS_SHST_CMPLT
;
656 mutex_unlock(&ctrl
->lock
);
659 static void nvmet_init_cap(struct nvmet_ctrl
*ctrl
)
661 /* command sets supported: NVMe command set: */
662 ctrl
->cap
= (1ULL << 37);
663 /* CC.EN timeout in 500msec units: */
664 ctrl
->cap
|= (15ULL << 24);
665 /* maximum queue entries supported: */
666 ctrl
->cap
|= NVMET_QUEUE_SIZE
- 1;
669 u16
nvmet_ctrl_find_get(const char *subsysnqn
, const char *hostnqn
, u16 cntlid
,
670 struct nvmet_req
*req
, struct nvmet_ctrl
**ret
)
672 struct nvmet_subsys
*subsys
;
673 struct nvmet_ctrl
*ctrl
;
676 subsys
= nvmet_find_get_subsys(req
->port
, subsysnqn
);
678 pr_warn("connect request for invalid subsystem %s!\n",
680 req
->rsp
->result
.u32
= IPO_IATTR_CONNECT_DATA(subsysnqn
);
681 return NVME_SC_CONNECT_INVALID_PARAM
| NVME_SC_DNR
;
684 mutex_lock(&subsys
->lock
);
685 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
) {
686 if (ctrl
->cntlid
== cntlid
) {
687 if (strncmp(hostnqn
, ctrl
->hostnqn
, NVMF_NQN_SIZE
)) {
688 pr_warn("hostnqn mismatch.\n");
691 if (!kref_get_unless_zero(&ctrl
->ref
))
699 pr_warn("could not find controller %d for subsys %s / host %s\n",
700 cntlid
, subsysnqn
, hostnqn
);
701 req
->rsp
->result
.u32
= IPO_IATTR_CONNECT_DATA(cntlid
);
702 status
= NVME_SC_CONNECT_INVALID_PARAM
| NVME_SC_DNR
;
705 mutex_unlock(&subsys
->lock
);
706 nvmet_subsys_put(subsys
);
710 u16
nvmet_check_ctrl_status(struct nvmet_req
*req
, struct nvme_command
*cmd
)
712 if (unlikely(!(req
->sq
->ctrl
->cc
& NVME_CC_ENABLE
))) {
713 pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n",
714 cmd
->common
.opcode
, req
->sq
->qid
);
715 return NVME_SC_CMD_SEQ_ERROR
| NVME_SC_DNR
;
718 if (unlikely(!(req
->sq
->ctrl
->csts
& NVME_CSTS_RDY
))) {
719 pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n",
720 cmd
->common
.opcode
, req
->sq
->qid
);
722 return NVME_SC_CMD_SEQ_ERROR
| NVME_SC_DNR
;
727 static bool __nvmet_host_allowed(struct nvmet_subsys
*subsys
,
730 struct nvmet_host_link
*p
;
732 if (subsys
->allow_any_host
)
735 list_for_each_entry(p
, &subsys
->hosts
, entry
) {
736 if (!strcmp(nvmet_host_name(p
->host
), hostnqn
))
743 static bool nvmet_host_discovery_allowed(struct nvmet_req
*req
,
746 struct nvmet_subsys_link
*s
;
748 list_for_each_entry(s
, &req
->port
->subsystems
, entry
) {
749 if (__nvmet_host_allowed(s
->subsys
, hostnqn
))
756 bool nvmet_host_allowed(struct nvmet_req
*req
, struct nvmet_subsys
*subsys
,
759 lockdep_assert_held(&nvmet_config_sem
);
761 if (subsys
->type
== NVME_NQN_DISC
)
762 return nvmet_host_discovery_allowed(req
, hostnqn
);
764 return __nvmet_host_allowed(subsys
, hostnqn
);
767 u16
nvmet_alloc_ctrl(const char *subsysnqn
, const char *hostnqn
,
768 struct nvmet_req
*req
, u32 kato
, struct nvmet_ctrl
**ctrlp
)
770 struct nvmet_subsys
*subsys
;
771 struct nvmet_ctrl
*ctrl
;
775 status
= NVME_SC_CONNECT_INVALID_PARAM
| NVME_SC_DNR
;
776 subsys
= nvmet_find_get_subsys(req
->port
, subsysnqn
);
778 pr_warn("connect request for invalid subsystem %s!\n",
780 req
->rsp
->result
.u32
= IPO_IATTR_CONNECT_DATA(subsysnqn
);
784 status
= NVME_SC_CONNECT_INVALID_PARAM
| NVME_SC_DNR
;
785 down_read(&nvmet_config_sem
);
786 if (!nvmet_host_allowed(req
, subsys
, hostnqn
)) {
787 pr_info("connect by host %s for subsystem %s not allowed\n",
789 req
->rsp
->result
.u32
= IPO_IATTR_CONNECT_DATA(hostnqn
);
790 up_read(&nvmet_config_sem
);
791 status
= NVME_SC_CONNECT_INVALID_HOST
| NVME_SC_DNR
;
792 goto out_put_subsystem
;
794 up_read(&nvmet_config_sem
);
796 status
= NVME_SC_INTERNAL
;
797 ctrl
= kzalloc(sizeof(*ctrl
), GFP_KERNEL
);
799 goto out_put_subsystem
;
800 mutex_init(&ctrl
->lock
);
802 nvmet_init_cap(ctrl
);
804 INIT_WORK(&ctrl
->async_event_work
, nvmet_async_event_work
);
805 INIT_LIST_HEAD(&ctrl
->async_events
);
807 memcpy(ctrl
->subsysnqn
, subsysnqn
, NVMF_NQN_SIZE
);
808 memcpy(ctrl
->hostnqn
, hostnqn
, NVMF_NQN_SIZE
);
810 kref_init(&ctrl
->ref
);
811 ctrl
->subsys
= subsys
;
813 ctrl
->cqs
= kcalloc(subsys
->max_qid
+ 1,
814 sizeof(struct nvmet_cq
*),
819 ctrl
->sqs
= kcalloc(subsys
->max_qid
+ 1,
820 sizeof(struct nvmet_sq
*),
825 ret
= ida_simple_get(&cntlid_ida
,
826 NVME_CNTLID_MIN
, NVME_CNTLID_MAX
,
829 status
= NVME_SC_CONNECT_CTRL_BUSY
| NVME_SC_DNR
;
834 ctrl
->ops
= req
->ops
;
835 if (ctrl
->subsys
->type
== NVME_NQN_DISC
) {
836 /* Don't accept keep-alive timeout for discovery controllers */
838 status
= NVME_SC_INVALID_FIELD
| NVME_SC_DNR
;
843 * Discovery controllers use some arbitrary high value in order
844 * to cleanup stale discovery sessions
846 * From the latest base diff RC:
847 * "The Keep Alive command is not supported by
848 * Discovery controllers. A transport may specify a
849 * fixed Discovery controller activity timeout value
850 * (e.g., 2 minutes). If no commands are received
851 * by a Discovery controller within that time
852 * period, the controller may perform the
853 * actions for Keep Alive Timer expiration".
855 ctrl
->kato
= NVMET_DISC_KATO
;
857 /* keep-alive timeout in seconds */
858 ctrl
->kato
= DIV_ROUND_UP(kato
, 1000);
860 nvmet_start_keep_alive_timer(ctrl
);
862 mutex_lock(&subsys
->lock
);
863 list_add_tail(&ctrl
->subsys_entry
, &subsys
->ctrls
);
864 mutex_unlock(&subsys
->lock
);
870 ida_simple_remove(&cntlid_ida
, ctrl
->cntlid
);
878 nvmet_subsys_put(subsys
);
883 static void nvmet_ctrl_free(struct kref
*ref
)
885 struct nvmet_ctrl
*ctrl
= container_of(ref
, struct nvmet_ctrl
, ref
);
886 struct nvmet_subsys
*subsys
= ctrl
->subsys
;
888 mutex_lock(&subsys
->lock
);
889 list_del(&ctrl
->subsys_entry
);
890 mutex_unlock(&subsys
->lock
);
892 nvmet_stop_keep_alive_timer(ctrl
);
894 flush_work(&ctrl
->async_event_work
);
895 cancel_work_sync(&ctrl
->fatal_err_work
);
897 ida_simple_remove(&cntlid_ida
, ctrl
->cntlid
);
903 nvmet_subsys_put(subsys
);
906 void nvmet_ctrl_put(struct nvmet_ctrl
*ctrl
)
908 kref_put(&ctrl
->ref
, nvmet_ctrl_free
);
911 static void nvmet_fatal_error_handler(struct work_struct
*work
)
913 struct nvmet_ctrl
*ctrl
=
914 container_of(work
, struct nvmet_ctrl
, fatal_err_work
);
916 pr_err("ctrl %d fatal error occurred!\n", ctrl
->cntlid
);
917 ctrl
->ops
->delete_ctrl(ctrl
);
920 void nvmet_ctrl_fatal_error(struct nvmet_ctrl
*ctrl
)
922 mutex_lock(&ctrl
->lock
);
923 if (!(ctrl
->csts
& NVME_CSTS_CFS
)) {
924 ctrl
->csts
|= NVME_CSTS_CFS
;
925 INIT_WORK(&ctrl
->fatal_err_work
, nvmet_fatal_error_handler
);
926 schedule_work(&ctrl
->fatal_err_work
);
928 mutex_unlock(&ctrl
->lock
);
930 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error
);
932 static struct nvmet_subsys
*nvmet_find_get_subsys(struct nvmet_port
*port
,
933 const char *subsysnqn
)
935 struct nvmet_subsys_link
*p
;
940 if (!strncmp(NVME_DISC_SUBSYS_NAME
, subsysnqn
,
942 if (!kref_get_unless_zero(&nvmet_disc_subsys
->ref
))
944 return nvmet_disc_subsys
;
947 down_read(&nvmet_config_sem
);
948 list_for_each_entry(p
, &port
->subsystems
, entry
) {
949 if (!strncmp(p
->subsys
->subsysnqn
, subsysnqn
,
951 if (!kref_get_unless_zero(&p
->subsys
->ref
))
953 up_read(&nvmet_config_sem
);
957 up_read(&nvmet_config_sem
);
961 struct nvmet_subsys
*nvmet_subsys_alloc(const char *subsysnqn
,
962 enum nvme_subsys_type type
)
964 struct nvmet_subsys
*subsys
;
966 subsys
= kzalloc(sizeof(*subsys
), GFP_KERNEL
);
970 subsys
->ver
= NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
971 /* generate a random serial number as our controllers are ephemeral: */
972 get_random_bytes(&subsys
->serial
, sizeof(subsys
->serial
));
976 subsys
->max_qid
= NVMET_NR_QUEUES
;
982 pr_err("%s: Unknown Subsystem type - %d\n", __func__
, type
);
987 subsys
->subsysnqn
= kstrndup(subsysnqn
, NVMF_NQN_SIZE
,
989 if (!subsys
->subsysnqn
) {
994 kref_init(&subsys
->ref
);
996 mutex_init(&subsys
->lock
);
997 INIT_LIST_HEAD(&subsys
->namespaces
);
998 INIT_LIST_HEAD(&subsys
->ctrls
);
999 INIT_LIST_HEAD(&subsys
->hosts
);
1004 static void nvmet_subsys_free(struct kref
*ref
)
1006 struct nvmet_subsys
*subsys
=
1007 container_of(ref
, struct nvmet_subsys
, ref
);
1009 WARN_ON_ONCE(!list_empty(&subsys
->namespaces
));
1011 kfree(subsys
->subsysnqn
);
1015 void nvmet_subsys_del_ctrls(struct nvmet_subsys
*subsys
)
1017 struct nvmet_ctrl
*ctrl
;
1019 mutex_lock(&subsys
->lock
);
1020 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
)
1021 ctrl
->ops
->delete_ctrl(ctrl
);
1022 mutex_unlock(&subsys
->lock
);
1025 void nvmet_subsys_put(struct nvmet_subsys
*subsys
)
1027 kref_put(&subsys
->ref
, nvmet_subsys_free
);
1030 static int __init
nvmet_init(void)
1034 error
= nvmet_init_discovery();
1038 error
= nvmet_init_configfs();
1040 goto out_exit_discovery
;
1044 nvmet_exit_discovery();
1049 static void __exit
nvmet_exit(void)
1051 nvmet_exit_configfs();
1052 nvmet_exit_discovery();
1053 ida_destroy(&cntlid_ida
);
1055 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry
) != 1024);
1056 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr
) != 1024);
1059 module_init(nvmet_init
);
1060 module_exit(nvmet_exit
);
1062 MODULE_LICENSE("GPL v2");