2 * Common code for the NVMe target.
3 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/module.h>
16 #include <linux/random.h>
17 #include <linux/rculist.h>
21 struct workqueue_struct
*buffered_io_wq
;
22 static const struct nvmet_fabrics_ops
*nvmet_transports
[NVMF_TRTYPE_MAX
];
23 static DEFINE_IDA(cntlid_ida
);
26 * This read/write semaphore is used to synchronize access to configuration
27 * information on a target system that will result in discovery log page
28 * information change for at least one host.
29 * The full list of resources to protected by this semaphore is:
32 * - per-subsystem allowed hosts list
33 * - allow_any_host subsystem attribute
35 * - the nvmet_transports array
37 * When updating any of those lists/structures write lock should be obtained,
38 * while when reading (popolating discovery log page or checking host-subsystem
39 * link) read lock is obtained to allow concurrent reads.
41 DECLARE_RWSEM(nvmet_config_sem
);
43 u32 nvmet_ana_group_enabled
[NVMET_MAX_ANAGRPS
+ 1];
45 DECLARE_RWSEM(nvmet_ana_sem
);
47 static struct nvmet_subsys
*nvmet_find_get_subsys(struct nvmet_port
*port
,
48 const char *subsysnqn
);
50 u16
nvmet_copy_to_sgl(struct nvmet_req
*req
, off_t off
, const void *buf
,
53 if (sg_pcopy_from_buffer(req
->sg
, req
->sg_cnt
, buf
, len
, off
) != len
)
54 return NVME_SC_SGL_INVALID_DATA
| NVME_SC_DNR
;
58 u16
nvmet_copy_from_sgl(struct nvmet_req
*req
, off_t off
, void *buf
, size_t len
)
60 if (sg_pcopy_to_buffer(req
->sg
, req
->sg_cnt
, buf
, len
, off
) != len
)
61 return NVME_SC_SGL_INVALID_DATA
| NVME_SC_DNR
;
65 u16
nvmet_zero_sgl(struct nvmet_req
*req
, off_t off
, size_t len
)
67 if (sg_zero_buffer(req
->sg
, req
->sg_cnt
, len
, off
) != len
)
68 return NVME_SC_SGL_INVALID_DATA
| NVME_SC_DNR
;
72 static unsigned int nvmet_max_nsid(struct nvmet_subsys
*subsys
)
76 if (list_empty(&subsys
->namespaces
))
79 ns
= list_last_entry(&subsys
->namespaces
, struct nvmet_ns
, dev_link
);
83 static u32
nvmet_async_event_result(struct nvmet_async_event
*aen
)
85 return aen
->event_type
| (aen
->event_info
<< 8) | (aen
->log_page
<< 16);
88 static void nvmet_async_events_free(struct nvmet_ctrl
*ctrl
)
90 struct nvmet_req
*req
;
93 mutex_lock(&ctrl
->lock
);
94 if (!ctrl
->nr_async_event_cmds
) {
95 mutex_unlock(&ctrl
->lock
);
99 req
= ctrl
->async_event_cmds
[--ctrl
->nr_async_event_cmds
];
100 mutex_unlock(&ctrl
->lock
);
101 nvmet_req_complete(req
, NVME_SC_INTERNAL
| NVME_SC_DNR
);
105 static void nvmet_async_event_work(struct work_struct
*work
)
107 struct nvmet_ctrl
*ctrl
=
108 container_of(work
, struct nvmet_ctrl
, async_event_work
);
109 struct nvmet_async_event
*aen
;
110 struct nvmet_req
*req
;
113 mutex_lock(&ctrl
->lock
);
114 aen
= list_first_entry_or_null(&ctrl
->async_events
,
115 struct nvmet_async_event
, entry
);
116 if (!aen
|| !ctrl
->nr_async_event_cmds
) {
117 mutex_unlock(&ctrl
->lock
);
121 req
= ctrl
->async_event_cmds
[--ctrl
->nr_async_event_cmds
];
122 nvmet_set_result(req
, nvmet_async_event_result(aen
));
124 list_del(&aen
->entry
);
127 mutex_unlock(&ctrl
->lock
);
128 nvmet_req_complete(req
, 0);
132 static void nvmet_add_async_event(struct nvmet_ctrl
*ctrl
, u8 event_type
,
133 u8 event_info
, u8 log_page
)
135 struct nvmet_async_event
*aen
;
137 aen
= kmalloc(sizeof(*aen
), GFP_KERNEL
);
141 aen
->event_type
= event_type
;
142 aen
->event_info
= event_info
;
143 aen
->log_page
= log_page
;
145 mutex_lock(&ctrl
->lock
);
146 list_add_tail(&aen
->entry
, &ctrl
->async_events
);
147 mutex_unlock(&ctrl
->lock
);
149 schedule_work(&ctrl
->async_event_work
);
152 static bool nvmet_aen_disabled(struct nvmet_ctrl
*ctrl
, u32 aen
)
154 if (!(READ_ONCE(ctrl
->aen_enabled
) & aen
))
156 return test_and_set_bit(aen
, &ctrl
->aen_masked
);
159 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl
*ctrl
, __le32 nsid
)
163 mutex_lock(&ctrl
->lock
);
164 if (ctrl
->nr_changed_ns
> NVME_MAX_CHANGED_NAMESPACES
)
167 for (i
= 0; i
< ctrl
->nr_changed_ns
; i
++) {
168 if (ctrl
->changed_ns_list
[i
] == nsid
)
172 if (ctrl
->nr_changed_ns
== NVME_MAX_CHANGED_NAMESPACES
) {
173 ctrl
->changed_ns_list
[0] = cpu_to_le32(0xffffffff);
174 ctrl
->nr_changed_ns
= U32_MAX
;
178 ctrl
->changed_ns_list
[ctrl
->nr_changed_ns
++] = nsid
;
180 mutex_unlock(&ctrl
->lock
);
183 void nvmet_ns_changed(struct nvmet_subsys
*subsys
, u32 nsid
)
185 struct nvmet_ctrl
*ctrl
;
187 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
) {
188 nvmet_add_to_changed_ns_log(ctrl
, cpu_to_le32(nsid
));
189 if (nvmet_aen_disabled(ctrl
, NVME_AEN_CFG_NS_ATTR
))
191 nvmet_add_async_event(ctrl
, NVME_AER_TYPE_NOTICE
,
192 NVME_AER_NOTICE_NS_CHANGED
,
193 NVME_LOG_CHANGED_NS
);
197 void nvmet_send_ana_event(struct nvmet_subsys
*subsys
,
198 struct nvmet_port
*port
)
200 struct nvmet_ctrl
*ctrl
;
202 mutex_lock(&subsys
->lock
);
203 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
) {
204 if (port
&& ctrl
->port
!= port
)
206 if (nvmet_aen_disabled(ctrl
, NVME_AEN_CFG_ANA_CHANGE
))
208 nvmet_add_async_event(ctrl
, NVME_AER_TYPE_NOTICE
,
209 NVME_AER_NOTICE_ANA
, NVME_LOG_ANA
);
211 mutex_unlock(&subsys
->lock
);
214 void nvmet_port_send_ana_event(struct nvmet_port
*port
)
216 struct nvmet_subsys_link
*p
;
218 down_read(&nvmet_config_sem
);
219 list_for_each_entry(p
, &port
->subsystems
, entry
)
220 nvmet_send_ana_event(p
->subsys
, port
);
221 up_read(&nvmet_config_sem
);
224 int nvmet_register_transport(const struct nvmet_fabrics_ops
*ops
)
228 down_write(&nvmet_config_sem
);
229 if (nvmet_transports
[ops
->type
])
232 nvmet_transports
[ops
->type
] = ops
;
233 up_write(&nvmet_config_sem
);
237 EXPORT_SYMBOL_GPL(nvmet_register_transport
);
239 void nvmet_unregister_transport(const struct nvmet_fabrics_ops
*ops
)
241 down_write(&nvmet_config_sem
);
242 nvmet_transports
[ops
->type
] = NULL
;
243 up_write(&nvmet_config_sem
);
245 EXPORT_SYMBOL_GPL(nvmet_unregister_transport
);
247 int nvmet_enable_port(struct nvmet_port
*port
)
249 const struct nvmet_fabrics_ops
*ops
;
252 lockdep_assert_held(&nvmet_config_sem
);
254 ops
= nvmet_transports
[port
->disc_addr
.trtype
];
256 up_write(&nvmet_config_sem
);
257 request_module("nvmet-transport-%d", port
->disc_addr
.trtype
);
258 down_write(&nvmet_config_sem
);
259 ops
= nvmet_transports
[port
->disc_addr
.trtype
];
261 pr_err("transport type %d not supported\n",
262 port
->disc_addr
.trtype
);
267 if (!try_module_get(ops
->owner
))
270 ret
= ops
->add_port(port
);
272 module_put(ops
->owner
);
276 /* If the transport didn't set inline_data_size, then disable it. */
277 if (port
->inline_data_size
< 0)
278 port
->inline_data_size
= 0;
280 port
->enabled
= true;
284 void nvmet_disable_port(struct nvmet_port
*port
)
286 const struct nvmet_fabrics_ops
*ops
;
288 lockdep_assert_held(&nvmet_config_sem
);
290 port
->enabled
= false;
292 ops
= nvmet_transports
[port
->disc_addr
.trtype
];
293 ops
->remove_port(port
);
294 module_put(ops
->owner
);
297 static void nvmet_keep_alive_timer(struct work_struct
*work
)
299 struct nvmet_ctrl
*ctrl
= container_of(to_delayed_work(work
),
300 struct nvmet_ctrl
, ka_work
);
302 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
303 ctrl
->cntlid
, ctrl
->kato
);
305 nvmet_ctrl_fatal_error(ctrl
);
308 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl
*ctrl
)
310 pr_debug("ctrl %d start keep-alive timer for %d secs\n",
311 ctrl
->cntlid
, ctrl
->kato
);
313 INIT_DELAYED_WORK(&ctrl
->ka_work
, nvmet_keep_alive_timer
);
314 schedule_delayed_work(&ctrl
->ka_work
, ctrl
->kato
* HZ
);
317 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl
*ctrl
)
319 pr_debug("ctrl %d stop keep-alive\n", ctrl
->cntlid
);
321 cancel_delayed_work_sync(&ctrl
->ka_work
);
324 static struct nvmet_ns
*__nvmet_find_namespace(struct nvmet_ctrl
*ctrl
,
329 list_for_each_entry_rcu(ns
, &ctrl
->subsys
->namespaces
, dev_link
) {
330 if (ns
->nsid
== le32_to_cpu(nsid
))
337 struct nvmet_ns
*nvmet_find_namespace(struct nvmet_ctrl
*ctrl
, __le32 nsid
)
342 ns
= __nvmet_find_namespace(ctrl
, nsid
);
344 percpu_ref_get(&ns
->ref
);
350 static void nvmet_destroy_namespace(struct percpu_ref
*ref
)
352 struct nvmet_ns
*ns
= container_of(ref
, struct nvmet_ns
, ref
);
354 complete(&ns
->disable_done
);
357 void nvmet_put_namespace(struct nvmet_ns
*ns
)
359 percpu_ref_put(&ns
->ref
);
362 static void nvmet_ns_dev_disable(struct nvmet_ns
*ns
)
364 nvmet_bdev_ns_disable(ns
);
365 nvmet_file_ns_disable(ns
);
368 int nvmet_ns_enable(struct nvmet_ns
*ns
)
370 struct nvmet_subsys
*subsys
= ns
->subsys
;
373 mutex_lock(&subsys
->lock
);
375 if (subsys
->nr_namespaces
== NVMET_MAX_NAMESPACES
)
381 ret
= nvmet_bdev_ns_enable(ns
);
383 ret
= nvmet_file_ns_enable(ns
);
387 ret
= percpu_ref_init(&ns
->ref
, nvmet_destroy_namespace
,
392 if (ns
->nsid
> subsys
->max_nsid
)
393 subsys
->max_nsid
= ns
->nsid
;
396 * The namespaces list needs to be sorted to simplify the implementation
397 * of the Identify Namepace List subcommand.
399 if (list_empty(&subsys
->namespaces
)) {
400 list_add_tail_rcu(&ns
->dev_link
, &subsys
->namespaces
);
402 struct nvmet_ns
*old
;
404 list_for_each_entry_rcu(old
, &subsys
->namespaces
, dev_link
) {
405 BUG_ON(ns
->nsid
== old
->nsid
);
406 if (ns
->nsid
< old
->nsid
)
410 list_add_tail_rcu(&ns
->dev_link
, &old
->dev_link
);
412 subsys
->nr_namespaces
++;
414 nvmet_ns_changed(subsys
, ns
->nsid
);
418 mutex_unlock(&subsys
->lock
);
421 nvmet_ns_dev_disable(ns
);
425 void nvmet_ns_disable(struct nvmet_ns
*ns
)
427 struct nvmet_subsys
*subsys
= ns
->subsys
;
429 mutex_lock(&subsys
->lock
);
434 list_del_rcu(&ns
->dev_link
);
435 if (ns
->nsid
== subsys
->max_nsid
)
436 subsys
->max_nsid
= nvmet_max_nsid(subsys
);
437 mutex_unlock(&subsys
->lock
);
440 * Now that we removed the namespaces from the lookup list, we
441 * can kill the per_cpu ref and wait for any remaining references
442 * to be dropped, as well as a RCU grace period for anyone only
443 * using the namepace under rcu_read_lock(). Note that we can't
444 * use call_rcu here as we need to ensure the namespaces have
445 * been fully destroyed before unloading the module.
447 percpu_ref_kill(&ns
->ref
);
449 wait_for_completion(&ns
->disable_done
);
450 percpu_ref_exit(&ns
->ref
);
452 mutex_lock(&subsys
->lock
);
453 subsys
->nr_namespaces
--;
454 nvmet_ns_changed(subsys
, ns
->nsid
);
455 nvmet_ns_dev_disable(ns
);
457 mutex_unlock(&subsys
->lock
);
460 void nvmet_ns_free(struct nvmet_ns
*ns
)
462 nvmet_ns_disable(ns
);
464 down_write(&nvmet_ana_sem
);
465 nvmet_ana_group_enabled
[ns
->anagrpid
]--;
466 up_write(&nvmet_ana_sem
);
468 kfree(ns
->device_path
);
472 struct nvmet_ns
*nvmet_ns_alloc(struct nvmet_subsys
*subsys
, u32 nsid
)
476 ns
= kzalloc(sizeof(*ns
), GFP_KERNEL
);
480 INIT_LIST_HEAD(&ns
->dev_link
);
481 init_completion(&ns
->disable_done
);
486 down_write(&nvmet_ana_sem
);
487 ns
->anagrpid
= NVMET_DEFAULT_ANA_GRPID
;
488 nvmet_ana_group_enabled
[ns
->anagrpid
]++;
489 up_write(&nvmet_ana_sem
);
492 ns
->buffered_io
= false;
497 static void __nvmet_req_complete(struct nvmet_req
*req
, u16 status
)
499 u32 old_sqhd
, new_sqhd
;
503 nvmet_set_status(req
, status
);
507 old_sqhd
= req
->sq
->sqhd
;
508 new_sqhd
= (old_sqhd
+ 1) % req
->sq
->size
;
509 } while (cmpxchg(&req
->sq
->sqhd
, old_sqhd
, new_sqhd
) !=
512 sqhd
= req
->sq
->sqhd
& 0x0000FFFF;
513 req
->rsp
->sq_head
= cpu_to_le16(sqhd
);
514 req
->rsp
->sq_id
= cpu_to_le16(req
->sq
->qid
);
515 req
->rsp
->command_id
= req
->cmd
->common
.command_id
;
518 nvmet_put_namespace(req
->ns
);
519 req
->ops
->queue_response(req
);
522 void nvmet_req_complete(struct nvmet_req
*req
, u16 status
)
524 __nvmet_req_complete(req
, status
);
525 percpu_ref_put(&req
->sq
->ref
);
527 EXPORT_SYMBOL_GPL(nvmet_req_complete
);
529 void nvmet_cq_setup(struct nvmet_ctrl
*ctrl
, struct nvmet_cq
*cq
,
538 void nvmet_sq_setup(struct nvmet_ctrl
*ctrl
, struct nvmet_sq
*sq
,
548 static void nvmet_confirm_sq(struct percpu_ref
*ref
)
550 struct nvmet_sq
*sq
= container_of(ref
, struct nvmet_sq
, ref
);
552 complete(&sq
->confirm_done
);
555 void nvmet_sq_destroy(struct nvmet_sq
*sq
)
558 * If this is the admin queue, complete all AERs so that our
559 * queue doesn't have outstanding requests on it.
561 if (sq
->ctrl
&& sq
->ctrl
->sqs
&& sq
->ctrl
->sqs
[0] == sq
)
562 nvmet_async_events_free(sq
->ctrl
);
563 percpu_ref_kill_and_confirm(&sq
->ref
, nvmet_confirm_sq
);
564 wait_for_completion(&sq
->confirm_done
);
565 wait_for_completion(&sq
->free_done
);
566 percpu_ref_exit(&sq
->ref
);
569 nvmet_ctrl_put(sq
->ctrl
);
570 sq
->ctrl
= NULL
; /* allows reusing the queue later */
573 EXPORT_SYMBOL_GPL(nvmet_sq_destroy
);
575 static void nvmet_sq_free(struct percpu_ref
*ref
)
577 struct nvmet_sq
*sq
= container_of(ref
, struct nvmet_sq
, ref
);
579 complete(&sq
->free_done
);
582 int nvmet_sq_init(struct nvmet_sq
*sq
)
586 ret
= percpu_ref_init(&sq
->ref
, nvmet_sq_free
, 0, GFP_KERNEL
);
588 pr_err("percpu_ref init failed!\n");
591 init_completion(&sq
->free_done
);
592 init_completion(&sq
->confirm_done
);
596 EXPORT_SYMBOL_GPL(nvmet_sq_init
);
598 static inline u16
nvmet_check_ana_state(struct nvmet_port
*port
,
601 enum nvme_ana_state state
= port
->ana_state
[ns
->anagrpid
];
603 if (unlikely(state
== NVME_ANA_INACCESSIBLE
))
604 return NVME_SC_ANA_INACCESSIBLE
;
605 if (unlikely(state
== NVME_ANA_PERSISTENT_LOSS
))
606 return NVME_SC_ANA_PERSISTENT_LOSS
;
607 if (unlikely(state
== NVME_ANA_CHANGE
))
608 return NVME_SC_ANA_TRANSITION
;
612 static inline u16
nvmet_io_cmd_check_access(struct nvmet_req
*req
)
614 if (unlikely(req
->ns
->readonly
)) {
615 switch (req
->cmd
->common
.opcode
) {
620 return NVME_SC_NS_WRITE_PROTECTED
;
627 static u16
nvmet_parse_io_cmd(struct nvmet_req
*req
)
629 struct nvme_command
*cmd
= req
->cmd
;
632 ret
= nvmet_check_ctrl_status(req
, cmd
);
636 req
->ns
= nvmet_find_namespace(req
->sq
->ctrl
, cmd
->rw
.nsid
);
637 if (unlikely(!req
->ns
))
638 return NVME_SC_INVALID_NS
| NVME_SC_DNR
;
639 ret
= nvmet_check_ana_state(req
->port
, req
->ns
);
642 ret
= nvmet_io_cmd_check_access(req
);
647 return nvmet_file_parse_io_cmd(req
);
649 return nvmet_bdev_parse_io_cmd(req
);
652 bool nvmet_req_init(struct nvmet_req
*req
, struct nvmet_cq
*cq
,
653 struct nvmet_sq
*sq
, const struct nvmet_fabrics_ops
*ops
)
655 u8 flags
= req
->cmd
->common
.flags
;
663 req
->transfer_len
= 0;
664 req
->rsp
->status
= 0;
667 /* no support for fused commands yet */
668 if (unlikely(flags
& (NVME_CMD_FUSE_FIRST
| NVME_CMD_FUSE_SECOND
))) {
669 status
= NVME_SC_INVALID_FIELD
| NVME_SC_DNR
;
674 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
675 * contains an address of a single contiguous physical buffer that is
678 if (unlikely((flags
& NVME_CMD_SGL_ALL
) != NVME_CMD_SGL_METABUF
)) {
679 status
= NVME_SC_INVALID_FIELD
| NVME_SC_DNR
;
683 if (unlikely(!req
->sq
->ctrl
))
684 /* will return an error for any Non-connect command: */
685 status
= nvmet_parse_connect_cmd(req
);
686 else if (likely(req
->sq
->qid
!= 0))
687 status
= nvmet_parse_io_cmd(req
);
688 else if (req
->cmd
->common
.opcode
== nvme_fabrics_command
)
689 status
= nvmet_parse_fabrics_cmd(req
);
690 else if (req
->sq
->ctrl
->subsys
->type
== NVME_NQN_DISC
)
691 status
= nvmet_parse_discovery_cmd(req
);
693 status
= nvmet_parse_admin_cmd(req
);
698 if (unlikely(!percpu_ref_tryget_live(&sq
->ref
))) {
699 status
= NVME_SC_INVALID_FIELD
| NVME_SC_DNR
;
706 __nvmet_req_complete(req
, status
);
709 EXPORT_SYMBOL_GPL(nvmet_req_init
);
711 void nvmet_req_uninit(struct nvmet_req
*req
)
713 percpu_ref_put(&req
->sq
->ref
);
715 nvmet_put_namespace(req
->ns
);
717 EXPORT_SYMBOL_GPL(nvmet_req_uninit
);
719 void nvmet_req_execute(struct nvmet_req
*req
)
721 if (unlikely(req
->data_len
!= req
->transfer_len
))
722 nvmet_req_complete(req
, NVME_SC_SGL_INVALID_DATA
| NVME_SC_DNR
);
726 EXPORT_SYMBOL_GPL(nvmet_req_execute
);
728 static inline bool nvmet_cc_en(u32 cc
)
730 return (cc
>> NVME_CC_EN_SHIFT
) & 0x1;
733 static inline u8
nvmet_cc_css(u32 cc
)
735 return (cc
>> NVME_CC_CSS_SHIFT
) & 0x7;
738 static inline u8
nvmet_cc_mps(u32 cc
)
740 return (cc
>> NVME_CC_MPS_SHIFT
) & 0xf;
743 static inline u8
nvmet_cc_ams(u32 cc
)
745 return (cc
>> NVME_CC_AMS_SHIFT
) & 0x7;
748 static inline u8
nvmet_cc_shn(u32 cc
)
750 return (cc
>> NVME_CC_SHN_SHIFT
) & 0x3;
753 static inline u8
nvmet_cc_iosqes(u32 cc
)
755 return (cc
>> NVME_CC_IOSQES_SHIFT
) & 0xf;
758 static inline u8
nvmet_cc_iocqes(u32 cc
)
760 return (cc
>> NVME_CC_IOCQES_SHIFT
) & 0xf;
763 static void nvmet_start_ctrl(struct nvmet_ctrl
*ctrl
)
765 lockdep_assert_held(&ctrl
->lock
);
767 if (nvmet_cc_iosqes(ctrl
->cc
) != NVME_NVM_IOSQES
||
768 nvmet_cc_iocqes(ctrl
->cc
) != NVME_NVM_IOCQES
||
769 nvmet_cc_mps(ctrl
->cc
) != 0 ||
770 nvmet_cc_ams(ctrl
->cc
) != 0 ||
771 nvmet_cc_css(ctrl
->cc
) != 0) {
772 ctrl
->csts
= NVME_CSTS_CFS
;
776 ctrl
->csts
= NVME_CSTS_RDY
;
779 * Controllers that are not yet enabled should not really enforce the
780 * keep alive timeout, but we still want to track a timeout and cleanup
781 * in case a host died before it enabled the controller. Hence, simply
782 * reset the keep alive timer when the controller is enabled.
784 mod_delayed_work(system_wq
, &ctrl
->ka_work
, ctrl
->kato
* HZ
);
787 static void nvmet_clear_ctrl(struct nvmet_ctrl
*ctrl
)
789 lockdep_assert_held(&ctrl
->lock
);
791 /* XXX: tear down queues? */
792 ctrl
->csts
&= ~NVME_CSTS_RDY
;
796 void nvmet_update_cc(struct nvmet_ctrl
*ctrl
, u32
new)
800 mutex_lock(&ctrl
->lock
);
804 if (nvmet_cc_en(new) && !nvmet_cc_en(old
))
805 nvmet_start_ctrl(ctrl
);
806 if (!nvmet_cc_en(new) && nvmet_cc_en(old
))
807 nvmet_clear_ctrl(ctrl
);
808 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old
)) {
809 nvmet_clear_ctrl(ctrl
);
810 ctrl
->csts
|= NVME_CSTS_SHST_CMPLT
;
812 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old
))
813 ctrl
->csts
&= ~NVME_CSTS_SHST_CMPLT
;
814 mutex_unlock(&ctrl
->lock
);
817 static void nvmet_init_cap(struct nvmet_ctrl
*ctrl
)
819 /* command sets supported: NVMe command set: */
820 ctrl
->cap
= (1ULL << 37);
821 /* CC.EN timeout in 500msec units: */
822 ctrl
->cap
|= (15ULL << 24);
823 /* maximum queue entries supported: */
824 ctrl
->cap
|= NVMET_QUEUE_SIZE
- 1;
827 u16
nvmet_ctrl_find_get(const char *subsysnqn
, const char *hostnqn
, u16 cntlid
,
828 struct nvmet_req
*req
, struct nvmet_ctrl
**ret
)
830 struct nvmet_subsys
*subsys
;
831 struct nvmet_ctrl
*ctrl
;
834 subsys
= nvmet_find_get_subsys(req
->port
, subsysnqn
);
836 pr_warn("connect request for invalid subsystem %s!\n",
838 req
->rsp
->result
.u32
= IPO_IATTR_CONNECT_DATA(subsysnqn
);
839 return NVME_SC_CONNECT_INVALID_PARAM
| NVME_SC_DNR
;
842 mutex_lock(&subsys
->lock
);
843 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
) {
844 if (ctrl
->cntlid
== cntlid
) {
845 if (strncmp(hostnqn
, ctrl
->hostnqn
, NVMF_NQN_SIZE
)) {
846 pr_warn("hostnqn mismatch.\n");
849 if (!kref_get_unless_zero(&ctrl
->ref
))
857 pr_warn("could not find controller %d for subsys %s / host %s\n",
858 cntlid
, subsysnqn
, hostnqn
);
859 req
->rsp
->result
.u32
= IPO_IATTR_CONNECT_DATA(cntlid
);
860 status
= NVME_SC_CONNECT_INVALID_PARAM
| NVME_SC_DNR
;
863 mutex_unlock(&subsys
->lock
);
864 nvmet_subsys_put(subsys
);
868 u16
nvmet_check_ctrl_status(struct nvmet_req
*req
, struct nvme_command
*cmd
)
870 if (unlikely(!(req
->sq
->ctrl
->cc
& NVME_CC_ENABLE
))) {
871 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
872 cmd
->common
.opcode
, req
->sq
->qid
);
873 return NVME_SC_CMD_SEQ_ERROR
| NVME_SC_DNR
;
876 if (unlikely(!(req
->sq
->ctrl
->csts
& NVME_CSTS_RDY
))) {
877 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
878 cmd
->common
.opcode
, req
->sq
->qid
);
879 return NVME_SC_CMD_SEQ_ERROR
| NVME_SC_DNR
;
884 static bool __nvmet_host_allowed(struct nvmet_subsys
*subsys
,
887 struct nvmet_host_link
*p
;
889 if (subsys
->allow_any_host
)
892 list_for_each_entry(p
, &subsys
->hosts
, entry
) {
893 if (!strcmp(nvmet_host_name(p
->host
), hostnqn
))
900 static bool nvmet_host_discovery_allowed(struct nvmet_req
*req
,
903 struct nvmet_subsys_link
*s
;
905 list_for_each_entry(s
, &req
->port
->subsystems
, entry
) {
906 if (__nvmet_host_allowed(s
->subsys
, hostnqn
))
913 bool nvmet_host_allowed(struct nvmet_req
*req
, struct nvmet_subsys
*subsys
,
916 lockdep_assert_held(&nvmet_config_sem
);
918 if (subsys
->type
== NVME_NQN_DISC
)
919 return nvmet_host_discovery_allowed(req
, hostnqn
);
921 return __nvmet_host_allowed(subsys
, hostnqn
);
924 u16
nvmet_alloc_ctrl(const char *subsysnqn
, const char *hostnqn
,
925 struct nvmet_req
*req
, u32 kato
, struct nvmet_ctrl
**ctrlp
)
927 struct nvmet_subsys
*subsys
;
928 struct nvmet_ctrl
*ctrl
;
932 status
= NVME_SC_CONNECT_INVALID_PARAM
| NVME_SC_DNR
;
933 subsys
= nvmet_find_get_subsys(req
->port
, subsysnqn
);
935 pr_warn("connect request for invalid subsystem %s!\n",
937 req
->rsp
->result
.u32
= IPO_IATTR_CONNECT_DATA(subsysnqn
);
941 status
= NVME_SC_CONNECT_INVALID_PARAM
| NVME_SC_DNR
;
942 down_read(&nvmet_config_sem
);
943 if (!nvmet_host_allowed(req
, subsys
, hostnqn
)) {
944 pr_info("connect by host %s for subsystem %s not allowed\n",
946 req
->rsp
->result
.u32
= IPO_IATTR_CONNECT_DATA(hostnqn
);
947 up_read(&nvmet_config_sem
);
948 status
= NVME_SC_CONNECT_INVALID_HOST
| NVME_SC_DNR
;
949 goto out_put_subsystem
;
951 up_read(&nvmet_config_sem
);
953 status
= NVME_SC_INTERNAL
;
954 ctrl
= kzalloc(sizeof(*ctrl
), GFP_KERNEL
);
956 goto out_put_subsystem
;
957 mutex_init(&ctrl
->lock
);
959 nvmet_init_cap(ctrl
);
961 ctrl
->port
= req
->port
;
963 INIT_WORK(&ctrl
->async_event_work
, nvmet_async_event_work
);
964 INIT_LIST_HEAD(&ctrl
->async_events
);
966 memcpy(ctrl
->subsysnqn
, subsysnqn
, NVMF_NQN_SIZE
);
967 memcpy(ctrl
->hostnqn
, hostnqn
, NVMF_NQN_SIZE
);
969 kref_init(&ctrl
->ref
);
970 ctrl
->subsys
= subsys
;
971 WRITE_ONCE(ctrl
->aen_enabled
, NVMET_AEN_CFG_OPTIONAL
);
973 ctrl
->changed_ns_list
= kmalloc_array(NVME_MAX_CHANGED_NAMESPACES
,
974 sizeof(__le32
), GFP_KERNEL
);
975 if (!ctrl
->changed_ns_list
)
978 ctrl
->cqs
= kcalloc(subsys
->max_qid
+ 1,
979 sizeof(struct nvmet_cq
*),
982 goto out_free_changed_ns_list
;
984 ctrl
->sqs
= kcalloc(subsys
->max_qid
+ 1,
985 sizeof(struct nvmet_sq
*),
990 ret
= ida_simple_get(&cntlid_ida
,
991 NVME_CNTLID_MIN
, NVME_CNTLID_MAX
,
994 status
= NVME_SC_CONNECT_CTRL_BUSY
| NVME_SC_DNR
;
999 ctrl
->ops
= req
->ops
;
1000 if (ctrl
->subsys
->type
== NVME_NQN_DISC
) {
1001 /* Don't accept keep-alive timeout for discovery controllers */
1003 status
= NVME_SC_INVALID_FIELD
| NVME_SC_DNR
;
1004 goto out_remove_ida
;
1008 * Discovery controllers use some arbitrary high value in order
1009 * to cleanup stale discovery sessions
1011 * From the latest base diff RC:
1012 * "The Keep Alive command is not supported by
1013 * Discovery controllers. A transport may specify a
1014 * fixed Discovery controller activity timeout value
1015 * (e.g., 2 minutes). If no commands are received
1016 * by a Discovery controller within that time
1017 * period, the controller may perform the
1018 * actions for Keep Alive Timer expiration".
1020 ctrl
->kato
= NVMET_DISC_KATO
;
1022 /* keep-alive timeout in seconds */
1023 ctrl
->kato
= DIV_ROUND_UP(kato
, 1000);
1025 nvmet_start_keep_alive_timer(ctrl
);
1027 mutex_lock(&subsys
->lock
);
1028 list_add_tail(&ctrl
->subsys_entry
, &subsys
->ctrls
);
1029 mutex_unlock(&subsys
->lock
);
1035 ida_simple_remove(&cntlid_ida
, ctrl
->cntlid
);
1040 out_free_changed_ns_list
:
1041 kfree(ctrl
->changed_ns_list
);
1045 nvmet_subsys_put(subsys
);
1050 static void nvmet_ctrl_free(struct kref
*ref
)
1052 struct nvmet_ctrl
*ctrl
= container_of(ref
, struct nvmet_ctrl
, ref
);
1053 struct nvmet_subsys
*subsys
= ctrl
->subsys
;
1055 mutex_lock(&subsys
->lock
);
1056 list_del(&ctrl
->subsys_entry
);
1057 mutex_unlock(&subsys
->lock
);
1059 nvmet_stop_keep_alive_timer(ctrl
);
1061 flush_work(&ctrl
->async_event_work
);
1062 cancel_work_sync(&ctrl
->fatal_err_work
);
1064 ida_simple_remove(&cntlid_ida
, ctrl
->cntlid
);
1068 kfree(ctrl
->changed_ns_list
);
1071 nvmet_subsys_put(subsys
);
1074 void nvmet_ctrl_put(struct nvmet_ctrl
*ctrl
)
1076 kref_put(&ctrl
->ref
, nvmet_ctrl_free
);
1079 static void nvmet_fatal_error_handler(struct work_struct
*work
)
1081 struct nvmet_ctrl
*ctrl
=
1082 container_of(work
, struct nvmet_ctrl
, fatal_err_work
);
1084 pr_err("ctrl %d fatal error occurred!\n", ctrl
->cntlid
);
1085 ctrl
->ops
->delete_ctrl(ctrl
);
1088 void nvmet_ctrl_fatal_error(struct nvmet_ctrl
*ctrl
)
1090 mutex_lock(&ctrl
->lock
);
1091 if (!(ctrl
->csts
& NVME_CSTS_CFS
)) {
1092 ctrl
->csts
|= NVME_CSTS_CFS
;
1093 INIT_WORK(&ctrl
->fatal_err_work
, nvmet_fatal_error_handler
);
1094 schedule_work(&ctrl
->fatal_err_work
);
1096 mutex_unlock(&ctrl
->lock
);
1098 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error
);
1100 static struct nvmet_subsys
*nvmet_find_get_subsys(struct nvmet_port
*port
,
1101 const char *subsysnqn
)
1103 struct nvmet_subsys_link
*p
;
1108 if (!strncmp(NVME_DISC_SUBSYS_NAME
, subsysnqn
,
1110 if (!kref_get_unless_zero(&nvmet_disc_subsys
->ref
))
1112 return nvmet_disc_subsys
;
1115 down_read(&nvmet_config_sem
);
1116 list_for_each_entry(p
, &port
->subsystems
, entry
) {
1117 if (!strncmp(p
->subsys
->subsysnqn
, subsysnqn
,
1119 if (!kref_get_unless_zero(&p
->subsys
->ref
))
1121 up_read(&nvmet_config_sem
);
1125 up_read(&nvmet_config_sem
);
1129 struct nvmet_subsys
*nvmet_subsys_alloc(const char *subsysnqn
,
1130 enum nvme_subsys_type type
)
1132 struct nvmet_subsys
*subsys
;
1134 subsys
= kzalloc(sizeof(*subsys
), GFP_KERNEL
);
1138 subsys
->ver
= NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
1139 /* generate a random serial number as our controllers are ephemeral: */
1140 get_random_bytes(&subsys
->serial
, sizeof(subsys
->serial
));
1144 subsys
->max_qid
= NVMET_NR_QUEUES
;
1147 subsys
->max_qid
= 0;
1150 pr_err("%s: Unknown Subsystem type - %d\n", __func__
, type
);
1154 subsys
->type
= type
;
1155 subsys
->subsysnqn
= kstrndup(subsysnqn
, NVMF_NQN_SIZE
,
1157 if (!subsys
->subsysnqn
) {
1162 kref_init(&subsys
->ref
);
1164 mutex_init(&subsys
->lock
);
1165 INIT_LIST_HEAD(&subsys
->namespaces
);
1166 INIT_LIST_HEAD(&subsys
->ctrls
);
1167 INIT_LIST_HEAD(&subsys
->hosts
);
1172 static void nvmet_subsys_free(struct kref
*ref
)
1174 struct nvmet_subsys
*subsys
=
1175 container_of(ref
, struct nvmet_subsys
, ref
);
1177 WARN_ON_ONCE(!list_empty(&subsys
->namespaces
));
1179 kfree(subsys
->subsysnqn
);
1183 void nvmet_subsys_del_ctrls(struct nvmet_subsys
*subsys
)
1185 struct nvmet_ctrl
*ctrl
;
1187 mutex_lock(&subsys
->lock
);
1188 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
)
1189 ctrl
->ops
->delete_ctrl(ctrl
);
1190 mutex_unlock(&subsys
->lock
);
1193 void nvmet_subsys_put(struct nvmet_subsys
*subsys
)
1195 kref_put(&subsys
->ref
, nvmet_subsys_free
);
1198 static int __init
nvmet_init(void)
1202 nvmet_ana_group_enabled
[NVMET_DEFAULT_ANA_GRPID
] = 1;
1204 buffered_io_wq
= alloc_workqueue("nvmet-buffered-io-wq",
1206 if (!buffered_io_wq
) {
1211 error
= nvmet_init_discovery();
1215 error
= nvmet_init_configfs();
1217 goto out_exit_discovery
;
1221 nvmet_exit_discovery();
1226 static void __exit
nvmet_exit(void)
1228 nvmet_exit_configfs();
1229 nvmet_exit_discovery();
1230 ida_destroy(&cntlid_ida
);
1231 destroy_workqueue(buffered_io_wq
);
1233 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry
) != 1024);
1234 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr
) != 1024);
1237 module_init(nvmet_init
);
1238 module_exit(nvmet_exit
);
1240 MODULE_LICENSE("GPL v2");