1 // SPDX-License-Identifier: GPL-2.0
3 * NVMe Over Fabrics Target Passthrough command implementation.
5 * Copyright (c) 2017-2018 Western Digital Corporation or its
7 * Copyright (c) 2019-2020, Eideticom Inc.
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/module.h>
13 #include "../host/nvme.h"
16 MODULE_IMPORT_NS("NVME_TARGET_PASSTHRU");
19 * xarray to maintain one passthru subsystem per nvme controller.
21 static DEFINE_XARRAY(passthru_subsystems
);
23 void nvmet_passthrough_override_cap(struct nvmet_ctrl
*ctrl
)
26 * Multiple command set support can only be declared if the underlying
27 * controller actually supports it.
29 if (!nvme_multi_css(ctrl
->subsys
->passthru_ctrl
))
30 ctrl
->cap
&= ~(1ULL << 43);
33 static u16
nvmet_passthru_override_id_descs(struct nvmet_req
*req
)
35 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
36 u16 status
= NVME_SC_SUCCESS
;
38 bool csi_seen
= false;
42 if (!ctrl
->subsys
->clear_ids
)
45 data
= kzalloc(NVME_IDENTIFY_DATA_SIZE
, GFP_KERNEL
);
47 return NVME_SC_INTERNAL
;
49 status
= nvmet_copy_from_sgl(req
, 0, data
, NVME_IDENTIFY_DATA_SIZE
);
53 for (pos
= 0; pos
< NVME_IDENTIFY_DATA_SIZE
; pos
+= len
) {
54 struct nvme_ns_id_desc
*cur
= data
+ pos
;
58 if (cur
->nidt
== NVME_NIDT_CSI
) {
59 memcpy(&csi
, cur
+ 1, NVME_NIDT_CSI_LEN
);
63 len
= sizeof(struct nvme_ns_id_desc
) + cur
->nidl
;
66 memset(data
, 0, NVME_IDENTIFY_DATA_SIZE
);
68 struct nvme_ns_id_desc
*cur
= data
;
70 cur
->nidt
= NVME_NIDT_CSI
;
71 cur
->nidl
= NVME_NIDT_CSI_LEN
;
72 memcpy(cur
+ 1, &csi
, NVME_NIDT_CSI_LEN
);
74 status
= nvmet_copy_to_sgl(req
, 0, data
, NVME_IDENTIFY_DATA_SIZE
);
80 static u16
nvmet_passthru_override_id_ctrl(struct nvmet_req
*req
)
82 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
83 struct nvme_ctrl
*pctrl
= ctrl
->subsys
->passthru_ctrl
;
84 u16 status
= NVME_SC_SUCCESS
;
85 struct nvme_id_ctrl
*id
;
86 unsigned int max_hw_sectors
;
89 id
= kzalloc(sizeof(*id
), GFP_KERNEL
);
91 return NVME_SC_INTERNAL
;
93 status
= nvmet_copy_from_sgl(req
, 0, id
, sizeof(*id
));
97 id
->cntlid
= cpu_to_le16(ctrl
->cntlid
);
98 id
->ver
= cpu_to_le32(ctrl
->subsys
->ver
);
101 * The passthru NVMe driver may have a limit on the number of segments
102 * which depends on the host's memory fragementation. To solve this,
103 * ensure mdts is limited to the pages equal to the number of segments.
105 max_hw_sectors
= min_not_zero(pctrl
->max_segments
<< PAGE_SECTORS_SHIFT
,
106 pctrl
->max_hw_sectors
);
109 * nvmet_passthru_map_sg is limitted to using a single bio so limit
110 * the mdts based on BIO_MAX_VECS as well
112 max_hw_sectors
= min_not_zero(BIO_MAX_VECS
<< PAGE_SECTORS_SHIFT
,
115 page_shift
= NVME_CAP_MPSMIN(ctrl
->cap
) + 12;
117 id
->mdts
= ilog2(max_hw_sectors
) + 9 - page_shift
;
121 * We export aerl limit for the fabrics controller, update this when
122 * passthru based aerl support is added.
124 id
->aerl
= NVMET_ASYNC_EVENTS
- 1;
126 /* emulate kas as most of the PCIe ctrl don't have a support for kas */
127 id
->kas
= cpu_to_le16(NVMET_KAS
);
129 /* don't support host memory buffer */
133 id
->sqes
= min_t(__u8
, ((0x6 << 4) | 0x6), id
->sqes
);
134 id
->cqes
= min_t(__u8
, ((0x4 << 4) | 0x4), id
->cqes
);
135 id
->maxcmd
= cpu_to_le16(NVMET_MAX_CMD(ctrl
));
137 /* don't support fuse commands */
140 id
->sgls
= cpu_to_le32(1 << 0); /* we always support SGLs */
141 if (ctrl
->ops
->flags
& NVMF_KEYED_SGLS
)
142 id
->sgls
|= cpu_to_le32(1 << 2);
143 if (req
->port
->inline_data_size
)
144 id
->sgls
|= cpu_to_le32(1 << 20);
147 * When passthru controller is setup using nvme-loop transport it will
148 * export the passthru ctrl subsysnqn (PCIe NVMe ctrl) and will fail in
149 * the nvme/host/core.c in the nvme_init_subsystem()->nvme_active_ctrl()
150 * code path with duplicate ctr subsynqn. In order to prevent that we
151 * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn.
153 memcpy(id
->subnqn
, ctrl
->subsysnqn
, sizeof(id
->subnqn
));
155 /* use fabric id-ctrl values */
156 id
->ioccsz
= cpu_to_le32((sizeof(struct nvme_command
) +
157 req
->port
->inline_data_size
) / 16);
158 id
->iorcsz
= cpu_to_le32(sizeof(struct nvme_completion
) / 16);
160 id
->msdbd
= ctrl
->ops
->msdbd
;
162 /* Support multipath connections with fabrics */
165 /* Disable reservations, see nvmet_parse_passthru_io_cmd() */
166 id
->oncs
&= cpu_to_le16(~NVME_CTRL_ONCS_RESERVATIONS
);
168 status
= nvmet_copy_to_sgl(req
, 0, id
, sizeof(struct nvme_id_ctrl
));
175 static u16
nvmet_passthru_override_id_ns(struct nvmet_req
*req
)
177 u16 status
= NVME_SC_SUCCESS
;
178 struct nvme_id_ns
*id
;
181 id
= kzalloc(sizeof(*id
), GFP_KERNEL
);
183 return NVME_SC_INTERNAL
;
185 status
= nvmet_copy_from_sgl(req
, 0, id
, sizeof(struct nvme_id_ns
));
189 for (i
= 0; i
< (id
->nlbaf
+ 1); i
++)
191 memset(&id
->lbaf
[i
], 0, sizeof(id
->lbaf
[i
]));
193 id
->flbas
= id
->flbas
& ~(1 << 4);
196 * Presently the NVMEof target code does not support sending
197 * metadata, so we must disable it here. This should be updated
198 * once target starts supporting metadata.
202 if (req
->sq
->ctrl
->subsys
->clear_ids
) {
203 memset(id
->nguid
, 0, NVME_NIDT_NGUID_LEN
);
204 memset(id
->eui64
, 0, NVME_NIDT_EUI64_LEN
);
207 status
= nvmet_copy_to_sgl(req
, 0, id
, sizeof(*id
));
214 static void nvmet_passthru_execute_cmd_work(struct work_struct
*w
)
216 struct nvmet_req
*req
= container_of(w
, struct nvmet_req
, p
.work
);
217 struct request
*rq
= req
->p
.rq
;
218 struct nvme_ctrl
*ctrl
= nvme_req(rq
)->ctrl
;
219 struct nvme_ns
*ns
= rq
->q
->queuedata
;
223 effects
= nvme_passthru_start(ctrl
, ns
, req
->cmd
->common
.opcode
);
224 status
= nvme_execute_rq(rq
, false);
225 if (status
== NVME_SC_SUCCESS
&&
226 req
->cmd
->common
.opcode
== nvme_admin_identify
) {
227 switch (req
->cmd
->identify
.cns
) {
228 case NVME_ID_CNS_CTRL
:
229 status
= nvmet_passthru_override_id_ctrl(req
);
232 status
= nvmet_passthru_override_id_ns(req
);
234 case NVME_ID_CNS_NS_DESC_LIST
:
235 status
= nvmet_passthru_override_id_descs(req
);
238 } else if (status
< 0)
239 status
= NVME_SC_INTERNAL
;
241 req
->cqe
->result
= nvme_req(rq
)->result
;
242 nvmet_req_complete(req
, status
);
243 blk_mq_free_request(rq
);
246 nvme_passthru_end(ctrl
, ns
, effects
, req
->cmd
, status
);
249 static enum rq_end_io_ret
nvmet_passthru_req_done(struct request
*rq
,
250 blk_status_t blk_status
)
252 struct nvmet_req
*req
= rq
->end_io_data
;
254 req
->cqe
->result
= nvme_req(rq
)->result
;
255 nvmet_req_complete(req
, nvme_req(rq
)->status
);
256 blk_mq_free_request(rq
);
257 return RQ_END_IO_NONE
;
260 static int nvmet_passthru_map_sg(struct nvmet_req
*req
, struct request
*rq
)
262 struct scatterlist
*sg
;
266 if (req
->sg_cnt
> BIO_MAX_VECS
)
269 if (nvmet_use_inline_bvec(req
)) {
270 bio
= &req
->p
.inline_bio
;
271 bio_init(bio
, NULL
, req
->inline_bvec
,
272 ARRAY_SIZE(req
->inline_bvec
), req_op(rq
));
274 bio
= bio_alloc(NULL
, bio_max_segs(req
->sg_cnt
), req_op(rq
),
276 bio
->bi_end_io
= bio_put
;
279 for_each_sg(req
->sg
, sg
, req
->sg_cnt
, i
) {
280 if (bio_add_pc_page(rq
->q
, bio
, sg_page(sg
), sg
->length
,
281 sg
->offset
) < sg
->length
) {
282 nvmet_req_bio_put(req
, bio
);
287 blk_rq_bio_prep(rq
, bio
, req
->sg_cnt
);
292 static void nvmet_passthru_execute_cmd(struct nvmet_req
*req
)
294 struct nvme_ctrl
*ctrl
= nvmet_req_subsys(req
)->passthru_ctrl
;
295 struct request_queue
*q
= ctrl
->admin_q
;
296 struct nvme_ns
*ns
= NULL
;
297 struct request
*rq
= NULL
;
298 unsigned int timeout
;
303 if (likely(req
->sq
->qid
!= 0)) {
304 u32 nsid
= le32_to_cpu(req
->cmd
->common
.nsid
);
306 ns
= nvme_find_get_ns(ctrl
, nsid
);
308 pr_err("failed to get passthru ns nsid:%u\n", nsid
);
309 status
= NVME_SC_INVALID_NS
| NVME_STATUS_DNR
;
314 timeout
= nvmet_req_subsys(req
)->io_timeout
;
316 timeout
= nvmet_req_subsys(req
)->admin_timeout
;
319 rq
= blk_mq_alloc_request(q
, nvme_req_op(req
->cmd
), 0);
321 status
= NVME_SC_INTERNAL
;
324 nvme_init_request(rq
, req
->cmd
);
327 rq
->timeout
= timeout
;
330 ret
= nvmet_passthru_map_sg(req
, rq
);
332 status
= NVME_SC_INTERNAL
;
338 * If a command needs post-execution fixups, or there are any
339 * non-trivial effects, make sure to execute the command synchronously
340 * in a workqueue so that nvme_passthru_end gets called.
342 effects
= nvme_command_effects(ctrl
, ns
, req
->cmd
->common
.opcode
);
343 if (req
->p
.use_workqueue
||
344 (effects
& ~(NVME_CMD_EFFECTS_CSUPP
| NVME_CMD_EFFECTS_LBCC
))) {
345 INIT_WORK(&req
->p
.work
, nvmet_passthru_execute_cmd_work
);
347 queue_work(nvmet_wq
, &req
->p
.work
);
349 rq
->end_io
= nvmet_passthru_req_done
;
350 rq
->end_io_data
= req
;
351 blk_execute_rq_nowait(rq
, false);
360 blk_mq_free_request(rq
);
365 nvmet_req_complete(req
, status
);
369 * We need to emulate set host behaviour to ensure that any requested
370 * behaviour of the target's host matches the requested behaviour
371 * of the device's host and fail otherwise.
373 static void nvmet_passthru_set_host_behaviour(struct nvmet_req
*req
)
375 struct nvme_ctrl
*ctrl
= nvmet_req_subsys(req
)->passthru_ctrl
;
376 struct nvme_feat_host_behavior
*host
;
377 u16 status
= NVME_SC_INTERNAL
;
380 host
= kzalloc(sizeof(*host
) * 2, GFP_KERNEL
);
382 goto out_complete_req
;
384 ret
= nvme_get_features(ctrl
, NVME_FEAT_HOST_BEHAVIOR
, 0,
385 host
, sizeof(*host
), NULL
);
389 status
= nvmet_copy_from_sgl(req
, 0, &host
[1], sizeof(*host
));
393 if (memcmp(&host
[0], &host
[1], sizeof(host
[0]))) {
394 pr_warn("target host has requested different behaviour from the local host\n");
395 status
= NVME_SC_INTERNAL
;
401 nvmet_req_complete(req
, status
);
404 static u16
nvmet_setup_passthru_command(struct nvmet_req
*req
)
406 req
->p
.use_workqueue
= false;
407 req
->execute
= nvmet_passthru_execute_cmd
;
408 return NVME_SC_SUCCESS
;
411 u16
nvmet_parse_passthru_io_cmd(struct nvmet_req
*req
)
413 /* Reject any commands with non-sgl flags set (ie. fused commands) */
414 if (req
->cmd
->common
.flags
& ~NVME_CMD_SGL_ALL
)
415 return NVME_SC_INVALID_FIELD
;
417 switch (req
->cmd
->common
.opcode
) {
418 case nvme_cmd_resv_register
:
419 case nvme_cmd_resv_report
:
420 case nvme_cmd_resv_acquire
:
421 case nvme_cmd_resv_release
:
423 * Reservations cannot be supported properly because the
424 * underlying device has no way of differentiating different
425 * hosts that connect via fabrics. This could potentially be
426 * emulated in the future if regular targets grow support for
429 return NVME_SC_INVALID_OPCODE
| NVME_STATUS_DNR
;
432 return nvmet_setup_passthru_command(req
);
436 * Only features that are emulated or specifically allowed in the list are
437 * passed down to the controller. This function implements the allow list for
438 * both get and set features.
440 static u16
nvmet_passthru_get_set_features(struct nvmet_req
*req
)
442 switch (le32_to_cpu(req
->cmd
->features
.fid
)) {
443 case NVME_FEAT_ARBITRATION
:
444 case NVME_FEAT_POWER_MGMT
:
445 case NVME_FEAT_LBA_RANGE
:
446 case NVME_FEAT_TEMP_THRESH
:
447 case NVME_FEAT_ERR_RECOVERY
:
448 case NVME_FEAT_VOLATILE_WC
:
449 case NVME_FEAT_WRITE_ATOMIC
:
450 case NVME_FEAT_AUTO_PST
:
451 case NVME_FEAT_TIMESTAMP
:
453 case NVME_FEAT_NOPSC
:
455 case NVME_FEAT_PLM_CONFIG
:
456 case NVME_FEAT_PLM_WINDOW
:
457 case NVME_FEAT_HOST_BEHAVIOR
:
458 case NVME_FEAT_SANITIZE
:
459 case NVME_FEAT_VENDOR_START
... NVME_FEAT_VENDOR_END
:
460 return nvmet_setup_passthru_command(req
);
462 case NVME_FEAT_ASYNC_EVENT
:
463 /* There is no support for forwarding ASYNC events */
464 case NVME_FEAT_IRQ_COALESCE
:
465 case NVME_FEAT_IRQ_CONFIG
:
466 /* The IRQ settings will not apply to the target controller */
467 case NVME_FEAT_HOST_MEM_BUF
:
469 * Any HMB that's set will not be passed through and will
470 * not work as expected
472 case NVME_FEAT_SW_PROGRESS
:
474 * The Pre-Boot Software Load Count doesn't make much
475 * sense for a target to export
477 case NVME_FEAT_RESV_MASK
:
478 case NVME_FEAT_RESV_PERSIST
:
479 /* No reservations, see nvmet_parse_passthru_io_cmd() */
481 return NVME_SC_INVALID_OPCODE
| NVME_STATUS_DNR
;
485 u16
nvmet_parse_passthru_admin_cmd(struct nvmet_req
*req
)
487 /* Reject any commands with non-sgl flags set (ie. fused commands) */
488 if (req
->cmd
->common
.flags
& ~NVME_CMD_SGL_ALL
)
489 return NVME_SC_INVALID_FIELD
;
492 * Passthru all vendor specific commands
494 if (req
->cmd
->common
.opcode
>= nvme_admin_vendor_start
)
495 return nvmet_setup_passthru_command(req
);
497 switch (req
->cmd
->common
.opcode
) {
498 case nvme_admin_async_event
:
499 req
->execute
= nvmet_execute_async_event
;
500 return NVME_SC_SUCCESS
;
501 case nvme_admin_keep_alive
:
503 * Most PCIe ctrls don't support keep alive cmd, we route keep
504 * alive to the non-passthru mode. In future please change this
505 * code when PCIe ctrls with keep alive support available.
507 req
->execute
= nvmet_execute_keep_alive
;
508 return NVME_SC_SUCCESS
;
509 case nvme_admin_set_features
:
510 switch (le32_to_cpu(req
->cmd
->features
.fid
)) {
511 case NVME_FEAT_ASYNC_EVENT
:
513 case NVME_FEAT_NUM_QUEUES
:
514 case NVME_FEAT_HOST_ID
:
515 req
->execute
= nvmet_execute_set_features
;
516 return NVME_SC_SUCCESS
;
517 case NVME_FEAT_HOST_BEHAVIOR
:
518 req
->execute
= nvmet_passthru_set_host_behaviour
;
519 return NVME_SC_SUCCESS
;
521 return nvmet_passthru_get_set_features(req
);
524 case nvme_admin_get_features
:
525 switch (le32_to_cpu(req
->cmd
->features
.fid
)) {
526 case NVME_FEAT_ASYNC_EVENT
:
528 case NVME_FEAT_NUM_QUEUES
:
529 case NVME_FEAT_HOST_ID
:
530 req
->execute
= nvmet_execute_get_features
;
531 return NVME_SC_SUCCESS
;
533 return nvmet_passthru_get_set_features(req
);
536 case nvme_admin_identify
:
537 switch (req
->cmd
->identify
.cns
) {
538 case NVME_ID_CNS_CS_CTRL
:
539 switch (req
->cmd
->identify
.csi
) {
541 req
->execute
= nvmet_passthru_execute_cmd
;
542 req
->p
.use_workqueue
= true;
543 return NVME_SC_SUCCESS
;
545 return NVME_SC_INVALID_OPCODE
| NVME_STATUS_DNR
;
546 case NVME_ID_CNS_CTRL
:
548 case NVME_ID_CNS_NS_DESC_LIST
:
549 req
->execute
= nvmet_passthru_execute_cmd
;
550 req
->p
.use_workqueue
= true;
551 return NVME_SC_SUCCESS
;
552 case NVME_ID_CNS_CS_NS
:
553 switch (req
->cmd
->identify
.csi
) {
555 req
->execute
= nvmet_passthru_execute_cmd
;
556 req
->p
.use_workqueue
= true;
557 return NVME_SC_SUCCESS
;
559 return NVME_SC_INVALID_OPCODE
| NVME_STATUS_DNR
;
561 return nvmet_setup_passthru_command(req
);
563 case nvme_admin_get_log_page
:
564 return nvmet_setup_passthru_command(req
);
566 /* Reject commands not in the allowlist above */
567 return nvmet_report_invalid_opcode(req
);
571 int nvmet_passthru_ctrl_enable(struct nvmet_subsys
*subsys
)
573 struct nvme_ctrl
*ctrl
;
578 mutex_lock(&subsys
->lock
);
579 if (!subsys
->passthru_ctrl_path
)
581 if (subsys
->passthru_ctrl
)
584 if (subsys
->nr_namespaces
) {
585 pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
589 file
= filp_open(subsys
->passthru_ctrl_path
, O_RDWR
, 0);
595 ctrl
= nvme_ctrl_from_file(file
);
597 pr_err("failed to open nvme controller %s\n",
598 subsys
->passthru_ctrl_path
);
603 old
= xa_cmpxchg(&passthru_subsystems
, ctrl
->instance
, NULL
,
605 if (xa_is_err(old
)) {
613 subsys
->passthru_ctrl
= ctrl
;
614 subsys
->ver
= ctrl
->vs
;
616 if (subsys
->ver
< NVME_VS(1, 2, 1)) {
617 pr_warn("nvme controller version is too old: %llu.%llu.%llu, advertising 1.2.1\n",
618 NVME_MAJOR(subsys
->ver
), NVME_MINOR(subsys
->ver
),
619 NVME_TERTIARY(subsys
->ver
));
620 subsys
->ver
= NVME_VS(1, 2, 1);
623 __module_get(subsys
->passthru_ctrl
->ops
->module
);
627 filp_close(file
, NULL
);
629 mutex_unlock(&subsys
->lock
);
633 static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys
*subsys
)
635 if (subsys
->passthru_ctrl
) {
636 xa_erase(&passthru_subsystems
, subsys
->passthru_ctrl
->instance
);
637 module_put(subsys
->passthru_ctrl
->ops
->module
);
638 nvme_put_ctrl(subsys
->passthru_ctrl
);
640 subsys
->passthru_ctrl
= NULL
;
641 subsys
->ver
= NVMET_DEFAULT_VS
;
644 void nvmet_passthru_ctrl_disable(struct nvmet_subsys
*subsys
)
646 mutex_lock(&subsys
->lock
);
647 __nvmet_passthru_ctrl_disable(subsys
);
648 mutex_unlock(&subsys
->lock
);
651 void nvmet_passthru_subsys_free(struct nvmet_subsys
*subsys
)
653 mutex_lock(&subsys
->lock
);
654 __nvmet_passthru_ctrl_disable(subsys
);
655 mutex_unlock(&subsys
->lock
);
656 kfree(subsys
->passthru_ctrl_path
);