1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2011-2014, Intel Corporation.
4 * Copyright (c) 2017-2021 Christoph Hellwig.
6 #include <linux/blk-integrity.h>
7 #include <linux/ptrace.h> /* for force_successful_syscall_return */
8 #include <linux/nvme_ioctl.h>
9 #include <linux/io_uring/cmd.h>
13 NVME_IOCTL_VEC
= (1 << 0),
14 NVME_IOCTL_PARTITION
= (1 << 1),
17 static bool nvme_cmd_allowed(struct nvme_ns
*ns
, struct nvme_command
*c
,
18 unsigned int flags
, bool open_for_write
)
23 * Do not allow unprivileged passthrough on partitions, as that allows an
24 * escape from the containment of the partition.
26 if (flags
& NVME_IOCTL_PARTITION
)
30 * Do not allow unprivileged processes to send vendor specific or fabrics
31 * commands as we can't be sure about their effects.
33 if (c
->common
.opcode
>= nvme_cmd_vendor_start
||
34 c
->common
.opcode
== nvme_fabrics_command
)
38 * Do not allow unprivileged passthrough of admin commands except
39 * for a subset of identify commands that contain information required
40 * to form proper I/O commands in userspace and do not expose any
41 * potentially sensitive information.
44 if (c
->common
.opcode
== nvme_admin_identify
) {
45 switch (c
->identify
.cns
) {
47 case NVME_ID_CNS_CS_NS
:
48 case NVME_ID_CNS_NS_CS_INDEP
:
49 case NVME_ID_CNS_CS_CTRL
:
50 case NVME_ID_CNS_CTRL
:
58 * Check if the controller provides a Commands Supported and Effects log
59 * and marks this command as supported. If not reject unprivileged
62 effects
= nvme_command_effects(ns
->ctrl
, ns
, c
->common
.opcode
);
63 if (!(effects
& NVME_CMD_EFFECTS_CSUPP
))
67 * Don't allow passthrough for command that have intrusive (or unknown)
70 if (effects
& ~(NVME_CMD_EFFECTS_CSUPP
| NVME_CMD_EFFECTS_LBCC
|
71 NVME_CMD_EFFECTS_UUID_SEL
|
72 NVME_CMD_EFFECTS_SCOPE_MASK
))
76 * Only allow I/O commands that transfer data to the controller or that
77 * change the logical block contents if the file descriptor is open for
80 if ((nvme_is_write(c
) || (effects
& NVME_CMD_EFFECTS_LBCC
)) &&
86 return capable(CAP_SYS_ADMIN
);
90 * Convert integer values from ioctl structures to user pointers, silently
91 * ignoring the upper bits in the compat case to match behaviour of 32-bit
94 static void __user
*nvme_to_user_ptr(uintptr_t ptrval
)
96 if (in_compat_syscall())
97 ptrval
= (compat_uptr_t
)ptrval
;
98 return (void __user
*)ptrval
;
101 static struct request
*nvme_alloc_user_request(struct request_queue
*q
,
102 struct nvme_command
*cmd
, blk_opf_t rq_flags
,
103 blk_mq_req_flags_t blk_flags
)
107 req
= blk_mq_alloc_request(q
, nvme_req_op(cmd
) | rq_flags
, blk_flags
);
110 nvme_init_request(req
, cmd
);
111 nvme_req(req
)->flags
|= NVME_REQ_USERCMD
;
115 static int nvme_map_user_request(struct request
*req
, u64 ubuffer
,
116 unsigned bufflen
, void __user
*meta_buffer
, unsigned meta_len
,
117 struct io_uring_cmd
*ioucmd
, unsigned int flags
)
119 struct request_queue
*q
= req
->q
;
120 struct nvme_ns
*ns
= q
->queuedata
;
121 struct block_device
*bdev
= ns
? ns
->disk
->part0
: NULL
;
122 bool supports_metadata
= bdev
&& blk_get_integrity(bdev
->bd_disk
);
123 struct nvme_ctrl
*ctrl
= nvme_req(req
)->ctrl
;
124 bool has_metadata
= meta_buffer
&& meta_len
;
125 struct bio
*bio
= NULL
;
128 if (!nvme_ctrl_sgl_supported(ctrl
))
129 dev_warn_once(ctrl
->device
, "using unchecked data buffer\n");
131 if (!supports_metadata
)
133 if (!nvme_ctrl_meta_sgl_supported(ctrl
))
134 dev_warn_once(ctrl
->device
,
135 "using unchecked metadata buffer\n");
138 if (ioucmd
&& (ioucmd
->flags
& IORING_URING_CMD_FIXED
)) {
139 struct iov_iter iter
;
141 /* fixedbufs is only for non-vectored io */
142 if (WARN_ON_ONCE(flags
& NVME_IOCTL_VEC
))
144 ret
= io_uring_cmd_import_fixed(ubuffer
, bufflen
,
145 rq_data_dir(req
), &iter
, ioucmd
);
148 ret
= blk_rq_map_user_iov(q
, req
, NULL
, &iter
, GFP_KERNEL
);
150 ret
= blk_rq_map_user_io(req
, NULL
, nvme_to_user_ptr(ubuffer
),
151 bufflen
, GFP_KERNEL
, flags
& NVME_IOCTL_VEC
, 0,
152 0, rq_data_dir(req
));
160 bio_set_dev(bio
, bdev
);
163 ret
= blk_rq_integrity_map_user(req
, meta_buffer
, meta_len
);
172 blk_rq_unmap_user(bio
);
174 blk_mq_free_request(req
);
178 static int nvme_submit_user_cmd(struct request_queue
*q
,
179 struct nvme_command
*cmd
, u64 ubuffer
, unsigned bufflen
,
180 void __user
*meta_buffer
, unsigned meta_len
,
181 u64
*result
, unsigned timeout
, unsigned int flags
)
183 struct nvme_ns
*ns
= q
->queuedata
;
184 struct nvme_ctrl
*ctrl
;
190 req
= nvme_alloc_user_request(q
, cmd
, 0, 0);
194 req
->timeout
= timeout
;
195 if (ubuffer
&& bufflen
) {
196 ret
= nvme_map_user_request(req
, ubuffer
, bufflen
, meta_buffer
,
197 meta_len
, NULL
, flags
);
203 ctrl
= nvme_req(req
)->ctrl
;
205 effects
= nvme_passthru_start(ctrl
, ns
, cmd
->common
.opcode
);
206 ret
= nvme_execute_rq(req
, false);
208 *result
= le64_to_cpu(nvme_req(req
)->result
.u64
);
210 blk_rq_unmap_user(bio
);
211 blk_mq_free_request(req
);
214 nvme_passthru_end(ctrl
, ns
, effects
, cmd
, ret
);
219 static int nvme_submit_io(struct nvme_ns
*ns
, struct nvme_user_io __user
*uio
)
221 struct nvme_user_io io
;
222 struct nvme_command c
;
223 unsigned length
, meta_len
;
224 void __user
*metadata
;
226 if (copy_from_user(&io
, uio
, sizeof(io
)))
234 case nvme_cmd_compare
:
240 length
= (io
.nblocks
+ 1) << ns
->head
->lba_shift
;
242 if ((io
.control
& NVME_RW_PRINFO_PRACT
) &&
243 (ns
->head
->ms
== ns
->head
->pi_size
)) {
245 * Protection information is stripped/inserted by the
248 if (nvme_to_user_ptr(io
.metadata
))
253 meta_len
= (io
.nblocks
+ 1) * ns
->head
->ms
;
254 metadata
= nvme_to_user_ptr(io
.metadata
);
257 if (ns
->head
->features
& NVME_NS_EXT_LBAS
) {
260 } else if (meta_len
) {
261 if ((io
.metadata
& 3) || !io
.metadata
)
265 memset(&c
, 0, sizeof(c
));
266 c
.rw
.opcode
= io
.opcode
;
267 c
.rw
.flags
= io
.flags
;
268 c
.rw
.nsid
= cpu_to_le32(ns
->head
->ns_id
);
269 c
.rw
.slba
= cpu_to_le64(io
.slba
);
270 c
.rw
.length
= cpu_to_le16(io
.nblocks
);
271 c
.rw
.control
= cpu_to_le16(io
.control
);
272 c
.rw
.dsmgmt
= cpu_to_le32(io
.dsmgmt
);
273 c
.rw
.reftag
= cpu_to_le32(io
.reftag
);
274 c
.rw
.lbat
= cpu_to_le16(io
.apptag
);
275 c
.rw
.lbatm
= cpu_to_le16(io
.appmask
);
277 return nvme_submit_user_cmd(ns
->queue
, &c
, io
.addr
, length
, metadata
,
278 meta_len
, NULL
, 0, 0);
281 static bool nvme_validate_passthru_nsid(struct nvme_ctrl
*ctrl
,
282 struct nvme_ns
*ns
, __u32 nsid
)
284 if (ns
&& nsid
!= ns
->head
->ns_id
) {
285 dev_err(ctrl
->device
,
286 "%s: nsid (%u) in cmd does not match nsid (%u)"
288 current
->comm
, nsid
, ns
->head
->ns_id
);
295 static int nvme_user_cmd(struct nvme_ctrl
*ctrl
, struct nvme_ns
*ns
,
296 struct nvme_passthru_cmd __user
*ucmd
, unsigned int flags
,
299 struct nvme_passthru_cmd cmd
;
300 struct nvme_command c
;
301 unsigned timeout
= 0;
305 if (copy_from_user(&cmd
, ucmd
, sizeof(cmd
)))
309 if (!nvme_validate_passthru_nsid(ctrl
, ns
, cmd
.nsid
))
312 memset(&c
, 0, sizeof(c
));
313 c
.common
.opcode
= cmd
.opcode
;
314 c
.common
.flags
= cmd
.flags
;
315 c
.common
.nsid
= cpu_to_le32(cmd
.nsid
);
316 c
.common
.cdw2
[0] = cpu_to_le32(cmd
.cdw2
);
317 c
.common
.cdw2
[1] = cpu_to_le32(cmd
.cdw3
);
318 c
.common
.cdw10
= cpu_to_le32(cmd
.cdw10
);
319 c
.common
.cdw11
= cpu_to_le32(cmd
.cdw11
);
320 c
.common
.cdw12
= cpu_to_le32(cmd
.cdw12
);
321 c
.common
.cdw13
= cpu_to_le32(cmd
.cdw13
);
322 c
.common
.cdw14
= cpu_to_le32(cmd
.cdw14
);
323 c
.common
.cdw15
= cpu_to_le32(cmd
.cdw15
);
325 if (!nvme_cmd_allowed(ns
, &c
, 0, open_for_write
))
329 timeout
= msecs_to_jiffies(cmd
.timeout_ms
);
331 status
= nvme_submit_user_cmd(ns
? ns
->queue
: ctrl
->admin_q
, &c
,
332 cmd
.addr
, cmd
.data_len
, nvme_to_user_ptr(cmd
.metadata
),
333 cmd
.metadata_len
, &result
, timeout
, 0);
336 if (put_user(result
, &ucmd
->result
))
343 static int nvme_user_cmd64(struct nvme_ctrl
*ctrl
, struct nvme_ns
*ns
,
344 struct nvme_passthru_cmd64 __user
*ucmd
, unsigned int flags
,
347 struct nvme_passthru_cmd64 cmd
;
348 struct nvme_command c
;
349 unsigned timeout
= 0;
352 if (copy_from_user(&cmd
, ucmd
, sizeof(cmd
)))
356 if (!nvme_validate_passthru_nsid(ctrl
, ns
, cmd
.nsid
))
359 memset(&c
, 0, sizeof(c
));
360 c
.common
.opcode
= cmd
.opcode
;
361 c
.common
.flags
= cmd
.flags
;
362 c
.common
.nsid
= cpu_to_le32(cmd
.nsid
);
363 c
.common
.cdw2
[0] = cpu_to_le32(cmd
.cdw2
);
364 c
.common
.cdw2
[1] = cpu_to_le32(cmd
.cdw3
);
365 c
.common
.cdw10
= cpu_to_le32(cmd
.cdw10
);
366 c
.common
.cdw11
= cpu_to_le32(cmd
.cdw11
);
367 c
.common
.cdw12
= cpu_to_le32(cmd
.cdw12
);
368 c
.common
.cdw13
= cpu_to_le32(cmd
.cdw13
);
369 c
.common
.cdw14
= cpu_to_le32(cmd
.cdw14
);
370 c
.common
.cdw15
= cpu_to_le32(cmd
.cdw15
);
372 if (!nvme_cmd_allowed(ns
, &c
, flags
, open_for_write
))
376 timeout
= msecs_to_jiffies(cmd
.timeout_ms
);
378 status
= nvme_submit_user_cmd(ns
? ns
->queue
: ctrl
->admin_q
, &c
,
379 cmd
.addr
, cmd
.data_len
, nvme_to_user_ptr(cmd
.metadata
),
380 cmd
.metadata_len
, &cmd
.result
, timeout
, flags
);
383 if (put_user(cmd
.result
, &ucmd
->result
))
390 struct nvme_uring_data
{
399 * This overlays struct io_uring_cmd pdu.
400 * Expect build errors if this grows larger than that.
402 struct nvme_uring_cmd_pdu
{
409 static inline struct nvme_uring_cmd_pdu
*nvme_uring_cmd_pdu(
410 struct io_uring_cmd
*ioucmd
)
412 return io_uring_cmd_to_pdu(ioucmd
, struct nvme_uring_cmd_pdu
);
415 static void nvme_uring_task_cb(struct io_uring_cmd
*ioucmd
,
416 unsigned issue_flags
)
418 struct nvme_uring_cmd_pdu
*pdu
= nvme_uring_cmd_pdu(ioucmd
);
421 blk_rq_unmap_user(pdu
->bio
);
422 io_uring_cmd_done(ioucmd
, pdu
->status
, pdu
->result
, issue_flags
);
425 static enum rq_end_io_ret
nvme_uring_cmd_end_io(struct request
*req
,
428 struct io_uring_cmd
*ioucmd
= req
->end_io_data
;
429 struct nvme_uring_cmd_pdu
*pdu
= nvme_uring_cmd_pdu(ioucmd
);
431 if (nvme_req(req
)->flags
& NVME_REQ_CANCELLED
) {
432 pdu
->status
= -EINTR
;
434 pdu
->status
= nvme_req(req
)->status
;
436 pdu
->status
= blk_status_to_errno(err
);
438 pdu
->result
= le64_to_cpu(nvme_req(req
)->result
.u64
);
441 * For iopoll, complete it directly. Note that using the uring_cmd
442 * helper for this is safe only because we check blk_rq_is_poll().
443 * As that returns false if we're NOT on a polled queue, then it's
444 * safe to use the polled completion helper.
446 * Otherwise, move the completion to task work.
448 if (blk_rq_is_poll(req
)) {
450 blk_rq_unmap_user(pdu
->bio
);
451 io_uring_cmd_iopoll_done(ioucmd
, pdu
->result
, pdu
->status
);
453 io_uring_cmd_do_in_task_lazy(ioucmd
, nvme_uring_task_cb
);
456 return RQ_END_IO_FREE
;
459 static int nvme_uring_cmd_io(struct nvme_ctrl
*ctrl
, struct nvme_ns
*ns
,
460 struct io_uring_cmd
*ioucmd
, unsigned int issue_flags
, bool vec
)
462 struct nvme_uring_cmd_pdu
*pdu
= nvme_uring_cmd_pdu(ioucmd
);
463 const struct nvme_uring_cmd
*cmd
= io_uring_sqe_cmd(ioucmd
->sqe
);
464 struct request_queue
*q
= ns
? ns
->queue
: ctrl
->admin_q
;
465 struct nvme_uring_data d
;
466 struct nvme_command c
;
468 blk_opf_t rq_flags
= REQ_ALLOC_CACHE
;
469 blk_mq_req_flags_t blk_flags
= 0;
472 c
.common
.opcode
= READ_ONCE(cmd
->opcode
);
473 c
.common
.flags
= READ_ONCE(cmd
->flags
);
477 c
.common
.command_id
= 0;
478 c
.common
.nsid
= cpu_to_le32(cmd
->nsid
);
479 if (!nvme_validate_passthru_nsid(ctrl
, ns
, le32_to_cpu(c
.common
.nsid
)))
482 c
.common
.cdw2
[0] = cpu_to_le32(READ_ONCE(cmd
->cdw2
));
483 c
.common
.cdw2
[1] = cpu_to_le32(READ_ONCE(cmd
->cdw3
));
484 c
.common
.metadata
= 0;
485 c
.common
.dptr
.prp1
= c
.common
.dptr
.prp2
= 0;
486 c
.common
.cdw10
= cpu_to_le32(READ_ONCE(cmd
->cdw10
));
487 c
.common
.cdw11
= cpu_to_le32(READ_ONCE(cmd
->cdw11
));
488 c
.common
.cdw12
= cpu_to_le32(READ_ONCE(cmd
->cdw12
));
489 c
.common
.cdw13
= cpu_to_le32(READ_ONCE(cmd
->cdw13
));
490 c
.common
.cdw14
= cpu_to_le32(READ_ONCE(cmd
->cdw14
));
491 c
.common
.cdw15
= cpu_to_le32(READ_ONCE(cmd
->cdw15
));
493 if (!nvme_cmd_allowed(ns
, &c
, 0, ioucmd
->file
->f_mode
& FMODE_WRITE
))
496 d
.metadata
= READ_ONCE(cmd
->metadata
);
497 d
.addr
= READ_ONCE(cmd
->addr
);
498 d
.data_len
= READ_ONCE(cmd
->data_len
);
499 d
.metadata_len
= READ_ONCE(cmd
->metadata_len
);
500 d
.timeout_ms
= READ_ONCE(cmd
->timeout_ms
);
502 if (issue_flags
& IO_URING_F_NONBLOCK
) {
503 rq_flags
|= REQ_NOWAIT
;
504 blk_flags
= BLK_MQ_REQ_NOWAIT
;
506 if (issue_flags
& IO_URING_F_IOPOLL
)
507 rq_flags
|= REQ_POLLED
;
509 req
= nvme_alloc_user_request(q
, &c
, rq_flags
, blk_flags
);
512 req
->timeout
= d
.timeout_ms
? msecs_to_jiffies(d
.timeout_ms
) : 0;
514 if (d
.addr
&& d
.data_len
) {
515 ret
= nvme_map_user_request(req
, d
.addr
,
516 d
.data_len
, nvme_to_user_ptr(d
.metadata
),
517 d
.metadata_len
, ioucmd
, vec
);
522 /* to free bio on completion, as req->bio will be null at that time */
525 req
->end_io_data
= ioucmd
;
526 req
->end_io
= nvme_uring_cmd_end_io
;
527 blk_execute_rq_nowait(req
, false);
531 static bool is_ctrl_ioctl(unsigned int cmd
)
533 if (cmd
== NVME_IOCTL_ADMIN_CMD
|| cmd
== NVME_IOCTL_ADMIN64_CMD
)
535 if (is_sed_ioctl(cmd
))
540 static int nvme_ctrl_ioctl(struct nvme_ctrl
*ctrl
, unsigned int cmd
,
541 void __user
*argp
, bool open_for_write
)
544 case NVME_IOCTL_ADMIN_CMD
:
545 return nvme_user_cmd(ctrl
, NULL
, argp
, 0, open_for_write
);
546 case NVME_IOCTL_ADMIN64_CMD
:
547 return nvme_user_cmd64(ctrl
, NULL
, argp
, 0, open_for_write
);
549 return sed_ioctl(ctrl
->opal_dev
, cmd
, argp
);
553 #ifdef COMPAT_FOR_U64_ALIGNMENT
554 struct nvme_user_io32
{
567 } __attribute__((__packed__
));
568 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32)
569 #endif /* COMPAT_FOR_U64_ALIGNMENT */
571 static int nvme_ns_ioctl(struct nvme_ns
*ns
, unsigned int cmd
,
572 void __user
*argp
, unsigned int flags
, bool open_for_write
)
576 force_successful_syscall_return();
577 return ns
->head
->ns_id
;
578 case NVME_IOCTL_IO_CMD
:
579 return nvme_user_cmd(ns
->ctrl
, ns
, argp
, flags
, open_for_write
);
581 * struct nvme_user_io can have different padding on some 32-bit ABIs.
582 * Just accept the compat version as all fields that are used are the
583 * same size and at the same offset.
585 #ifdef COMPAT_FOR_U64_ALIGNMENT
586 case NVME_IOCTL_SUBMIT_IO32
:
588 case NVME_IOCTL_SUBMIT_IO
:
589 return nvme_submit_io(ns
, argp
);
590 case NVME_IOCTL_IO64_CMD_VEC
:
591 flags
|= NVME_IOCTL_VEC
;
593 case NVME_IOCTL_IO64_CMD
:
594 return nvme_user_cmd64(ns
->ctrl
, ns
, argp
, flags
,
601 int nvme_ioctl(struct block_device
*bdev
, blk_mode_t mode
,
602 unsigned int cmd
, unsigned long arg
)
604 struct nvme_ns
*ns
= bdev
->bd_disk
->private_data
;
605 bool open_for_write
= mode
& BLK_OPEN_WRITE
;
606 void __user
*argp
= (void __user
*)arg
;
607 unsigned int flags
= 0;
609 if (bdev_is_partition(bdev
))
610 flags
|= NVME_IOCTL_PARTITION
;
612 if (is_ctrl_ioctl(cmd
))
613 return nvme_ctrl_ioctl(ns
->ctrl
, cmd
, argp
, open_for_write
);
614 return nvme_ns_ioctl(ns
, cmd
, argp
, flags
, open_for_write
);
617 long nvme_ns_chr_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
620 container_of(file_inode(file
)->i_cdev
, struct nvme_ns
, cdev
);
621 bool open_for_write
= file
->f_mode
& FMODE_WRITE
;
622 void __user
*argp
= (void __user
*)arg
;
624 if (is_ctrl_ioctl(cmd
))
625 return nvme_ctrl_ioctl(ns
->ctrl
, cmd
, argp
, open_for_write
);
626 return nvme_ns_ioctl(ns
, cmd
, argp
, 0, open_for_write
);
629 static int nvme_uring_cmd_checks(unsigned int issue_flags
)
632 /* NVMe passthrough requires big SQE/CQE support */
633 if ((issue_flags
& (IO_URING_F_SQE128
|IO_URING_F_CQE32
)) !=
634 (IO_URING_F_SQE128
|IO_URING_F_CQE32
))
639 static int nvme_ns_uring_cmd(struct nvme_ns
*ns
, struct io_uring_cmd
*ioucmd
,
640 unsigned int issue_flags
)
642 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
645 ret
= nvme_uring_cmd_checks(issue_flags
);
649 switch (ioucmd
->cmd_op
) {
650 case NVME_URING_CMD_IO
:
651 ret
= nvme_uring_cmd_io(ctrl
, ns
, ioucmd
, issue_flags
, false);
653 case NVME_URING_CMD_IO_VEC
:
654 ret
= nvme_uring_cmd_io(ctrl
, ns
, ioucmd
, issue_flags
, true);
663 int nvme_ns_chr_uring_cmd(struct io_uring_cmd
*ioucmd
, unsigned int issue_flags
)
665 struct nvme_ns
*ns
= container_of(file_inode(ioucmd
->file
)->i_cdev
,
666 struct nvme_ns
, cdev
);
668 return nvme_ns_uring_cmd(ns
, ioucmd
, issue_flags
);
671 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd
*ioucmd
,
672 struct io_comp_batch
*iob
,
673 unsigned int poll_flags
)
675 struct nvme_uring_cmd_pdu
*pdu
= nvme_uring_cmd_pdu(ioucmd
);
676 struct request
*req
= pdu
->req
;
678 if (req
&& blk_rq_is_poll(req
))
679 return blk_rq_poll(req
, iob
, poll_flags
);
682 #ifdef CONFIG_NVME_MULTIPATH
683 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns
*ns
, unsigned int cmd
,
684 void __user
*argp
, struct nvme_ns_head
*head
, int srcu_idx
,
686 __releases(&head
->srcu
)
688 struct nvme_ctrl
*ctrl
= ns
->ctrl
;
691 nvme_get_ctrl(ns
->ctrl
);
692 srcu_read_unlock(&head
->srcu
, srcu_idx
);
693 ret
= nvme_ctrl_ioctl(ns
->ctrl
, cmd
, argp
, open_for_write
);
699 int nvme_ns_head_ioctl(struct block_device
*bdev
, blk_mode_t mode
,
700 unsigned int cmd
, unsigned long arg
)
702 struct nvme_ns_head
*head
= bdev
->bd_disk
->private_data
;
703 bool open_for_write
= mode
& BLK_OPEN_WRITE
;
704 void __user
*argp
= (void __user
*)arg
;
706 int srcu_idx
, ret
= -EWOULDBLOCK
;
707 unsigned int flags
= 0;
709 if (bdev_is_partition(bdev
))
710 flags
|= NVME_IOCTL_PARTITION
;
712 srcu_idx
= srcu_read_lock(&head
->srcu
);
713 ns
= nvme_find_path(head
);
718 * Handle ioctls that apply to the controller instead of the namespace
719 * seperately and drop the ns SRCU reference early. This avoids a
720 * deadlock when deleting namespaces using the passthrough interface.
722 if (is_ctrl_ioctl(cmd
))
723 return nvme_ns_head_ctrl_ioctl(ns
, cmd
, argp
, head
, srcu_idx
,
726 ret
= nvme_ns_ioctl(ns
, cmd
, argp
, flags
, open_for_write
);
728 srcu_read_unlock(&head
->srcu
, srcu_idx
);
732 long nvme_ns_head_chr_ioctl(struct file
*file
, unsigned int cmd
,
735 bool open_for_write
= file
->f_mode
& FMODE_WRITE
;
736 struct cdev
*cdev
= file_inode(file
)->i_cdev
;
737 struct nvme_ns_head
*head
=
738 container_of(cdev
, struct nvme_ns_head
, cdev
);
739 void __user
*argp
= (void __user
*)arg
;
741 int srcu_idx
, ret
= -EWOULDBLOCK
;
743 srcu_idx
= srcu_read_lock(&head
->srcu
);
744 ns
= nvme_find_path(head
);
748 if (is_ctrl_ioctl(cmd
))
749 return nvme_ns_head_ctrl_ioctl(ns
, cmd
, argp
, head
, srcu_idx
,
752 ret
= nvme_ns_ioctl(ns
, cmd
, argp
, 0, open_for_write
);
754 srcu_read_unlock(&head
->srcu
, srcu_idx
);
758 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd
*ioucmd
,
759 unsigned int issue_flags
)
761 struct cdev
*cdev
= file_inode(ioucmd
->file
)->i_cdev
;
762 struct nvme_ns_head
*head
= container_of(cdev
, struct nvme_ns_head
, cdev
);
763 int srcu_idx
= srcu_read_lock(&head
->srcu
);
764 struct nvme_ns
*ns
= nvme_find_path(head
);
768 ret
= nvme_ns_uring_cmd(ns
, ioucmd
, issue_flags
);
769 srcu_read_unlock(&head
->srcu
, srcu_idx
);
772 #endif /* CONFIG_NVME_MULTIPATH */
774 int nvme_dev_uring_cmd(struct io_uring_cmd
*ioucmd
, unsigned int issue_flags
)
776 struct nvme_ctrl
*ctrl
= ioucmd
->file
->private_data
;
779 /* IOPOLL not supported yet */
780 if (issue_flags
& IO_URING_F_IOPOLL
)
783 ret
= nvme_uring_cmd_checks(issue_flags
);
787 switch (ioucmd
->cmd_op
) {
788 case NVME_URING_CMD_ADMIN
:
789 ret
= nvme_uring_cmd_io(ctrl
, NULL
, ioucmd
, issue_flags
, false);
791 case NVME_URING_CMD_ADMIN_VEC
:
792 ret
= nvme_uring_cmd_io(ctrl
, NULL
, ioucmd
, issue_flags
, true);
801 static int nvme_dev_user_cmd(struct nvme_ctrl
*ctrl
, void __user
*argp
,
807 srcu_idx
= srcu_read_lock(&ctrl
->srcu
);
808 if (list_empty(&ctrl
->namespaces
)) {
813 ns
= list_first_or_null_rcu(&ctrl
->namespaces
, struct nvme_ns
, list
);
814 if (ns
!= list_last_entry(&ctrl
->namespaces
, struct nvme_ns
, list
)) {
815 dev_warn(ctrl
->device
,
816 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
821 dev_warn(ctrl
->device
,
822 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
823 if (!nvme_get_ns(ns
)) {
827 srcu_read_unlock(&ctrl
->srcu
, srcu_idx
);
829 ret
= nvme_user_cmd(ctrl
, ns
, argp
, 0, open_for_write
);
834 srcu_read_unlock(&ctrl
->srcu
, srcu_idx
);
838 long nvme_dev_ioctl(struct file
*file
, unsigned int cmd
,
841 bool open_for_write
= file
->f_mode
& FMODE_WRITE
;
842 struct nvme_ctrl
*ctrl
= file
->private_data
;
843 void __user
*argp
= (void __user
*)arg
;
846 case NVME_IOCTL_ADMIN_CMD
:
847 return nvme_user_cmd(ctrl
, NULL
, argp
, 0, open_for_write
);
848 case NVME_IOCTL_ADMIN64_CMD
:
849 return nvme_user_cmd64(ctrl
, NULL
, argp
, 0, open_for_write
);
850 case NVME_IOCTL_IO_CMD
:
851 return nvme_dev_user_cmd(ctrl
, argp
, open_for_write
);
852 case NVME_IOCTL_RESET
:
853 if (!capable(CAP_SYS_ADMIN
))
855 dev_warn(ctrl
->device
, "resetting controller\n");
856 return nvme_reset_ctrl_sync(ctrl
);
857 case NVME_IOCTL_SUBSYS_RESET
:
858 if (!capable(CAP_SYS_ADMIN
))
860 return nvme_reset_subsystem(ctrl
);
861 case NVME_IOCTL_RESCAN
:
862 if (!capable(CAP_SYS_ADMIN
))
864 nvme_queue_scan(ctrl
);