1 // SPDX-License-Identifier: GPL-2.0
3 * Code related to the io_uring_register() syscall
5 * Copyright (C) 2023 Jens Axboe
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
9 #include <linux/syscalls.h>
10 #include <linux/refcount.h>
11 #include <linux/bits.h>
13 #include <linux/file.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/nospec.h>
17 #include <linux/compat.h>
18 #include <linux/io_uring.h>
19 #include <linux/io_uring_types.h>
34 #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
35 IORING_REGISTER_LAST + IORING_OP_LAST)
37 static __cold
int io_probe(struct io_ring_ctx
*ctx
, void __user
*arg
,
40 struct io_uring_probe
*p
;
44 if (nr_args
> IORING_OP_LAST
)
45 nr_args
= IORING_OP_LAST
;
47 size
= struct_size(p
, ops
, nr_args
);
48 p
= kzalloc(size
, GFP_KERNEL
);
53 if (copy_from_user(p
, arg
, size
))
56 if (memchr_inv(p
, 0, size
))
59 p
->last_op
= IORING_OP_LAST
- 1;
61 for (i
= 0; i
< nr_args
; i
++) {
63 if (io_uring_op_supported(i
))
64 p
->ops
[i
].flags
= IO_URING_OP_SUPPORTED
;
69 if (copy_to_user(arg
, p
, size
))
76 int io_unregister_personality(struct io_ring_ctx
*ctx
, unsigned id
)
78 const struct cred
*creds
;
80 creds
= xa_erase(&ctx
->personalities
, id
);
90 static int io_register_personality(struct io_ring_ctx
*ctx
)
92 const struct cred
*creds
;
96 creds
= get_current_cred();
98 ret
= xa_alloc_cyclic(&ctx
->personalities
, &id
, (void *)creds
,
99 XA_LIMIT(0, USHRT_MAX
), &ctx
->pers_next
, GFP_KERNEL
);
107 static __cold
int io_register_restrictions(struct io_ring_ctx
*ctx
,
108 void __user
*arg
, unsigned int nr_args
)
110 struct io_uring_restriction
*res
;
114 /* Restrictions allowed only if rings started disabled */
115 if (!(ctx
->flags
& IORING_SETUP_R_DISABLED
))
118 /* We allow only a single restrictions registration */
119 if (ctx
->restrictions
.registered
)
122 if (!arg
|| nr_args
> IORING_MAX_RESTRICTIONS
)
125 size
= array_size(nr_args
, sizeof(*res
));
126 if (size
== SIZE_MAX
)
129 res
= memdup_user(arg
, size
);
135 for (i
= 0; i
< nr_args
; i
++) {
136 switch (res
[i
].opcode
) {
137 case IORING_RESTRICTION_REGISTER_OP
:
138 if (res
[i
].register_op
>= IORING_REGISTER_LAST
) {
143 __set_bit(res
[i
].register_op
,
144 ctx
->restrictions
.register_op
);
146 case IORING_RESTRICTION_SQE_OP
:
147 if (res
[i
].sqe_op
>= IORING_OP_LAST
) {
152 __set_bit(res
[i
].sqe_op
, ctx
->restrictions
.sqe_op
);
154 case IORING_RESTRICTION_SQE_FLAGS_ALLOWED
:
155 ctx
->restrictions
.sqe_flags_allowed
= res
[i
].sqe_flags
;
157 case IORING_RESTRICTION_SQE_FLAGS_REQUIRED
:
158 ctx
->restrictions
.sqe_flags_required
= res
[i
].sqe_flags
;
167 /* Reset all restrictions if an error happened */
169 memset(&ctx
->restrictions
, 0, sizeof(ctx
->restrictions
));
171 ctx
->restrictions
.registered
= true;
177 static int io_register_enable_rings(struct io_ring_ctx
*ctx
)
179 if (!(ctx
->flags
& IORING_SETUP_R_DISABLED
))
182 if (ctx
->flags
& IORING_SETUP_SINGLE_ISSUER
&& !ctx
->submitter_task
) {
183 WRITE_ONCE(ctx
->submitter_task
, get_task_struct(current
));
185 * Lazy activation attempts would fail if it was polled before
186 * submitter_task is set.
188 if (wq_has_sleeper(&ctx
->poll_wq
))
189 io_activate_pollwq(ctx
);
192 if (ctx
->restrictions
.registered
)
195 ctx
->flags
&= ~IORING_SETUP_R_DISABLED
;
196 if (ctx
->sq_data
&& wq_has_sleeper(&ctx
->sq_data
->wait
))
197 wake_up(&ctx
->sq_data
->wait
);
201 static __cold
int __io_register_iowq_aff(struct io_ring_ctx
*ctx
,
202 cpumask_var_t new_mask
)
206 if (!(ctx
->flags
& IORING_SETUP_SQPOLL
)) {
207 ret
= io_wq_cpu_affinity(current
->io_uring
, new_mask
);
209 mutex_unlock(&ctx
->uring_lock
);
210 ret
= io_sqpoll_wq_cpu_affinity(ctx
, new_mask
);
211 mutex_lock(&ctx
->uring_lock
);
217 static __cold
int io_register_iowq_aff(struct io_ring_ctx
*ctx
,
218 void __user
*arg
, unsigned len
)
220 cpumask_var_t new_mask
;
223 if (!alloc_cpumask_var(&new_mask
, GFP_KERNEL
))
226 cpumask_clear(new_mask
);
227 if (len
> cpumask_size())
228 len
= cpumask_size();
231 if (in_compat_syscall())
232 ret
= compat_get_bitmap(cpumask_bits(new_mask
),
233 (const compat_ulong_t __user
*)arg
,
234 len
* 8 /* CHAR_BIT */);
237 ret
= copy_from_user(new_mask
, arg
, len
);
240 free_cpumask_var(new_mask
);
244 ret
= __io_register_iowq_aff(ctx
, new_mask
);
245 free_cpumask_var(new_mask
);
249 static __cold
int io_unregister_iowq_aff(struct io_ring_ctx
*ctx
)
251 return __io_register_iowq_aff(ctx
, NULL
);
254 static __cold
int io_register_iowq_max_workers(struct io_ring_ctx
*ctx
,
256 __must_hold(&ctx
->uring_lock
)
258 struct io_tctx_node
*node
;
259 struct io_uring_task
*tctx
= NULL
;
260 struct io_sq_data
*sqd
= NULL
;
264 if (copy_from_user(new_count
, arg
, sizeof(new_count
)))
266 for (i
= 0; i
< ARRAY_SIZE(new_count
); i
++)
267 if (new_count
[i
] > INT_MAX
)
270 if (ctx
->flags
& IORING_SETUP_SQPOLL
) {
274 * Observe the correct sqd->lock -> ctx->uring_lock
275 * ordering. Fine to drop uring_lock here, we hold
278 refcount_inc(&sqd
->refs
);
279 mutex_unlock(&ctx
->uring_lock
);
280 mutex_lock(&sqd
->lock
);
281 mutex_lock(&ctx
->uring_lock
);
283 tctx
= sqd
->thread
->io_uring
;
286 tctx
= current
->io_uring
;
289 BUILD_BUG_ON(sizeof(new_count
) != sizeof(ctx
->iowq_limits
));
291 for (i
= 0; i
< ARRAY_SIZE(new_count
); i
++)
293 ctx
->iowq_limits
[i
] = new_count
[i
];
294 ctx
->iowq_limits_set
= true;
296 if (tctx
&& tctx
->io_wq
) {
297 ret
= io_wq_max_workers(tctx
->io_wq
, new_count
);
301 memset(new_count
, 0, sizeof(new_count
));
305 mutex_unlock(&ctx
->uring_lock
);
306 mutex_unlock(&sqd
->lock
);
308 mutex_lock(&ctx
->uring_lock
);
311 if (copy_to_user(arg
, new_count
, sizeof(new_count
)))
314 /* that's it for SQPOLL, only the SQPOLL task creates requests */
318 /* now propagate the restriction to all registered users */
319 list_for_each_entry(node
, &ctx
->tctx_list
, ctx_node
) {
320 tctx
= node
->task
->io_uring
;
321 if (WARN_ON_ONCE(!tctx
->io_wq
))
324 for (i
= 0; i
< ARRAY_SIZE(new_count
); i
++)
325 new_count
[i
] = ctx
->iowq_limits
[i
];
326 /* ignore errors, it always returns zero anyway */
327 (void)io_wq_max_workers(tctx
->io_wq
, new_count
);
332 mutex_unlock(&ctx
->uring_lock
);
333 mutex_unlock(&sqd
->lock
);
335 mutex_lock(&ctx
->uring_lock
);
340 static int io_register_clock(struct io_ring_ctx
*ctx
,
341 struct io_uring_clock_register __user
*arg
)
343 struct io_uring_clock_register reg
;
345 if (copy_from_user(®
, arg
, sizeof(reg
)))
347 if (memchr_inv(®
.__resv
, 0, sizeof(reg
.__resv
)))
350 switch (reg
.clockid
) {
351 case CLOCK_MONOTONIC
:
352 ctx
->clock_offset
= 0;
355 ctx
->clock_offset
= TK_OFFS_BOOT
;
361 ctx
->clockid
= reg
.clockid
;
366 * State to maintain until we can swap. Both new and old state, used for
367 * either mapping or freeing.
369 struct io_ring_ctx_rings
{
370 unsigned short n_ring_pages
;
371 unsigned short n_sqe_pages
;
372 struct page
**ring_pages
;
373 struct page
**sqe_pages
;
374 struct io_uring_sqe
*sq_sqes
;
375 struct io_rings
*rings
;
378 static void io_register_free_rings(struct io_uring_params
*p
,
379 struct io_ring_ctx_rings
*r
)
381 if (!(p
->flags
& IORING_SETUP_NO_MMAP
)) {
382 io_pages_unmap(r
->rings
, &r
->ring_pages
, &r
->n_ring_pages
,
384 io_pages_unmap(r
->sq_sqes
, &r
->sqe_pages
, &r
->n_sqe_pages
,
387 io_pages_free(&r
->ring_pages
, r
->n_ring_pages
);
388 io_pages_free(&r
->sqe_pages
, r
->n_sqe_pages
);
394 #define swap_old(ctx, o, n, field) \
396 (o).field = (ctx)->field; \
397 (ctx)->field = (n).field; \
400 #define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP)
401 #define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \
402 IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP)
404 static int io_register_resize_rings(struct io_ring_ctx
*ctx
, void __user
*arg
)
406 struct io_ring_ctx_rings o
= { }, n
= { }, *to_free
= NULL
;
407 size_t size
, sq_array_offset
;
408 struct io_uring_params p
;
413 /* for single issuer, must be owner resizing */
414 if (ctx
->flags
& IORING_SETUP_SINGLE_ISSUER
&&
415 current
!= ctx
->submitter_task
)
417 if (copy_from_user(&p
, arg
, sizeof(p
)))
419 if (p
.flags
& ~RESIZE_FLAGS
)
422 /* properties that are always inherited */
423 p
.flags
|= (ctx
->flags
& COPY_FLAGS
);
425 ret
= io_uring_fill_params(p
.sq_entries
, &p
);
429 /* nothing to do, but copy params back */
430 if (p
.sq_entries
== ctx
->sq_entries
&& p
.cq_entries
== ctx
->cq_entries
) {
431 if (copy_to_user(arg
, &p
, sizeof(p
)))
436 size
= rings_size(p
.flags
, p
.sq_entries
, p
.cq_entries
,
438 if (size
== SIZE_MAX
)
441 if (!(p
.flags
& IORING_SETUP_NO_MMAP
))
442 n
.rings
= io_pages_map(&n
.ring_pages
, &n
.n_ring_pages
, size
);
444 n
.rings
= __io_uaddr_map(&n
.ring_pages
, &n
.n_ring_pages
,
445 p
.cq_off
.user_addr
, size
);
447 return PTR_ERR(n
.rings
);
449 n
.rings
->sq_ring_mask
= p
.sq_entries
- 1;
450 n
.rings
->cq_ring_mask
= p
.cq_entries
- 1;
451 n
.rings
->sq_ring_entries
= p
.sq_entries
;
452 n
.rings
->cq_ring_entries
= p
.cq_entries
;
454 if (copy_to_user(arg
, &p
, sizeof(p
))) {
455 io_register_free_rings(&p
, &n
);
459 if (p
.flags
& IORING_SETUP_SQE128
)
460 size
= array_size(2 * sizeof(struct io_uring_sqe
), p
.sq_entries
);
462 size
= array_size(sizeof(struct io_uring_sqe
), p
.sq_entries
);
463 if (size
== SIZE_MAX
) {
464 io_register_free_rings(&p
, &n
);
468 if (!(p
.flags
& IORING_SETUP_NO_MMAP
))
469 ptr
= io_pages_map(&n
.sqe_pages
, &n
.n_sqe_pages
, size
);
471 ptr
= __io_uaddr_map(&n
.sqe_pages
, &n
.n_sqe_pages
,
475 io_register_free_rings(&p
, &n
);
480 * If using SQPOLL, park the thread
483 mutex_unlock(&ctx
->uring_lock
);
484 io_sq_thread_park(ctx
->sq_data
);
485 mutex_lock(&ctx
->uring_lock
);
489 * We'll do the swap. Grab the ctx->resize_lock, which will exclude
490 * any new mmap's on the ring fd. Clear out existing mappings to prevent
491 * mmap from seeing them, as we'll unmap them. Any attempt to mmap
492 * existing rings beyond this point will fail. Not that it could proceed
493 * at this point anyway, as the io_uring mmap side needs go grab the
494 * ctx->resize_lock as well. Likewise, hold the completion lock over the
495 * duration of the actual swap.
497 mutex_lock(&ctx
->resize_lock
);
498 spin_lock(&ctx
->completion_lock
);
499 o
.rings
= ctx
->rings
;
501 o
.sq_sqes
= ctx
->sq_sqes
;
505 * Now copy SQ and CQ entries, if any. If either of the destination
506 * rings can't hold what is already there, then fail the operation.
509 tail
= o
.rings
->sq
.tail
;
510 if (tail
- o
.rings
->sq
.head
> p
.sq_entries
)
512 for (i
= o
.rings
->sq
.head
; i
< tail
; i
++) {
513 unsigned src_head
= i
& (ctx
->sq_entries
- 1);
514 unsigned dst_head
= i
& n
.rings
->sq_ring_mask
;
516 n
.sq_sqes
[dst_head
] = o
.sq_sqes
[src_head
];
518 n
.rings
->sq
.head
= o
.rings
->sq
.head
;
519 n
.rings
->sq
.tail
= o
.rings
->sq
.tail
;
521 tail
= o
.rings
->cq
.tail
;
522 if (tail
- o
.rings
->cq
.head
> p
.cq_entries
) {
524 /* restore old rings, and return -EOVERFLOW via cleanup path */
525 ctx
->rings
= o
.rings
;
526 ctx
->sq_sqes
= o
.sq_sqes
;
531 for (i
= o
.rings
->cq
.head
; i
< tail
; i
++) {
532 unsigned src_head
= i
& (ctx
->cq_entries
- 1);
533 unsigned dst_head
= i
& n
.rings
->cq_ring_mask
;
535 n
.rings
->cqes
[dst_head
] = o
.rings
->cqes
[src_head
];
537 n
.rings
->cq
.head
= o
.rings
->cq
.head
;
538 n
.rings
->cq
.tail
= o
.rings
->cq
.tail
;
539 /* invalidate cached cqe refill */
540 ctx
->cqe_cached
= ctx
->cqe_sentinel
= NULL
;
542 n
.rings
->sq_dropped
= o
.rings
->sq_dropped
;
543 n
.rings
->sq_flags
= o
.rings
->sq_flags
;
544 n
.rings
->cq_flags
= o
.rings
->cq_flags
;
545 n
.rings
->cq_overflow
= o
.rings
->cq_overflow
;
547 /* all done, store old pointers and assign new ones */
548 if (!(ctx
->flags
& IORING_SETUP_NO_SQARRAY
))
549 ctx
->sq_array
= (u32
*)((char *)n
.rings
+ sq_array_offset
);
551 ctx
->sq_entries
= p
.sq_entries
;
552 ctx
->cq_entries
= p
.cq_entries
;
554 ctx
->rings
= n
.rings
;
555 ctx
->sq_sqes
= n
.sq_sqes
;
556 swap_old(ctx
, o
, n
, n_ring_pages
);
557 swap_old(ctx
, o
, n
, n_sqe_pages
);
558 swap_old(ctx
, o
, n
, ring_pages
);
559 swap_old(ctx
, o
, n
, sqe_pages
);
563 spin_unlock(&ctx
->completion_lock
);
564 mutex_unlock(&ctx
->resize_lock
);
565 io_register_free_rings(&p
, to_free
);
568 io_sq_thread_unpark(ctx
->sq_data
);
573 static int io_register_mem_region(struct io_ring_ctx
*ctx
, void __user
*uarg
)
575 struct io_uring_mem_region_reg __user
*reg_uptr
= uarg
;
576 struct io_uring_mem_region_reg reg
;
577 struct io_uring_region_desc __user
*rd_uptr
;
578 struct io_uring_region_desc rd
;
581 if (io_region_is_set(&ctx
->param_region
))
583 if (copy_from_user(®
, reg_uptr
, sizeof(reg
)))
585 rd_uptr
= u64_to_user_ptr(reg
.region_uptr
);
586 if (copy_from_user(&rd
, rd_uptr
, sizeof(rd
)))
589 if (memchr_inv(®
.__resv
, 0, sizeof(reg
.__resv
)))
591 if (reg
.flags
& ~IORING_MEM_REGION_REG_WAIT_ARG
)
595 * This ensures there are no waiters. Waiters are unlocked and it's
596 * hard to synchronise with them, especially if we need to initialise
599 if ((reg
.flags
& IORING_MEM_REGION_REG_WAIT_ARG
) &&
600 !(ctx
->flags
& IORING_SETUP_R_DISABLED
))
603 ret
= io_create_region(ctx
, &ctx
->param_region
, &rd
);
606 if (copy_to_user(rd_uptr
, &rd
, sizeof(rd
))) {
607 io_free_region(ctx
, &ctx
->param_region
);
611 if (reg
.flags
& IORING_MEM_REGION_REG_WAIT_ARG
) {
612 ctx
->cq_wait_arg
= io_region_get_ptr(&ctx
->param_region
);
613 ctx
->cq_wait_size
= rd
.size
;
618 static int __io_uring_register(struct io_ring_ctx
*ctx
, unsigned opcode
,
619 void __user
*arg
, unsigned nr_args
)
620 __releases(ctx
->uring_lock
)
621 __acquires(ctx
->uring_lock
)
626 * We don't quiesce the refs for register anymore and so it can't be
627 * dying as we're holding a file ref here.
629 if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx
->refs
)))
632 if (ctx
->submitter_task
&& ctx
->submitter_task
!= current
)
635 if (ctx
->restricted
) {
636 opcode
= array_index_nospec(opcode
, IORING_REGISTER_LAST
);
637 if (!test_bit(opcode
, ctx
->restrictions
.register_op
))
642 case IORING_REGISTER_BUFFERS
:
646 ret
= io_sqe_buffers_register(ctx
, arg
, nr_args
, NULL
);
648 case IORING_UNREGISTER_BUFFERS
:
652 ret
= io_sqe_buffers_unregister(ctx
);
654 case IORING_REGISTER_FILES
:
658 ret
= io_sqe_files_register(ctx
, arg
, nr_args
, NULL
);
660 case IORING_UNREGISTER_FILES
:
664 ret
= io_sqe_files_unregister(ctx
);
666 case IORING_REGISTER_FILES_UPDATE
:
667 ret
= io_register_files_update(ctx
, arg
, nr_args
);
669 case IORING_REGISTER_EVENTFD
:
673 ret
= io_eventfd_register(ctx
, arg
, 0);
675 case IORING_REGISTER_EVENTFD_ASYNC
:
679 ret
= io_eventfd_register(ctx
, arg
, 1);
681 case IORING_UNREGISTER_EVENTFD
:
685 ret
= io_eventfd_unregister(ctx
);
687 case IORING_REGISTER_PROBE
:
689 if (!arg
|| nr_args
> 256)
691 ret
= io_probe(ctx
, arg
, nr_args
);
693 case IORING_REGISTER_PERSONALITY
:
697 ret
= io_register_personality(ctx
);
699 case IORING_UNREGISTER_PERSONALITY
:
703 ret
= io_unregister_personality(ctx
, nr_args
);
705 case IORING_REGISTER_ENABLE_RINGS
:
709 ret
= io_register_enable_rings(ctx
);
711 case IORING_REGISTER_RESTRICTIONS
:
712 ret
= io_register_restrictions(ctx
, arg
, nr_args
);
714 case IORING_REGISTER_FILES2
:
715 ret
= io_register_rsrc(ctx
, arg
, nr_args
, IORING_RSRC_FILE
);
717 case IORING_REGISTER_FILES_UPDATE2
:
718 ret
= io_register_rsrc_update(ctx
, arg
, nr_args
,
721 case IORING_REGISTER_BUFFERS2
:
722 ret
= io_register_rsrc(ctx
, arg
, nr_args
, IORING_RSRC_BUFFER
);
724 case IORING_REGISTER_BUFFERS_UPDATE
:
725 ret
= io_register_rsrc_update(ctx
, arg
, nr_args
,
728 case IORING_REGISTER_IOWQ_AFF
:
730 if (!arg
|| !nr_args
)
732 ret
= io_register_iowq_aff(ctx
, arg
, nr_args
);
734 case IORING_UNREGISTER_IOWQ_AFF
:
738 ret
= io_unregister_iowq_aff(ctx
);
740 case IORING_REGISTER_IOWQ_MAX_WORKERS
:
742 if (!arg
|| nr_args
!= 2)
744 ret
= io_register_iowq_max_workers(ctx
, arg
);
746 case IORING_REGISTER_RING_FDS
:
747 ret
= io_ringfd_register(ctx
, arg
, nr_args
);
749 case IORING_UNREGISTER_RING_FDS
:
750 ret
= io_ringfd_unregister(ctx
, arg
, nr_args
);
752 case IORING_REGISTER_PBUF_RING
:
754 if (!arg
|| nr_args
!= 1)
756 ret
= io_register_pbuf_ring(ctx
, arg
);
758 case IORING_UNREGISTER_PBUF_RING
:
760 if (!arg
|| nr_args
!= 1)
762 ret
= io_unregister_pbuf_ring(ctx
, arg
);
764 case IORING_REGISTER_SYNC_CANCEL
:
766 if (!arg
|| nr_args
!= 1)
768 ret
= io_sync_cancel(ctx
, arg
);
770 case IORING_REGISTER_FILE_ALLOC_RANGE
:
774 ret
= io_register_file_alloc_range(ctx
, arg
);
776 case IORING_REGISTER_PBUF_STATUS
:
778 if (!arg
|| nr_args
!= 1)
780 ret
= io_register_pbuf_status(ctx
, arg
);
782 case IORING_REGISTER_NAPI
:
784 if (!arg
|| nr_args
!= 1)
786 ret
= io_register_napi(ctx
, arg
);
788 case IORING_UNREGISTER_NAPI
:
792 ret
= io_unregister_napi(ctx
, arg
);
794 case IORING_REGISTER_CLOCK
:
798 ret
= io_register_clock(ctx
, arg
);
800 case IORING_REGISTER_CLONE_BUFFERS
:
802 if (!arg
|| nr_args
!= 1)
804 ret
= io_register_clone_buffers(ctx
, arg
);
806 case IORING_REGISTER_RESIZE_RINGS
:
808 if (!arg
|| nr_args
!= 1)
810 ret
= io_register_resize_rings(ctx
, arg
);
812 case IORING_REGISTER_MEM_REGION
:
814 if (!arg
|| nr_args
!= 1)
816 ret
= io_register_mem_region(ctx
, arg
);
827 * Given an 'fd' value, return the ctx associated with if. If 'registered' is
828 * true, then the registered index is used. Otherwise, the normal fd table.
829 * Caller must call fput() on the returned file, unless it's an ERR_PTR.
831 struct file
*io_uring_register_get_file(unsigned int fd
, bool registered
)
837 * Ring fd has been registered via IORING_REGISTER_RING_FDS, we
838 * need only dereference our task private array to find it.
840 struct io_uring_task
*tctx
= current
->io_uring
;
842 if (unlikely(!tctx
|| fd
>= IO_RINGFD_REG_MAX
))
843 return ERR_PTR(-EINVAL
);
844 fd
= array_index_nospec(fd
, IO_RINGFD_REG_MAX
);
845 file
= tctx
->registered_rings
[fd
];
851 return ERR_PTR(-EBADF
);
852 if (io_is_uring_fops(file
))
855 return ERR_PTR(-EOPNOTSUPP
);
859 * "blind" registration opcodes are ones where there's no ring given, and
860 * hence the source fd must be -1.
862 static int io_uring_register_blind(unsigned int opcode
, void __user
*arg
,
863 unsigned int nr_args
)
866 case IORING_REGISTER_SEND_MSG_RING
: {
867 struct io_uring_sqe sqe
;
869 if (!arg
|| nr_args
!= 1)
871 if (copy_from_user(&sqe
, arg
, sizeof(sqe
)))
873 /* no flags supported */
876 if (sqe
.opcode
== IORING_OP_MSG_RING
)
877 return io_uring_sync_msg_ring(&sqe
);
884 SYSCALL_DEFINE4(io_uring_register
, unsigned int, fd
, unsigned int, opcode
,
885 void __user
*, arg
, unsigned int, nr_args
)
887 struct io_ring_ctx
*ctx
;
890 bool use_registered_ring
;
892 use_registered_ring
= !!(opcode
& IORING_REGISTER_USE_REGISTERED_RING
);
893 opcode
&= ~IORING_REGISTER_USE_REGISTERED_RING
;
895 if (opcode
>= IORING_REGISTER_LAST
)
899 return io_uring_register_blind(opcode
, arg
, nr_args
);
901 file
= io_uring_register_get_file(fd
, use_registered_ring
);
903 return PTR_ERR(file
);
904 ctx
= file
->private_data
;
906 mutex_lock(&ctx
->uring_lock
);
907 ret
= __io_uring_register(ctx
, opcode
, arg
, nr_args
);
909 trace_io_uring_register(ctx
, opcode
, ctx
->file_table
.data
.nr
,
910 ctx
->buf_table
.nr
, ret
);
911 mutex_unlock(&ctx
->uring_lock
);
912 if (!use_registered_ring
)