1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
7 #include <linux/compat.h>
8 #include <net/compat.h>
9 #include <linux/io_uring.h>
11 #include <uapi/linux/io_uring.h>
15 #include "alloc_cache.h"
20 #if defined(CONFIG_NET)
28 struct sockaddr __user
*addr
;
48 struct sockaddr __user
*addr
;
51 bool seen_econnaborted
;
67 struct compat_msghdr __user
*umsg_compat
;
68 struct user_msghdr __user
*umsg
;
74 unsigned nr_multishot_loops
;
76 /* initialised and used only by !msg send variants */
79 void __user
*msg_control
;
80 /* used only for send zerocopy */
81 struct io_kiocb
*notif
;
85 * Number of times we'll try and do receives if there's more data. If we
86 * exceed this limit, then add us to the back of the queue and retry from
87 * there. This helps fairness between flooding clients.
89 #define MULTISHOT_MAX_RETRY 32
91 int io_shutdown_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
93 struct io_shutdown
*shutdown
= io_kiocb_to_cmd(req
, struct io_shutdown
);
95 if (unlikely(sqe
->off
|| sqe
->addr
|| sqe
->rw_flags
||
96 sqe
->buf_index
|| sqe
->splice_fd_in
))
99 shutdown
->how
= READ_ONCE(sqe
->len
);
100 req
->flags
|= REQ_F_FORCE_ASYNC
;
104 int io_shutdown(struct io_kiocb
*req
, unsigned int issue_flags
)
106 struct io_shutdown
*shutdown
= io_kiocb_to_cmd(req
, struct io_shutdown
);
110 WARN_ON_ONCE(issue_flags
& IO_URING_F_NONBLOCK
);
112 sock
= sock_from_file(req
->file
);
116 ret
= __sys_shutdown_sock(sock
, shutdown
->how
);
117 io_req_set_res(req
, ret
, 0);
121 static bool io_net_retry(struct socket
*sock
, int flags
)
123 if (!(flags
& MSG_WAITALL
))
125 return sock
->type
== SOCK_STREAM
|| sock
->type
== SOCK_SEQPACKET
;
128 static void io_netmsg_iovec_free(struct io_async_msghdr
*kmsg
)
130 if (kmsg
->free_iov
) {
131 kfree(kmsg
->free_iov
);
132 kmsg
->free_iov_nr
= 0;
133 kmsg
->free_iov
= NULL
;
137 static void io_netmsg_recycle(struct io_kiocb
*req
, unsigned int issue_flags
)
139 struct io_async_msghdr
*hdr
= req
->async_data
;
142 /* can't recycle, ensure we free the iovec if we have one */
143 if (unlikely(issue_flags
& IO_URING_F_UNLOCKED
)) {
144 io_netmsg_iovec_free(hdr
);
148 /* Let normal cleanup path reap it if we fail adding to the cache */
150 if (io_alloc_cache_put(&req
->ctx
->netmsg_cache
, hdr
)) {
152 kasan_mempool_poison_object(iov
);
153 req
->async_data
= NULL
;
154 req
->flags
&= ~REQ_F_ASYNC_DATA
;
158 static struct io_async_msghdr
*io_msg_alloc_async(struct io_kiocb
*req
)
160 struct io_ring_ctx
*ctx
= req
->ctx
;
161 struct io_async_msghdr
*hdr
;
163 hdr
= io_alloc_cache_get(&ctx
->netmsg_cache
);
166 kasan_mempool_unpoison_object(hdr
->free_iov
,
167 hdr
->free_iov_nr
* sizeof(struct iovec
));
168 req
->flags
|= REQ_F_NEED_CLEANUP
;
170 req
->flags
|= REQ_F_ASYNC_DATA
;
171 req
->async_data
= hdr
;
175 if (!io_alloc_async_data(req
)) {
176 hdr
= req
->async_data
;
177 hdr
->free_iov_nr
= 0;
178 hdr
->free_iov
= NULL
;
184 /* assign new iovec to kmsg, if we need to */
185 static int io_net_vec_assign(struct io_kiocb
*req
, struct io_async_msghdr
*kmsg
,
189 req
->flags
|= REQ_F_NEED_CLEANUP
;
190 kmsg
->free_iov_nr
= kmsg
->msg
.msg_iter
.nr_segs
;
192 kfree(kmsg
->free_iov
);
193 kmsg
->free_iov
= iov
;
198 static inline void io_mshot_prep_retry(struct io_kiocb
*req
,
199 struct io_async_msghdr
*kmsg
)
201 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
203 req
->flags
&= ~REQ_F_BL_EMPTY
;
205 sr
->len
= 0; /* get from the provided buffer */
206 req
->buf_index
= sr
->buf_group
;
210 static int io_compat_msg_copy_hdr(struct io_kiocb
*req
,
211 struct io_async_msghdr
*iomsg
,
212 struct compat_msghdr
*msg
, int ddir
)
214 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
215 struct compat_iovec __user
*uiov
;
219 if (iomsg
->free_iov
) {
220 nr_segs
= iomsg
->free_iov_nr
;
221 iov
= iomsg
->free_iov
;
223 iov
= &iomsg
->fast_iov
;
227 if (copy_from_user(msg
, sr
->umsg_compat
, sizeof(*msg
)))
230 uiov
= compat_ptr(msg
->msg_iov
);
231 if (req
->flags
& REQ_F_BUFFER_SELECT
) {
234 if (msg
->msg_iovlen
== 0) {
235 sr
->len
= iov
->iov_len
= 0;
236 iov
->iov_base
= NULL
;
237 } else if (msg
->msg_iovlen
> 1) {
240 if (!access_ok(uiov
, sizeof(*uiov
)))
242 if (__get_user(clen
, &uiov
->iov_len
))
252 ret
= __import_iovec(ddir
, (struct iovec __user
*)uiov
, msg
->msg_iovlen
,
253 nr_segs
, &iov
, &iomsg
->msg
.msg_iter
, true);
254 if (unlikely(ret
< 0))
257 return io_net_vec_assign(req
, iomsg
, iov
);
261 static int io_msg_copy_hdr(struct io_kiocb
*req
, struct io_async_msghdr
*iomsg
,
262 struct user_msghdr
*msg
, int ddir
)
264 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
265 struct user_msghdr __user
*umsg
= sr
->umsg
;
269 if (iomsg
->free_iov
) {
270 nr_segs
= iomsg
->free_iov_nr
;
271 iov
= iomsg
->free_iov
;
273 iov
= &iomsg
->fast_iov
;
277 if (!user_access_begin(umsg
, sizeof(*umsg
)))
281 unsafe_get_user(msg
->msg_name
, &umsg
->msg_name
, ua_end
);
282 unsafe_get_user(msg
->msg_namelen
, &umsg
->msg_namelen
, ua_end
);
283 unsafe_get_user(msg
->msg_iov
, &umsg
->msg_iov
, ua_end
);
284 unsafe_get_user(msg
->msg_iovlen
, &umsg
->msg_iovlen
, ua_end
);
285 unsafe_get_user(msg
->msg_control
, &umsg
->msg_control
, ua_end
);
286 unsafe_get_user(msg
->msg_controllen
, &umsg
->msg_controllen
, ua_end
);
289 if (req
->flags
& REQ_F_BUFFER_SELECT
) {
290 if (msg
->msg_iovlen
== 0) {
291 sr
->len
= iov
->iov_len
= 0;
292 iov
->iov_base
= NULL
;
293 } else if (msg
->msg_iovlen
> 1) {
297 /* we only need the length for provided buffers */
298 if (!access_ok(&msg
->msg_iov
[0].iov_len
, sizeof(__kernel_size_t
)))
300 unsafe_get_user(iov
->iov_len
, &msg
->msg_iov
[0].iov_len
,
302 sr
->len
= iov
->iov_len
;
311 ret
= __import_iovec(ddir
, msg
->msg_iov
, msg
->msg_iovlen
, nr_segs
,
312 &iov
, &iomsg
->msg
.msg_iter
, false);
313 if (unlikely(ret
< 0))
316 return io_net_vec_assign(req
, iomsg
, iov
);
319 static int io_sendmsg_copy_hdr(struct io_kiocb
*req
,
320 struct io_async_msghdr
*iomsg
)
322 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
323 struct user_msghdr msg
;
326 iomsg
->msg
.msg_name
= &iomsg
->addr
;
327 iomsg
->msg
.msg_iter
.nr_segs
= 0;
330 if (unlikely(req
->ctx
->compat
)) {
331 struct compat_msghdr cmsg
;
333 ret
= io_compat_msg_copy_hdr(req
, iomsg
, &cmsg
, ITER_SOURCE
);
337 return __get_compat_msghdr(&iomsg
->msg
, &cmsg
, NULL
);
341 ret
= io_msg_copy_hdr(req
, iomsg
, &msg
, ITER_SOURCE
);
345 ret
= __copy_msghdr(&iomsg
->msg
, &msg
, NULL
);
347 /* save msg_control as sys_sendmsg() overwrites it */
348 sr
->msg_control
= iomsg
->msg
.msg_control_user
;
352 void io_sendmsg_recvmsg_cleanup(struct io_kiocb
*req
)
354 struct io_async_msghdr
*io
= req
->async_data
;
356 io_netmsg_iovec_free(io
);
359 static int io_send_setup(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
361 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
362 struct io_async_msghdr
*kmsg
= req
->async_data
;
367 sr
->buf
= u64_to_user_ptr(READ_ONCE(sqe
->addr
));
369 if (READ_ONCE(sqe
->__pad3
[0]))
372 kmsg
->msg
.msg_name
= NULL
;
373 kmsg
->msg
.msg_namelen
= 0;
374 kmsg
->msg
.msg_control
= NULL
;
375 kmsg
->msg
.msg_controllen
= 0;
376 kmsg
->msg
.msg_ubuf
= NULL
;
378 addr
= u64_to_user_ptr(READ_ONCE(sqe
->addr2
));
379 addr_len
= READ_ONCE(sqe
->addr_len
);
381 ret
= move_addr_to_kernel(addr
, addr_len
, &kmsg
->addr
);
382 if (unlikely(ret
< 0))
384 kmsg
->msg
.msg_name
= &kmsg
->addr
;
385 kmsg
->msg
.msg_namelen
= addr_len
;
387 if (!io_do_buffer_select(req
)) {
388 ret
= import_ubuf(ITER_SOURCE
, sr
->buf
, sr
->len
,
389 &kmsg
->msg
.msg_iter
);
390 if (unlikely(ret
< 0))
396 static int io_sendmsg_setup(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
398 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
399 struct io_async_msghdr
*kmsg
= req
->async_data
;
402 sr
->umsg
= u64_to_user_ptr(READ_ONCE(sqe
->addr
));
404 ret
= io_sendmsg_copy_hdr(req
, kmsg
);
406 req
->flags
|= REQ_F_NEED_CLEANUP
;
410 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
412 int io_sendmsg_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
414 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
418 if (req
->opcode
!= IORING_OP_SEND
) {
419 if (sqe
->addr2
|| sqe
->file_index
)
423 sr
->len
= READ_ONCE(sqe
->len
);
424 sr
->flags
= READ_ONCE(sqe
->ioprio
);
425 if (sr
->flags
& ~SENDMSG_FLAGS
)
427 sr
->msg_flags
= READ_ONCE(sqe
->msg_flags
) | MSG_NOSIGNAL
;
428 if (sr
->msg_flags
& MSG_DONTWAIT
)
429 req
->flags
|= REQ_F_NOWAIT
;
430 if (sr
->flags
& IORING_RECVSEND_BUNDLE
) {
431 if (req
->opcode
== IORING_OP_SENDMSG
)
433 if (!(req
->flags
& REQ_F_BUFFER_SELECT
))
435 sr
->msg_flags
|= MSG_WAITALL
;
436 sr
->buf_group
= req
->buf_index
;
437 req
->buf_list
= NULL
;
441 if (req
->ctx
->compat
)
442 sr
->msg_flags
|= MSG_CMSG_COMPAT
;
444 if (unlikely(!io_msg_alloc_async(req
)))
446 if (req
->opcode
!= IORING_OP_SENDMSG
)
447 return io_send_setup(req
, sqe
);
448 return io_sendmsg_setup(req
, sqe
);
451 static void io_req_msg_cleanup(struct io_kiocb
*req
,
452 unsigned int issue_flags
)
454 req
->flags
&= ~REQ_F_NEED_CLEANUP
;
455 io_netmsg_recycle(req
, issue_flags
);
459 * For bundle completions, we need to figure out how many segments we consumed.
460 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it
461 * could be using an ITER_IOVEC. If the latter, then if we consumed all of
462 * the segments, then it's a trivial questiont o answer. If we have residual
463 * data in the iter, then loop the segments to figure out how much we
466 static int io_bundle_nbufs(struct io_async_msghdr
*kmsg
, int ret
)
471 /* no data is always zero segments, and a ubuf is always 1 segment */
474 if (iter_is_ubuf(&kmsg
->msg
.msg_iter
))
477 iov
= kmsg
->free_iov
;
479 iov
= &kmsg
->fast_iov
;
481 /* if all data was transferred, it's basic pointer math */
482 if (!iov_iter_count(&kmsg
->msg
.msg_iter
))
483 return iter_iov(&kmsg
->msg
.msg_iter
) - iov
;
485 /* short transfer, count segments */
488 int this_len
= min_t(int, iov
[nbufs
].iov_len
, ret
);
497 static inline bool io_send_finish(struct io_kiocb
*req
, int *ret
,
498 struct io_async_msghdr
*kmsg
,
499 unsigned issue_flags
)
501 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
502 bool bundle_finished
= *ret
<= 0;
505 if (!(sr
->flags
& IORING_RECVSEND_BUNDLE
)) {
506 cflags
= io_put_kbuf(req
, *ret
, issue_flags
);
510 cflags
= io_put_kbufs(req
, *ret
, io_bundle_nbufs(kmsg
, *ret
), issue_flags
);
512 if (bundle_finished
|| req
->flags
& REQ_F_BL_EMPTY
)
516 * Fill CQE for this receive and see if we should keep trying to
517 * receive from this socket.
519 if (io_req_post_cqe(req
, *ret
, cflags
| IORING_CQE_F_MORE
)) {
520 io_mshot_prep_retry(req
, kmsg
);
524 /* Otherwise stop bundle and use the current result. */
526 io_req_set_res(req
, *ret
, cflags
);
531 int io_sendmsg(struct io_kiocb
*req
, unsigned int issue_flags
)
533 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
534 struct io_async_msghdr
*kmsg
= req
->async_data
;
540 sock
= sock_from_file(req
->file
);
544 if (!(req
->flags
& REQ_F_POLLED
) &&
545 (sr
->flags
& IORING_RECVSEND_POLL_FIRST
))
548 flags
= sr
->msg_flags
;
549 if (issue_flags
& IO_URING_F_NONBLOCK
)
550 flags
|= MSG_DONTWAIT
;
551 if (flags
& MSG_WAITALL
)
552 min_ret
= iov_iter_count(&kmsg
->msg
.msg_iter
);
554 kmsg
->msg
.msg_control_user
= sr
->msg_control
;
556 ret
= __sys_sendmsg_sock(sock
, &kmsg
->msg
, flags
);
559 if (ret
== -EAGAIN
&& (issue_flags
& IO_URING_F_NONBLOCK
))
561 if (ret
> 0 && io_net_retry(sock
, flags
)) {
562 kmsg
->msg
.msg_controllen
= 0;
563 kmsg
->msg
.msg_control
= NULL
;
565 req
->flags
|= REQ_F_BL_NO_RECYCLE
;
568 if (ret
== -ERESTARTSYS
)
572 io_req_msg_cleanup(req
, issue_flags
);
575 else if (sr
->done_io
)
577 io_req_set_res(req
, ret
, 0);
581 int io_send(struct io_kiocb
*req
, unsigned int issue_flags
)
583 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
584 struct io_async_msghdr
*kmsg
= req
->async_data
;
590 sock
= sock_from_file(req
->file
);
594 if (!(req
->flags
& REQ_F_POLLED
) &&
595 (sr
->flags
& IORING_RECVSEND_POLL_FIRST
))
598 flags
= sr
->msg_flags
;
599 if (issue_flags
& IO_URING_F_NONBLOCK
)
600 flags
|= MSG_DONTWAIT
;
603 if (io_do_buffer_select(req
)) {
604 struct buf_sel_arg arg
= {
605 .iovs
= &kmsg
->fast_iov
,
606 .max_len
= min_not_zero(sr
->len
, INT_MAX
),
610 if (kmsg
->free_iov
) {
611 arg
.nr_iovs
= kmsg
->free_iov_nr
;
612 arg
.iovs
= kmsg
->free_iov
;
613 arg
.mode
= KBUF_MODE_FREE
;
616 if (!(sr
->flags
& IORING_RECVSEND_BUNDLE
))
619 arg
.mode
|= KBUF_MODE_EXPAND
;
621 ret
= io_buffers_select(req
, &arg
, issue_flags
);
622 if (unlikely(ret
< 0))
625 if (arg
.iovs
!= &kmsg
->fast_iov
&& arg
.iovs
!= kmsg
->free_iov
) {
626 kmsg
->free_iov_nr
= ret
;
627 kmsg
->free_iov
= arg
.iovs
;
628 req
->flags
|= REQ_F_NEED_CLEANUP
;
630 sr
->len
= arg
.out_len
;
633 sr
->buf
= arg
.iovs
[0].iov_base
;
634 ret
= import_ubuf(ITER_SOURCE
, sr
->buf
, sr
->len
,
635 &kmsg
->msg
.msg_iter
);
639 iov_iter_init(&kmsg
->msg
.msg_iter
, ITER_SOURCE
,
640 arg
.iovs
, ret
, arg
.out_len
);
645 * If MSG_WAITALL is set, or this is a bundle send, then we need
646 * the full amount. If just bundle is set, if we do a short send
647 * then we complete the bundle sequence rather than continue on.
649 if (flags
& MSG_WAITALL
|| sr
->flags
& IORING_RECVSEND_BUNDLE
)
650 min_ret
= iov_iter_count(&kmsg
->msg
.msg_iter
);
652 flags
&= ~MSG_INTERNAL_SENDMSG_FLAGS
;
653 kmsg
->msg
.msg_flags
= flags
;
654 ret
= sock_sendmsg(sock
, &kmsg
->msg
);
656 if (ret
== -EAGAIN
&& (issue_flags
& IO_URING_F_NONBLOCK
))
659 if (ret
> 0 && io_net_retry(sock
, flags
)) {
663 req
->flags
|= REQ_F_BL_NO_RECYCLE
;
666 if (ret
== -ERESTARTSYS
)
672 else if (sr
->done_io
)
675 if (!io_send_finish(req
, &ret
, kmsg
, issue_flags
))
678 io_req_msg_cleanup(req
, issue_flags
);
682 static int io_recvmsg_mshot_prep(struct io_kiocb
*req
,
683 struct io_async_msghdr
*iomsg
,
684 int namelen
, size_t controllen
)
686 if ((req
->flags
& (REQ_F_APOLL_MULTISHOT
|REQ_F_BUFFER_SELECT
)) ==
687 (REQ_F_APOLL_MULTISHOT
|REQ_F_BUFFER_SELECT
)) {
690 if (unlikely(namelen
< 0))
692 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out
),
695 if (check_add_overflow(hdr
, controllen
, &hdr
))
698 iomsg
->namelen
= namelen
;
699 iomsg
->controllen
= controllen
;
706 static int io_recvmsg_copy_hdr(struct io_kiocb
*req
,
707 struct io_async_msghdr
*iomsg
)
709 struct user_msghdr msg
;
712 iomsg
->msg
.msg_name
= &iomsg
->addr
;
713 iomsg
->msg
.msg_iter
.nr_segs
= 0;
716 if (unlikely(req
->ctx
->compat
)) {
717 struct compat_msghdr cmsg
;
719 ret
= io_compat_msg_copy_hdr(req
, iomsg
, &cmsg
, ITER_DEST
);
723 ret
= __get_compat_msghdr(&iomsg
->msg
, &cmsg
, &iomsg
->uaddr
);
727 return io_recvmsg_mshot_prep(req
, iomsg
, cmsg
.msg_namelen
,
728 cmsg
.msg_controllen
);
732 ret
= io_msg_copy_hdr(req
, iomsg
, &msg
, ITER_DEST
);
736 ret
= __copy_msghdr(&iomsg
->msg
, &msg
, &iomsg
->uaddr
);
740 return io_recvmsg_mshot_prep(req
, iomsg
, msg
.msg_namelen
,
744 static int io_recvmsg_prep_setup(struct io_kiocb
*req
)
746 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
747 struct io_async_msghdr
*kmsg
;
750 kmsg
= io_msg_alloc_async(req
);
754 if (req
->opcode
== IORING_OP_RECV
) {
755 kmsg
->msg
.msg_name
= NULL
;
756 kmsg
->msg
.msg_namelen
= 0;
757 kmsg
->msg
.msg_control
= NULL
;
758 kmsg
->msg
.msg_get_inq
= 1;
759 kmsg
->msg
.msg_controllen
= 0;
760 kmsg
->msg
.msg_iocb
= NULL
;
761 kmsg
->msg
.msg_ubuf
= NULL
;
763 if (!io_do_buffer_select(req
)) {
764 ret
= import_ubuf(ITER_DEST
, sr
->buf
, sr
->len
,
765 &kmsg
->msg
.msg_iter
);
772 ret
= io_recvmsg_copy_hdr(req
, kmsg
);
774 req
->flags
|= REQ_F_NEED_CLEANUP
;
778 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
779 IORING_RECVSEND_BUNDLE)
781 int io_recvmsg_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
783 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
787 if (unlikely(sqe
->file_index
|| sqe
->addr2
))
790 sr
->umsg
= u64_to_user_ptr(READ_ONCE(sqe
->addr
));
791 sr
->len
= READ_ONCE(sqe
->len
);
792 sr
->flags
= READ_ONCE(sqe
->ioprio
);
793 if (sr
->flags
& ~RECVMSG_FLAGS
)
795 sr
->msg_flags
= READ_ONCE(sqe
->msg_flags
);
796 if (sr
->msg_flags
& MSG_DONTWAIT
)
797 req
->flags
|= REQ_F_NOWAIT
;
798 if (sr
->msg_flags
& MSG_ERRQUEUE
)
799 req
->flags
|= REQ_F_CLEAR_POLLIN
;
800 if (req
->flags
& REQ_F_BUFFER_SELECT
) {
802 * Store the buffer group for this multishot receive separately,
803 * as if we end up doing an io-wq based issue that selects a
804 * buffer, it has to be committed immediately and that will
805 * clear ->buf_list. This means we lose the link to the buffer
806 * list, and the eventual buffer put on completion then cannot
809 sr
->buf_group
= req
->buf_index
;
810 req
->buf_list
= NULL
;
812 if (sr
->flags
& IORING_RECV_MULTISHOT
) {
813 if (!(req
->flags
& REQ_F_BUFFER_SELECT
))
815 if (sr
->msg_flags
& MSG_WAITALL
)
817 if (req
->opcode
== IORING_OP_RECV
&& sr
->len
)
819 req
->flags
|= REQ_F_APOLL_MULTISHOT
;
821 if (sr
->flags
& IORING_RECVSEND_BUNDLE
) {
822 if (req
->opcode
== IORING_OP_RECVMSG
)
827 if (req
->ctx
->compat
)
828 sr
->msg_flags
|= MSG_CMSG_COMPAT
;
830 sr
->nr_multishot_loops
= 0;
831 return io_recvmsg_prep_setup(req
);
835 * Finishes io_recv and io_recvmsg.
837 * Returns true if it is actually finished, or false if it should run
838 * again (for multishot).
840 static inline bool io_recv_finish(struct io_kiocb
*req
, int *ret
,
841 struct io_async_msghdr
*kmsg
,
842 bool mshot_finished
, unsigned issue_flags
)
844 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
845 unsigned int cflags
= 0;
847 if (kmsg
->msg
.msg_inq
> 0)
848 cflags
|= IORING_CQE_F_SOCK_NONEMPTY
;
850 if (sr
->flags
& IORING_RECVSEND_BUNDLE
) {
851 cflags
|= io_put_kbufs(req
, *ret
, io_bundle_nbufs(kmsg
, *ret
),
853 /* bundle with no more immediate buffers, we're done */
854 if (req
->flags
& REQ_F_BL_EMPTY
)
857 cflags
|= io_put_kbuf(req
, *ret
, issue_flags
);
861 * Fill CQE for this receive and see if we should keep trying to
862 * receive from this socket.
864 if ((req
->flags
& REQ_F_APOLL_MULTISHOT
) && !mshot_finished
&&
865 io_req_post_cqe(req
, *ret
, cflags
| IORING_CQE_F_MORE
)) {
866 int mshot_retry_ret
= IOU_ISSUE_SKIP_COMPLETE
;
868 io_mshot_prep_retry(req
, kmsg
);
869 /* Known not-empty or unknown state, retry */
870 if (cflags
& IORING_CQE_F_SOCK_NONEMPTY
|| kmsg
->msg
.msg_inq
< 0) {
871 if (sr
->nr_multishot_loops
++ < MULTISHOT_MAX_RETRY
)
873 /* mshot retries exceeded, force a requeue */
874 sr
->nr_multishot_loops
= 0;
875 mshot_retry_ret
= IOU_REQUEUE
;
877 if (issue_flags
& IO_URING_F_MULTISHOT
)
878 *ret
= mshot_retry_ret
;
884 /* Finish the request / stop multishot. */
886 io_req_set_res(req
, *ret
, cflags
);
888 if (issue_flags
& IO_URING_F_MULTISHOT
)
889 *ret
= IOU_STOP_MULTISHOT
;
892 io_req_msg_cleanup(req
, issue_flags
);
896 static int io_recvmsg_prep_multishot(struct io_async_msghdr
*kmsg
,
897 struct io_sr_msg
*sr
, void __user
**buf
,
900 unsigned long ubuf
= (unsigned long) *buf
;
903 hdr
= sizeof(struct io_uring_recvmsg_out
) + kmsg
->namelen
+
908 if (kmsg
->controllen
) {
909 unsigned long control
= ubuf
+ hdr
- kmsg
->controllen
;
911 kmsg
->msg
.msg_control_user
= (void __user
*) control
;
912 kmsg
->msg
.msg_controllen
= kmsg
->controllen
;
915 sr
->buf
= *buf
; /* stash for later copy */
916 *buf
= (void __user
*) (ubuf
+ hdr
);
917 kmsg
->payloadlen
= *len
= *len
- hdr
;
921 struct io_recvmsg_multishot_hdr
{
922 struct io_uring_recvmsg_out msg
;
923 struct sockaddr_storage addr
;
926 static int io_recvmsg_multishot(struct socket
*sock
, struct io_sr_msg
*io
,
927 struct io_async_msghdr
*kmsg
,
928 unsigned int flags
, bool *finished
)
932 struct io_recvmsg_multishot_hdr hdr
;
935 kmsg
->msg
.msg_name
= &hdr
.addr
;
936 kmsg
->msg
.msg_flags
= flags
& (MSG_CMSG_CLOEXEC
|MSG_CMSG_COMPAT
);
937 kmsg
->msg
.msg_namelen
= 0;
939 if (sock
->file
->f_flags
& O_NONBLOCK
)
940 flags
|= MSG_DONTWAIT
;
942 err
= sock_recvmsg(sock
, &kmsg
->msg
, flags
);
943 *finished
= err
<= 0;
947 hdr
.msg
= (struct io_uring_recvmsg_out
) {
948 .controllen
= kmsg
->controllen
- kmsg
->msg
.msg_controllen
,
949 .flags
= kmsg
->msg
.msg_flags
& ~MSG_CMSG_COMPAT
952 hdr
.msg
.payloadlen
= err
;
953 if (err
> kmsg
->payloadlen
)
954 err
= kmsg
->payloadlen
;
956 copy_len
= sizeof(struct io_uring_recvmsg_out
);
957 if (kmsg
->msg
.msg_namelen
> kmsg
->namelen
)
958 copy_len
+= kmsg
->namelen
;
960 copy_len
+= kmsg
->msg
.msg_namelen
;
963 * "fromlen shall refer to the value before truncation.."
966 hdr
.msg
.namelen
= kmsg
->msg
.msg_namelen
;
968 /* ensure that there is no gap between hdr and sockaddr_storage */
969 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr
, addr
) !=
970 sizeof(struct io_uring_recvmsg_out
));
971 if (copy_to_user(io
->buf
, &hdr
, copy_len
)) {
976 return sizeof(struct io_uring_recvmsg_out
) + kmsg
->namelen
+
977 kmsg
->controllen
+ err
;
980 int io_recvmsg(struct io_kiocb
*req
, unsigned int issue_flags
)
982 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
983 struct io_async_msghdr
*kmsg
= req
->async_data
;
986 int ret
, min_ret
= 0;
987 bool force_nonblock
= issue_flags
& IO_URING_F_NONBLOCK
;
988 bool mshot_finished
= true;
990 sock
= sock_from_file(req
->file
);
994 if (!(req
->flags
& REQ_F_POLLED
) &&
995 (sr
->flags
& IORING_RECVSEND_POLL_FIRST
))
998 flags
= sr
->msg_flags
;
1000 flags
|= MSG_DONTWAIT
;
1003 if (io_do_buffer_select(req
)) {
1005 size_t len
= sr
->len
;
1007 buf
= io_buffer_select(req
, &len
, issue_flags
);
1011 if (req
->flags
& REQ_F_APOLL_MULTISHOT
) {
1012 ret
= io_recvmsg_prep_multishot(kmsg
, sr
, &buf
, &len
);
1014 io_kbuf_recycle(req
, issue_flags
);
1019 iov_iter_ubuf(&kmsg
->msg
.msg_iter
, ITER_DEST
, buf
, len
);
1022 kmsg
->msg
.msg_get_inq
= 1;
1023 kmsg
->msg
.msg_inq
= -1;
1024 if (req
->flags
& REQ_F_APOLL_MULTISHOT
) {
1025 ret
= io_recvmsg_multishot(sock
, sr
, kmsg
, flags
,
1028 /* disable partial retry for recvmsg with cmsg attached */
1029 if (flags
& MSG_WAITALL
&& !kmsg
->msg
.msg_controllen
)
1030 min_ret
= iov_iter_count(&kmsg
->msg
.msg_iter
);
1032 ret
= __sys_recvmsg_sock(sock
, &kmsg
->msg
, sr
->umsg
,
1033 kmsg
->uaddr
, flags
);
1036 if (ret
< min_ret
) {
1037 if (ret
== -EAGAIN
&& force_nonblock
) {
1038 if (issue_flags
& IO_URING_F_MULTISHOT
) {
1039 io_kbuf_recycle(req
, issue_flags
);
1040 return IOU_ISSUE_SKIP_COMPLETE
;
1044 if (ret
> 0 && io_net_retry(sock
, flags
)) {
1046 req
->flags
|= REQ_F_BL_NO_RECYCLE
;
1049 if (ret
== -ERESTARTSYS
)
1052 } else if ((flags
& MSG_WAITALL
) && (kmsg
->msg
.msg_flags
& (MSG_TRUNC
| MSG_CTRUNC
))) {
1058 else if (sr
->done_io
)
1061 io_kbuf_recycle(req
, issue_flags
);
1063 if (!io_recv_finish(req
, &ret
, kmsg
, mshot_finished
, issue_flags
))
1064 goto retry_multishot
;
1069 static int io_recv_buf_select(struct io_kiocb
*req
, struct io_async_msghdr
*kmsg
,
1070 size_t *len
, unsigned int issue_flags
)
1072 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
1076 * If the ring isn't locked, then don't use the peek interface
1077 * to grab multiple buffers as we will lock/unlock between
1078 * this selection and posting the buffers.
1080 if (!(issue_flags
& IO_URING_F_UNLOCKED
) &&
1081 sr
->flags
& IORING_RECVSEND_BUNDLE
) {
1082 struct buf_sel_arg arg
= {
1083 .iovs
= &kmsg
->fast_iov
,
1085 .mode
= KBUF_MODE_EXPAND
,
1088 if (kmsg
->free_iov
) {
1089 arg
.nr_iovs
= kmsg
->free_iov_nr
;
1090 arg
.iovs
= kmsg
->free_iov
;
1091 arg
.mode
|= KBUF_MODE_FREE
;
1094 if (kmsg
->msg
.msg_inq
> 0)
1095 arg
.max_len
= min_not_zero(sr
->len
, kmsg
->msg
.msg_inq
);
1097 ret
= io_buffers_peek(req
, &arg
);
1098 if (unlikely(ret
< 0))
1101 /* special case 1 vec, can be a fast path */
1103 sr
->buf
= arg
.iovs
[0].iov_base
;
1104 sr
->len
= arg
.iovs
[0].iov_len
;
1107 iov_iter_init(&kmsg
->msg
.msg_iter
, ITER_DEST
, arg
.iovs
, ret
,
1109 if (arg
.iovs
!= &kmsg
->fast_iov
&& arg
.iovs
!= kmsg
->free_iov
) {
1110 kmsg
->free_iov_nr
= ret
;
1111 kmsg
->free_iov
= arg
.iovs
;
1112 req
->flags
|= REQ_F_NEED_CLEANUP
;
1118 buf
= io_buffer_select(req
, len
, issue_flags
);
1124 ret
= import_ubuf(ITER_DEST
, sr
->buf
, sr
->len
,
1125 &kmsg
->msg
.msg_iter
);
1133 int io_recv(struct io_kiocb
*req
, unsigned int issue_flags
)
1135 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
1136 struct io_async_msghdr
*kmsg
= req
->async_data
;
1137 struct socket
*sock
;
1139 int ret
, min_ret
= 0;
1140 bool force_nonblock
= issue_flags
& IO_URING_F_NONBLOCK
;
1141 size_t len
= sr
->len
;
1142 bool mshot_finished
;
1144 if (!(req
->flags
& REQ_F_POLLED
) &&
1145 (sr
->flags
& IORING_RECVSEND_POLL_FIRST
))
1148 sock
= sock_from_file(req
->file
);
1149 if (unlikely(!sock
))
1152 flags
= sr
->msg_flags
;
1154 flags
|= MSG_DONTWAIT
;
1157 if (io_do_buffer_select(req
)) {
1158 ret
= io_recv_buf_select(req
, kmsg
, &len
, issue_flags
);
1159 if (unlikely(ret
)) {
1160 kmsg
->msg
.msg_inq
= -1;
1166 kmsg
->msg
.msg_flags
= 0;
1167 kmsg
->msg
.msg_inq
= -1;
1169 if (flags
& MSG_WAITALL
)
1170 min_ret
= iov_iter_count(&kmsg
->msg
.msg_iter
);
1172 ret
= sock_recvmsg(sock
, &kmsg
->msg
, flags
);
1173 if (ret
< min_ret
) {
1174 if (ret
== -EAGAIN
&& force_nonblock
) {
1175 if (issue_flags
& IO_URING_F_MULTISHOT
) {
1176 io_kbuf_recycle(req
, issue_flags
);
1177 return IOU_ISSUE_SKIP_COMPLETE
;
1182 if (ret
> 0 && io_net_retry(sock
, flags
)) {
1186 req
->flags
|= REQ_F_BL_NO_RECYCLE
;
1189 if (ret
== -ERESTARTSYS
)
1192 } else if ((flags
& MSG_WAITALL
) && (kmsg
->msg
.msg_flags
& (MSG_TRUNC
| MSG_CTRUNC
))) {
1197 mshot_finished
= ret
<= 0;
1200 else if (sr
->done_io
)
1203 io_kbuf_recycle(req
, issue_flags
);
1205 if (!io_recv_finish(req
, &ret
, kmsg
, mshot_finished
, issue_flags
))
1206 goto retry_multishot
;
1211 void io_send_zc_cleanup(struct io_kiocb
*req
)
1213 struct io_sr_msg
*zc
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
1214 struct io_async_msghdr
*io
= req
->async_data
;
1216 if (req_has_async_data(req
))
1217 io_netmsg_iovec_free(io
);
1219 io_notif_flush(zc
->notif
);
1224 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1225 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
1227 int io_send_zc_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
1229 struct io_sr_msg
*zc
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
1230 struct io_ring_ctx
*ctx
= req
->ctx
;
1231 struct io_kiocb
*notif
;
1234 req
->flags
|= REQ_F_POLL_NO_LAZY
;
1236 if (unlikely(READ_ONCE(sqe
->__pad2
[0]) || READ_ONCE(sqe
->addr3
)))
1238 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1239 if (req
->flags
& REQ_F_CQE_SKIP
)
1242 notif
= zc
->notif
= io_alloc_notif(ctx
);
1245 notif
->cqe
.user_data
= req
->cqe
.user_data
;
1247 notif
->cqe
.flags
= IORING_CQE_F_NOTIF
;
1248 req
->flags
|= REQ_F_NEED_CLEANUP
;
1250 zc
->flags
= READ_ONCE(sqe
->ioprio
);
1251 if (unlikely(zc
->flags
& ~IO_ZC_FLAGS_COMMON
)) {
1252 if (zc
->flags
& ~IO_ZC_FLAGS_VALID
)
1254 if (zc
->flags
& IORING_SEND_ZC_REPORT_USAGE
) {
1255 struct io_notif_data
*nd
= io_notif_to_data(notif
);
1257 nd
->zc_report
= true;
1258 nd
->zc_used
= false;
1259 nd
->zc_copied
= false;
1263 if (req
->opcode
!= IORING_OP_SEND_ZC
) {
1264 if (unlikely(sqe
->addr2
|| sqe
->file_index
))
1266 if (unlikely(zc
->flags
& IORING_RECVSEND_FIXED_BUF
))
1270 zc
->len
= READ_ONCE(sqe
->len
);
1271 zc
->msg_flags
= READ_ONCE(sqe
->msg_flags
) | MSG_NOSIGNAL
| MSG_ZEROCOPY
;
1272 zc
->buf_index
= READ_ONCE(sqe
->buf_index
);
1273 if (zc
->msg_flags
& MSG_DONTWAIT
)
1274 req
->flags
|= REQ_F_NOWAIT
;
1276 #ifdef CONFIG_COMPAT
1277 if (req
->ctx
->compat
)
1278 zc
->msg_flags
|= MSG_CMSG_COMPAT
;
1280 if (unlikely(!io_msg_alloc_async(req
)))
1282 if (req
->opcode
!= IORING_OP_SENDMSG_ZC
)
1283 return io_send_setup(req
, sqe
);
1284 return io_sendmsg_setup(req
, sqe
);
1287 static int io_sg_from_iter_iovec(struct sk_buff
*skb
,
1288 struct iov_iter
*from
, size_t length
)
1290 skb_zcopy_downgrade_managed(skb
);
1291 return zerocopy_fill_skb_from_iter(skb
, from
, length
);
1294 static int io_sg_from_iter(struct sk_buff
*skb
,
1295 struct iov_iter
*from
, size_t length
)
1297 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1298 int frag
= shinfo
->nr_frags
;
1300 struct bvec_iter bi
;
1302 unsigned long truesize
= 0;
1305 shinfo
->flags
|= SKBFL_MANAGED_FRAG_REFS
;
1306 else if (unlikely(!skb_zcopy_managed(skb
)))
1307 return zerocopy_fill_skb_from_iter(skb
, from
, length
);
1309 bi
.bi_size
= min(from
->count
, length
);
1310 bi
.bi_bvec_done
= from
->iov_offset
;
1313 while (bi
.bi_size
&& frag
< MAX_SKB_FRAGS
) {
1314 struct bio_vec v
= mp_bvec_iter_bvec(from
->bvec
, bi
);
1317 truesize
+= PAGE_ALIGN(v
.bv_len
+ v
.bv_offset
);
1318 __skb_fill_page_desc_noacc(shinfo
, frag
++, v
.bv_page
,
1319 v
.bv_offset
, v
.bv_len
);
1320 bvec_iter_advance_single(from
->bvec
, &bi
, v
.bv_len
);
1325 shinfo
->nr_frags
= frag
;
1326 from
->bvec
+= bi
.bi_idx
;
1327 from
->nr_segs
-= bi
.bi_idx
;
1328 from
->count
-= copied
;
1329 from
->iov_offset
= bi
.bi_bvec_done
;
1331 skb
->data_len
+= copied
;
1333 skb
->truesize
+= truesize
;
1337 static int io_send_zc_import(struct io_kiocb
*req
, unsigned int issue_flags
)
1339 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
1340 struct io_async_msghdr
*kmsg
= req
->async_data
;
1343 if (sr
->flags
& IORING_RECVSEND_FIXED_BUF
) {
1344 struct io_ring_ctx
*ctx
= req
->ctx
;
1345 struct io_rsrc_node
*node
;
1348 io_ring_submit_lock(ctx
, issue_flags
);
1349 node
= io_rsrc_node_lookup(&ctx
->buf_table
, sr
->buf_index
);
1351 io_req_assign_buf_node(sr
->notif
, node
);
1354 io_ring_submit_unlock(ctx
, issue_flags
);
1359 ret
= io_import_fixed(ITER_SOURCE
, &kmsg
->msg
.msg_iter
,
1360 node
->buf
, (u64
)(uintptr_t)sr
->buf
,
1364 kmsg
->msg
.sg_from_iter
= io_sg_from_iter
;
1366 ret
= import_ubuf(ITER_SOURCE
, sr
->buf
, sr
->len
, &kmsg
->msg
.msg_iter
);
1369 ret
= io_notif_account_mem(sr
->notif
, sr
->len
);
1372 kmsg
->msg
.sg_from_iter
= io_sg_from_iter_iovec
;
1378 int io_send_zc(struct io_kiocb
*req
, unsigned int issue_flags
)
1380 struct io_sr_msg
*zc
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
1381 struct io_async_msghdr
*kmsg
= req
->async_data
;
1382 struct socket
*sock
;
1384 int ret
, min_ret
= 0;
1386 sock
= sock_from_file(req
->file
);
1387 if (unlikely(!sock
))
1389 if (!test_bit(SOCK_SUPPORT_ZC
, &sock
->flags
))
1392 if (!(req
->flags
& REQ_F_POLLED
) &&
1393 (zc
->flags
& IORING_RECVSEND_POLL_FIRST
))
1397 ret
= io_send_zc_import(req
, issue_flags
);
1402 msg_flags
= zc
->msg_flags
;
1403 if (issue_flags
& IO_URING_F_NONBLOCK
)
1404 msg_flags
|= MSG_DONTWAIT
;
1405 if (msg_flags
& MSG_WAITALL
)
1406 min_ret
= iov_iter_count(&kmsg
->msg
.msg_iter
);
1407 msg_flags
&= ~MSG_INTERNAL_SENDMSG_FLAGS
;
1409 kmsg
->msg
.msg_flags
= msg_flags
;
1410 kmsg
->msg
.msg_ubuf
= &io_notif_to_data(zc
->notif
)->uarg
;
1411 ret
= sock_sendmsg(sock
, &kmsg
->msg
);
1413 if (unlikely(ret
< min_ret
)) {
1414 if (ret
== -EAGAIN
&& (issue_flags
& IO_URING_F_NONBLOCK
))
1417 if (ret
> 0 && io_net_retry(sock
, kmsg
->msg
.msg_flags
)) {
1421 req
->flags
|= REQ_F_BL_NO_RECYCLE
;
1424 if (ret
== -ERESTARTSYS
)
1431 else if (zc
->done_io
)
1435 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1436 * flushing notif to io_send_zc_cleanup()
1438 if (!(issue_flags
& IO_URING_F_UNLOCKED
)) {
1439 io_notif_flush(zc
->notif
);
1440 io_req_msg_cleanup(req
, 0);
1442 io_req_set_res(req
, ret
, IORING_CQE_F_MORE
);
1446 int io_sendmsg_zc(struct io_kiocb
*req
, unsigned int issue_flags
)
1448 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
1449 struct io_async_msghdr
*kmsg
= req
->async_data
;
1450 struct socket
*sock
;
1452 int ret
, min_ret
= 0;
1454 sock
= sock_from_file(req
->file
);
1455 if (unlikely(!sock
))
1457 if (!test_bit(SOCK_SUPPORT_ZC
, &sock
->flags
))
1460 if (!(req
->flags
& REQ_F_POLLED
) &&
1461 (sr
->flags
& IORING_RECVSEND_POLL_FIRST
))
1464 flags
= sr
->msg_flags
;
1465 if (issue_flags
& IO_URING_F_NONBLOCK
)
1466 flags
|= MSG_DONTWAIT
;
1467 if (flags
& MSG_WAITALL
)
1468 min_ret
= iov_iter_count(&kmsg
->msg
.msg_iter
);
1470 kmsg
->msg
.msg_control_user
= sr
->msg_control
;
1471 kmsg
->msg
.msg_ubuf
= &io_notif_to_data(sr
->notif
)->uarg
;
1472 kmsg
->msg
.sg_from_iter
= io_sg_from_iter_iovec
;
1473 ret
= __sys_sendmsg_sock(sock
, &kmsg
->msg
, flags
);
1475 if (unlikely(ret
< min_ret
)) {
1476 if (ret
== -EAGAIN
&& (issue_flags
& IO_URING_F_NONBLOCK
))
1479 if (ret
> 0 && io_net_retry(sock
, flags
)) {
1481 req
->flags
|= REQ_F_BL_NO_RECYCLE
;
1484 if (ret
== -ERESTARTSYS
)
1491 else if (sr
->done_io
)
1495 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1496 * flushing notif to io_send_zc_cleanup()
1498 if (!(issue_flags
& IO_URING_F_UNLOCKED
)) {
1499 io_notif_flush(sr
->notif
);
1500 io_req_msg_cleanup(req
, 0);
1502 io_req_set_res(req
, ret
, IORING_CQE_F_MORE
);
1506 void io_sendrecv_fail(struct io_kiocb
*req
)
1508 struct io_sr_msg
*sr
= io_kiocb_to_cmd(req
, struct io_sr_msg
);
1511 req
->cqe
.res
= sr
->done_io
;
1513 if ((req
->flags
& REQ_F_NEED_CLEANUP
) &&
1514 (req
->opcode
== IORING_OP_SEND_ZC
|| req
->opcode
== IORING_OP_SENDMSG_ZC
))
1515 req
->cqe
.flags
|= IORING_CQE_F_MORE
;
1518 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \
1519 IORING_ACCEPT_POLL_FIRST)
1521 int io_accept_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
1523 struct io_accept
*accept
= io_kiocb_to_cmd(req
, struct io_accept
);
1525 if (sqe
->len
|| sqe
->buf_index
)
1528 accept
->addr
= u64_to_user_ptr(READ_ONCE(sqe
->addr
));
1529 accept
->addr_len
= u64_to_user_ptr(READ_ONCE(sqe
->addr2
));
1530 accept
->flags
= READ_ONCE(sqe
->accept_flags
);
1531 accept
->nofile
= rlimit(RLIMIT_NOFILE
);
1532 accept
->iou_flags
= READ_ONCE(sqe
->ioprio
);
1533 if (accept
->iou_flags
& ~ACCEPT_FLAGS
)
1536 accept
->file_slot
= READ_ONCE(sqe
->file_index
);
1537 if (accept
->file_slot
) {
1538 if (accept
->flags
& SOCK_CLOEXEC
)
1540 if (accept
->iou_flags
& IORING_ACCEPT_MULTISHOT
&&
1541 accept
->file_slot
!= IORING_FILE_INDEX_ALLOC
)
1544 if (accept
->flags
& ~(SOCK_CLOEXEC
| SOCK_NONBLOCK
))
1546 if (SOCK_NONBLOCK
!= O_NONBLOCK
&& (accept
->flags
& SOCK_NONBLOCK
))
1547 accept
->flags
= (accept
->flags
& ~SOCK_NONBLOCK
) | O_NONBLOCK
;
1548 if (accept
->iou_flags
& IORING_ACCEPT_MULTISHOT
)
1549 req
->flags
|= REQ_F_APOLL_MULTISHOT
;
1550 if (accept
->iou_flags
& IORING_ACCEPT_DONTWAIT
)
1551 req
->flags
|= REQ_F_NOWAIT
;
1555 int io_accept(struct io_kiocb
*req
, unsigned int issue_flags
)
1557 struct io_accept
*accept
= io_kiocb_to_cmd(req
, struct io_accept
);
1558 bool force_nonblock
= issue_flags
& IO_URING_F_NONBLOCK
;
1559 bool fixed
= !!accept
->file_slot
;
1560 struct proto_accept_arg arg
= {
1561 .flags
= force_nonblock
? O_NONBLOCK
: 0,
1567 if (!(req
->flags
& REQ_F_POLLED
) &&
1568 accept
->iou_flags
& IORING_ACCEPT_POLL_FIRST
)
1573 fd
= __get_unused_fd_flags(accept
->flags
, accept
->nofile
);
1574 if (unlikely(fd
< 0))
1579 file
= do_accept(req
->file
, &arg
, accept
->addr
, accept
->addr_len
,
1584 ret
= PTR_ERR(file
);
1585 if (ret
== -EAGAIN
&& force_nonblock
&&
1586 !(accept
->iou_flags
& IORING_ACCEPT_DONTWAIT
)) {
1588 * if it's multishot and polled, we don't need to
1589 * return EAGAIN to arm the poll infra since it
1590 * has already been done
1592 if (issue_flags
& IO_URING_F_MULTISHOT
)
1593 return IOU_ISSUE_SKIP_COMPLETE
;
1596 if (ret
== -ERESTARTSYS
)
1599 } else if (!fixed
) {
1600 fd_install(fd
, file
);
1603 ret
= io_fixed_fd_install(req
, issue_flags
, file
,
1609 cflags
|= IORING_CQE_F_SOCK_NONEMPTY
;
1611 if (!(req
->flags
& REQ_F_APOLL_MULTISHOT
)) {
1612 io_req_set_res(req
, ret
, cflags
);
1618 if (io_req_post_cqe(req
, ret
, cflags
| IORING_CQE_F_MORE
)) {
1619 if (cflags
& IORING_CQE_F_SOCK_NONEMPTY
|| arg
.is_empty
== -1)
1621 if (issue_flags
& IO_URING_F_MULTISHOT
)
1622 return IOU_ISSUE_SKIP_COMPLETE
;
1626 io_req_set_res(req
, ret
, cflags
);
1627 return IOU_STOP_MULTISHOT
;
1630 int io_socket_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
1632 struct io_socket
*sock
= io_kiocb_to_cmd(req
, struct io_socket
);
1634 if (sqe
->addr
|| sqe
->rw_flags
|| sqe
->buf_index
)
1637 sock
->domain
= READ_ONCE(sqe
->fd
);
1638 sock
->type
= READ_ONCE(sqe
->off
);
1639 sock
->protocol
= READ_ONCE(sqe
->len
);
1640 sock
->file_slot
= READ_ONCE(sqe
->file_index
);
1641 sock
->nofile
= rlimit(RLIMIT_NOFILE
);
1643 sock
->flags
= sock
->type
& ~SOCK_TYPE_MASK
;
1644 if (sock
->file_slot
&& (sock
->flags
& SOCK_CLOEXEC
))
1646 if (sock
->flags
& ~(SOCK_CLOEXEC
| SOCK_NONBLOCK
))
1651 int io_socket(struct io_kiocb
*req
, unsigned int issue_flags
)
1653 struct io_socket
*sock
= io_kiocb_to_cmd(req
, struct io_socket
);
1654 bool fixed
= !!sock
->file_slot
;
1659 fd
= __get_unused_fd_flags(sock
->flags
, sock
->nofile
);
1660 if (unlikely(fd
< 0))
1663 file
= __sys_socket_file(sock
->domain
, sock
->type
, sock
->protocol
);
1667 ret
= PTR_ERR(file
);
1668 if (ret
== -EAGAIN
&& (issue_flags
& IO_URING_F_NONBLOCK
))
1670 if (ret
== -ERESTARTSYS
)
1673 } else if (!fixed
) {
1674 fd_install(fd
, file
);
1677 ret
= io_fixed_fd_install(req
, issue_flags
, file
,
1680 io_req_set_res(req
, ret
, 0);
1684 int io_connect_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
1686 struct io_connect
*conn
= io_kiocb_to_cmd(req
, struct io_connect
);
1687 struct io_async_msghdr
*io
;
1689 if (sqe
->len
|| sqe
->buf_index
|| sqe
->rw_flags
|| sqe
->splice_fd_in
)
1692 conn
->addr
= u64_to_user_ptr(READ_ONCE(sqe
->addr
));
1693 conn
->addr_len
= READ_ONCE(sqe
->addr2
);
1694 conn
->in_progress
= conn
->seen_econnaborted
= false;
1696 io
= io_msg_alloc_async(req
);
1700 return move_addr_to_kernel(conn
->addr
, conn
->addr_len
, &io
->addr
);
1703 int io_connect(struct io_kiocb
*req
, unsigned int issue_flags
)
1705 struct io_connect
*connect
= io_kiocb_to_cmd(req
, struct io_connect
);
1706 struct io_async_msghdr
*io
= req
->async_data
;
1707 unsigned file_flags
;
1709 bool force_nonblock
= issue_flags
& IO_URING_F_NONBLOCK
;
1711 file_flags
= force_nonblock
? O_NONBLOCK
: 0;
1713 ret
= __sys_connect_file(req
->file
, &io
->addr
, connect
->addr_len
,
1715 if ((ret
== -EAGAIN
|| ret
== -EINPROGRESS
|| ret
== -ECONNABORTED
)
1716 && force_nonblock
) {
1717 if (ret
== -EINPROGRESS
) {
1718 connect
->in_progress
= true;
1719 } else if (ret
== -ECONNABORTED
) {
1720 if (connect
->seen_econnaborted
)
1722 connect
->seen_econnaborted
= true;
1726 if (connect
->in_progress
) {
1728 * At least bluetooth will return -EBADFD on a re-connect
1729 * attempt, and it's (supposedly) also valid to get -EISCONN
1730 * which means the previous result is good. For both of these,
1731 * grab the sock_error() and use that for the completion.
1733 if (ret
== -EBADFD
|| ret
== -EISCONN
)
1734 ret
= sock_error(sock_from_file(req
->file
)->sk
);
1736 if (ret
== -ERESTARTSYS
)
1741 io_req_msg_cleanup(req
, issue_flags
);
1742 io_req_set_res(req
, ret
, 0);
1746 int io_bind_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
1748 struct io_bind
*bind
= io_kiocb_to_cmd(req
, struct io_bind
);
1749 struct sockaddr __user
*uaddr
;
1750 struct io_async_msghdr
*io
;
1752 if (sqe
->len
|| sqe
->buf_index
|| sqe
->rw_flags
|| sqe
->splice_fd_in
)
1755 uaddr
= u64_to_user_ptr(READ_ONCE(sqe
->addr
));
1756 bind
->addr_len
= READ_ONCE(sqe
->addr2
);
1758 io
= io_msg_alloc_async(req
);
1761 return move_addr_to_kernel(uaddr
, bind
->addr_len
, &io
->addr
);
1764 int io_bind(struct io_kiocb
*req
, unsigned int issue_flags
)
1766 struct io_bind
*bind
= io_kiocb_to_cmd(req
, struct io_bind
);
1767 struct io_async_msghdr
*io
= req
->async_data
;
1768 struct socket
*sock
;
1771 sock
= sock_from_file(req
->file
);
1772 if (unlikely(!sock
))
1775 ret
= __sys_bind_socket(sock
, &io
->addr
, bind
->addr_len
);
1778 io_req_set_res(req
, ret
, 0);
1782 int io_listen_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
1784 struct io_listen
*listen
= io_kiocb_to_cmd(req
, struct io_listen
);
1786 if (sqe
->addr
|| sqe
->buf_index
|| sqe
->rw_flags
|| sqe
->splice_fd_in
|| sqe
->addr2
)
1789 listen
->backlog
= READ_ONCE(sqe
->len
);
1793 int io_listen(struct io_kiocb
*req
, unsigned int issue_flags
)
1795 struct io_listen
*listen
= io_kiocb_to_cmd(req
, struct io_listen
);
1796 struct socket
*sock
;
1799 sock
= sock_from_file(req
->file
);
1800 if (unlikely(!sock
))
1803 ret
= __sys_listen_socket(sock
, listen
->backlog
);
1806 io_req_set_res(req
, ret
, 0);
1810 void io_netmsg_cache_free(const void *entry
)
1812 struct io_async_msghdr
*kmsg
= (struct io_async_msghdr
*) entry
;
1814 if (kmsg
->free_iov
) {
1815 kasan_mempool_unpoison_object(kmsg
->free_iov
,
1816 kmsg
->free_iov_nr
* sizeof(struct iovec
));
1817 io_netmsg_iovec_free(kmsg
);