1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
5 #include <linux/file.h>
7 #include <linux/slab.h>
8 #include <linux/poll.h>
9 #include <linux/hashtable.h>
10 #include <linux/io_uring.h>
12 #include <trace/events/io_uring.h>
14 #include <uapi/linux/io_uring.h>
17 #include "alloc_cache.h"
25 struct io_poll_update
{
31 bool update_user_data
;
34 struct io_poll_table
{
35 struct poll_table_struct pt
;
40 /* output value, set only if arm poll returns >0 */
44 #define IO_POLL_CANCEL_FLAG BIT(31)
45 #define IO_POLL_RETRY_FLAG BIT(30)
46 #define IO_POLL_REF_MASK GENMASK(29, 0)
49 * We usually have 1-2 refs taken, 128 is more than enough and we want to
50 * maximise the margin between this amount and the moment when it overflows.
52 #define IO_POLL_REF_BIAS 128
54 #define IO_WQE_F_DOUBLE 1
56 static int io_poll_wake(struct wait_queue_entry
*wait
, unsigned mode
, int sync
,
59 static inline struct io_kiocb
*wqe_to_req(struct wait_queue_entry
*wqe
)
61 unsigned long priv
= (unsigned long)wqe
->private;
63 return (struct io_kiocb
*)(priv
& ~IO_WQE_F_DOUBLE
);
66 static inline bool wqe_is_double(struct wait_queue_entry
*wqe
)
68 unsigned long priv
= (unsigned long)wqe
->private;
70 return priv
& IO_WQE_F_DOUBLE
;
73 static bool io_poll_get_ownership_slowpath(struct io_kiocb
*req
)
78 * poll_refs are already elevated and we don't have much hope for
79 * grabbing the ownership. Instead of incrementing set a retry flag
80 * to notify the loop that there might have been some change.
82 v
= atomic_fetch_or(IO_POLL_RETRY_FLAG
, &req
->poll_refs
);
83 if (v
& IO_POLL_REF_MASK
)
85 return !(atomic_fetch_inc(&req
->poll_refs
) & IO_POLL_REF_MASK
);
89 * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
90 * bump it and acquire ownership. It's disallowed to modify requests while not
91 * owning it, that prevents from races for enqueueing task_work's and b/w
92 * arming poll and wakeups.
94 static inline bool io_poll_get_ownership(struct io_kiocb
*req
)
96 if (unlikely(atomic_read(&req
->poll_refs
) >= IO_POLL_REF_BIAS
))
97 return io_poll_get_ownership_slowpath(req
);
98 return !(atomic_fetch_inc(&req
->poll_refs
) & IO_POLL_REF_MASK
);
101 static void io_poll_mark_cancelled(struct io_kiocb
*req
)
103 atomic_or(IO_POLL_CANCEL_FLAG
, &req
->poll_refs
);
106 static struct io_poll
*io_poll_get_double(struct io_kiocb
*req
)
108 /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
109 if (req
->opcode
== IORING_OP_POLL_ADD
)
110 return req
->async_data
;
111 return req
->apoll
->double_poll
;
114 static struct io_poll
*io_poll_get_single(struct io_kiocb
*req
)
116 if (req
->opcode
== IORING_OP_POLL_ADD
)
117 return io_kiocb_to_cmd(req
, struct io_poll
);
118 return &req
->apoll
->poll
;
121 static void io_poll_req_insert(struct io_kiocb
*req
)
123 struct io_hash_table
*table
= &req
->ctx
->cancel_table
;
124 u32 index
= hash_long(req
->cqe
.user_data
, table
->hash_bits
);
125 struct io_hash_bucket
*hb
= &table
->hbs
[index
];
127 spin_lock(&hb
->lock
);
128 hlist_add_head(&req
->hash_node
, &hb
->list
);
129 spin_unlock(&hb
->lock
);
132 static void io_poll_req_delete(struct io_kiocb
*req
, struct io_ring_ctx
*ctx
)
134 struct io_hash_table
*table
= &req
->ctx
->cancel_table
;
135 u32 index
= hash_long(req
->cqe
.user_data
, table
->hash_bits
);
136 spinlock_t
*lock
= &table
->hbs
[index
].lock
;
139 hash_del(&req
->hash_node
);
143 static void io_poll_req_insert_locked(struct io_kiocb
*req
)
145 struct io_hash_table
*table
= &req
->ctx
->cancel_table_locked
;
146 u32 index
= hash_long(req
->cqe
.user_data
, table
->hash_bits
);
148 lockdep_assert_held(&req
->ctx
->uring_lock
);
150 hlist_add_head(&req
->hash_node
, &table
->hbs
[index
].list
);
153 static void io_poll_tw_hash_eject(struct io_kiocb
*req
, struct io_tw_state
*ts
)
155 struct io_ring_ctx
*ctx
= req
->ctx
;
157 if (req
->flags
& REQ_F_HASH_LOCKED
) {
159 * ->cancel_table_locked is protected by ->uring_lock in
160 * contrast to per bucket spinlocks. Likely, tctx_task_work()
161 * already grabbed the mutex for us, but there is a chance it
165 hash_del(&req
->hash_node
);
166 req
->flags
&= ~REQ_F_HASH_LOCKED
;
168 io_poll_req_delete(req
, ctx
);
172 static void io_init_poll_iocb(struct io_poll
*poll
, __poll_t events
)
175 #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
176 /* mask in events that we always want/need */
177 poll
->events
= events
| IO_POLL_UNMASK
;
178 INIT_LIST_HEAD(&poll
->wait
.entry
);
179 init_waitqueue_func_entry(&poll
->wait
, io_poll_wake
);
182 static inline void io_poll_remove_entry(struct io_poll
*poll
)
184 struct wait_queue_head
*head
= smp_load_acquire(&poll
->head
);
187 spin_lock_irq(&head
->lock
);
188 list_del_init(&poll
->wait
.entry
);
190 spin_unlock_irq(&head
->lock
);
194 static void io_poll_remove_entries(struct io_kiocb
*req
)
197 * Nothing to do if neither of those flags are set. Avoid dipping
198 * into the poll/apoll/double cachelines if we can.
200 if (!(req
->flags
& (REQ_F_SINGLE_POLL
| REQ_F_DOUBLE_POLL
)))
204 * While we hold the waitqueue lock and the waitqueue is nonempty,
205 * wake_up_pollfree() will wait for us. However, taking the waitqueue
206 * lock in the first place can race with the waitqueue being freed.
208 * We solve this as eventpoll does: by taking advantage of the fact that
209 * all users of wake_up_pollfree() will RCU-delay the actual free. If
210 * we enter rcu_read_lock() and see that the pointer to the queue is
211 * non-NULL, we can then lock it without the memory being freed out from
214 * Keep holding rcu_read_lock() as long as we hold the queue lock, in
215 * case the caller deletes the entry from the queue, leaving it empty.
216 * In that case, only RCU prevents the queue memory from being freed.
219 if (req
->flags
& REQ_F_SINGLE_POLL
)
220 io_poll_remove_entry(io_poll_get_single(req
));
221 if (req
->flags
& REQ_F_DOUBLE_POLL
)
222 io_poll_remove_entry(io_poll_get_double(req
));
228 IOU_POLL_NO_ACTION
= 1,
229 IOU_POLL_REMOVE_POLL_USE_RES
= 2,
230 IOU_POLL_REISSUE
= 3,
231 IOU_POLL_REQUEUE
= 4,
234 static void __io_poll_execute(struct io_kiocb
*req
, int mask
)
238 io_req_set_res(req
, mask
, 0);
239 req
->io_task_work
.func
= io_poll_task_func
;
241 trace_io_uring_task_add(req
, mask
);
243 if (!(req
->flags
& REQ_F_POLL_NO_LAZY
))
244 flags
= IOU_F_TWQ_LAZY_WAKE
;
245 __io_req_task_work_add(req
, flags
);
248 static inline void io_poll_execute(struct io_kiocb
*req
, int res
)
250 if (io_poll_get_ownership(req
))
251 __io_poll_execute(req
, res
);
255 * All poll tw should go through this. Checks for poll events, manages
256 * references, does rewait, etc.
258 * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action
259 * require, which is either spurious wakeup or multishot CQE is served.
260 * IOU_POLL_DONE when it's done with the request, then the mask is stored in
261 * req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot
262 * poll and that the result is stored in req->cqe.
264 static int io_poll_check_events(struct io_kiocb
*req
, struct io_tw_state
*ts
)
268 /* req->task == current here, checking PF_EXITING is safe */
269 if (unlikely(req
->task
->flags
& PF_EXITING
))
273 v
= atomic_read(&req
->poll_refs
);
275 if (unlikely(v
!= 1)) {
276 /* tw should be the owner and so have some refs */
277 if (WARN_ON_ONCE(!(v
& IO_POLL_REF_MASK
)))
278 return IOU_POLL_NO_ACTION
;
279 if (v
& IO_POLL_CANCEL_FLAG
)
282 * cqe.res contains only events of the first wake up
283 * and all others are to be lost. Redo vfs_poll() to get
286 if ((v
& IO_POLL_REF_MASK
) != 1)
289 if (v
& IO_POLL_RETRY_FLAG
) {
292 * We won't find new events that came in between
293 * vfs_poll and the ref put unless we clear the
296 atomic_andnot(IO_POLL_RETRY_FLAG
, &req
->poll_refs
);
297 v
&= ~IO_POLL_RETRY_FLAG
;
301 /* the mask was stashed in __io_poll_execute */
303 struct poll_table_struct pt
= { ._key
= req
->apoll_events
};
304 req
->cqe
.res
= vfs_poll(req
->file
, &pt
) & req
->apoll_events
;
306 * We got woken with a mask, but someone else got to
307 * it first. The above vfs_poll() doesn't add us back
308 * to the waitqueue, so if we get nothing back, we
309 * should be safe and attempt a reissue.
311 if (unlikely(!req
->cqe
.res
)) {
312 /* Multishot armed need not reissue */
313 if (!(req
->apoll_events
& EPOLLONESHOT
))
315 return IOU_POLL_REISSUE
;
318 if (req
->apoll_events
& EPOLLONESHOT
)
319 return IOU_POLL_DONE
;
321 /* multishot, just fill a CQE and proceed */
322 if (!(req
->flags
& REQ_F_APOLL_MULTISHOT
)) {
323 __poll_t mask
= mangle_poll(req
->cqe
.res
&
326 if (!io_req_post_cqe(req
, mask
, IORING_CQE_F_MORE
)) {
327 io_req_set_res(req
, mask
, 0);
328 return IOU_POLL_REMOVE_POLL_USE_RES
;
331 int ret
= io_poll_issue(req
, ts
);
332 if (ret
== IOU_STOP_MULTISHOT
)
333 return IOU_POLL_REMOVE_POLL_USE_RES
;
334 else if (ret
== IOU_REQUEUE
)
335 return IOU_POLL_REQUEUE
;
340 /* force the next iteration to vfs_poll() */
344 * Release all references, retry if someone tried to restart
345 * task_work while we were executing it.
347 v
&= IO_POLL_REF_MASK
;
348 } while (atomic_sub_return(v
, &req
->poll_refs
) & IO_POLL_REF_MASK
);
351 return IOU_POLL_NO_ACTION
;
354 void io_poll_task_func(struct io_kiocb
*req
, struct io_tw_state
*ts
)
358 ret
= io_poll_check_events(req
, ts
);
359 if (ret
== IOU_POLL_NO_ACTION
) {
361 } else if (ret
== IOU_POLL_REQUEUE
) {
362 __io_poll_execute(req
, 0);
365 io_poll_remove_entries(req
);
366 io_poll_tw_hash_eject(req
, ts
);
368 if (req
->opcode
== IORING_OP_POLL_ADD
) {
369 if (ret
== IOU_POLL_DONE
) {
370 struct io_poll
*poll
;
372 poll
= io_kiocb_to_cmd(req
, struct io_poll
);
373 req
->cqe
.res
= mangle_poll(req
->cqe
.res
& poll
->events
);
374 } else if (ret
== IOU_POLL_REISSUE
) {
375 io_req_task_submit(req
, ts
);
377 } else if (ret
!= IOU_POLL_REMOVE_POLL_USE_RES
) {
382 io_req_set_res(req
, req
->cqe
.res
, 0);
383 io_req_task_complete(req
, ts
);
385 io_tw_lock(req
->ctx
, ts
);
387 if (ret
== IOU_POLL_REMOVE_POLL_USE_RES
)
388 io_req_task_complete(req
, ts
);
389 else if (ret
== IOU_POLL_DONE
|| ret
== IOU_POLL_REISSUE
)
390 io_req_task_submit(req
, ts
);
392 io_req_defer_failed(req
, ret
);
396 static void io_poll_cancel_req(struct io_kiocb
*req
)
398 io_poll_mark_cancelled(req
);
399 /* kick tw, which should complete the request */
400 io_poll_execute(req
, 0);
403 #define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI)
405 static __cold
int io_pollfree_wake(struct io_kiocb
*req
, struct io_poll
*poll
)
407 io_poll_mark_cancelled(req
);
408 /* we have to kick tw in case it's not already */
409 io_poll_execute(req
, 0);
412 * If the waitqueue is being freed early but someone is already
413 * holds ownership over it, we have to tear down the request as
414 * best we can. That means immediately removing the request from
415 * its waitqueue and preventing all further accesses to the
416 * waitqueue via the request.
418 list_del_init(&poll
->wait
.entry
);
421 * Careful: this *must* be the last step, since as soon
422 * as req->head is NULL'ed out, the request can be
423 * completed and freed, since aio_poll_complete_work()
424 * will no longer need to take the waitqueue lock.
426 smp_store_release(&poll
->head
, NULL
);
430 static int io_poll_wake(struct wait_queue_entry
*wait
, unsigned mode
, int sync
,
433 struct io_kiocb
*req
= wqe_to_req(wait
);
434 struct io_poll
*poll
= container_of(wait
, struct io_poll
, wait
);
435 __poll_t mask
= key_to_poll(key
);
437 if (unlikely(mask
& POLLFREE
))
438 return io_pollfree_wake(req
, poll
);
440 /* for instances that support it check for an event match first */
441 if (mask
&& !(mask
& (poll
->events
& ~IO_ASYNC_POLL_COMMON
)))
444 if (io_poll_get_ownership(req
)) {
446 * If we trigger a multishot poll off our own wakeup path,
447 * disable multishot as there is a circular dependency between
448 * CQ posting and triggering the event.
450 if (mask
& EPOLL_URING_WAKE
)
451 poll
->events
|= EPOLLONESHOT
;
453 /* optional, saves extra locking for removal in tw handler */
454 if (mask
&& poll
->events
& EPOLLONESHOT
) {
455 list_del_init(&poll
->wait
.entry
);
457 if (wqe_is_double(wait
))
458 req
->flags
&= ~REQ_F_DOUBLE_POLL
;
460 req
->flags
&= ~REQ_F_SINGLE_POLL
;
462 __io_poll_execute(req
, mask
);
467 /* fails only when polling is already completing by the first entry */
468 static bool io_poll_double_prepare(struct io_kiocb
*req
)
470 struct wait_queue_head
*head
;
471 struct io_poll
*poll
= io_poll_get_single(req
);
473 /* head is RCU protected, see io_poll_remove_entries() comments */
475 head
= smp_load_acquire(&poll
->head
);
477 * poll arm might not hold ownership and so race for req->flags with
478 * io_poll_wake(). There is only one poll entry queued, serialise with
479 * it by taking its head lock. As we're still arming the tw hanlder
480 * is not going to be run, so there are no races with it.
483 spin_lock_irq(&head
->lock
);
484 req
->flags
|= REQ_F_DOUBLE_POLL
;
485 if (req
->opcode
== IORING_OP_POLL_ADD
)
486 req
->flags
|= REQ_F_ASYNC_DATA
;
487 spin_unlock_irq(&head
->lock
);
493 static void __io_queue_proc(struct io_poll
*poll
, struct io_poll_table
*pt
,
494 struct wait_queue_head
*head
,
495 struct io_poll
**poll_ptr
)
497 struct io_kiocb
*req
= pt
->req
;
498 unsigned long wqe_private
= (unsigned long) req
;
501 * The file being polled uses multiple waitqueues for poll handling
502 * (e.g. one for read, one for write). Setup a separate io_poll
505 if (unlikely(pt
->nr_entries
)) {
506 struct io_poll
*first
= poll
;
508 /* double add on the same waitqueue head, ignore */
509 if (first
->head
== head
)
511 /* already have a 2nd entry, fail a third attempt */
513 if ((*poll_ptr
)->head
== head
)
519 poll
= kmalloc(sizeof(*poll
), GFP_ATOMIC
);
525 /* mark as double wq entry */
526 wqe_private
|= IO_WQE_F_DOUBLE
;
527 io_init_poll_iocb(poll
, first
->events
);
528 if (!io_poll_double_prepare(req
)) {
529 /* the request is completing, just back off */
535 /* fine to modify, there is no poll queued to race with us */
536 req
->flags
|= REQ_F_SINGLE_POLL
;
541 poll
->wait
.private = (void *) wqe_private
;
543 if (poll
->events
& EPOLLEXCLUSIVE
) {
544 add_wait_queue_exclusive(head
, &poll
->wait
);
546 add_wait_queue(head
, &poll
->wait
);
550 static void io_poll_queue_proc(struct file
*file
, struct wait_queue_head
*head
,
551 struct poll_table_struct
*p
)
553 struct io_poll_table
*pt
= container_of(p
, struct io_poll_table
, pt
);
554 struct io_poll
*poll
= io_kiocb_to_cmd(pt
->req
, struct io_poll
);
556 __io_queue_proc(poll
, pt
, head
,
557 (struct io_poll
**) &pt
->req
->async_data
);
560 static bool io_poll_can_finish_inline(struct io_kiocb
*req
,
561 struct io_poll_table
*pt
)
563 return pt
->owning
|| io_poll_get_ownership(req
);
566 static void io_poll_add_hash(struct io_kiocb
*req
)
568 if (req
->flags
& REQ_F_HASH_LOCKED
)
569 io_poll_req_insert_locked(req
);
571 io_poll_req_insert(req
);
575 * Returns 0 when it's handed over for polling. The caller owns the requests if
576 * it returns non-zero, but otherwise should not touch it. Negative values
577 * contain an error code. When the result is >0, the polling has completed
578 * inline and ipt.result_mask is set to the mask.
580 static int __io_arm_poll_handler(struct io_kiocb
*req
,
581 struct io_poll
*poll
,
582 struct io_poll_table
*ipt
, __poll_t mask
,
583 unsigned issue_flags
)
585 INIT_HLIST_NODE(&req
->hash_node
);
586 io_init_poll_iocb(poll
, mask
);
587 poll
->file
= req
->file
;
588 req
->apoll_events
= poll
->events
;
595 * Polling is either completed here or via task_work, so if we're in the
596 * task context we're naturally serialised with tw by merit of running
597 * the same task. When it's io-wq, take the ownership to prevent tw
598 * from running. However, when we're in the task context, skip taking
599 * it as an optimisation.
601 * Note: even though the request won't be completed/freed, without
602 * ownership we still can race with io_poll_wake().
603 * io_poll_can_finish_inline() tries to deal with that.
605 ipt
->owning
= issue_flags
& IO_URING_F_UNLOCKED
;
606 atomic_set(&req
->poll_refs
, (int)ipt
->owning
);
608 /* io-wq doesn't hold uring_lock */
609 if (issue_flags
& IO_URING_F_UNLOCKED
)
610 req
->flags
&= ~REQ_F_HASH_LOCKED
;
614 * Exclusive waits may only wake a limited amount of entries
615 * rather than all of them, this may interfere with lazy
616 * wake if someone does wait(events > 1). Ensure we don't do
617 * lazy wake for those, as we need to process each one as they
620 if (poll
->events
& EPOLLEXCLUSIVE
)
621 req
->flags
|= REQ_F_POLL_NO_LAZY
;
623 mask
= vfs_poll(req
->file
, &ipt
->pt
) & poll
->events
;
625 if (unlikely(ipt
->error
|| !ipt
->nr_entries
)) {
626 io_poll_remove_entries(req
);
628 if (!io_poll_can_finish_inline(req
, ipt
)) {
629 io_poll_mark_cancelled(req
);
631 } else if (mask
&& (poll
->events
& EPOLLET
)) {
632 ipt
->result_mask
= mask
;
635 return ipt
->error
?: -EINVAL
;
639 ((poll
->events
& (EPOLLET
|EPOLLONESHOT
)) == (EPOLLET
|EPOLLONESHOT
))) {
640 if (!io_poll_can_finish_inline(req
, ipt
)) {
641 io_poll_add_hash(req
);
644 io_poll_remove_entries(req
);
645 ipt
->result_mask
= mask
;
646 /* no one else has access to the req, forget about the ref */
650 io_poll_add_hash(req
);
652 if (mask
&& (poll
->events
& EPOLLET
) &&
653 io_poll_can_finish_inline(req
, ipt
)) {
654 __io_poll_execute(req
, mask
);
661 * Try to release ownership. If we see a change of state, e.g.
662 * poll was waken up, queue up a tw, it'll deal with it.
664 if (atomic_cmpxchg(&req
->poll_refs
, 1, 0) != 1)
665 __io_poll_execute(req
, 0);
670 static void io_async_queue_proc(struct file
*file
, struct wait_queue_head
*head
,
671 struct poll_table_struct
*p
)
673 struct io_poll_table
*pt
= container_of(p
, struct io_poll_table
, pt
);
674 struct async_poll
*apoll
= pt
->req
->apoll
;
676 __io_queue_proc(&apoll
->poll
, pt
, head
, &apoll
->double_poll
);
680 * We can't reliably detect loops in repeated poll triggers and issue
681 * subsequently failing. But rather than fail these immediately, allow a
682 * certain amount of retries before we give up. Given that this condition
683 * should _rarely_ trigger even once, we should be fine with a larger value.
685 #define APOLL_MAX_RETRY 128
687 static struct async_poll
*io_req_alloc_apoll(struct io_kiocb
*req
,
688 unsigned issue_flags
)
690 struct io_ring_ctx
*ctx
= req
->ctx
;
691 struct async_poll
*apoll
;
693 if (req
->flags
& REQ_F_POLLED
) {
695 kfree(apoll
->double_poll
);
696 } else if (!(issue_flags
& IO_URING_F_UNLOCKED
)) {
697 apoll
= io_alloc_cache_get(&ctx
->apoll_cache
);
700 apoll
->poll
.retries
= APOLL_MAX_RETRY
;
703 apoll
= kmalloc(sizeof(*apoll
), GFP_ATOMIC
);
704 if (unlikely(!apoll
))
706 apoll
->poll
.retries
= APOLL_MAX_RETRY
;
708 apoll
->double_poll
= NULL
;
710 if (unlikely(!--apoll
->poll
.retries
))
715 int io_arm_poll_handler(struct io_kiocb
*req
, unsigned issue_flags
)
717 const struct io_issue_def
*def
= &io_issue_defs
[req
->opcode
];
718 struct async_poll
*apoll
;
719 struct io_poll_table ipt
;
720 __poll_t mask
= POLLPRI
| POLLERR
| EPOLLET
;
724 * apoll requests already grab the mutex to complete in the tw handler,
725 * so removal from the mutex-backed hash is free, use it by default.
727 req
->flags
|= REQ_F_HASH_LOCKED
;
729 if (!def
->pollin
&& !def
->pollout
)
730 return IO_APOLL_ABORTED
;
731 if (!io_file_can_poll(req
))
732 return IO_APOLL_ABORTED
;
733 if (!(req
->flags
& REQ_F_APOLL_MULTISHOT
))
734 mask
|= EPOLLONESHOT
;
737 mask
|= EPOLLIN
| EPOLLRDNORM
;
739 /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
740 if (req
->flags
& REQ_F_CLEAR_POLLIN
)
743 mask
|= EPOLLOUT
| EPOLLWRNORM
;
745 if (def
->poll_exclusive
)
746 mask
|= EPOLLEXCLUSIVE
;
748 apoll
= io_req_alloc_apoll(req
, issue_flags
);
750 return IO_APOLL_ABORTED
;
751 req
->flags
&= ~(REQ_F_SINGLE_POLL
| REQ_F_DOUBLE_POLL
);
752 req
->flags
|= REQ_F_POLLED
;
753 ipt
.pt
._qproc
= io_async_queue_proc
;
755 io_kbuf_recycle(req
, issue_flags
);
757 ret
= __io_arm_poll_handler(req
, &apoll
->poll
, &ipt
, mask
, issue_flags
);
759 return ret
> 0 ? IO_APOLL_READY
: IO_APOLL_ABORTED
;
760 trace_io_uring_poll_arm(req
, mask
, apoll
->poll
.events
);
764 static __cold
bool io_poll_remove_all_table(struct task_struct
*tsk
,
765 struct io_hash_table
*table
,
768 unsigned nr_buckets
= 1U << table
->hash_bits
;
769 struct hlist_node
*tmp
;
770 struct io_kiocb
*req
;
774 for (i
= 0; i
< nr_buckets
; i
++) {
775 struct io_hash_bucket
*hb
= &table
->hbs
[i
];
777 spin_lock(&hb
->lock
);
778 hlist_for_each_entry_safe(req
, tmp
, &hb
->list
, hash_node
) {
779 if (io_match_task_safe(req
, tsk
, cancel_all
)) {
780 hlist_del_init(&req
->hash_node
);
781 io_poll_cancel_req(req
);
785 spin_unlock(&hb
->lock
);
791 * Returns true if we found and killed one or more poll requests
793 __cold
bool io_poll_remove_all(struct io_ring_ctx
*ctx
, struct task_struct
*tsk
,
795 __must_hold(&ctx
->uring_lock
)
799 ret
= io_poll_remove_all_table(tsk
, &ctx
->cancel_table
, cancel_all
);
800 ret
|= io_poll_remove_all_table(tsk
, &ctx
->cancel_table_locked
, cancel_all
);
804 static struct io_kiocb
*io_poll_find(struct io_ring_ctx
*ctx
, bool poll_only
,
805 struct io_cancel_data
*cd
,
806 struct io_hash_table
*table
,
807 struct io_hash_bucket
**out_bucket
)
809 struct io_kiocb
*req
;
810 u32 index
= hash_long(cd
->data
, table
->hash_bits
);
811 struct io_hash_bucket
*hb
= &table
->hbs
[index
];
815 spin_lock(&hb
->lock
);
816 hlist_for_each_entry(req
, &hb
->list
, hash_node
) {
817 if (cd
->data
!= req
->cqe
.user_data
)
819 if (poll_only
&& req
->opcode
!= IORING_OP_POLL_ADD
)
821 if (cd
->flags
& IORING_ASYNC_CANCEL_ALL
) {
822 if (io_cancel_match_sequence(req
, cd
->seq
))
828 spin_unlock(&hb
->lock
);
832 static struct io_kiocb
*io_poll_file_find(struct io_ring_ctx
*ctx
,
833 struct io_cancel_data
*cd
,
834 struct io_hash_table
*table
,
835 struct io_hash_bucket
**out_bucket
)
837 unsigned nr_buckets
= 1U << table
->hash_bits
;
838 struct io_kiocb
*req
;
843 for (i
= 0; i
< nr_buckets
; i
++) {
844 struct io_hash_bucket
*hb
= &table
->hbs
[i
];
846 spin_lock(&hb
->lock
);
847 hlist_for_each_entry(req
, &hb
->list
, hash_node
) {
848 if (io_cancel_req_match(req
, cd
)) {
853 spin_unlock(&hb
->lock
);
858 static int io_poll_disarm(struct io_kiocb
*req
)
862 if (!io_poll_get_ownership(req
))
864 io_poll_remove_entries(req
);
865 hash_del(&req
->hash_node
);
869 static int __io_poll_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
,
870 struct io_hash_table
*table
)
872 struct io_hash_bucket
*bucket
;
873 struct io_kiocb
*req
;
875 if (cd
->flags
& (IORING_ASYNC_CANCEL_FD
| IORING_ASYNC_CANCEL_OP
|
876 IORING_ASYNC_CANCEL_ANY
))
877 req
= io_poll_file_find(ctx
, cd
, table
, &bucket
);
879 req
= io_poll_find(ctx
, false, cd
, table
, &bucket
);
882 io_poll_cancel_req(req
);
884 spin_unlock(&bucket
->lock
);
885 return req
? 0 : -ENOENT
;
888 int io_poll_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
,
889 unsigned issue_flags
)
893 ret
= __io_poll_cancel(ctx
, cd
, &ctx
->cancel_table
);
897 io_ring_submit_lock(ctx
, issue_flags
);
898 ret
= __io_poll_cancel(ctx
, cd
, &ctx
->cancel_table_locked
);
899 io_ring_submit_unlock(ctx
, issue_flags
);
903 static __poll_t
io_poll_parse_events(const struct io_uring_sqe
*sqe
,
908 events
= READ_ONCE(sqe
->poll32_events
);
910 events
= swahw32(events
);
912 if (!(flags
& IORING_POLL_ADD_MULTI
))
913 events
|= EPOLLONESHOT
;
914 if (!(flags
& IORING_POLL_ADD_LEVEL
))
916 return demangle_poll(events
) |
917 (events
& (EPOLLEXCLUSIVE
|EPOLLONESHOT
|EPOLLET
));
920 int io_poll_remove_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
922 struct io_poll_update
*upd
= io_kiocb_to_cmd(req
, struct io_poll_update
);
925 if (sqe
->buf_index
|| sqe
->splice_fd_in
)
927 flags
= READ_ONCE(sqe
->len
);
928 if (flags
& ~(IORING_POLL_UPDATE_EVENTS
| IORING_POLL_UPDATE_USER_DATA
|
929 IORING_POLL_ADD_MULTI
))
931 /* meaningless without update */
932 if (flags
== IORING_POLL_ADD_MULTI
)
935 upd
->old_user_data
= READ_ONCE(sqe
->addr
);
936 upd
->update_events
= flags
& IORING_POLL_UPDATE_EVENTS
;
937 upd
->update_user_data
= flags
& IORING_POLL_UPDATE_USER_DATA
;
939 upd
->new_user_data
= READ_ONCE(sqe
->off
);
940 if (!upd
->update_user_data
&& upd
->new_user_data
)
942 if (upd
->update_events
)
943 upd
->events
= io_poll_parse_events(sqe
, flags
);
944 else if (sqe
->poll32_events
)
950 int io_poll_add_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
952 struct io_poll
*poll
= io_kiocb_to_cmd(req
, struct io_poll
);
955 if (sqe
->buf_index
|| sqe
->off
|| sqe
->addr
)
957 flags
= READ_ONCE(sqe
->len
);
958 if (flags
& ~IORING_POLL_ADD_MULTI
)
960 if ((flags
& IORING_POLL_ADD_MULTI
) && (req
->flags
& REQ_F_CQE_SKIP
))
963 poll
->events
= io_poll_parse_events(sqe
, flags
);
967 int io_poll_add(struct io_kiocb
*req
, unsigned int issue_flags
)
969 struct io_poll
*poll
= io_kiocb_to_cmd(req
, struct io_poll
);
970 struct io_poll_table ipt
;
973 ipt
.pt
._qproc
= io_poll_queue_proc
;
976 * If sqpoll or single issuer, there is no contention for ->uring_lock
977 * and we'll end up holding it in tw handlers anyway.
979 if (req
->ctx
->flags
& (IORING_SETUP_SQPOLL
|IORING_SETUP_SINGLE_ISSUER
))
980 req
->flags
|= REQ_F_HASH_LOCKED
;
982 ret
= __io_arm_poll_handler(req
, poll
, &ipt
, poll
->events
, issue_flags
);
984 io_req_set_res(req
, ipt
.result_mask
, 0);
987 return ret
?: IOU_ISSUE_SKIP_COMPLETE
;
990 int io_poll_remove(struct io_kiocb
*req
, unsigned int issue_flags
)
992 struct io_poll_update
*poll_update
= io_kiocb_to_cmd(req
, struct io_poll_update
);
993 struct io_ring_ctx
*ctx
= req
->ctx
;
994 struct io_cancel_data cd
= { .ctx
= ctx
, .data
= poll_update
->old_user_data
, };
995 struct io_hash_bucket
*bucket
;
996 struct io_kiocb
*preq
;
999 io_ring_submit_lock(ctx
, issue_flags
);
1000 preq
= io_poll_find(ctx
, true, &cd
, &ctx
->cancel_table
, &bucket
);
1001 ret2
= io_poll_disarm(preq
);
1003 spin_unlock(&bucket
->lock
);
1006 if (ret2
!= -ENOENT
) {
1011 preq
= io_poll_find(ctx
, true, &cd
, &ctx
->cancel_table_locked
, &bucket
);
1012 ret2
= io_poll_disarm(preq
);
1014 spin_unlock(&bucket
->lock
);
1021 if (WARN_ON_ONCE(preq
->opcode
!= IORING_OP_POLL_ADD
)) {
1026 if (poll_update
->update_events
|| poll_update
->update_user_data
) {
1027 /* only mask one event flags, keep behavior flags */
1028 if (poll_update
->update_events
) {
1029 struct io_poll
*poll
= io_kiocb_to_cmd(preq
, struct io_poll
);
1031 poll
->events
&= ~0xffff;
1032 poll
->events
|= poll_update
->events
& 0xffff;
1033 poll
->events
|= IO_POLL_UNMASK
;
1035 if (poll_update
->update_user_data
)
1036 preq
->cqe
.user_data
= poll_update
->new_user_data
;
1038 ret2
= io_poll_add(preq
, issue_flags
& ~IO_URING_F_UNLOCKED
);
1039 /* successfully updated, don't complete poll request */
1040 if (!ret2
|| ret2
== -EIOCBQUEUED
)
1045 io_req_set_res(preq
, -ECANCELED
, 0);
1046 preq
->io_task_work
.func
= io_req_task_complete
;
1047 io_req_task_work_add(preq
);
1049 io_ring_submit_unlock(ctx
, issue_flags
);
1054 /* complete update request, we're done with it */
1055 io_req_set_res(req
, ret
, 0);