1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
5 #include <linux/file.h>
7 #include <linux/slab.h>
8 #include <linux/poll.h>
9 #include <linux/hashtable.h>
10 #include <linux/io_uring.h>
12 #include <trace/events/io_uring.h>
14 #include <uapi/linux/io_uring.h>
17 #include "alloc_cache.h"
25 struct io_poll_update
{
31 bool update_user_data
;
34 struct io_poll_table
{
35 struct poll_table_struct pt
;
40 /* output value, set only if arm poll returns >0 */
44 #define IO_POLL_CANCEL_FLAG BIT(31)
45 #define IO_POLL_RETRY_FLAG BIT(30)
46 #define IO_POLL_REF_MASK GENMASK(29, 0)
49 * We usually have 1-2 refs taken, 128 is more than enough and we want to
50 * maximise the margin between this amount and the moment when it overflows.
52 #define IO_POLL_REF_BIAS 128
54 #define IO_WQE_F_DOUBLE 1
56 static int io_poll_wake(struct wait_queue_entry
*wait
, unsigned mode
, int sync
,
59 static inline struct io_kiocb
*wqe_to_req(struct wait_queue_entry
*wqe
)
61 unsigned long priv
= (unsigned long)wqe
->private;
63 return (struct io_kiocb
*)(priv
& ~IO_WQE_F_DOUBLE
);
66 static inline bool wqe_is_double(struct wait_queue_entry
*wqe
)
68 unsigned long priv
= (unsigned long)wqe
->private;
70 return priv
& IO_WQE_F_DOUBLE
;
73 static bool io_poll_get_ownership_slowpath(struct io_kiocb
*req
)
78 * poll_refs are already elevated and we don't have much hope for
79 * grabbing the ownership. Instead of incrementing set a retry flag
80 * to notify the loop that there might have been some change.
82 v
= atomic_fetch_or(IO_POLL_RETRY_FLAG
, &req
->poll_refs
);
83 if (v
& IO_POLL_REF_MASK
)
85 return !(atomic_fetch_inc(&req
->poll_refs
) & IO_POLL_REF_MASK
);
89 * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
90 * bump it and acquire ownership. It's disallowed to modify requests while not
91 * owning it, that prevents from races for enqueueing task_work's and b/w
92 * arming poll and wakeups.
94 static inline bool io_poll_get_ownership(struct io_kiocb
*req
)
96 if (unlikely(atomic_read(&req
->poll_refs
) >= IO_POLL_REF_BIAS
))
97 return io_poll_get_ownership_slowpath(req
);
98 return !(atomic_fetch_inc(&req
->poll_refs
) & IO_POLL_REF_MASK
);
101 static void io_poll_mark_cancelled(struct io_kiocb
*req
)
103 atomic_or(IO_POLL_CANCEL_FLAG
, &req
->poll_refs
);
106 static struct io_poll
*io_poll_get_double(struct io_kiocb
*req
)
108 /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
109 if (req
->opcode
== IORING_OP_POLL_ADD
)
110 return req
->async_data
;
111 return req
->apoll
->double_poll
;
114 static struct io_poll
*io_poll_get_single(struct io_kiocb
*req
)
116 if (req
->opcode
== IORING_OP_POLL_ADD
)
117 return io_kiocb_to_cmd(req
, struct io_poll
);
118 return &req
->apoll
->poll
;
121 static void io_poll_req_insert(struct io_kiocb
*req
)
123 struct io_hash_table
*table
= &req
->ctx
->cancel_table
;
124 u32 index
= hash_long(req
->cqe
.user_data
, table
->hash_bits
);
126 lockdep_assert_held(&req
->ctx
->uring_lock
);
128 hlist_add_head(&req
->hash_node
, &table
->hbs
[index
].list
);
131 static void io_init_poll_iocb(struct io_poll
*poll
, __poll_t events
)
134 #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
135 /* mask in events that we always want/need */
136 poll
->events
= events
| IO_POLL_UNMASK
;
137 INIT_LIST_HEAD(&poll
->wait
.entry
);
138 init_waitqueue_func_entry(&poll
->wait
, io_poll_wake
);
141 static inline void io_poll_remove_entry(struct io_poll
*poll
)
143 struct wait_queue_head
*head
= smp_load_acquire(&poll
->head
);
146 spin_lock_irq(&head
->lock
);
147 list_del_init(&poll
->wait
.entry
);
149 spin_unlock_irq(&head
->lock
);
153 static void io_poll_remove_entries(struct io_kiocb
*req
)
156 * Nothing to do if neither of those flags are set. Avoid dipping
157 * into the poll/apoll/double cachelines if we can.
159 if (!(req
->flags
& (REQ_F_SINGLE_POLL
| REQ_F_DOUBLE_POLL
)))
163 * While we hold the waitqueue lock and the waitqueue is nonempty,
164 * wake_up_pollfree() will wait for us. However, taking the waitqueue
165 * lock in the first place can race with the waitqueue being freed.
167 * We solve this as eventpoll does: by taking advantage of the fact that
168 * all users of wake_up_pollfree() will RCU-delay the actual free. If
169 * we enter rcu_read_lock() and see that the pointer to the queue is
170 * non-NULL, we can then lock it without the memory being freed out from
173 * Keep holding rcu_read_lock() as long as we hold the queue lock, in
174 * case the caller deletes the entry from the queue, leaving it empty.
175 * In that case, only RCU prevents the queue memory from being freed.
178 if (req
->flags
& REQ_F_SINGLE_POLL
)
179 io_poll_remove_entry(io_poll_get_single(req
));
180 if (req
->flags
& REQ_F_DOUBLE_POLL
)
181 io_poll_remove_entry(io_poll_get_double(req
));
187 IOU_POLL_NO_ACTION
= 1,
188 IOU_POLL_REMOVE_POLL_USE_RES
= 2,
189 IOU_POLL_REISSUE
= 3,
190 IOU_POLL_REQUEUE
= 4,
193 static void __io_poll_execute(struct io_kiocb
*req
, int mask
)
197 io_req_set_res(req
, mask
, 0);
198 req
->io_task_work
.func
= io_poll_task_func
;
200 trace_io_uring_task_add(req
, mask
);
202 if (!(req
->flags
& REQ_F_POLL_NO_LAZY
))
203 flags
= IOU_F_TWQ_LAZY_WAKE
;
204 __io_req_task_work_add(req
, flags
);
207 static inline void io_poll_execute(struct io_kiocb
*req
, int res
)
209 if (io_poll_get_ownership(req
))
210 __io_poll_execute(req
, res
);
214 * All poll tw should go through this. Checks for poll events, manages
215 * references, does rewait, etc.
217 * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action
218 * require, which is either spurious wakeup or multishot CQE is served.
219 * IOU_POLL_DONE when it's done with the request, then the mask is stored in
220 * req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot
221 * poll and that the result is stored in req->cqe.
223 static int io_poll_check_events(struct io_kiocb
*req
, struct io_tw_state
*ts
)
227 if (unlikely(io_should_terminate_tw()))
231 v
= atomic_read(&req
->poll_refs
);
233 if (unlikely(v
!= 1)) {
234 /* tw should be the owner and so have some refs */
235 if (WARN_ON_ONCE(!(v
& IO_POLL_REF_MASK
)))
236 return IOU_POLL_NO_ACTION
;
237 if (v
& IO_POLL_CANCEL_FLAG
)
240 * cqe.res contains only events of the first wake up
241 * and all others are to be lost. Redo vfs_poll() to get
244 if ((v
& IO_POLL_REF_MASK
) != 1)
247 if (v
& IO_POLL_RETRY_FLAG
) {
250 * We won't find new events that came in between
251 * vfs_poll and the ref put unless we clear the
254 atomic_andnot(IO_POLL_RETRY_FLAG
, &req
->poll_refs
);
255 v
&= ~IO_POLL_RETRY_FLAG
;
259 /* the mask was stashed in __io_poll_execute */
261 struct poll_table_struct pt
= { ._key
= req
->apoll_events
};
262 req
->cqe
.res
= vfs_poll(req
->file
, &pt
) & req
->apoll_events
;
264 * We got woken with a mask, but someone else got to
265 * it first. The above vfs_poll() doesn't add us back
266 * to the waitqueue, so if we get nothing back, we
267 * should be safe and attempt a reissue.
269 if (unlikely(!req
->cqe
.res
)) {
270 /* Multishot armed need not reissue */
271 if (!(req
->apoll_events
& EPOLLONESHOT
))
273 return IOU_POLL_REISSUE
;
276 if (req
->apoll_events
& EPOLLONESHOT
)
277 return IOU_POLL_DONE
;
279 /* multishot, just fill a CQE and proceed */
280 if (!(req
->flags
& REQ_F_APOLL_MULTISHOT
)) {
281 __poll_t mask
= mangle_poll(req
->cqe
.res
&
284 if (!io_req_post_cqe(req
, mask
, IORING_CQE_F_MORE
)) {
285 io_req_set_res(req
, mask
, 0);
286 return IOU_POLL_REMOVE_POLL_USE_RES
;
289 int ret
= io_poll_issue(req
, ts
);
290 if (ret
== IOU_STOP_MULTISHOT
)
291 return IOU_POLL_REMOVE_POLL_USE_RES
;
292 else if (ret
== IOU_REQUEUE
)
293 return IOU_POLL_REQUEUE
;
298 /* force the next iteration to vfs_poll() */
302 * Release all references, retry if someone tried to restart
303 * task_work while we were executing it.
305 v
&= IO_POLL_REF_MASK
;
306 } while (atomic_sub_return(v
, &req
->poll_refs
) & IO_POLL_REF_MASK
);
309 return IOU_POLL_NO_ACTION
;
312 void io_poll_task_func(struct io_kiocb
*req
, struct io_tw_state
*ts
)
316 ret
= io_poll_check_events(req
, ts
);
317 if (ret
== IOU_POLL_NO_ACTION
) {
319 } else if (ret
== IOU_POLL_REQUEUE
) {
320 __io_poll_execute(req
, 0);
323 io_poll_remove_entries(req
);
324 /* task_work always has ->uring_lock held */
325 hash_del(&req
->hash_node
);
327 if (req
->opcode
== IORING_OP_POLL_ADD
) {
328 if (ret
== IOU_POLL_DONE
) {
329 struct io_poll
*poll
;
331 poll
= io_kiocb_to_cmd(req
, struct io_poll
);
332 req
->cqe
.res
= mangle_poll(req
->cqe
.res
& poll
->events
);
333 } else if (ret
== IOU_POLL_REISSUE
) {
334 io_req_task_submit(req
, ts
);
336 } else if (ret
!= IOU_POLL_REMOVE_POLL_USE_RES
) {
341 io_req_set_res(req
, req
->cqe
.res
, 0);
342 io_req_task_complete(req
, ts
);
344 io_tw_lock(req
->ctx
, ts
);
346 if (ret
== IOU_POLL_REMOVE_POLL_USE_RES
)
347 io_req_task_complete(req
, ts
);
348 else if (ret
== IOU_POLL_DONE
|| ret
== IOU_POLL_REISSUE
)
349 io_req_task_submit(req
, ts
);
351 io_req_defer_failed(req
, ret
);
355 static void io_poll_cancel_req(struct io_kiocb
*req
)
357 io_poll_mark_cancelled(req
);
358 /* kick tw, which should complete the request */
359 io_poll_execute(req
, 0);
362 #define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI)
364 static __cold
int io_pollfree_wake(struct io_kiocb
*req
, struct io_poll
*poll
)
366 io_poll_mark_cancelled(req
);
367 /* we have to kick tw in case it's not already */
368 io_poll_execute(req
, 0);
371 * If the waitqueue is being freed early but someone is already
372 * holds ownership over it, we have to tear down the request as
373 * best we can. That means immediately removing the request from
374 * its waitqueue and preventing all further accesses to the
375 * waitqueue via the request.
377 list_del_init(&poll
->wait
.entry
);
380 * Careful: this *must* be the last step, since as soon
381 * as req->head is NULL'ed out, the request can be
382 * completed and freed, since aio_poll_complete_work()
383 * will no longer need to take the waitqueue lock.
385 smp_store_release(&poll
->head
, NULL
);
389 static int io_poll_wake(struct wait_queue_entry
*wait
, unsigned mode
, int sync
,
392 struct io_kiocb
*req
= wqe_to_req(wait
);
393 struct io_poll
*poll
= container_of(wait
, struct io_poll
, wait
);
394 __poll_t mask
= key_to_poll(key
);
396 if (unlikely(mask
& POLLFREE
))
397 return io_pollfree_wake(req
, poll
);
399 /* for instances that support it check for an event match first */
400 if (mask
&& !(mask
& (poll
->events
& ~IO_ASYNC_POLL_COMMON
)))
403 if (io_poll_get_ownership(req
)) {
405 * If we trigger a multishot poll off our own wakeup path,
406 * disable multishot as there is a circular dependency between
407 * CQ posting and triggering the event.
409 if (mask
& EPOLL_URING_WAKE
)
410 poll
->events
|= EPOLLONESHOT
;
412 /* optional, saves extra locking for removal in tw handler */
413 if (mask
&& poll
->events
& EPOLLONESHOT
) {
414 list_del_init(&poll
->wait
.entry
);
416 if (wqe_is_double(wait
))
417 req
->flags
&= ~REQ_F_DOUBLE_POLL
;
419 req
->flags
&= ~REQ_F_SINGLE_POLL
;
421 __io_poll_execute(req
, mask
);
426 /* fails only when polling is already completing by the first entry */
427 static bool io_poll_double_prepare(struct io_kiocb
*req
)
429 struct wait_queue_head
*head
;
430 struct io_poll
*poll
= io_poll_get_single(req
);
432 /* head is RCU protected, see io_poll_remove_entries() comments */
434 head
= smp_load_acquire(&poll
->head
);
436 * poll arm might not hold ownership and so race for req->flags with
437 * io_poll_wake(). There is only one poll entry queued, serialise with
438 * it by taking its head lock. As we're still arming the tw hanlder
439 * is not going to be run, so there are no races with it.
442 spin_lock_irq(&head
->lock
);
443 req
->flags
|= REQ_F_DOUBLE_POLL
;
444 if (req
->opcode
== IORING_OP_POLL_ADD
)
445 req
->flags
|= REQ_F_ASYNC_DATA
;
446 spin_unlock_irq(&head
->lock
);
452 static void __io_queue_proc(struct io_poll
*poll
, struct io_poll_table
*pt
,
453 struct wait_queue_head
*head
,
454 struct io_poll
**poll_ptr
)
456 struct io_kiocb
*req
= pt
->req
;
457 unsigned long wqe_private
= (unsigned long) req
;
460 * The file being polled uses multiple waitqueues for poll handling
461 * (e.g. one for read, one for write). Setup a separate io_poll
464 if (unlikely(pt
->nr_entries
)) {
465 struct io_poll
*first
= poll
;
467 /* double add on the same waitqueue head, ignore */
468 if (first
->head
== head
)
470 /* already have a 2nd entry, fail a third attempt */
472 if ((*poll_ptr
)->head
== head
)
478 poll
= kmalloc(sizeof(*poll
), GFP_ATOMIC
);
484 /* mark as double wq entry */
485 wqe_private
|= IO_WQE_F_DOUBLE
;
486 io_init_poll_iocb(poll
, first
->events
);
487 if (!io_poll_double_prepare(req
)) {
488 /* the request is completing, just back off */
494 /* fine to modify, there is no poll queued to race with us */
495 req
->flags
|= REQ_F_SINGLE_POLL
;
500 poll
->wait
.private = (void *) wqe_private
;
502 if (poll
->events
& EPOLLEXCLUSIVE
) {
503 add_wait_queue_exclusive(head
, &poll
->wait
);
505 add_wait_queue(head
, &poll
->wait
);
509 static void io_poll_queue_proc(struct file
*file
, struct wait_queue_head
*head
,
510 struct poll_table_struct
*p
)
512 struct io_poll_table
*pt
= container_of(p
, struct io_poll_table
, pt
);
513 struct io_poll
*poll
= io_kiocb_to_cmd(pt
->req
, struct io_poll
);
515 __io_queue_proc(poll
, pt
, head
,
516 (struct io_poll
**) &pt
->req
->async_data
);
519 static bool io_poll_can_finish_inline(struct io_kiocb
*req
,
520 struct io_poll_table
*pt
)
522 return pt
->owning
|| io_poll_get_ownership(req
);
525 static void io_poll_add_hash(struct io_kiocb
*req
, unsigned int issue_flags
)
527 struct io_ring_ctx
*ctx
= req
->ctx
;
529 io_ring_submit_lock(ctx
, issue_flags
);
530 io_poll_req_insert(req
);
531 io_ring_submit_unlock(ctx
, issue_flags
);
535 * Returns 0 when it's handed over for polling. The caller owns the requests if
536 * it returns non-zero, but otherwise should not touch it. Negative values
537 * contain an error code. When the result is >0, the polling has completed
538 * inline and ipt.result_mask is set to the mask.
540 static int __io_arm_poll_handler(struct io_kiocb
*req
,
541 struct io_poll
*poll
,
542 struct io_poll_table
*ipt
, __poll_t mask
,
543 unsigned issue_flags
)
545 INIT_HLIST_NODE(&req
->hash_node
);
546 io_init_poll_iocb(poll
, mask
);
547 poll
->file
= req
->file
;
548 req
->apoll_events
= poll
->events
;
555 * Polling is either completed here or via task_work, so if we're in the
556 * task context we're naturally serialised with tw by merit of running
557 * the same task. When it's io-wq, take the ownership to prevent tw
558 * from running. However, when we're in the task context, skip taking
559 * it as an optimisation.
561 * Note: even though the request won't be completed/freed, without
562 * ownership we still can race with io_poll_wake().
563 * io_poll_can_finish_inline() tries to deal with that.
565 ipt
->owning
= issue_flags
& IO_URING_F_UNLOCKED
;
566 atomic_set(&req
->poll_refs
, (int)ipt
->owning
);
569 * Exclusive waits may only wake a limited amount of entries
570 * rather than all of them, this may interfere with lazy
571 * wake if someone does wait(events > 1). Ensure we don't do
572 * lazy wake for those, as we need to process each one as they
575 if (poll
->events
& EPOLLEXCLUSIVE
)
576 req
->flags
|= REQ_F_POLL_NO_LAZY
;
578 mask
= vfs_poll(req
->file
, &ipt
->pt
) & poll
->events
;
580 if (unlikely(ipt
->error
|| !ipt
->nr_entries
)) {
581 io_poll_remove_entries(req
);
583 if (!io_poll_can_finish_inline(req
, ipt
)) {
584 io_poll_mark_cancelled(req
);
586 } else if (mask
&& (poll
->events
& EPOLLET
)) {
587 ipt
->result_mask
= mask
;
590 return ipt
->error
?: -EINVAL
;
594 ((poll
->events
& (EPOLLET
|EPOLLONESHOT
)) == (EPOLLET
|EPOLLONESHOT
))) {
595 if (!io_poll_can_finish_inline(req
, ipt
)) {
596 io_poll_add_hash(req
, issue_flags
);
599 io_poll_remove_entries(req
);
600 ipt
->result_mask
= mask
;
601 /* no one else has access to the req, forget about the ref */
605 io_poll_add_hash(req
, issue_flags
);
607 if (mask
&& (poll
->events
& EPOLLET
) &&
608 io_poll_can_finish_inline(req
, ipt
)) {
609 __io_poll_execute(req
, mask
);
616 * Try to release ownership. If we see a change of state, e.g.
617 * poll was waken up, queue up a tw, it'll deal with it.
619 if (atomic_cmpxchg(&req
->poll_refs
, 1, 0) != 1)
620 __io_poll_execute(req
, 0);
625 static void io_async_queue_proc(struct file
*file
, struct wait_queue_head
*head
,
626 struct poll_table_struct
*p
)
628 struct io_poll_table
*pt
= container_of(p
, struct io_poll_table
, pt
);
629 struct async_poll
*apoll
= pt
->req
->apoll
;
631 __io_queue_proc(&apoll
->poll
, pt
, head
, &apoll
->double_poll
);
635 * We can't reliably detect loops in repeated poll triggers and issue
636 * subsequently failing. But rather than fail these immediately, allow a
637 * certain amount of retries before we give up. Given that this condition
638 * should _rarely_ trigger even once, we should be fine with a larger value.
640 #define APOLL_MAX_RETRY 128
642 static struct async_poll
*io_req_alloc_apoll(struct io_kiocb
*req
,
643 unsigned issue_flags
)
645 struct io_ring_ctx
*ctx
= req
->ctx
;
646 struct async_poll
*apoll
;
648 if (req
->flags
& REQ_F_POLLED
) {
650 kfree(apoll
->double_poll
);
651 } else if (!(issue_flags
& IO_URING_F_UNLOCKED
)) {
652 apoll
= io_alloc_cache_get(&ctx
->apoll_cache
);
655 apoll
->poll
.retries
= APOLL_MAX_RETRY
;
658 apoll
= kmalloc(sizeof(*apoll
), GFP_ATOMIC
);
659 if (unlikely(!apoll
))
661 apoll
->poll
.retries
= APOLL_MAX_RETRY
;
663 apoll
->double_poll
= NULL
;
665 if (unlikely(!--apoll
->poll
.retries
))
670 int io_arm_poll_handler(struct io_kiocb
*req
, unsigned issue_flags
)
672 const struct io_issue_def
*def
= &io_issue_defs
[req
->opcode
];
673 struct async_poll
*apoll
;
674 struct io_poll_table ipt
;
675 __poll_t mask
= POLLPRI
| POLLERR
| EPOLLET
;
678 if (!def
->pollin
&& !def
->pollout
)
679 return IO_APOLL_ABORTED
;
680 if (!io_file_can_poll(req
))
681 return IO_APOLL_ABORTED
;
682 if (!(req
->flags
& REQ_F_APOLL_MULTISHOT
))
683 mask
|= EPOLLONESHOT
;
686 mask
|= EPOLLIN
| EPOLLRDNORM
;
688 /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
689 if (req
->flags
& REQ_F_CLEAR_POLLIN
)
692 mask
|= EPOLLOUT
| EPOLLWRNORM
;
694 if (def
->poll_exclusive
)
695 mask
|= EPOLLEXCLUSIVE
;
697 apoll
= io_req_alloc_apoll(req
, issue_flags
);
699 return IO_APOLL_ABORTED
;
700 req
->flags
&= ~(REQ_F_SINGLE_POLL
| REQ_F_DOUBLE_POLL
);
701 req
->flags
|= REQ_F_POLLED
;
702 ipt
.pt
._qproc
= io_async_queue_proc
;
704 io_kbuf_recycle(req
, issue_flags
);
706 ret
= __io_arm_poll_handler(req
, &apoll
->poll
, &ipt
, mask
, issue_flags
);
708 return ret
> 0 ? IO_APOLL_READY
: IO_APOLL_ABORTED
;
709 trace_io_uring_poll_arm(req
, mask
, apoll
->poll
.events
);
714 * Returns true if we found and killed one or more poll requests
716 __cold
bool io_poll_remove_all(struct io_ring_ctx
*ctx
, struct io_uring_task
*tctx
,
719 unsigned nr_buckets
= 1U << ctx
->cancel_table
.hash_bits
;
720 struct hlist_node
*tmp
;
721 struct io_kiocb
*req
;
725 lockdep_assert_held(&ctx
->uring_lock
);
727 for (i
= 0; i
< nr_buckets
; i
++) {
728 struct io_hash_bucket
*hb
= &ctx
->cancel_table
.hbs
[i
];
730 hlist_for_each_entry_safe(req
, tmp
, &hb
->list
, hash_node
) {
731 if (io_match_task_safe(req
, tctx
, cancel_all
)) {
732 hlist_del_init(&req
->hash_node
);
733 io_poll_cancel_req(req
);
741 static struct io_kiocb
*io_poll_find(struct io_ring_ctx
*ctx
, bool poll_only
,
742 struct io_cancel_data
*cd
)
744 struct io_kiocb
*req
;
745 u32 index
= hash_long(cd
->data
, ctx
->cancel_table
.hash_bits
);
746 struct io_hash_bucket
*hb
= &ctx
->cancel_table
.hbs
[index
];
748 hlist_for_each_entry(req
, &hb
->list
, hash_node
) {
749 if (cd
->data
!= req
->cqe
.user_data
)
751 if (poll_only
&& req
->opcode
!= IORING_OP_POLL_ADD
)
753 if (cd
->flags
& IORING_ASYNC_CANCEL_ALL
) {
754 if (io_cancel_match_sequence(req
, cd
->seq
))
762 static struct io_kiocb
*io_poll_file_find(struct io_ring_ctx
*ctx
,
763 struct io_cancel_data
*cd
)
765 unsigned nr_buckets
= 1U << ctx
->cancel_table
.hash_bits
;
766 struct io_kiocb
*req
;
769 for (i
= 0; i
< nr_buckets
; i
++) {
770 struct io_hash_bucket
*hb
= &ctx
->cancel_table
.hbs
[i
];
772 hlist_for_each_entry(req
, &hb
->list
, hash_node
) {
773 if (io_cancel_req_match(req
, cd
))
780 static int io_poll_disarm(struct io_kiocb
*req
)
784 if (!io_poll_get_ownership(req
))
786 io_poll_remove_entries(req
);
787 hash_del(&req
->hash_node
);
791 static int __io_poll_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
)
793 struct io_kiocb
*req
;
795 if (cd
->flags
& (IORING_ASYNC_CANCEL_FD
| IORING_ASYNC_CANCEL_OP
|
796 IORING_ASYNC_CANCEL_ANY
))
797 req
= io_poll_file_find(ctx
, cd
);
799 req
= io_poll_find(ctx
, false, cd
);
802 io_poll_cancel_req(req
);
808 int io_poll_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
,
809 unsigned issue_flags
)
813 io_ring_submit_lock(ctx
, issue_flags
);
814 ret
= __io_poll_cancel(ctx
, cd
);
815 io_ring_submit_unlock(ctx
, issue_flags
);
819 static __poll_t
io_poll_parse_events(const struct io_uring_sqe
*sqe
,
824 events
= READ_ONCE(sqe
->poll32_events
);
826 events
= swahw32(events
);
828 if (!(flags
& IORING_POLL_ADD_MULTI
))
829 events
|= EPOLLONESHOT
;
830 if (!(flags
& IORING_POLL_ADD_LEVEL
))
832 return demangle_poll(events
) |
833 (events
& (EPOLLEXCLUSIVE
|EPOLLONESHOT
|EPOLLET
));
836 int io_poll_remove_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
838 struct io_poll_update
*upd
= io_kiocb_to_cmd(req
, struct io_poll_update
);
841 if (sqe
->buf_index
|| sqe
->splice_fd_in
)
843 flags
= READ_ONCE(sqe
->len
);
844 if (flags
& ~(IORING_POLL_UPDATE_EVENTS
| IORING_POLL_UPDATE_USER_DATA
|
845 IORING_POLL_ADD_MULTI
))
847 /* meaningless without update */
848 if (flags
== IORING_POLL_ADD_MULTI
)
851 upd
->old_user_data
= READ_ONCE(sqe
->addr
);
852 upd
->update_events
= flags
& IORING_POLL_UPDATE_EVENTS
;
853 upd
->update_user_data
= flags
& IORING_POLL_UPDATE_USER_DATA
;
855 upd
->new_user_data
= READ_ONCE(sqe
->off
);
856 if (!upd
->update_user_data
&& upd
->new_user_data
)
858 if (upd
->update_events
)
859 upd
->events
= io_poll_parse_events(sqe
, flags
);
860 else if (sqe
->poll32_events
)
866 int io_poll_add_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
868 struct io_poll
*poll
= io_kiocb_to_cmd(req
, struct io_poll
);
871 if (sqe
->buf_index
|| sqe
->off
|| sqe
->addr
)
873 flags
= READ_ONCE(sqe
->len
);
874 if (flags
& ~IORING_POLL_ADD_MULTI
)
876 if ((flags
& IORING_POLL_ADD_MULTI
) && (req
->flags
& REQ_F_CQE_SKIP
))
879 poll
->events
= io_poll_parse_events(sqe
, flags
);
883 int io_poll_add(struct io_kiocb
*req
, unsigned int issue_flags
)
885 struct io_poll
*poll
= io_kiocb_to_cmd(req
, struct io_poll
);
886 struct io_poll_table ipt
;
889 ipt
.pt
._qproc
= io_poll_queue_proc
;
891 ret
= __io_arm_poll_handler(req
, poll
, &ipt
, poll
->events
, issue_flags
);
893 io_req_set_res(req
, ipt
.result_mask
, 0);
896 return ret
?: IOU_ISSUE_SKIP_COMPLETE
;
899 int io_poll_remove(struct io_kiocb
*req
, unsigned int issue_flags
)
901 struct io_poll_update
*poll_update
= io_kiocb_to_cmd(req
, struct io_poll_update
);
902 struct io_ring_ctx
*ctx
= req
->ctx
;
903 struct io_cancel_data cd
= { .ctx
= ctx
, .data
= poll_update
->old_user_data
, };
904 struct io_kiocb
*preq
;
907 io_ring_submit_lock(ctx
, issue_flags
);
908 preq
= io_poll_find(ctx
, true, &cd
);
909 ret2
= io_poll_disarm(preq
);
914 if (WARN_ON_ONCE(preq
->opcode
!= IORING_OP_POLL_ADD
)) {
919 if (poll_update
->update_events
|| poll_update
->update_user_data
) {
920 /* only mask one event flags, keep behavior flags */
921 if (poll_update
->update_events
) {
922 struct io_poll
*poll
= io_kiocb_to_cmd(preq
, struct io_poll
);
924 poll
->events
&= ~0xffff;
925 poll
->events
|= poll_update
->events
& 0xffff;
926 poll
->events
|= IO_POLL_UNMASK
;
928 if (poll_update
->update_user_data
)
929 preq
->cqe
.user_data
= poll_update
->new_user_data
;
931 ret2
= io_poll_add(preq
, issue_flags
& ~IO_URING_F_UNLOCKED
);
932 /* successfully updated, don't complete poll request */
933 if (!ret2
|| ret2
== -EIOCBQUEUED
)
938 io_req_set_res(preq
, -ECANCELED
, 0);
939 preq
->io_task_work
.func
= io_req_task_complete
;
940 io_req_task_work_add(preq
);
942 io_ring_submit_unlock(ctx
, issue_flags
);
947 /* complete update request, we're done with it */
948 io_req_set_res(req
, ret
, 0);