1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/io_uring.h>
7 #include <trace/events/io_uring.h>
9 #include <uapi/linux/io_uring.h>
21 struct list_head list
;
22 /* head of the link, used by linked timeouts only */
23 struct io_kiocb
*head
;
24 /* for linked completions */
25 struct io_kiocb
*prev
;
28 struct io_timeout_rem
{
38 static inline bool io_is_timeout_noseq(struct io_kiocb
*req
)
40 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
41 struct io_timeout_data
*data
= req
->async_data
;
43 return !timeout
->off
|| data
->flags
& IORING_TIMEOUT_MULTISHOT
;
46 static inline void io_put_req(struct io_kiocb
*req
)
48 if (req_ref_put_and_test(req
)) {
54 static inline bool io_timeout_finish(struct io_timeout
*timeout
,
55 struct io_timeout_data
*data
)
57 if (!(data
->flags
& IORING_TIMEOUT_MULTISHOT
))
60 if (!timeout
->off
|| (timeout
->repeats
&& --timeout
->repeats
))
66 static enum hrtimer_restart
io_timeout_fn(struct hrtimer
*timer
);
68 static void io_timeout_complete(struct io_kiocb
*req
, struct io_tw_state
*ts
)
70 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
71 struct io_timeout_data
*data
= req
->async_data
;
72 struct io_ring_ctx
*ctx
= req
->ctx
;
74 if (!io_timeout_finish(timeout
, data
)) {
75 if (io_req_post_cqe(req
, -ETIME
, IORING_CQE_F_MORE
)) {
77 spin_lock_irq(&ctx
->timeout_lock
);
78 list_add(&timeout
->list
, ctx
->timeout_list
.prev
);
79 data
->timer
.function
= io_timeout_fn
;
80 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
), data
->mode
);
81 spin_unlock_irq(&ctx
->timeout_lock
);
86 io_req_task_complete(req
, ts
);
89 static bool io_kill_timeout(struct io_kiocb
*req
, int status
)
90 __must_hold(&req
->ctx
->timeout_lock
)
92 struct io_timeout_data
*io
= req
->async_data
;
94 if (hrtimer_try_to_cancel(&io
->timer
) != -1) {
95 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
99 atomic_set(&req
->ctx
->cq_timeouts
,
100 atomic_read(&req
->ctx
->cq_timeouts
) + 1);
101 list_del_init(&timeout
->list
);
102 io_req_queue_tw_complete(req
, status
);
108 __cold
void io_flush_timeouts(struct io_ring_ctx
*ctx
)
111 struct io_timeout
*timeout
, *tmp
;
113 spin_lock_irq(&ctx
->timeout_lock
);
114 seq
= ctx
->cached_cq_tail
- atomic_read(&ctx
->cq_timeouts
);
116 list_for_each_entry_safe(timeout
, tmp
, &ctx
->timeout_list
, list
) {
117 struct io_kiocb
*req
= cmd_to_io_kiocb(timeout
);
118 u32 events_needed
, events_got
;
120 if (io_is_timeout_noseq(req
))
124 * Since seq can easily wrap around over time, subtract
125 * the last seq at which timeouts were flushed before comparing.
126 * Assuming not more than 2^31-1 events have happened since,
127 * these subtractions won't have wrapped, so we can check if
128 * target is in [last_seq, current_seq] by comparing the two.
130 events_needed
= timeout
->target_seq
- ctx
->cq_last_tm_flush
;
131 events_got
= seq
- ctx
->cq_last_tm_flush
;
132 if (events_got
< events_needed
)
135 io_kill_timeout(req
, 0);
137 ctx
->cq_last_tm_flush
= seq
;
138 spin_unlock_irq(&ctx
->timeout_lock
);
141 static void io_req_tw_fail_links(struct io_kiocb
*link
, struct io_tw_state
*ts
)
143 io_tw_lock(link
->ctx
, ts
);
145 struct io_kiocb
*nxt
= link
->link
;
146 long res
= -ECANCELED
;
148 if (link
->flags
& REQ_F_FAIL
)
151 io_req_set_res(link
, res
, 0);
152 io_req_task_complete(link
, ts
);
157 static void io_fail_links(struct io_kiocb
*req
)
158 __must_hold(&req
->ctx
->completion_lock
)
160 struct io_kiocb
*link
= req
->link
;
161 bool ignore_cqes
= req
->flags
& REQ_F_SKIP_LINK_CQES
;
168 link
->flags
|= REQ_F_CQE_SKIP
;
170 link
->flags
&= ~REQ_F_CQE_SKIP
;
171 trace_io_uring_fail_link(req
, link
);
176 link
->io_task_work
.func
= io_req_tw_fail_links
;
177 io_req_task_work_add(link
);
181 static inline void io_remove_next_linked(struct io_kiocb
*req
)
183 struct io_kiocb
*nxt
= req
->link
;
185 req
->link
= nxt
->link
;
189 void io_disarm_next(struct io_kiocb
*req
)
190 __must_hold(&req
->ctx
->completion_lock
)
192 struct io_kiocb
*link
= NULL
;
194 if (req
->flags
& REQ_F_ARM_LTIMEOUT
) {
196 req
->flags
&= ~REQ_F_ARM_LTIMEOUT
;
197 if (link
&& link
->opcode
== IORING_OP_LINK_TIMEOUT
) {
198 io_remove_next_linked(req
);
199 io_req_queue_tw_complete(link
, -ECANCELED
);
201 } else if (req
->flags
& REQ_F_LINK_TIMEOUT
) {
202 struct io_ring_ctx
*ctx
= req
->ctx
;
204 spin_lock_irq(&ctx
->timeout_lock
);
205 link
= io_disarm_linked_timeout(req
);
206 spin_unlock_irq(&ctx
->timeout_lock
);
208 io_req_queue_tw_complete(link
, -ECANCELED
);
210 if (unlikely((req
->flags
& REQ_F_FAIL
) &&
211 !(req
->flags
& REQ_F_HARDLINK
)))
215 struct io_kiocb
*__io_disarm_linked_timeout(struct io_kiocb
*req
,
216 struct io_kiocb
*link
)
217 __must_hold(&req
->ctx
->completion_lock
)
218 __must_hold(&req
->ctx
->timeout_lock
)
220 struct io_timeout_data
*io
= link
->async_data
;
221 struct io_timeout
*timeout
= io_kiocb_to_cmd(link
, struct io_timeout
);
223 io_remove_next_linked(req
);
224 timeout
->head
= NULL
;
225 if (hrtimer_try_to_cancel(&io
->timer
) != -1) {
226 list_del(&timeout
->list
);
233 static enum hrtimer_restart
io_timeout_fn(struct hrtimer
*timer
)
235 struct io_timeout_data
*data
= container_of(timer
,
236 struct io_timeout_data
, timer
);
237 struct io_kiocb
*req
= data
->req
;
238 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
239 struct io_ring_ctx
*ctx
= req
->ctx
;
242 spin_lock_irqsave(&ctx
->timeout_lock
, flags
);
243 list_del_init(&timeout
->list
);
244 atomic_set(&req
->ctx
->cq_timeouts
,
245 atomic_read(&req
->ctx
->cq_timeouts
) + 1);
246 spin_unlock_irqrestore(&ctx
->timeout_lock
, flags
);
248 if (!(data
->flags
& IORING_TIMEOUT_ETIME_SUCCESS
))
251 io_req_set_res(req
, -ETIME
, 0);
252 req
->io_task_work
.func
= io_timeout_complete
;
253 io_req_task_work_add(req
);
254 return HRTIMER_NORESTART
;
257 static struct io_kiocb
*io_timeout_extract(struct io_ring_ctx
*ctx
,
258 struct io_cancel_data
*cd
)
259 __must_hold(&ctx
->timeout_lock
)
261 struct io_timeout
*timeout
;
262 struct io_timeout_data
*io
;
263 struct io_kiocb
*req
= NULL
;
265 list_for_each_entry(timeout
, &ctx
->timeout_list
, list
) {
266 struct io_kiocb
*tmp
= cmd_to_io_kiocb(timeout
);
268 if (io_cancel_req_match(tmp
, cd
)) {
274 return ERR_PTR(-ENOENT
);
276 io
= req
->async_data
;
277 if (hrtimer_try_to_cancel(&io
->timer
) == -1)
278 return ERR_PTR(-EALREADY
);
279 timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
280 list_del_init(&timeout
->list
);
284 int io_timeout_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
)
285 __must_hold(&ctx
->completion_lock
)
287 struct io_kiocb
*req
;
289 spin_lock_irq(&ctx
->timeout_lock
);
290 req
= io_timeout_extract(ctx
, cd
);
291 spin_unlock_irq(&ctx
->timeout_lock
);
295 io_req_task_queue_fail(req
, -ECANCELED
);
299 static void io_req_task_link_timeout(struct io_kiocb
*req
, struct io_tw_state
*ts
)
301 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
302 struct io_kiocb
*prev
= timeout
->prev
;
306 if (!(req
->task
->flags
& PF_EXITING
)) {
307 struct io_cancel_data cd
= {
309 .data
= prev
->cqe
.user_data
,
312 ret
= io_try_cancel(req
->task
->io_uring
, &cd
, 0);
314 io_req_set_res(req
, ret
?: -ETIME
, 0);
315 io_req_task_complete(req
, ts
);
318 io_req_set_res(req
, -ETIME
, 0);
319 io_req_task_complete(req
, ts
);
323 static enum hrtimer_restart
io_link_timeout_fn(struct hrtimer
*timer
)
325 struct io_timeout_data
*data
= container_of(timer
,
326 struct io_timeout_data
, timer
);
327 struct io_kiocb
*prev
, *req
= data
->req
;
328 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
329 struct io_ring_ctx
*ctx
= req
->ctx
;
332 spin_lock_irqsave(&ctx
->timeout_lock
, flags
);
333 prev
= timeout
->head
;
334 timeout
->head
= NULL
;
337 * We don't expect the list to be empty, that will only happen if we
338 * race with the completion of the linked work.
341 io_remove_next_linked(prev
);
342 if (!req_ref_inc_not_zero(prev
))
345 list_del(&timeout
->list
);
346 timeout
->prev
= prev
;
347 spin_unlock_irqrestore(&ctx
->timeout_lock
, flags
);
349 req
->io_task_work
.func
= io_req_task_link_timeout
;
350 io_req_task_work_add(req
);
351 return HRTIMER_NORESTART
;
354 static clockid_t
io_timeout_get_clock(struct io_timeout_data
*data
)
356 switch (data
->flags
& IORING_TIMEOUT_CLOCK_MASK
) {
357 case IORING_TIMEOUT_BOOTTIME
:
358 return CLOCK_BOOTTIME
;
359 case IORING_TIMEOUT_REALTIME
:
360 return CLOCK_REALTIME
;
362 /* can't happen, vetted at prep time */
366 return CLOCK_MONOTONIC
;
370 static int io_linked_timeout_update(struct io_ring_ctx
*ctx
, __u64 user_data
,
371 struct timespec64
*ts
, enum hrtimer_mode mode
)
372 __must_hold(&ctx
->timeout_lock
)
374 struct io_timeout_data
*io
;
375 struct io_timeout
*timeout
;
376 struct io_kiocb
*req
= NULL
;
378 list_for_each_entry(timeout
, &ctx
->ltimeout_list
, list
) {
379 struct io_kiocb
*tmp
= cmd_to_io_kiocb(timeout
);
381 if (user_data
== tmp
->cqe
.user_data
) {
389 io
= req
->async_data
;
390 if (hrtimer_try_to_cancel(&io
->timer
) == -1)
392 hrtimer_init(&io
->timer
, io_timeout_get_clock(io
), mode
);
393 io
->timer
.function
= io_link_timeout_fn
;
394 hrtimer_start(&io
->timer
, timespec64_to_ktime(*ts
), mode
);
398 static int io_timeout_update(struct io_ring_ctx
*ctx
, __u64 user_data
,
399 struct timespec64
*ts
, enum hrtimer_mode mode
)
400 __must_hold(&ctx
->timeout_lock
)
402 struct io_cancel_data cd
= { .ctx
= ctx
, .data
= user_data
, };
403 struct io_kiocb
*req
= io_timeout_extract(ctx
, &cd
);
404 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
405 struct io_timeout_data
*data
;
410 timeout
->off
= 0; /* noseq */
411 data
= req
->async_data
;
412 list_add_tail(&timeout
->list
, &ctx
->timeout_list
);
413 hrtimer_init(&data
->timer
, io_timeout_get_clock(data
), mode
);
414 data
->timer
.function
= io_timeout_fn
;
415 hrtimer_start(&data
->timer
, timespec64_to_ktime(*ts
), mode
);
419 int io_timeout_remove_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
421 struct io_timeout_rem
*tr
= io_kiocb_to_cmd(req
, struct io_timeout_rem
);
423 if (unlikely(req
->flags
& (REQ_F_FIXED_FILE
| REQ_F_BUFFER_SELECT
)))
425 if (sqe
->buf_index
|| sqe
->len
|| sqe
->splice_fd_in
)
428 tr
->ltimeout
= false;
429 tr
->addr
= READ_ONCE(sqe
->addr
);
430 tr
->flags
= READ_ONCE(sqe
->timeout_flags
);
431 if (tr
->flags
& IORING_TIMEOUT_UPDATE_MASK
) {
432 if (hweight32(tr
->flags
& IORING_TIMEOUT_CLOCK_MASK
) > 1)
434 if (tr
->flags
& IORING_LINK_TIMEOUT_UPDATE
)
436 if (tr
->flags
& ~(IORING_TIMEOUT_UPDATE_MASK
|IORING_TIMEOUT_ABS
))
438 if (get_timespec64(&tr
->ts
, u64_to_user_ptr(sqe
->addr2
)))
440 if (tr
->ts
.tv_sec
< 0 || tr
->ts
.tv_nsec
< 0)
442 } else if (tr
->flags
) {
443 /* timeout removal doesn't support flags */
450 static inline enum hrtimer_mode
io_translate_timeout_mode(unsigned int flags
)
452 return (flags
& IORING_TIMEOUT_ABS
) ? HRTIMER_MODE_ABS
457 * Remove or update an existing timeout command
459 int io_timeout_remove(struct io_kiocb
*req
, unsigned int issue_flags
)
461 struct io_timeout_rem
*tr
= io_kiocb_to_cmd(req
, struct io_timeout_rem
);
462 struct io_ring_ctx
*ctx
= req
->ctx
;
465 if (!(tr
->flags
& IORING_TIMEOUT_UPDATE
)) {
466 struct io_cancel_data cd
= { .ctx
= ctx
, .data
= tr
->addr
, };
468 spin_lock(&ctx
->completion_lock
);
469 ret
= io_timeout_cancel(ctx
, &cd
);
470 spin_unlock(&ctx
->completion_lock
);
472 enum hrtimer_mode mode
= io_translate_timeout_mode(tr
->flags
);
474 spin_lock_irq(&ctx
->timeout_lock
);
476 ret
= io_linked_timeout_update(ctx
, tr
->addr
, &tr
->ts
, mode
);
478 ret
= io_timeout_update(ctx
, tr
->addr
, &tr
->ts
, mode
);
479 spin_unlock_irq(&ctx
->timeout_lock
);
484 io_req_set_res(req
, ret
, 0);
488 static int __io_timeout_prep(struct io_kiocb
*req
,
489 const struct io_uring_sqe
*sqe
,
490 bool is_timeout_link
)
492 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
493 struct io_timeout_data
*data
;
495 u32 off
= READ_ONCE(sqe
->off
);
497 if (sqe
->buf_index
|| sqe
->len
!= 1 || sqe
->splice_fd_in
)
499 if (off
&& is_timeout_link
)
501 flags
= READ_ONCE(sqe
->timeout_flags
);
502 if (flags
& ~(IORING_TIMEOUT_ABS
| IORING_TIMEOUT_CLOCK_MASK
|
503 IORING_TIMEOUT_ETIME_SUCCESS
|
504 IORING_TIMEOUT_MULTISHOT
))
506 /* more than one clock specified is invalid, obviously */
507 if (hweight32(flags
& IORING_TIMEOUT_CLOCK_MASK
) > 1)
509 /* multishot requests only make sense with rel values */
510 if (!(~flags
& (IORING_TIMEOUT_MULTISHOT
| IORING_TIMEOUT_ABS
)))
513 INIT_LIST_HEAD(&timeout
->list
);
515 if (unlikely(off
&& !req
->ctx
->off_timeout_used
))
516 req
->ctx
->off_timeout_used
= true;
518 * for multishot reqs w/ fixed nr of repeats, repeats tracks the
521 timeout
->repeats
= 0;
522 if ((flags
& IORING_TIMEOUT_MULTISHOT
) && off
> 0)
523 timeout
->repeats
= off
;
525 if (WARN_ON_ONCE(req_has_async_data(req
)))
527 if (io_alloc_async_data(req
))
530 data
= req
->async_data
;
534 if (get_timespec64(&data
->ts
, u64_to_user_ptr(sqe
->addr
)))
537 if (data
->ts
.tv_sec
< 0 || data
->ts
.tv_nsec
< 0)
540 data
->mode
= io_translate_timeout_mode(flags
);
541 hrtimer_init(&data
->timer
, io_timeout_get_clock(data
), data
->mode
);
543 if (is_timeout_link
) {
544 struct io_submit_link
*link
= &req
->ctx
->submit_state
.link
;
548 if (link
->last
->opcode
== IORING_OP_LINK_TIMEOUT
)
550 timeout
->head
= link
->last
;
551 link
->last
->flags
|= REQ_F_ARM_LTIMEOUT
;
556 int io_timeout_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
558 return __io_timeout_prep(req
, sqe
, false);
561 int io_link_timeout_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
563 return __io_timeout_prep(req
, sqe
, true);
566 int io_timeout(struct io_kiocb
*req
, unsigned int issue_flags
)
568 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
569 struct io_ring_ctx
*ctx
= req
->ctx
;
570 struct io_timeout_data
*data
= req
->async_data
;
571 struct list_head
*entry
;
572 u32 tail
, off
= timeout
->off
;
574 spin_lock_irq(&ctx
->timeout_lock
);
577 * sqe->off holds how many events that need to occur for this
578 * timeout event to be satisfied. If it isn't set, then this is
579 * a pure timeout request, sequence isn't used.
581 if (io_is_timeout_noseq(req
)) {
582 entry
= ctx
->timeout_list
.prev
;
586 tail
= data_race(ctx
->cached_cq_tail
) - atomic_read(&ctx
->cq_timeouts
);
587 timeout
->target_seq
= tail
+ off
;
589 /* Update the last seq here in case io_flush_timeouts() hasn't.
590 * This is safe because ->completion_lock is held, and submissions
591 * and completions are never mixed in the same ->completion_lock section.
593 ctx
->cq_last_tm_flush
= tail
;
596 * Insertion sort, ensuring the first entry in the list is always
597 * the one we need first.
599 list_for_each_prev(entry
, &ctx
->timeout_list
) {
600 struct io_timeout
*nextt
= list_entry(entry
, struct io_timeout
, list
);
601 struct io_kiocb
*nxt
= cmd_to_io_kiocb(nextt
);
603 if (io_is_timeout_noseq(nxt
))
605 /* nxt.seq is behind @tail, otherwise would've been completed */
606 if (off
>= nextt
->target_seq
- tail
)
610 list_add(&timeout
->list
, entry
);
611 data
->timer
.function
= io_timeout_fn
;
612 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
), data
->mode
);
613 spin_unlock_irq(&ctx
->timeout_lock
);
614 return IOU_ISSUE_SKIP_COMPLETE
;
617 void io_queue_linked_timeout(struct io_kiocb
*req
)
619 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
620 struct io_ring_ctx
*ctx
= req
->ctx
;
622 spin_lock_irq(&ctx
->timeout_lock
);
624 * If the back reference is NULL, then our linked request finished
625 * before we got a chance to setup the timer
628 struct io_timeout_data
*data
= req
->async_data
;
630 data
->timer
.function
= io_link_timeout_fn
;
631 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
),
633 list_add_tail(&timeout
->list
, &ctx
->ltimeout_list
);
635 spin_unlock_irq(&ctx
->timeout_lock
);
636 /* drop submission reference */
640 static bool io_match_task(struct io_kiocb
*head
, struct task_struct
*task
,
642 __must_hold(&head
->ctx
->timeout_lock
)
644 struct io_kiocb
*req
;
646 if (task
&& head
->task
!= task
)
651 io_for_each_link(req
, head
) {
652 if (req
->flags
& REQ_F_INFLIGHT
)
658 /* Returns true if we found and killed one or more timeouts */
659 __cold
bool io_kill_timeouts(struct io_ring_ctx
*ctx
, struct task_struct
*tsk
,
662 struct io_timeout
*timeout
, *tmp
;
666 * completion_lock is needed for io_match_task(). Take it before
667 * timeout_lockfirst to keep locking ordering.
669 spin_lock(&ctx
->completion_lock
);
670 spin_lock_irq(&ctx
->timeout_lock
);
671 list_for_each_entry_safe(timeout
, tmp
, &ctx
->timeout_list
, list
) {
672 struct io_kiocb
*req
= cmd_to_io_kiocb(timeout
);
674 if (io_match_task(req
, tsk
, cancel_all
) &&
675 io_kill_timeout(req
, -ECANCELED
))
678 spin_unlock_irq(&ctx
->timeout_lock
);
679 spin_unlock(&ctx
->completion_lock
);
680 return canceled
!= 0;