1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/io_uring.h>
7 #include <trace/events/io_uring.h>
9 #include <uapi/linux/io_uring.h>
21 struct list_head list
;
22 /* head of the link, used by linked timeouts only */
23 struct io_kiocb
*head
;
24 /* for linked completions */
25 struct io_kiocb
*prev
;
28 struct io_timeout_rem
{
38 static inline bool io_is_timeout_noseq(struct io_kiocb
*req
)
40 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
41 struct io_timeout_data
*data
= req
->async_data
;
43 return !timeout
->off
|| data
->flags
& IORING_TIMEOUT_MULTISHOT
;
46 static inline void io_put_req(struct io_kiocb
*req
)
48 if (req_ref_put_and_test(req
)) {
54 static inline bool io_timeout_finish(struct io_timeout
*timeout
,
55 struct io_timeout_data
*data
)
57 if (!(data
->flags
& IORING_TIMEOUT_MULTISHOT
))
60 if (!timeout
->off
|| (timeout
->repeats
&& --timeout
->repeats
))
66 static enum hrtimer_restart
io_timeout_fn(struct hrtimer
*timer
);
68 static void io_timeout_complete(struct io_kiocb
*req
, struct io_tw_state
*ts
)
70 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
71 struct io_timeout_data
*data
= req
->async_data
;
72 struct io_ring_ctx
*ctx
= req
->ctx
;
74 if (!io_timeout_finish(timeout
, data
)) {
75 if (io_req_post_cqe(req
, -ETIME
, IORING_CQE_F_MORE
)) {
77 spin_lock_irq(&ctx
->timeout_lock
);
78 list_add(&timeout
->list
, ctx
->timeout_list
.prev
);
79 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
), data
->mode
);
80 spin_unlock_irq(&ctx
->timeout_lock
);
85 io_req_task_complete(req
, ts
);
88 static bool io_kill_timeout(struct io_kiocb
*req
, int status
)
89 __must_hold(&req
->ctx
->timeout_lock
)
91 struct io_timeout_data
*io
= req
->async_data
;
93 if (hrtimer_try_to_cancel(&io
->timer
) != -1) {
94 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
98 atomic_set(&req
->ctx
->cq_timeouts
,
99 atomic_read(&req
->ctx
->cq_timeouts
) + 1);
100 list_del_init(&timeout
->list
);
101 io_req_queue_tw_complete(req
, status
);
107 __cold
void io_flush_timeouts(struct io_ring_ctx
*ctx
)
110 struct io_timeout
*timeout
, *tmp
;
112 spin_lock_irq(&ctx
->timeout_lock
);
113 seq
= ctx
->cached_cq_tail
- atomic_read(&ctx
->cq_timeouts
);
115 list_for_each_entry_safe(timeout
, tmp
, &ctx
->timeout_list
, list
) {
116 struct io_kiocb
*req
= cmd_to_io_kiocb(timeout
);
117 u32 events_needed
, events_got
;
119 if (io_is_timeout_noseq(req
))
123 * Since seq can easily wrap around over time, subtract
124 * the last seq at which timeouts were flushed before comparing.
125 * Assuming not more than 2^31-1 events have happened since,
126 * these subtractions won't have wrapped, so we can check if
127 * target is in [last_seq, current_seq] by comparing the two.
129 events_needed
= timeout
->target_seq
- ctx
->cq_last_tm_flush
;
130 events_got
= seq
- ctx
->cq_last_tm_flush
;
131 if (events_got
< events_needed
)
134 io_kill_timeout(req
, 0);
136 ctx
->cq_last_tm_flush
= seq
;
137 spin_unlock_irq(&ctx
->timeout_lock
);
140 static void io_req_tw_fail_links(struct io_kiocb
*link
, struct io_tw_state
*ts
)
142 io_tw_lock(link
->ctx
, ts
);
144 struct io_kiocb
*nxt
= link
->link
;
145 long res
= -ECANCELED
;
147 if (link
->flags
& REQ_F_FAIL
)
150 io_req_set_res(link
, res
, 0);
151 io_req_task_complete(link
, ts
);
156 static void io_fail_links(struct io_kiocb
*req
)
157 __must_hold(&req
->ctx
->completion_lock
)
159 struct io_kiocb
*link
= req
->link
;
160 bool ignore_cqes
= req
->flags
& REQ_F_SKIP_LINK_CQES
;
167 link
->flags
|= REQ_F_CQE_SKIP
;
169 link
->flags
&= ~REQ_F_CQE_SKIP
;
170 trace_io_uring_fail_link(req
, link
);
175 link
->io_task_work
.func
= io_req_tw_fail_links
;
176 io_req_task_work_add(link
);
180 static inline void io_remove_next_linked(struct io_kiocb
*req
)
182 struct io_kiocb
*nxt
= req
->link
;
184 req
->link
= nxt
->link
;
188 void io_disarm_next(struct io_kiocb
*req
)
189 __must_hold(&req
->ctx
->completion_lock
)
191 struct io_kiocb
*link
= NULL
;
193 if (req
->flags
& REQ_F_ARM_LTIMEOUT
) {
195 req
->flags
&= ~REQ_F_ARM_LTIMEOUT
;
196 if (link
&& link
->opcode
== IORING_OP_LINK_TIMEOUT
) {
197 io_remove_next_linked(req
);
198 io_req_queue_tw_complete(link
, -ECANCELED
);
200 } else if (req
->flags
& REQ_F_LINK_TIMEOUT
) {
201 struct io_ring_ctx
*ctx
= req
->ctx
;
203 spin_lock_irq(&ctx
->timeout_lock
);
204 link
= io_disarm_linked_timeout(req
);
205 spin_unlock_irq(&ctx
->timeout_lock
);
207 io_req_queue_tw_complete(link
, -ECANCELED
);
209 if (unlikely((req
->flags
& REQ_F_FAIL
) &&
210 !(req
->flags
& REQ_F_HARDLINK
)))
214 struct io_kiocb
*__io_disarm_linked_timeout(struct io_kiocb
*req
,
215 struct io_kiocb
*link
)
216 __must_hold(&req
->ctx
->completion_lock
)
217 __must_hold(&req
->ctx
->timeout_lock
)
219 struct io_timeout_data
*io
= link
->async_data
;
220 struct io_timeout
*timeout
= io_kiocb_to_cmd(link
, struct io_timeout
);
222 io_remove_next_linked(req
);
223 timeout
->head
= NULL
;
224 if (hrtimer_try_to_cancel(&io
->timer
) != -1) {
225 list_del(&timeout
->list
);
232 static enum hrtimer_restart
io_timeout_fn(struct hrtimer
*timer
)
234 struct io_timeout_data
*data
= container_of(timer
,
235 struct io_timeout_data
, timer
);
236 struct io_kiocb
*req
= data
->req
;
237 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
238 struct io_ring_ctx
*ctx
= req
->ctx
;
241 spin_lock_irqsave(&ctx
->timeout_lock
, flags
);
242 list_del_init(&timeout
->list
);
243 atomic_set(&req
->ctx
->cq_timeouts
,
244 atomic_read(&req
->ctx
->cq_timeouts
) + 1);
245 spin_unlock_irqrestore(&ctx
->timeout_lock
, flags
);
247 if (!(data
->flags
& IORING_TIMEOUT_ETIME_SUCCESS
))
250 io_req_set_res(req
, -ETIME
, 0);
251 req
->io_task_work
.func
= io_timeout_complete
;
252 io_req_task_work_add(req
);
253 return HRTIMER_NORESTART
;
256 static struct io_kiocb
*io_timeout_extract(struct io_ring_ctx
*ctx
,
257 struct io_cancel_data
*cd
)
258 __must_hold(&ctx
->timeout_lock
)
260 struct io_timeout
*timeout
;
261 struct io_timeout_data
*io
;
262 struct io_kiocb
*req
= NULL
;
264 list_for_each_entry(timeout
, &ctx
->timeout_list
, list
) {
265 struct io_kiocb
*tmp
= cmd_to_io_kiocb(timeout
);
267 if (io_cancel_req_match(tmp
, cd
)) {
273 return ERR_PTR(-ENOENT
);
275 io
= req
->async_data
;
276 if (hrtimer_try_to_cancel(&io
->timer
) == -1)
277 return ERR_PTR(-EALREADY
);
278 timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
279 list_del_init(&timeout
->list
);
283 int io_timeout_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
)
284 __must_hold(&ctx
->completion_lock
)
286 struct io_kiocb
*req
;
288 spin_lock_irq(&ctx
->timeout_lock
);
289 req
= io_timeout_extract(ctx
, cd
);
290 spin_unlock_irq(&ctx
->timeout_lock
);
294 io_req_task_queue_fail(req
, -ECANCELED
);
298 static void io_req_task_link_timeout(struct io_kiocb
*req
, struct io_tw_state
*ts
)
300 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
301 struct io_kiocb
*prev
= timeout
->prev
;
305 if (!io_should_terminate_tw()) {
306 struct io_cancel_data cd
= {
308 .data
= prev
->cqe
.user_data
,
311 ret
= io_try_cancel(req
->tctx
, &cd
, 0);
315 io_req_set_res(req
, ret
?: -ETIME
, 0);
316 io_req_task_complete(req
, ts
);
319 io_req_set_res(req
, -ETIME
, 0);
320 io_req_task_complete(req
, ts
);
324 static enum hrtimer_restart
io_link_timeout_fn(struct hrtimer
*timer
)
326 struct io_timeout_data
*data
= container_of(timer
,
327 struct io_timeout_data
, timer
);
328 struct io_kiocb
*prev
, *req
= data
->req
;
329 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
330 struct io_ring_ctx
*ctx
= req
->ctx
;
333 spin_lock_irqsave(&ctx
->timeout_lock
, flags
);
334 prev
= timeout
->head
;
335 timeout
->head
= NULL
;
338 * We don't expect the list to be empty, that will only happen if we
339 * race with the completion of the linked work.
342 io_remove_next_linked(prev
);
343 if (!req_ref_inc_not_zero(prev
))
346 list_del(&timeout
->list
);
347 timeout
->prev
= prev
;
348 spin_unlock_irqrestore(&ctx
->timeout_lock
, flags
);
350 req
->io_task_work
.func
= io_req_task_link_timeout
;
351 io_req_task_work_add(req
);
352 return HRTIMER_NORESTART
;
355 static clockid_t
io_timeout_get_clock(struct io_timeout_data
*data
)
357 switch (data
->flags
& IORING_TIMEOUT_CLOCK_MASK
) {
358 case IORING_TIMEOUT_BOOTTIME
:
359 return CLOCK_BOOTTIME
;
360 case IORING_TIMEOUT_REALTIME
:
361 return CLOCK_REALTIME
;
363 /* can't happen, vetted at prep time */
367 return CLOCK_MONOTONIC
;
371 static int io_linked_timeout_update(struct io_ring_ctx
*ctx
, __u64 user_data
,
372 struct timespec64
*ts
, enum hrtimer_mode mode
)
373 __must_hold(&ctx
->timeout_lock
)
375 struct io_timeout_data
*io
;
376 struct io_timeout
*timeout
;
377 struct io_kiocb
*req
= NULL
;
379 list_for_each_entry(timeout
, &ctx
->ltimeout_list
, list
) {
380 struct io_kiocb
*tmp
= cmd_to_io_kiocb(timeout
);
382 if (user_data
== tmp
->cqe
.user_data
) {
390 io
= req
->async_data
;
391 if (hrtimer_try_to_cancel(&io
->timer
) == -1)
393 hrtimer_init(&io
->timer
, io_timeout_get_clock(io
), mode
);
394 io
->timer
.function
= io_link_timeout_fn
;
395 hrtimer_start(&io
->timer
, timespec64_to_ktime(*ts
), mode
);
399 static int io_timeout_update(struct io_ring_ctx
*ctx
, __u64 user_data
,
400 struct timespec64
*ts
, enum hrtimer_mode mode
)
401 __must_hold(&ctx
->timeout_lock
)
403 struct io_cancel_data cd
= { .ctx
= ctx
, .data
= user_data
, };
404 struct io_kiocb
*req
= io_timeout_extract(ctx
, &cd
);
405 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
406 struct io_timeout_data
*data
;
411 timeout
->off
= 0; /* noseq */
412 data
= req
->async_data
;
413 list_add_tail(&timeout
->list
, &ctx
->timeout_list
);
414 hrtimer_init(&data
->timer
, io_timeout_get_clock(data
), mode
);
415 data
->timer
.function
= io_timeout_fn
;
416 hrtimer_start(&data
->timer
, timespec64_to_ktime(*ts
), mode
);
420 int io_timeout_remove_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
422 struct io_timeout_rem
*tr
= io_kiocb_to_cmd(req
, struct io_timeout_rem
);
424 if (unlikely(req
->flags
& (REQ_F_FIXED_FILE
| REQ_F_BUFFER_SELECT
)))
426 if (sqe
->buf_index
|| sqe
->len
|| sqe
->splice_fd_in
)
429 tr
->ltimeout
= false;
430 tr
->addr
= READ_ONCE(sqe
->addr
);
431 tr
->flags
= READ_ONCE(sqe
->timeout_flags
);
432 if (tr
->flags
& IORING_TIMEOUT_UPDATE_MASK
) {
433 if (hweight32(tr
->flags
& IORING_TIMEOUT_CLOCK_MASK
) > 1)
435 if (tr
->flags
& IORING_LINK_TIMEOUT_UPDATE
)
437 if (tr
->flags
& ~(IORING_TIMEOUT_UPDATE_MASK
|IORING_TIMEOUT_ABS
))
439 if (get_timespec64(&tr
->ts
, u64_to_user_ptr(sqe
->addr2
)))
441 if (tr
->ts
.tv_sec
< 0 || tr
->ts
.tv_nsec
< 0)
443 } else if (tr
->flags
) {
444 /* timeout removal doesn't support flags */
451 static inline enum hrtimer_mode
io_translate_timeout_mode(unsigned int flags
)
453 return (flags
& IORING_TIMEOUT_ABS
) ? HRTIMER_MODE_ABS
458 * Remove or update an existing timeout command
460 int io_timeout_remove(struct io_kiocb
*req
, unsigned int issue_flags
)
462 struct io_timeout_rem
*tr
= io_kiocb_to_cmd(req
, struct io_timeout_rem
);
463 struct io_ring_ctx
*ctx
= req
->ctx
;
466 if (!(tr
->flags
& IORING_TIMEOUT_UPDATE
)) {
467 struct io_cancel_data cd
= { .ctx
= ctx
, .data
= tr
->addr
, };
469 spin_lock(&ctx
->completion_lock
);
470 ret
= io_timeout_cancel(ctx
, &cd
);
471 spin_unlock(&ctx
->completion_lock
);
473 enum hrtimer_mode mode
= io_translate_timeout_mode(tr
->flags
);
475 spin_lock_irq(&ctx
->timeout_lock
);
477 ret
= io_linked_timeout_update(ctx
, tr
->addr
, &tr
->ts
, mode
);
479 ret
= io_timeout_update(ctx
, tr
->addr
, &tr
->ts
, mode
);
480 spin_unlock_irq(&ctx
->timeout_lock
);
485 io_req_set_res(req
, ret
, 0);
489 static int __io_timeout_prep(struct io_kiocb
*req
,
490 const struct io_uring_sqe
*sqe
,
491 bool is_timeout_link
)
493 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
494 struct io_timeout_data
*data
;
496 u32 off
= READ_ONCE(sqe
->off
);
498 if (sqe
->buf_index
|| sqe
->len
!= 1 || sqe
->splice_fd_in
)
500 if (off
&& is_timeout_link
)
502 flags
= READ_ONCE(sqe
->timeout_flags
);
503 if (flags
& ~(IORING_TIMEOUT_ABS
| IORING_TIMEOUT_CLOCK_MASK
|
504 IORING_TIMEOUT_ETIME_SUCCESS
|
505 IORING_TIMEOUT_MULTISHOT
))
507 /* more than one clock specified is invalid, obviously */
508 if (hweight32(flags
& IORING_TIMEOUT_CLOCK_MASK
) > 1)
510 /* multishot requests only make sense with rel values */
511 if (!(~flags
& (IORING_TIMEOUT_MULTISHOT
| IORING_TIMEOUT_ABS
)))
514 INIT_LIST_HEAD(&timeout
->list
);
516 if (unlikely(off
&& !req
->ctx
->off_timeout_used
))
517 req
->ctx
->off_timeout_used
= true;
519 * for multishot reqs w/ fixed nr of repeats, repeats tracks the
522 timeout
->repeats
= 0;
523 if ((flags
& IORING_TIMEOUT_MULTISHOT
) && off
> 0)
524 timeout
->repeats
= off
;
526 if (WARN_ON_ONCE(req_has_async_data(req
)))
528 if (io_alloc_async_data(req
))
531 data
= req
->async_data
;
535 if (get_timespec64(&data
->ts
, u64_to_user_ptr(sqe
->addr
)))
538 if (data
->ts
.tv_sec
< 0 || data
->ts
.tv_nsec
< 0)
541 data
->mode
= io_translate_timeout_mode(flags
);
542 hrtimer_init(&data
->timer
, io_timeout_get_clock(data
), data
->mode
);
544 if (is_timeout_link
) {
545 struct io_submit_link
*link
= &req
->ctx
->submit_state
.link
;
549 if (link
->last
->opcode
== IORING_OP_LINK_TIMEOUT
)
551 timeout
->head
= link
->last
;
552 link
->last
->flags
|= REQ_F_ARM_LTIMEOUT
;
557 int io_timeout_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
559 return __io_timeout_prep(req
, sqe
, false);
562 int io_link_timeout_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
564 return __io_timeout_prep(req
, sqe
, true);
567 int io_timeout(struct io_kiocb
*req
, unsigned int issue_flags
)
569 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
570 struct io_ring_ctx
*ctx
= req
->ctx
;
571 struct io_timeout_data
*data
= req
->async_data
;
572 struct list_head
*entry
;
573 u32 tail
, off
= timeout
->off
;
575 spin_lock_irq(&ctx
->timeout_lock
);
578 * sqe->off holds how many events that need to occur for this
579 * timeout event to be satisfied. If it isn't set, then this is
580 * a pure timeout request, sequence isn't used.
582 if (io_is_timeout_noseq(req
)) {
583 entry
= ctx
->timeout_list
.prev
;
587 tail
= data_race(ctx
->cached_cq_tail
) - atomic_read(&ctx
->cq_timeouts
);
588 timeout
->target_seq
= tail
+ off
;
590 /* Update the last seq here in case io_flush_timeouts() hasn't.
591 * This is safe because ->completion_lock is held, and submissions
592 * and completions are never mixed in the same ->completion_lock section.
594 ctx
->cq_last_tm_flush
= tail
;
597 * Insertion sort, ensuring the first entry in the list is always
598 * the one we need first.
600 list_for_each_prev(entry
, &ctx
->timeout_list
) {
601 struct io_timeout
*nextt
= list_entry(entry
, struct io_timeout
, list
);
602 struct io_kiocb
*nxt
= cmd_to_io_kiocb(nextt
);
604 if (io_is_timeout_noseq(nxt
))
606 /* nxt.seq is behind @tail, otherwise would've been completed */
607 if (off
>= nextt
->target_seq
- tail
)
611 list_add(&timeout
->list
, entry
);
612 data
->timer
.function
= io_timeout_fn
;
613 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
), data
->mode
);
614 spin_unlock_irq(&ctx
->timeout_lock
);
615 return IOU_ISSUE_SKIP_COMPLETE
;
618 void io_queue_linked_timeout(struct io_kiocb
*req
)
620 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
621 struct io_ring_ctx
*ctx
= req
->ctx
;
623 spin_lock_irq(&ctx
->timeout_lock
);
625 * If the back reference is NULL, then our linked request finished
626 * before we got a chance to setup the timer
629 struct io_timeout_data
*data
= req
->async_data
;
631 data
->timer
.function
= io_link_timeout_fn
;
632 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
),
634 list_add_tail(&timeout
->list
, &ctx
->ltimeout_list
);
636 spin_unlock_irq(&ctx
->timeout_lock
);
637 /* drop submission reference */
641 static bool io_match_task(struct io_kiocb
*head
, struct io_uring_task
*tctx
,
643 __must_hold(&head
->ctx
->timeout_lock
)
645 struct io_kiocb
*req
;
647 if (tctx
&& head
->tctx
!= tctx
)
652 io_for_each_link(req
, head
) {
653 if (req
->flags
& REQ_F_INFLIGHT
)
659 /* Returns true if we found and killed one or more timeouts */
660 __cold
bool io_kill_timeouts(struct io_ring_ctx
*ctx
, struct io_uring_task
*tctx
,
663 struct io_timeout
*timeout
, *tmp
;
667 * completion_lock is needed for io_match_task(). Take it before
668 * timeout_lockfirst to keep locking ordering.
670 spin_lock(&ctx
->completion_lock
);
671 spin_lock_irq(&ctx
->timeout_lock
);
672 list_for_each_entry_safe(timeout
, tmp
, &ctx
->timeout_list
, list
) {
673 struct io_kiocb
*req
= cmd_to_io_kiocb(timeout
);
675 if (io_match_task(req
, tctx
, cancel_all
) &&
676 io_kill_timeout(req
, -ECANCELED
))
679 spin_unlock_irq(&ctx
->timeout_lock
);
680 spin_unlock(&ctx
->completion_lock
);
681 return canceled
!= 0;