Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / io_uring / napi.c
blobb1ade3fda30f3e97110503cfe7b19d8fee5d22c0
1 // SPDX-License-Identifier: GPL-2.0
3 #include "io_uring.h"
4 #include "napi.h"
6 #ifdef CONFIG_NET_RX_BUSY_POLL
8 /* Timeout for cleanout of stale entries. */
9 #define NAPI_TIMEOUT (60 * SEC_CONVERSION)
11 struct io_napi_entry {
12 unsigned int napi_id;
13 struct list_head list;
15 unsigned long timeout;
16 struct hlist_node node;
18 struct rcu_head rcu;
21 static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
22 unsigned int napi_id)
24 struct io_napi_entry *e;
26 hlist_for_each_entry_rcu(e, hash_list, node) {
27 if (e->napi_id != napi_id)
28 continue;
29 return e;
32 return NULL;
35 static inline ktime_t net_to_ktime(unsigned long t)
37 /* napi approximating usecs, reverse busy_loop_current_time */
38 return ns_to_ktime(t << 10);
41 int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
43 struct hlist_head *hash_list;
44 struct io_napi_entry *e;
46 /* Non-NAPI IDs can be rejected. */
47 if (napi_id < MIN_NAPI_ID)
48 return -EINVAL;
50 hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
52 scoped_guard(rcu) {
53 e = io_napi_hash_find(hash_list, napi_id);
54 if (e) {
55 WRITE_ONCE(e->timeout, jiffies + NAPI_TIMEOUT);
56 return -EEXIST;
60 e = kmalloc(sizeof(*e), GFP_NOWAIT);
61 if (!e)
62 return -ENOMEM;
64 e->napi_id = napi_id;
65 e->timeout = jiffies + NAPI_TIMEOUT;
68 * guard(spinlock) is not used to manually unlock it before calling
69 * kfree()
71 spin_lock(&ctx->napi_lock);
72 if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
73 spin_unlock(&ctx->napi_lock);
74 kfree(e);
75 return -EEXIST;
78 hlist_add_tail_rcu(&e->node, hash_list);
79 list_add_tail_rcu(&e->list, &ctx->napi_list);
80 spin_unlock(&ctx->napi_lock);
81 return 0;
84 static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id)
86 struct hlist_head *hash_list;
87 struct io_napi_entry *e;
89 /* Non-NAPI IDs can be rejected. */
90 if (napi_id < MIN_NAPI_ID)
91 return -EINVAL;
93 hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
94 guard(spinlock)(&ctx->napi_lock);
95 e = io_napi_hash_find(hash_list, napi_id);
96 if (!e)
97 return -ENOENT;
99 list_del_rcu(&e->list);
100 hash_del_rcu(&e->node);
101 kfree_rcu(e, rcu);
102 return 0;
105 static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
107 struct io_napi_entry *e;
109 guard(spinlock)(&ctx->napi_lock);
111 * list_for_each_entry_safe() is not required as long as:
112 * 1. list_del_rcu() does not reset the deleted node next pointer
113 * 2. kfree_rcu() delays the memory freeing until the next quiescent
114 * state
116 list_for_each_entry(e, &ctx->napi_list, list) {
117 if (time_after(jiffies, READ_ONCE(e->timeout))) {
118 list_del_rcu(&e->list);
119 hash_del_rcu(&e->node);
120 kfree_rcu(e, rcu);
125 static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
127 if (is_stale)
128 __io_napi_remove_stale(ctx);
131 static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
132 ktime_t bp)
134 if (bp) {
135 ktime_t end_time = ktime_add(start_time, bp);
136 ktime_t now = net_to_ktime(busy_loop_current_time());
138 return ktime_after(now, end_time);
141 return true;
144 static bool io_napi_busy_loop_should_end(void *data,
145 unsigned long start_time)
147 struct io_wait_queue *iowq = data;
149 if (signal_pending(current))
150 return true;
151 if (io_should_wake(iowq) || io_has_work(iowq->ctx))
152 return true;
153 if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
154 iowq->napi_busy_poll_dt))
155 return true;
157 return false;
161 * never report stale entries
163 static bool static_tracking_do_busy_loop(struct io_ring_ctx *ctx,
164 bool (*loop_end)(void *, unsigned long),
165 void *loop_end_arg)
167 struct io_napi_entry *e;
169 list_for_each_entry_rcu(e, &ctx->napi_list, list)
170 napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
171 ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
172 return false;
175 static bool
176 dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
177 bool (*loop_end)(void *, unsigned long),
178 void *loop_end_arg)
180 struct io_napi_entry *e;
181 bool is_stale = false;
183 list_for_each_entry_rcu(e, &ctx->napi_list, list) {
184 napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
185 ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
187 if (time_after(jiffies, READ_ONCE(e->timeout)))
188 is_stale = true;
191 return is_stale;
194 static inline bool
195 __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
196 bool (*loop_end)(void *, unsigned long),
197 void *loop_end_arg)
199 if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC)
200 return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
201 return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
204 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
205 struct io_wait_queue *iowq)
207 unsigned long start_time = busy_loop_current_time();
208 bool (*loop_end)(void *, unsigned long) = NULL;
209 void *loop_end_arg = NULL;
210 bool is_stale = false;
212 /* Singular lists use a different napi loop end check function and are
213 * only executed once.
215 if (list_is_singular(&ctx->napi_list)) {
216 loop_end = io_napi_busy_loop_should_end;
217 loop_end_arg = iowq;
220 scoped_guard(rcu) {
221 do {
222 is_stale = __io_napi_do_busy_loop(ctx, loop_end,
223 loop_end_arg);
224 } while (!io_napi_busy_loop_should_end(iowq, start_time) &&
225 !loop_end_arg);
228 io_napi_remove_stale(ctx, is_stale);
232 * io_napi_init() - Init napi settings
233 * @ctx: pointer to io-uring context structure
235 * Init napi settings in the io-uring context.
237 void io_napi_init(struct io_ring_ctx *ctx)
239 u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC;
241 INIT_LIST_HEAD(&ctx->napi_list);
242 spin_lock_init(&ctx->napi_lock);
243 ctx->napi_prefer_busy_poll = false;
244 ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
245 ctx->napi_track_mode = IO_URING_NAPI_TRACKING_INACTIVE;
249 * io_napi_free() - Deallocate napi
250 * @ctx: pointer to io-uring context structure
252 * Free the napi list and the hash table in the io-uring context.
254 void io_napi_free(struct io_ring_ctx *ctx)
256 struct io_napi_entry *e;
258 guard(spinlock)(&ctx->napi_lock);
259 list_for_each_entry(e, &ctx->napi_list, list) {
260 hash_del_rcu(&e->node);
261 kfree_rcu(e, rcu);
263 INIT_LIST_HEAD_RCU(&ctx->napi_list);
266 static int io_napi_register_napi(struct io_ring_ctx *ctx,
267 struct io_uring_napi *napi)
269 switch (napi->op_param) {
270 case IO_URING_NAPI_TRACKING_DYNAMIC:
271 case IO_URING_NAPI_TRACKING_STATIC:
272 break;
273 default:
274 return -EINVAL;
276 /* clean the napi list for new settings */
277 io_napi_free(ctx);
278 WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
279 WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC);
280 WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll);
281 return 0;
285 * io_napi_register() - Register napi with io-uring
286 * @ctx: pointer to io-uring context structure
287 * @arg: pointer to io_uring_napi structure
289 * Register napi in the io-uring context.
291 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
293 const struct io_uring_napi curr = {
294 .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
295 .prefer_busy_poll = ctx->napi_prefer_busy_poll,
296 .op_param = ctx->napi_track_mode
298 struct io_uring_napi napi;
300 if (ctx->flags & IORING_SETUP_IOPOLL)
301 return -EINVAL;
302 if (copy_from_user(&napi, arg, sizeof(napi)))
303 return -EFAULT;
304 if (napi.pad[0] || napi.pad[1] || napi.resv)
305 return -EINVAL;
307 if (copy_to_user(arg, &curr, sizeof(curr)))
308 return -EFAULT;
310 switch (napi.opcode) {
311 case IO_URING_NAPI_REGISTER_OP:
312 return io_napi_register_napi(ctx, &napi);
313 case IO_URING_NAPI_STATIC_ADD_ID:
314 if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
315 return -EINVAL;
316 return __io_napi_add_id(ctx, napi.op_param);
317 case IO_URING_NAPI_STATIC_DEL_ID:
318 if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
319 return -EINVAL;
320 return __io_napi_del_id(ctx, napi.op_param);
321 default:
322 return -EINVAL;
327 * io_napi_unregister() - Unregister napi with io-uring
328 * @ctx: pointer to io-uring context structure
329 * @arg: pointer to io_uring_napi structure
331 * Unregister napi. If arg has been specified copy the busy poll timeout and
332 * prefer busy poll setting to the passed in structure.
334 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
336 const struct io_uring_napi curr = {
337 .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
338 .prefer_busy_poll = ctx->napi_prefer_busy_poll
341 if (arg && copy_to_user(arg, &curr, sizeof(curr)))
342 return -EFAULT;
344 WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
345 WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
346 WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
347 return 0;
351 * __io_napi_busy_loop() - execute busy poll loop
352 * @ctx: pointer to io-uring context structure
353 * @iowq: pointer to io wait queue
355 * Execute the busy poll loop and merge the spliced off list.
357 void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
359 if (ctx->flags & IORING_SETUP_SQPOLL)
360 return;
362 iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
363 if (iowq->timeout != KTIME_MAX) {
364 ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx));
366 iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt);
369 iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
370 io_napi_blocking_busy_loop(ctx, iowq);
374 * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll
375 * @ctx: pointer to io-uring context structure
377 * Splice of the napi list and execute the napi busy poll loop.
379 int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
381 bool is_stale = false;
383 if (!READ_ONCE(ctx->napi_busy_poll_dt))
384 return 0;
385 if (list_empty_careful(&ctx->napi_list))
386 return 0;
388 scoped_guard(rcu) {
389 is_stale = __io_napi_do_busy_loop(ctx, NULL, NULL);
392 io_napi_remove_stale(ctx, is_stale);
393 return 1;
396 #endif