1 // SPDX-License-Identifier: GPL-2.0
6 #ifdef CONFIG_NET_RX_BUSY_POLL
8 /* Timeout for cleanout of stale entries. */
9 #define NAPI_TIMEOUT (60 * SEC_CONVERSION)
11 struct io_napi_entry
{
13 struct list_head list
;
15 unsigned long timeout
;
16 struct hlist_node node
;
21 static struct io_napi_entry
*io_napi_hash_find(struct hlist_head
*hash_list
,
24 struct io_napi_entry
*e
;
26 hlist_for_each_entry_rcu(e
, hash_list
, node
) {
27 if (e
->napi_id
!= napi_id
)
35 static inline ktime_t
net_to_ktime(unsigned long t
)
37 /* napi approximating usecs, reverse busy_loop_current_time */
38 return ns_to_ktime(t
<< 10);
41 int __io_napi_add_id(struct io_ring_ctx
*ctx
, unsigned int napi_id
)
43 struct hlist_head
*hash_list
;
44 struct io_napi_entry
*e
;
46 /* Non-NAPI IDs can be rejected. */
47 if (napi_id
< MIN_NAPI_ID
)
50 hash_list
= &ctx
->napi_ht
[hash_min(napi_id
, HASH_BITS(ctx
->napi_ht
))];
53 e
= io_napi_hash_find(hash_list
, napi_id
);
55 WRITE_ONCE(e
->timeout
, jiffies
+ NAPI_TIMEOUT
);
60 e
= kmalloc(sizeof(*e
), GFP_NOWAIT
);
65 e
->timeout
= jiffies
+ NAPI_TIMEOUT
;
68 * guard(spinlock) is not used to manually unlock it before calling
71 spin_lock(&ctx
->napi_lock
);
72 if (unlikely(io_napi_hash_find(hash_list
, napi_id
))) {
73 spin_unlock(&ctx
->napi_lock
);
78 hlist_add_tail_rcu(&e
->node
, hash_list
);
79 list_add_tail_rcu(&e
->list
, &ctx
->napi_list
);
80 spin_unlock(&ctx
->napi_lock
);
84 static int __io_napi_del_id(struct io_ring_ctx
*ctx
, unsigned int napi_id
)
86 struct hlist_head
*hash_list
;
87 struct io_napi_entry
*e
;
89 /* Non-NAPI IDs can be rejected. */
90 if (napi_id
< MIN_NAPI_ID
)
93 hash_list
= &ctx
->napi_ht
[hash_min(napi_id
, HASH_BITS(ctx
->napi_ht
))];
94 guard(spinlock
)(&ctx
->napi_lock
);
95 e
= io_napi_hash_find(hash_list
, napi_id
);
99 list_del_rcu(&e
->list
);
100 hash_del_rcu(&e
->node
);
105 static void __io_napi_remove_stale(struct io_ring_ctx
*ctx
)
107 struct io_napi_entry
*e
;
109 guard(spinlock
)(&ctx
->napi_lock
);
111 * list_for_each_entry_safe() is not required as long as:
112 * 1. list_del_rcu() does not reset the deleted node next pointer
113 * 2. kfree_rcu() delays the memory freeing until the next quiescent
116 list_for_each_entry(e
, &ctx
->napi_list
, list
) {
117 if (time_after(jiffies
, READ_ONCE(e
->timeout
))) {
118 list_del_rcu(&e
->list
);
119 hash_del_rcu(&e
->node
);
125 static inline void io_napi_remove_stale(struct io_ring_ctx
*ctx
, bool is_stale
)
128 __io_napi_remove_stale(ctx
);
131 static inline bool io_napi_busy_loop_timeout(ktime_t start_time
,
135 ktime_t end_time
= ktime_add(start_time
, bp
);
136 ktime_t now
= net_to_ktime(busy_loop_current_time());
138 return ktime_after(now
, end_time
);
144 static bool io_napi_busy_loop_should_end(void *data
,
145 unsigned long start_time
)
147 struct io_wait_queue
*iowq
= data
;
149 if (signal_pending(current
))
151 if (io_should_wake(iowq
) || io_has_work(iowq
->ctx
))
153 if (io_napi_busy_loop_timeout(net_to_ktime(start_time
),
154 iowq
->napi_busy_poll_dt
))
161 * never report stale entries
163 static bool static_tracking_do_busy_loop(struct io_ring_ctx
*ctx
,
164 bool (*loop_end
)(void *, unsigned long),
167 struct io_napi_entry
*e
;
169 list_for_each_entry_rcu(e
, &ctx
->napi_list
, list
)
170 napi_busy_loop_rcu(e
->napi_id
, loop_end
, loop_end_arg
,
171 ctx
->napi_prefer_busy_poll
, BUSY_POLL_BUDGET
);
176 dynamic_tracking_do_busy_loop(struct io_ring_ctx
*ctx
,
177 bool (*loop_end
)(void *, unsigned long),
180 struct io_napi_entry
*e
;
181 bool is_stale
= false;
183 list_for_each_entry_rcu(e
, &ctx
->napi_list
, list
) {
184 napi_busy_loop_rcu(e
->napi_id
, loop_end
, loop_end_arg
,
185 ctx
->napi_prefer_busy_poll
, BUSY_POLL_BUDGET
);
187 if (time_after(jiffies
, READ_ONCE(e
->timeout
)))
195 __io_napi_do_busy_loop(struct io_ring_ctx
*ctx
,
196 bool (*loop_end
)(void *, unsigned long),
199 if (READ_ONCE(ctx
->napi_track_mode
) == IO_URING_NAPI_TRACKING_STATIC
)
200 return static_tracking_do_busy_loop(ctx
, loop_end
, loop_end_arg
);
201 return dynamic_tracking_do_busy_loop(ctx
, loop_end
, loop_end_arg
);
204 static void io_napi_blocking_busy_loop(struct io_ring_ctx
*ctx
,
205 struct io_wait_queue
*iowq
)
207 unsigned long start_time
= busy_loop_current_time();
208 bool (*loop_end
)(void *, unsigned long) = NULL
;
209 void *loop_end_arg
= NULL
;
210 bool is_stale
= false;
212 /* Singular lists use a different napi loop end check function and are
213 * only executed once.
215 if (list_is_singular(&ctx
->napi_list
)) {
216 loop_end
= io_napi_busy_loop_should_end
;
222 is_stale
= __io_napi_do_busy_loop(ctx
, loop_end
,
224 } while (!io_napi_busy_loop_should_end(iowq
, start_time
) &&
228 io_napi_remove_stale(ctx
, is_stale
);
232 * io_napi_init() - Init napi settings
233 * @ctx: pointer to io-uring context structure
235 * Init napi settings in the io-uring context.
237 void io_napi_init(struct io_ring_ctx
*ctx
)
239 u64 sys_dt
= READ_ONCE(sysctl_net_busy_poll
) * NSEC_PER_USEC
;
241 INIT_LIST_HEAD(&ctx
->napi_list
);
242 spin_lock_init(&ctx
->napi_lock
);
243 ctx
->napi_prefer_busy_poll
= false;
244 ctx
->napi_busy_poll_dt
= ns_to_ktime(sys_dt
);
245 ctx
->napi_track_mode
= IO_URING_NAPI_TRACKING_INACTIVE
;
249 * io_napi_free() - Deallocate napi
250 * @ctx: pointer to io-uring context structure
252 * Free the napi list and the hash table in the io-uring context.
254 void io_napi_free(struct io_ring_ctx
*ctx
)
256 struct io_napi_entry
*e
;
258 guard(spinlock
)(&ctx
->napi_lock
);
259 list_for_each_entry(e
, &ctx
->napi_list
, list
) {
260 hash_del_rcu(&e
->node
);
263 INIT_LIST_HEAD_RCU(&ctx
->napi_list
);
266 static int io_napi_register_napi(struct io_ring_ctx
*ctx
,
267 struct io_uring_napi
*napi
)
269 switch (napi
->op_param
) {
270 case IO_URING_NAPI_TRACKING_DYNAMIC
:
271 case IO_URING_NAPI_TRACKING_STATIC
:
276 /* clean the napi list for new settings */
278 WRITE_ONCE(ctx
->napi_track_mode
, napi
->op_param
);
279 WRITE_ONCE(ctx
->napi_busy_poll_dt
, napi
->busy_poll_to
* NSEC_PER_USEC
);
280 WRITE_ONCE(ctx
->napi_prefer_busy_poll
, !!napi
->prefer_busy_poll
);
285 * io_napi_register() - Register napi with io-uring
286 * @ctx: pointer to io-uring context structure
287 * @arg: pointer to io_uring_napi structure
289 * Register napi in the io-uring context.
291 int io_register_napi(struct io_ring_ctx
*ctx
, void __user
*arg
)
293 const struct io_uring_napi curr
= {
294 .busy_poll_to
= ktime_to_us(ctx
->napi_busy_poll_dt
),
295 .prefer_busy_poll
= ctx
->napi_prefer_busy_poll
,
296 .op_param
= ctx
->napi_track_mode
298 struct io_uring_napi napi
;
300 if (ctx
->flags
& IORING_SETUP_IOPOLL
)
302 if (copy_from_user(&napi
, arg
, sizeof(napi
)))
304 if (napi
.pad
[0] || napi
.pad
[1] || napi
.resv
)
307 if (copy_to_user(arg
, &curr
, sizeof(curr
)))
310 switch (napi
.opcode
) {
311 case IO_URING_NAPI_REGISTER_OP
:
312 return io_napi_register_napi(ctx
, &napi
);
313 case IO_URING_NAPI_STATIC_ADD_ID
:
314 if (curr
.op_param
!= IO_URING_NAPI_TRACKING_STATIC
)
316 return __io_napi_add_id(ctx
, napi
.op_param
);
317 case IO_URING_NAPI_STATIC_DEL_ID
:
318 if (curr
.op_param
!= IO_URING_NAPI_TRACKING_STATIC
)
320 return __io_napi_del_id(ctx
, napi
.op_param
);
327 * io_napi_unregister() - Unregister napi with io-uring
328 * @ctx: pointer to io-uring context structure
329 * @arg: pointer to io_uring_napi structure
331 * Unregister napi. If arg has been specified copy the busy poll timeout and
332 * prefer busy poll setting to the passed in structure.
334 int io_unregister_napi(struct io_ring_ctx
*ctx
, void __user
*arg
)
336 const struct io_uring_napi curr
= {
337 .busy_poll_to
= ktime_to_us(ctx
->napi_busy_poll_dt
),
338 .prefer_busy_poll
= ctx
->napi_prefer_busy_poll
341 if (arg
&& copy_to_user(arg
, &curr
, sizeof(curr
)))
344 WRITE_ONCE(ctx
->napi_busy_poll_dt
, 0);
345 WRITE_ONCE(ctx
->napi_prefer_busy_poll
, false);
346 WRITE_ONCE(ctx
->napi_track_mode
, IO_URING_NAPI_TRACKING_INACTIVE
);
351 * __io_napi_busy_loop() - execute busy poll loop
352 * @ctx: pointer to io-uring context structure
353 * @iowq: pointer to io wait queue
355 * Execute the busy poll loop and merge the spliced off list.
357 void __io_napi_busy_loop(struct io_ring_ctx
*ctx
, struct io_wait_queue
*iowq
)
359 if (ctx
->flags
& IORING_SETUP_SQPOLL
)
362 iowq
->napi_busy_poll_dt
= READ_ONCE(ctx
->napi_busy_poll_dt
);
363 if (iowq
->timeout
!= KTIME_MAX
) {
364 ktime_t dt
= ktime_sub(iowq
->timeout
, io_get_time(ctx
));
366 iowq
->napi_busy_poll_dt
= min_t(u64
, iowq
->napi_busy_poll_dt
, dt
);
369 iowq
->napi_prefer_busy_poll
= READ_ONCE(ctx
->napi_prefer_busy_poll
);
370 io_napi_blocking_busy_loop(ctx
, iowq
);
374 * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll
375 * @ctx: pointer to io-uring context structure
377 * Splice of the napi list and execute the napi busy poll loop.
379 int io_napi_sqpoll_busy_poll(struct io_ring_ctx
*ctx
)
381 bool is_stale
= false;
383 if (!READ_ONCE(ctx
->napi_busy_poll_dt
))
385 if (list_empty_careful(&ctx
->napi_list
))
389 is_stale
= __io_napi_do_busy_loop(ctx
, NULL
, NULL
);
392 io_napi_remove_stale(ctx
, is_stale
);