1 // SPDX-License-Identifier: GPL-2.0-only
4 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
7 #include <linux/filter.h>
8 #include <linux/types.h>
10 #include <linux/netdevice.h>
11 #include <linux/slab.h>
12 #include <linux/idr.h>
13 #include <linux/rhashtable.h>
14 #include <linux/bug.h>
15 #include <net/page_pool.h>
18 #include <net/xdp_priv.h> /* struct xdp_mem_allocator */
19 #include <trace/events/xdp.h>
20 #include <net/xdp_sock_drv.h>
22 #define REG_STATE_NEW 0x0
23 #define REG_STATE_REGISTERED 0x1
24 #define REG_STATE_UNREGISTERED 0x2
25 #define REG_STATE_UNUSED 0x3
27 static DEFINE_IDA(mem_id_pool
);
28 static DEFINE_MUTEX(mem_id_lock
);
29 #define MEM_ID_MAX 0xFFFE
31 static int mem_id_next
= MEM_ID_MIN
;
33 static bool mem_id_init
; /* false */
34 static struct rhashtable
*mem_id_ht
;
36 static u32
xdp_mem_id_hashfn(const void *data
, u32 len
, u32 seed
)
41 BUILD_BUG_ON(sizeof_field(struct xdp_mem_allocator
, mem
.id
)
44 /* Use cyclic increasing ID as direct hash key */
48 static int xdp_mem_id_cmp(struct rhashtable_compare_arg
*arg
,
51 const struct xdp_mem_allocator
*xa
= ptr
;
52 u32 mem_id
= *(u32
*)arg
->key
;
54 return xa
->mem
.id
!= mem_id
;
57 static const struct rhashtable_params mem_id_rht_params
= {
59 .head_offset
= offsetof(struct xdp_mem_allocator
, node
),
60 .key_offset
= offsetof(struct xdp_mem_allocator
, mem
.id
),
61 .key_len
= sizeof_field(struct xdp_mem_allocator
, mem
.id
),
62 .max_size
= MEM_ID_MAX
,
64 .automatic_shrinking
= true,
65 .hashfn
= xdp_mem_id_hashfn
,
66 .obj_cmpfn
= xdp_mem_id_cmp
,
69 static void __xdp_mem_allocator_rcu_free(struct rcu_head
*rcu
)
71 struct xdp_mem_allocator
*xa
;
73 xa
= container_of(rcu
, struct xdp_mem_allocator
, rcu
);
75 /* Allow this ID to be reused */
76 ida_simple_remove(&mem_id_pool
, xa
->mem
.id
);
81 static void mem_xa_remove(struct xdp_mem_allocator
*xa
)
83 trace_mem_disconnect(xa
);
85 if (!rhashtable_remove_fast(mem_id_ht
, &xa
->node
, mem_id_rht_params
))
86 call_rcu(&xa
->rcu
, __xdp_mem_allocator_rcu_free
);
89 static void mem_allocator_disconnect(void *allocator
)
91 struct xdp_mem_allocator
*xa
;
92 struct rhashtable_iter iter
;
94 mutex_lock(&mem_id_lock
);
96 rhashtable_walk_enter(mem_id_ht
, &iter
);
98 rhashtable_walk_start(&iter
);
100 while ((xa
= rhashtable_walk_next(&iter
)) && !IS_ERR(xa
)) {
101 if (xa
->allocator
== allocator
)
105 rhashtable_walk_stop(&iter
);
107 } while (xa
== ERR_PTR(-EAGAIN
));
108 rhashtable_walk_exit(&iter
);
110 mutex_unlock(&mem_id_lock
);
113 void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info
*xdp_rxq
)
115 struct xdp_mem_allocator
*xa
;
116 int id
= xdp_rxq
->mem
.id
;
118 if (xdp_rxq
->reg_state
!= REG_STATE_REGISTERED
) {
119 WARN(1, "Missing register, driver bug");
126 if (xdp_rxq
->mem
.type
== MEM_TYPE_PAGE_POOL
) {
128 xa
= rhashtable_lookup(mem_id_ht
, &id
, mem_id_rht_params
);
129 page_pool_destroy(xa
->page_pool
);
133 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model
);
135 void xdp_rxq_info_unreg(struct xdp_rxq_info
*xdp_rxq
)
137 /* Simplify driver cleanup code paths, allow unreg "unused" */
138 if (xdp_rxq
->reg_state
== REG_STATE_UNUSED
)
141 WARN(!(xdp_rxq
->reg_state
== REG_STATE_REGISTERED
), "Driver BUG");
143 xdp_rxq_info_unreg_mem_model(xdp_rxq
);
145 xdp_rxq
->reg_state
= REG_STATE_UNREGISTERED
;
148 /* Reset mem info to defaults */
150 xdp_rxq
->mem
.type
= 0;
152 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg
);
154 static void xdp_rxq_info_init(struct xdp_rxq_info
*xdp_rxq
)
156 memset(xdp_rxq
, 0, sizeof(*xdp_rxq
));
159 /* Returns 0 on success, negative on failure */
160 int xdp_rxq_info_reg(struct xdp_rxq_info
*xdp_rxq
,
161 struct net_device
*dev
, u32 queue_index
, unsigned int napi_id
)
163 if (xdp_rxq
->reg_state
== REG_STATE_UNUSED
) {
164 WARN(1, "Driver promised not to register this");
168 if (xdp_rxq
->reg_state
== REG_STATE_REGISTERED
) {
169 WARN(1, "Missing unregister, handled but fix driver");
170 xdp_rxq_info_unreg(xdp_rxq
);
174 WARN(1, "Missing net_device from driver");
178 /* State either UNREGISTERED or NEW */
179 xdp_rxq_info_init(xdp_rxq
);
181 xdp_rxq
->queue_index
= queue_index
;
182 xdp_rxq
->napi_id
= napi_id
;
184 xdp_rxq
->reg_state
= REG_STATE_REGISTERED
;
187 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg
);
189 void xdp_rxq_info_unused(struct xdp_rxq_info
*xdp_rxq
)
191 xdp_rxq
->reg_state
= REG_STATE_UNUSED
;
193 EXPORT_SYMBOL_GPL(xdp_rxq_info_unused
);
195 bool xdp_rxq_info_is_reg(struct xdp_rxq_info
*xdp_rxq
)
197 return (xdp_rxq
->reg_state
== REG_STATE_REGISTERED
);
199 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg
);
201 static int __mem_id_init_hash_table(void)
203 struct rhashtable
*rht
;
206 if (unlikely(mem_id_init
))
209 rht
= kzalloc(sizeof(*rht
), GFP_KERNEL
);
213 ret
= rhashtable_init(rht
, &mem_id_rht_params
);
219 smp_mb(); /* mutex lock should provide enough pairing */
225 /* Allocate a cyclic ID that maps to allocator pointer.
226 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
228 * Caller must lock mem_id_lock.
230 static int __mem_id_cyclic_get(gfp_t gfp
)
236 id
= ida_simple_get(&mem_id_pool
, mem_id_next
, MEM_ID_MAX
, gfp
);
239 /* Cyclic allocator, reset next id */
241 mem_id_next
= MEM_ID_MIN
;
245 return id
; /* errno */
247 mem_id_next
= id
+ 1;
252 static bool __is_supported_mem_type(enum xdp_mem_type type
)
254 if (type
== MEM_TYPE_PAGE_POOL
)
255 return is_page_pool_compiled_in();
257 if (type
>= MEM_TYPE_MAX
)
263 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info
*xdp_rxq
,
264 enum xdp_mem_type type
, void *allocator
)
266 struct xdp_mem_allocator
*xdp_alloc
;
267 gfp_t gfp
= GFP_KERNEL
;
271 if (xdp_rxq
->reg_state
!= REG_STATE_REGISTERED
) {
272 WARN(1, "Missing register, driver bug");
276 if (!__is_supported_mem_type(type
))
279 xdp_rxq
->mem
.type
= type
;
282 if (type
== MEM_TYPE_PAGE_POOL
)
283 return -EINVAL
; /* Setup time check page_pool req */
287 /* Delay init of rhashtable to save memory if feature isn't used */
289 mutex_lock(&mem_id_lock
);
290 ret
= __mem_id_init_hash_table();
291 mutex_unlock(&mem_id_lock
);
298 xdp_alloc
= kzalloc(sizeof(*xdp_alloc
), gfp
);
302 mutex_lock(&mem_id_lock
);
303 id
= __mem_id_cyclic_get(gfp
);
308 xdp_rxq
->mem
.id
= id
;
309 xdp_alloc
->mem
= xdp_rxq
->mem
;
310 xdp_alloc
->allocator
= allocator
;
312 /* Insert allocator into ID lookup table */
313 ptr
= rhashtable_insert_slow(mem_id_ht
, &id
, &xdp_alloc
->node
);
315 ida_simple_remove(&mem_id_pool
, xdp_rxq
->mem
.id
);
317 errno
= PTR_ERR(ptr
);
321 if (type
== MEM_TYPE_PAGE_POOL
)
322 page_pool_use_xdp_mem(allocator
, mem_allocator_disconnect
);
324 mutex_unlock(&mem_id_lock
);
326 trace_mem_connect(xdp_alloc
, xdp_rxq
);
329 mutex_unlock(&mem_id_lock
);
333 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model
);
335 /* XDP RX runs under NAPI protection, and in different delivery error
336 * scenarios (e.g. queue full), it is possible to return the xdp_frame
337 * while still leveraging this protection. The @napi_direct boolean
338 * is used for those calls sites. Thus, allowing for faster recycling
339 * of xdp_frames/pages in those cases.
341 static void __xdp_return(void *data
, struct xdp_mem_info
*mem
, bool napi_direct
,
342 struct xdp_buff
*xdp
)
344 struct xdp_mem_allocator
*xa
;
348 case MEM_TYPE_PAGE_POOL
:
350 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
351 xa
= rhashtable_lookup(mem_id_ht
, &mem
->id
, mem_id_rht_params
);
352 page
= virt_to_head_page(data
);
353 napi_direct
&= !xdp_return_frame_no_direct();
354 page_pool_put_full_page(xa
->page_pool
, page
, napi_direct
);
357 case MEM_TYPE_PAGE_SHARED
:
358 page_frag_free(data
);
360 case MEM_TYPE_PAGE_ORDER0
:
361 page
= virt_to_page(data
); /* Assumes order0 page*/
364 case MEM_TYPE_XSK_BUFF_POOL
:
365 /* NB! Only valid from an xdp_buff! */
369 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */
370 WARN(1, "Incorrect XDP memory type (%d) usage", mem
->type
);
375 void xdp_return_frame(struct xdp_frame
*xdpf
)
377 __xdp_return(xdpf
->data
, &xdpf
->mem
, false, NULL
);
379 EXPORT_SYMBOL_GPL(xdp_return_frame
);
381 void xdp_return_frame_rx_napi(struct xdp_frame
*xdpf
)
383 __xdp_return(xdpf
->data
, &xdpf
->mem
, true, NULL
);
385 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi
);
387 /* XDP bulk APIs introduce a defer/flush mechanism to return
388 * pages belonging to the same xdp_mem_allocator object
389 * (identified via the mem.id field) in bulk to optimize
390 * I-cache and D-cache.
391 * The bulk queue size is set to 16 to be aligned to how
392 * XDP_REDIRECT bulking works. The bulk is flushed when
393 * it is full or when mem.id changes.
394 * xdp_frame_bulk is usually stored/allocated on the function
395 * call-stack to avoid locking penalties.
397 void xdp_flush_frame_bulk(struct xdp_frame_bulk
*bq
)
399 struct xdp_mem_allocator
*xa
= bq
->xa
;
401 if (unlikely(!xa
|| !bq
->count
))
404 page_pool_put_page_bulk(xa
->page_pool
, bq
->q
, bq
->count
);
405 /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */
408 EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk
);
410 /* Must be called with rcu_read_lock held */
411 void xdp_return_frame_bulk(struct xdp_frame
*xdpf
,
412 struct xdp_frame_bulk
*bq
)
414 struct xdp_mem_info
*mem
= &xdpf
->mem
;
415 struct xdp_mem_allocator
*xa
;
417 if (mem
->type
!= MEM_TYPE_PAGE_POOL
) {
418 __xdp_return(xdpf
->data
, &xdpf
->mem
, false, NULL
);
424 xa
= rhashtable_lookup(mem_id_ht
, &mem
->id
, mem_id_rht_params
);
429 if (bq
->count
== XDP_BULK_QUEUE_SIZE
)
430 xdp_flush_frame_bulk(bq
);
432 if (unlikely(mem
->id
!= xa
->mem
.id
)) {
433 xdp_flush_frame_bulk(bq
);
434 bq
->xa
= rhashtable_lookup(mem_id_ht
, &mem
->id
, mem_id_rht_params
);
437 bq
->q
[bq
->count
++] = xdpf
->data
;
439 EXPORT_SYMBOL_GPL(xdp_return_frame_bulk
);
441 void xdp_return_buff(struct xdp_buff
*xdp
)
443 __xdp_return(xdp
->data
, &xdp
->rxq
->mem
, true, xdp
);
446 /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
447 void __xdp_release_frame(void *data
, struct xdp_mem_info
*mem
)
449 struct xdp_mem_allocator
*xa
;
453 xa
= rhashtable_lookup(mem_id_ht
, &mem
->id
, mem_id_rht_params
);
454 page
= virt_to_head_page(data
);
456 page_pool_release_page(xa
->page_pool
, page
);
459 EXPORT_SYMBOL_GPL(__xdp_release_frame
);
461 void xdp_attachment_setup(struct xdp_attachment_info
*info
,
462 struct netdev_bpf
*bpf
)
465 bpf_prog_put(info
->prog
);
466 info
->prog
= bpf
->prog
;
467 info
->flags
= bpf
->flags
;
469 EXPORT_SYMBOL_GPL(xdp_attachment_setup
);
471 struct xdp_frame
*xdp_convert_zc_to_xdp_frame(struct xdp_buff
*xdp
)
473 unsigned int metasize
, totsize
;
474 void *addr
, *data_to_copy
;
475 struct xdp_frame
*xdpf
;
478 /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */
479 metasize
= xdp_data_meta_unsupported(xdp
) ? 0 :
480 xdp
->data
- xdp
->data_meta
;
481 totsize
= xdp
->data_end
- xdp
->data
+ metasize
;
483 if (sizeof(*xdpf
) + totsize
> PAGE_SIZE
)
486 page
= dev_alloc_page();
490 addr
= page_to_virt(page
);
492 memset(xdpf
, 0, sizeof(*xdpf
));
494 addr
+= sizeof(*xdpf
);
495 data_to_copy
= metasize
? xdp
->data_meta
: xdp
->data
;
496 memcpy(addr
, data_to_copy
, totsize
);
498 xdpf
->data
= addr
+ metasize
;
499 xdpf
->len
= totsize
- metasize
;
501 xdpf
->metasize
= metasize
;
502 xdpf
->frame_sz
= PAGE_SIZE
;
503 xdpf
->mem
.type
= MEM_TYPE_PAGE_ORDER0
;
508 EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame
);
510 /* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */
511 void xdp_warn(const char *msg
, const char *func
, const int line
)
513 WARN(1, "XDP_WARN: %s(line:%d): %s\n", func
, line
, msg
);
515 EXPORT_SYMBOL_GPL(xdp_warn
);