1 // SPDX-License-Identifier: GPL-2.0
3 #include <net/xsk_buff_pool.h>
4 #include <net/xdp_sock.h>
5 #include <net/xdp_sock_drv.h>
11 void xp_add_xsk(struct xsk_buff_pool
*pool
, struct xdp_sock
*xs
)
18 spin_lock_irqsave(&pool
->xsk_tx_list_lock
, flags
);
19 list_add_rcu(&xs
->tx_list
, &pool
->xsk_tx_list
);
20 spin_unlock_irqrestore(&pool
->xsk_tx_list_lock
, flags
);
23 void xp_del_xsk(struct xsk_buff_pool
*pool
, struct xdp_sock
*xs
)
30 spin_lock_irqsave(&pool
->xsk_tx_list_lock
, flags
);
31 list_del_rcu(&xs
->tx_list
);
32 spin_unlock_irqrestore(&pool
->xsk_tx_list_lock
, flags
);
35 void xp_destroy(struct xsk_buff_pool
*pool
)
40 kvfree(pool
->tx_descs
);
45 int xp_alloc_tx_descs(struct xsk_buff_pool
*pool
, struct xdp_sock
*xs
)
47 pool
->tx_descs
= kvcalloc(xs
->tx
->nentries
, sizeof(*pool
->tx_descs
),
55 struct xsk_buff_pool
*xp_create_and_assign_umem(struct xdp_sock
*xs
,
56 struct xdp_umem
*umem
)
58 bool unaligned
= umem
->flags
& XDP_UMEM_UNALIGNED_CHUNK_FLAG
;
59 struct xsk_buff_pool
*pool
;
60 struct xdp_buff_xsk
*xskb
;
63 entries
= unaligned
? umem
->chunks
: 0;
64 pool
= kvzalloc(struct_size(pool
, free_heads
, entries
), GFP_KERNEL
);
68 pool
->heads
= kvcalloc(umem
->chunks
, sizeof(*pool
->heads
), GFP_KERNEL
);
73 if (xp_alloc_tx_descs(pool
, xs
))
76 pool
->chunk_mask
= ~((u64
)umem
->chunk_size
- 1);
77 pool
->addrs_cnt
= umem
->size
;
78 pool
->heads_cnt
= umem
->chunks
;
79 pool
->free_heads_cnt
= umem
->chunks
;
80 pool
->headroom
= umem
->headroom
;
81 pool
->chunk_size
= umem
->chunk_size
;
82 pool
->chunk_shift
= ffs(umem
->chunk_size
) - 1;
83 pool
->unaligned
= unaligned
;
84 pool
->frame_len
= umem
->chunk_size
- umem
->headroom
-
87 pool
->addrs
= umem
->addrs
;
88 pool
->tx_metadata_len
= umem
->tx_metadata_len
;
89 pool
->tx_sw_csum
= umem
->flags
& XDP_UMEM_TX_SW_CSUM
;
90 INIT_LIST_HEAD(&pool
->free_list
);
91 INIT_LIST_HEAD(&pool
->xskb_list
);
92 INIT_LIST_HEAD(&pool
->xsk_tx_list
);
93 spin_lock_init(&pool
->xsk_tx_list_lock
);
94 spin_lock_init(&pool
->cq_lock
);
95 refcount_set(&pool
->users
, 1);
97 pool
->fq
= xs
->fq_tmp
;
98 pool
->cq
= xs
->cq_tmp
;
100 for (i
= 0; i
< pool
->free_heads_cnt
; i
++) {
101 xskb
= &pool
->heads
[i
];
103 xskb
->xdp
.frame_sz
= umem
->chunk_size
- umem
->headroom
;
104 INIT_LIST_HEAD(&xskb
->free_list_node
);
105 INIT_LIST_HEAD(&xskb
->xskb_list_node
);
107 pool
->free_heads
[i
] = xskb
;
109 xp_init_xskb_addr(xskb
, pool
, i
* pool
->chunk_size
);
119 void xp_set_rxq_info(struct xsk_buff_pool
*pool
, struct xdp_rxq_info
*rxq
)
123 for (i
= 0; i
< pool
->heads_cnt
; i
++)
124 pool
->heads
[i
].xdp
.rxq
= rxq
;
126 EXPORT_SYMBOL(xp_set_rxq_info
);
128 void xp_fill_cb(struct xsk_buff_pool
*pool
, struct xsk_cb_desc
*desc
)
132 for (i
= 0; i
< pool
->heads_cnt
; i
++) {
133 struct xdp_buff_xsk
*xskb
= &pool
->heads
[i
];
135 memcpy(xskb
->cb
+ desc
->off
, desc
->src
, desc
->bytes
);
138 EXPORT_SYMBOL(xp_fill_cb
);
140 static void xp_disable_drv_zc(struct xsk_buff_pool
*pool
)
142 struct netdev_bpf bpf
;
147 if (pool
->umem
->zc
) {
148 bpf
.command
= XDP_SETUP_XSK_POOL
;
150 bpf
.xsk
.queue_id
= pool
->queue_id
;
152 err
= pool
->netdev
->netdev_ops
->ndo_bpf(pool
->netdev
, &bpf
);
155 WARN(1, "Failed to disable zero-copy!\n");
159 #define NETDEV_XDP_ACT_ZC (NETDEV_XDP_ACT_BASIC | \
160 NETDEV_XDP_ACT_REDIRECT | \
161 NETDEV_XDP_ACT_XSK_ZEROCOPY)
163 int xp_assign_dev(struct xsk_buff_pool
*pool
,
164 struct net_device
*netdev
, u16 queue_id
, u16 flags
)
166 bool force_zc
, force_copy
;
167 struct netdev_bpf bpf
;
172 force_zc
= flags
& XDP_ZEROCOPY
;
173 force_copy
= flags
& XDP_COPY
;
175 if (force_zc
&& force_copy
)
178 if (xsk_get_pool_from_qid(netdev
, queue_id
))
181 pool
->netdev
= netdev
;
182 pool
->queue_id
= queue_id
;
183 err
= xsk_reg_pool_at_qid(netdev
, pool
, queue_id
);
187 if (flags
& XDP_USE_SG
)
188 pool
->umem
->flags
|= XDP_UMEM_SG_FLAG
;
190 if (flags
& XDP_USE_NEED_WAKEUP
)
191 pool
->uses_need_wakeup
= true;
192 /* Tx needs to be explicitly woken up the first time. Also
193 * for supporting drivers that do not implement this
194 * feature. They will always have to call sendto() or poll().
196 pool
->cached_need_wakeup
= XDP_WAKEUP_TX
;
201 /* For copy-mode, we are done. */
204 if ((netdev
->xdp_features
& NETDEV_XDP_ACT_ZC
) != NETDEV_XDP_ACT_ZC
) {
209 if (netdev
->xdp_zc_max_segs
== 1 && (flags
& XDP_USE_SG
)) {
214 if (dev_get_min_mp_channel_count(netdev
)) {
219 bpf
.command
= XDP_SETUP_XSK_POOL
;
221 bpf
.xsk
.queue_id
= queue_id
;
223 err
= netdev
->netdev_ops
->ndo_bpf(netdev
, &bpf
);
227 if (!pool
->dma_pages
) {
228 WARN(1, "Driver did not DMA map zero-copy buffers");
232 pool
->umem
->zc
= true;
236 xp_disable_drv_zc(pool
);
239 err
= 0; /* fallback to copy mode */
241 xsk_clear_pool_at_qid(netdev
, queue_id
);
247 int xp_assign_dev_shared(struct xsk_buff_pool
*pool
, struct xdp_sock
*umem_xs
,
248 struct net_device
*dev
, u16 queue_id
)
251 struct xdp_umem
*umem
= umem_xs
->umem
;
253 /* One fill and completion ring required for each queue id. */
254 if (!pool
->fq
|| !pool
->cq
)
257 flags
= umem
->zc
? XDP_ZEROCOPY
: XDP_COPY
;
258 if (umem_xs
->pool
->uses_need_wakeup
)
259 flags
|= XDP_USE_NEED_WAKEUP
;
261 return xp_assign_dev(pool
, dev
, queue_id
, flags
);
264 void xp_clear_dev(struct xsk_buff_pool
*pool
)
269 xp_disable_drv_zc(pool
);
270 xsk_clear_pool_at_qid(pool
->netdev
, pool
->queue_id
);
271 dev_put(pool
->netdev
);
275 static void xp_release_deferred(struct work_struct
*work
)
277 struct xsk_buff_pool
*pool
= container_of(work
, struct xsk_buff_pool
,
285 xskq_destroy(pool
->fq
);
290 xskq_destroy(pool
->cq
);
294 xdp_put_umem(pool
->umem
, false);
298 void xp_get_pool(struct xsk_buff_pool
*pool
)
300 refcount_inc(&pool
->users
);
303 bool xp_put_pool(struct xsk_buff_pool
*pool
)
308 if (refcount_dec_and_test(&pool
->users
)) {
309 INIT_WORK(&pool
->work
, xp_release_deferred
);
310 schedule_work(&pool
->work
);
317 static struct xsk_dma_map
*xp_find_dma_map(struct xsk_buff_pool
*pool
)
319 struct xsk_dma_map
*dma_map
;
321 list_for_each_entry(dma_map
, &pool
->umem
->xsk_dma_list
, list
) {
322 if (dma_map
->netdev
== pool
->netdev
)
329 static struct xsk_dma_map
*xp_create_dma_map(struct device
*dev
, struct net_device
*netdev
,
330 u32 nr_pages
, struct xdp_umem
*umem
)
332 struct xsk_dma_map
*dma_map
;
334 dma_map
= kzalloc(sizeof(*dma_map
), GFP_KERNEL
);
338 dma_map
->dma_pages
= kvcalloc(nr_pages
, sizeof(*dma_map
->dma_pages
), GFP_KERNEL
);
339 if (!dma_map
->dma_pages
) {
344 dma_map
->netdev
= netdev
;
346 dma_map
->dma_pages_cnt
= nr_pages
;
347 refcount_set(&dma_map
->users
, 1);
348 list_add(&dma_map
->list
, &umem
->xsk_dma_list
);
352 static void xp_destroy_dma_map(struct xsk_dma_map
*dma_map
)
354 list_del(&dma_map
->list
);
355 kvfree(dma_map
->dma_pages
);
359 static void __xp_dma_unmap(struct xsk_dma_map
*dma_map
, unsigned long attrs
)
364 for (i
= 0; i
< dma_map
->dma_pages_cnt
; i
++) {
365 dma
= &dma_map
->dma_pages
[i
];
367 *dma
&= ~XSK_NEXT_PG_CONTIG_MASK
;
368 dma_unmap_page_attrs(dma_map
->dev
, *dma
, PAGE_SIZE
,
369 DMA_BIDIRECTIONAL
, attrs
);
374 xp_destroy_dma_map(dma_map
);
377 void xp_dma_unmap(struct xsk_buff_pool
*pool
, unsigned long attrs
)
379 struct xsk_dma_map
*dma_map
;
381 if (!pool
->dma_pages
)
384 dma_map
= xp_find_dma_map(pool
);
386 WARN(1, "Could not find dma_map for device");
390 if (!refcount_dec_and_test(&dma_map
->users
))
393 __xp_dma_unmap(dma_map
, attrs
);
394 kvfree(pool
->dma_pages
);
395 pool
->dma_pages
= NULL
;
396 pool
->dma_pages_cnt
= 0;
399 EXPORT_SYMBOL(xp_dma_unmap
);
401 static void xp_check_dma_contiguity(struct xsk_dma_map
*dma_map
)
405 for (i
= 0; i
< dma_map
->dma_pages_cnt
- 1; i
++) {
406 if (dma_map
->dma_pages
[i
] + PAGE_SIZE
== dma_map
->dma_pages
[i
+ 1])
407 dma_map
->dma_pages
[i
] |= XSK_NEXT_PG_CONTIG_MASK
;
409 dma_map
->dma_pages
[i
] &= ~XSK_NEXT_PG_CONTIG_MASK
;
413 static int xp_init_dma_info(struct xsk_buff_pool
*pool
, struct xsk_dma_map
*dma_map
)
415 if (!pool
->unaligned
) {
418 for (i
= 0; i
< pool
->heads_cnt
; i
++) {
419 struct xdp_buff_xsk
*xskb
= &pool
->heads
[i
];
421 xp_init_xskb_dma(xskb
, pool
, dma_map
->dma_pages
, xskb
->orig_addr
);
425 pool
->dma_pages
= kvcalloc(dma_map
->dma_pages_cnt
, sizeof(*pool
->dma_pages
), GFP_KERNEL
);
426 if (!pool
->dma_pages
)
429 pool
->dev
= dma_map
->dev
;
430 pool
->dma_pages_cnt
= dma_map
->dma_pages_cnt
;
431 memcpy(pool
->dma_pages
, dma_map
->dma_pages
,
432 pool
->dma_pages_cnt
* sizeof(*pool
->dma_pages
));
437 int xp_dma_map(struct xsk_buff_pool
*pool
, struct device
*dev
,
438 unsigned long attrs
, struct page
**pages
, u32 nr_pages
)
440 struct xsk_dma_map
*dma_map
;
445 dma_map
= xp_find_dma_map(pool
);
447 err
= xp_init_dma_info(pool
, dma_map
);
451 refcount_inc(&dma_map
->users
);
455 dma_map
= xp_create_dma_map(dev
, pool
->netdev
, nr_pages
, pool
->umem
);
459 for (i
= 0; i
< dma_map
->dma_pages_cnt
; i
++) {
460 dma
= dma_map_page_attrs(dev
, pages
[i
], 0, PAGE_SIZE
,
461 DMA_BIDIRECTIONAL
, attrs
);
462 if (dma_mapping_error(dev
, dma
)) {
463 __xp_dma_unmap(dma_map
, attrs
);
466 dma_map
->dma_pages
[i
] = dma
;
470 xp_check_dma_contiguity(dma_map
);
472 err
= xp_init_dma_info(pool
, dma_map
);
474 __xp_dma_unmap(dma_map
, attrs
);
480 EXPORT_SYMBOL(xp_dma_map
);
482 static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool
*pool
,
485 return xp_desc_crosses_non_contig_pg(pool
, addr
, pool
->chunk_size
);
488 static bool xp_check_unaligned(struct xsk_buff_pool
*pool
, u64
*addr
)
490 *addr
= xp_unaligned_extract_addr(*addr
);
491 if (*addr
>= pool
->addrs_cnt
||
492 *addr
+ pool
->chunk_size
> pool
->addrs_cnt
||
493 xp_addr_crosses_non_contig_pg(pool
, *addr
))
498 static bool xp_check_aligned(struct xsk_buff_pool
*pool
, u64
*addr
)
500 *addr
= xp_aligned_extract_addr(pool
, *addr
);
501 return *addr
< pool
->addrs_cnt
;
504 static struct xdp_buff_xsk
*__xp_alloc(struct xsk_buff_pool
*pool
)
506 struct xdp_buff_xsk
*xskb
;
510 if (pool
->free_heads_cnt
== 0)
514 if (!xskq_cons_peek_addr_unchecked(pool
->fq
, &addr
)) {
515 pool
->fq
->queue_empty_descs
++;
519 ok
= pool
->unaligned
? xp_check_unaligned(pool
, &addr
) :
520 xp_check_aligned(pool
, &addr
);
522 pool
->fq
->invalid_descs
++;
523 xskq_cons_release(pool
->fq
);
529 if (pool
->unaligned
) {
530 xskb
= pool
->free_heads
[--pool
->free_heads_cnt
];
531 xp_init_xskb_addr(xskb
, pool
, addr
);
533 xp_init_xskb_dma(xskb
, pool
, pool
->dma_pages
, addr
);
535 xskb
= &pool
->heads
[xp_aligned_extract_idx(pool
, addr
)];
538 xskq_cons_release(pool
->fq
);
542 struct xdp_buff
*xp_alloc(struct xsk_buff_pool
*pool
)
544 struct xdp_buff_xsk
*xskb
;
546 if (!pool
->free_list_cnt
) {
547 xskb
= __xp_alloc(pool
);
551 pool
->free_list_cnt
--;
552 xskb
= list_first_entry(&pool
->free_list
, struct xdp_buff_xsk
,
554 list_del_init(&xskb
->free_list_node
);
557 xskb
->xdp
.data
= xskb
->xdp
.data_hard_start
+ XDP_PACKET_HEADROOM
;
558 xskb
->xdp
.data_meta
= xskb
->xdp
.data
;
562 xp_dma_sync_for_device(pool
, xskb
->dma
, pool
->frame_len
);
566 EXPORT_SYMBOL(xp_alloc
);
568 static u32
xp_alloc_new_from_fq(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
, u32 max
)
570 u32 i
, cached_cons
, nb_entries
;
572 if (max
> pool
->free_heads_cnt
)
573 max
= pool
->free_heads_cnt
;
574 max
= xskq_cons_nb_entries(pool
->fq
, max
);
576 cached_cons
= pool
->fq
->cached_cons
;
580 struct xdp_buff_xsk
*xskb
;
584 __xskq_cons_read_addr_unchecked(pool
->fq
, cached_cons
++, &addr
);
586 ok
= pool
->unaligned
? xp_check_unaligned(pool
, &addr
) :
587 xp_check_aligned(pool
, &addr
);
589 pool
->fq
->invalid_descs
++;
594 if (pool
->unaligned
) {
595 xskb
= pool
->free_heads
[--pool
->free_heads_cnt
];
596 xp_init_xskb_addr(xskb
, pool
, addr
);
598 xp_init_xskb_dma(xskb
, pool
, pool
->dma_pages
, addr
);
600 xskb
= &pool
->heads
[xp_aligned_extract_idx(pool
, addr
)];
607 xskq_cons_release_n(pool
->fq
, max
);
611 static u32
xp_alloc_reused(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
, u32 nb_entries
)
613 struct xdp_buff_xsk
*xskb
;
616 nb_entries
= min_t(u32
, nb_entries
, pool
->free_list_cnt
);
620 xskb
= list_first_entry(&pool
->free_list
, struct xdp_buff_xsk
, free_list_node
);
621 list_del_init(&xskb
->free_list_node
);
626 pool
->free_list_cnt
-= nb_entries
;
631 static u32
xp_alloc_slow(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
,
636 for (i
= 0; i
< max
; i
++) {
637 struct xdp_buff
*buff
;
639 buff
= xp_alloc(pool
);
649 u32
xp_alloc_batch(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
, u32 max
)
651 u32 nb_entries1
= 0, nb_entries2
;
653 if (unlikely(pool
->dev
&& dma_dev_need_sync(pool
->dev
)))
654 return xp_alloc_slow(pool
, xdp
, max
);
656 if (unlikely(pool
->free_list_cnt
)) {
657 nb_entries1
= xp_alloc_reused(pool
, xdp
, max
);
658 if (nb_entries1
== max
)
665 nb_entries2
= xp_alloc_new_from_fq(pool
, xdp
, max
);
667 pool
->fq
->queue_empty_descs
++;
669 return nb_entries1
+ nb_entries2
;
671 EXPORT_SYMBOL(xp_alloc_batch
);
673 bool xp_can_alloc(struct xsk_buff_pool
*pool
, u32 count
)
675 u32 req_count
, avail_count
;
677 if (pool
->free_list_cnt
>= count
)
680 req_count
= count
- pool
->free_list_cnt
;
681 avail_count
= xskq_cons_nb_entries(pool
->fq
, req_count
);
683 pool
->fq
->queue_empty_descs
++;
685 return avail_count
>= req_count
;
687 EXPORT_SYMBOL(xp_can_alloc
);
689 void xp_free(struct xdp_buff_xsk
*xskb
)
691 if (!list_empty(&xskb
->free_list_node
))
694 xskb
->pool
->free_list_cnt
++;
695 list_add(&xskb
->free_list_node
, &xskb
->pool
->free_list
);
697 EXPORT_SYMBOL(xp_free
);
699 void *xp_raw_get_data(struct xsk_buff_pool
*pool
, u64 addr
)
701 addr
= pool
->unaligned
? xp_unaligned_add_offset_to_addr(addr
) : addr
;
702 return pool
->addrs
+ addr
;
704 EXPORT_SYMBOL(xp_raw_get_data
);
706 dma_addr_t
xp_raw_get_dma(struct xsk_buff_pool
*pool
, u64 addr
)
708 addr
= pool
->unaligned
? xp_unaligned_add_offset_to_addr(addr
) : addr
;
709 return (pool
->dma_pages
[addr
>> PAGE_SHIFT
] &
710 ~XSK_NEXT_PG_CONTIG_MASK
) +
713 EXPORT_SYMBOL(xp_raw_get_dma
);