1 // SPDX-License-Identifier: GPL-2.0
3 #include <net/xsk_buff_pool.h>
4 #include <net/xdp_sock.h>
5 #include <net/xdp_sock_drv.h>
11 void xp_add_xsk(struct xsk_buff_pool
*pool
, struct xdp_sock
*xs
)
18 spin_lock_irqsave(&pool
->xsk_tx_list_lock
, flags
);
19 list_add_rcu(&xs
->tx_list
, &pool
->xsk_tx_list
);
20 spin_unlock_irqrestore(&pool
->xsk_tx_list_lock
, flags
);
23 void xp_del_xsk(struct xsk_buff_pool
*pool
, struct xdp_sock
*xs
)
30 spin_lock_irqsave(&pool
->xsk_tx_list_lock
, flags
);
31 list_del_rcu(&xs
->tx_list
);
32 spin_unlock_irqrestore(&pool
->xsk_tx_list_lock
, flags
);
35 void xp_destroy(struct xsk_buff_pool
*pool
)
40 kvfree(pool
->tx_descs
);
45 int xp_alloc_tx_descs(struct xsk_buff_pool
*pool
, struct xdp_sock
*xs
)
47 pool
->tx_descs
= kvcalloc(xs
->tx
->nentries
, sizeof(*pool
->tx_descs
),
55 struct xsk_buff_pool
*xp_create_and_assign_umem(struct xdp_sock
*xs
,
56 struct xdp_umem
*umem
)
58 bool unaligned
= umem
->flags
& XDP_UMEM_UNALIGNED_CHUNK_FLAG
;
59 struct xsk_buff_pool
*pool
;
60 struct xdp_buff_xsk
*xskb
;
63 entries
= unaligned
? umem
->chunks
: 0;
64 pool
= kvzalloc(struct_size(pool
, free_heads
, entries
), GFP_KERNEL
);
68 pool
->heads
= kvcalloc(umem
->chunks
, sizeof(*pool
->heads
), GFP_KERNEL
);
73 if (xp_alloc_tx_descs(pool
, xs
))
76 pool
->chunk_mask
= ~((u64
)umem
->chunk_size
- 1);
77 pool
->addrs_cnt
= umem
->size
;
78 pool
->heads_cnt
= umem
->chunks
;
79 pool
->free_heads_cnt
= umem
->chunks
;
80 pool
->headroom
= umem
->headroom
;
81 pool
->chunk_size
= umem
->chunk_size
;
82 pool
->chunk_shift
= ffs(umem
->chunk_size
) - 1;
83 pool
->unaligned
= unaligned
;
84 pool
->frame_len
= umem
->chunk_size
- umem
->headroom
-
87 pool
->addrs
= umem
->addrs
;
88 pool
->tx_metadata_len
= umem
->tx_metadata_len
;
89 pool
->tx_sw_csum
= umem
->flags
& XDP_UMEM_TX_SW_CSUM
;
90 INIT_LIST_HEAD(&pool
->free_list
);
91 INIT_LIST_HEAD(&pool
->xskb_list
);
92 INIT_LIST_HEAD(&pool
->xsk_tx_list
);
93 spin_lock_init(&pool
->xsk_tx_list_lock
);
94 spin_lock_init(&pool
->cq_lock
);
95 refcount_set(&pool
->users
, 1);
97 pool
->fq
= xs
->fq_tmp
;
98 pool
->cq
= xs
->cq_tmp
;
100 for (i
= 0; i
< pool
->free_heads_cnt
; i
++) {
101 xskb
= &pool
->heads
[i
];
103 xskb
->xdp
.frame_sz
= umem
->chunk_size
- umem
->headroom
;
104 INIT_LIST_HEAD(&xskb
->list_node
);
106 pool
->free_heads
[i
] = xskb
;
108 xp_init_xskb_addr(xskb
, pool
, i
* pool
->chunk_size
);
118 void xp_set_rxq_info(struct xsk_buff_pool
*pool
, struct xdp_rxq_info
*rxq
)
122 for (i
= 0; i
< pool
->heads_cnt
; i
++)
123 pool
->heads
[i
].xdp
.rxq
= rxq
;
125 EXPORT_SYMBOL(xp_set_rxq_info
);
127 void xp_fill_cb(struct xsk_buff_pool
*pool
, struct xsk_cb_desc
*desc
)
131 for (i
= 0; i
< pool
->heads_cnt
; i
++) {
132 struct xdp_buff_xsk
*xskb
= &pool
->heads
[i
];
134 memcpy(xskb
->cb
+ desc
->off
, desc
->src
, desc
->bytes
);
137 EXPORT_SYMBOL(xp_fill_cb
);
139 static void xp_disable_drv_zc(struct xsk_buff_pool
*pool
)
141 struct netdev_bpf bpf
;
146 if (pool
->umem
->zc
) {
147 bpf
.command
= XDP_SETUP_XSK_POOL
;
149 bpf
.xsk
.queue_id
= pool
->queue_id
;
151 err
= pool
->netdev
->netdev_ops
->ndo_bpf(pool
->netdev
, &bpf
);
154 WARN(1, "Failed to disable zero-copy!\n");
158 #define NETDEV_XDP_ACT_ZC (NETDEV_XDP_ACT_BASIC | \
159 NETDEV_XDP_ACT_REDIRECT | \
160 NETDEV_XDP_ACT_XSK_ZEROCOPY)
162 int xp_assign_dev(struct xsk_buff_pool
*pool
,
163 struct net_device
*netdev
, u16 queue_id
, u16 flags
)
165 bool force_zc
, force_copy
;
166 struct netdev_bpf bpf
;
171 force_zc
= flags
& XDP_ZEROCOPY
;
172 force_copy
= flags
& XDP_COPY
;
174 if (force_zc
&& force_copy
)
177 if (xsk_get_pool_from_qid(netdev
, queue_id
))
180 pool
->netdev
= netdev
;
181 pool
->queue_id
= queue_id
;
182 err
= xsk_reg_pool_at_qid(netdev
, pool
, queue_id
);
186 if (flags
& XDP_USE_SG
)
187 pool
->umem
->flags
|= XDP_UMEM_SG_FLAG
;
189 if (flags
& XDP_USE_NEED_WAKEUP
)
190 pool
->uses_need_wakeup
= true;
191 /* Tx needs to be explicitly woken up the first time. Also
192 * for supporting drivers that do not implement this
193 * feature. They will always have to call sendto() or poll().
195 pool
->cached_need_wakeup
= XDP_WAKEUP_TX
;
200 /* For copy-mode, we are done. */
203 if ((netdev
->xdp_features
& NETDEV_XDP_ACT_ZC
) != NETDEV_XDP_ACT_ZC
) {
208 if (netdev
->xdp_zc_max_segs
== 1 && (flags
& XDP_USE_SG
)) {
213 if (dev_get_min_mp_channel_count(netdev
)) {
218 bpf
.command
= XDP_SETUP_XSK_POOL
;
220 bpf
.xsk
.queue_id
= queue_id
;
222 err
= netdev
->netdev_ops
->ndo_bpf(netdev
, &bpf
);
226 if (!pool
->dma_pages
) {
227 WARN(1, "Driver did not DMA map zero-copy buffers");
231 pool
->umem
->zc
= true;
232 pool
->xdp_zc_max_segs
= netdev
->xdp_zc_max_segs
;
236 xp_disable_drv_zc(pool
);
239 err
= 0; /* fallback to copy mode */
241 xsk_clear_pool_at_qid(netdev
, queue_id
);
247 int xp_assign_dev_shared(struct xsk_buff_pool
*pool
, struct xdp_sock
*umem_xs
,
248 struct net_device
*dev
, u16 queue_id
)
251 struct xdp_umem
*umem
= umem_xs
->umem
;
253 /* One fill and completion ring required for each queue id. */
254 if (!pool
->fq
|| !pool
->cq
)
257 flags
= umem
->zc
? XDP_ZEROCOPY
: XDP_COPY
;
258 if (umem_xs
->pool
->uses_need_wakeup
)
259 flags
|= XDP_USE_NEED_WAKEUP
;
261 return xp_assign_dev(pool
, dev
, queue_id
, flags
);
264 void xp_clear_dev(struct xsk_buff_pool
*pool
)
269 xp_disable_drv_zc(pool
);
270 xsk_clear_pool_at_qid(pool
->netdev
, pool
->queue_id
);
271 dev_put(pool
->netdev
);
275 static void xp_release_deferred(struct work_struct
*work
)
277 struct xsk_buff_pool
*pool
= container_of(work
, struct xsk_buff_pool
,
285 xskq_destroy(pool
->fq
);
290 xskq_destroy(pool
->cq
);
294 xdp_put_umem(pool
->umem
, false);
298 void xp_get_pool(struct xsk_buff_pool
*pool
)
300 refcount_inc(&pool
->users
);
303 bool xp_put_pool(struct xsk_buff_pool
*pool
)
308 if (refcount_dec_and_test(&pool
->users
)) {
309 INIT_WORK(&pool
->work
, xp_release_deferred
);
310 schedule_work(&pool
->work
);
317 static struct xsk_dma_map
*xp_find_dma_map(struct xsk_buff_pool
*pool
)
319 struct xsk_dma_map
*dma_map
;
321 list_for_each_entry(dma_map
, &pool
->umem
->xsk_dma_list
, list
) {
322 if (dma_map
->netdev
== pool
->netdev
)
329 static struct xsk_dma_map
*xp_create_dma_map(struct device
*dev
, struct net_device
*netdev
,
330 u32 nr_pages
, struct xdp_umem
*umem
)
332 struct xsk_dma_map
*dma_map
;
334 dma_map
= kzalloc(sizeof(*dma_map
), GFP_KERNEL
);
338 dma_map
->dma_pages
= kvcalloc(nr_pages
, sizeof(*dma_map
->dma_pages
), GFP_KERNEL
);
339 if (!dma_map
->dma_pages
) {
344 dma_map
->netdev
= netdev
;
346 dma_map
->dma_pages_cnt
= nr_pages
;
347 refcount_set(&dma_map
->users
, 1);
348 list_add(&dma_map
->list
, &umem
->xsk_dma_list
);
352 static void xp_destroy_dma_map(struct xsk_dma_map
*dma_map
)
354 list_del(&dma_map
->list
);
355 kvfree(dma_map
->dma_pages
);
359 static void __xp_dma_unmap(struct xsk_dma_map
*dma_map
, unsigned long attrs
)
364 for (i
= 0; i
< dma_map
->dma_pages_cnt
; i
++) {
365 dma
= &dma_map
->dma_pages
[i
];
367 *dma
&= ~XSK_NEXT_PG_CONTIG_MASK
;
368 dma_unmap_page_attrs(dma_map
->dev
, *dma
, PAGE_SIZE
,
369 DMA_BIDIRECTIONAL
, attrs
);
374 xp_destroy_dma_map(dma_map
);
377 void xp_dma_unmap(struct xsk_buff_pool
*pool
, unsigned long attrs
)
379 struct xsk_dma_map
*dma_map
;
381 if (!pool
->dma_pages
)
384 dma_map
= xp_find_dma_map(pool
);
386 WARN(1, "Could not find dma_map for device");
390 if (refcount_dec_and_test(&dma_map
->users
))
391 __xp_dma_unmap(dma_map
, attrs
);
393 kvfree(pool
->dma_pages
);
394 pool
->dma_pages
= NULL
;
395 pool
->dma_pages_cnt
= 0;
398 EXPORT_SYMBOL(xp_dma_unmap
);
400 static void xp_check_dma_contiguity(struct xsk_dma_map
*dma_map
)
404 for (i
= 0; i
< dma_map
->dma_pages_cnt
- 1; i
++) {
405 if (dma_map
->dma_pages
[i
] + PAGE_SIZE
== dma_map
->dma_pages
[i
+ 1])
406 dma_map
->dma_pages
[i
] |= XSK_NEXT_PG_CONTIG_MASK
;
408 dma_map
->dma_pages
[i
] &= ~XSK_NEXT_PG_CONTIG_MASK
;
412 static int xp_init_dma_info(struct xsk_buff_pool
*pool
, struct xsk_dma_map
*dma_map
)
414 if (!pool
->unaligned
) {
417 for (i
= 0; i
< pool
->heads_cnt
; i
++) {
418 struct xdp_buff_xsk
*xskb
= &pool
->heads
[i
];
421 orig_addr
= xskb
->xdp
.data_hard_start
- pool
->addrs
- pool
->headroom
;
422 xp_init_xskb_dma(xskb
, pool
, dma_map
->dma_pages
, orig_addr
);
426 pool
->dma_pages
= kvcalloc(dma_map
->dma_pages_cnt
, sizeof(*pool
->dma_pages
), GFP_KERNEL
);
427 if (!pool
->dma_pages
)
430 pool
->dev
= dma_map
->dev
;
431 pool
->dma_pages_cnt
= dma_map
->dma_pages_cnt
;
432 memcpy(pool
->dma_pages
, dma_map
->dma_pages
,
433 pool
->dma_pages_cnt
* sizeof(*pool
->dma_pages
));
438 int xp_dma_map(struct xsk_buff_pool
*pool
, struct device
*dev
,
439 unsigned long attrs
, struct page
**pages
, u32 nr_pages
)
441 struct xsk_dma_map
*dma_map
;
446 dma_map
= xp_find_dma_map(pool
);
448 err
= xp_init_dma_info(pool
, dma_map
);
452 refcount_inc(&dma_map
->users
);
456 dma_map
= xp_create_dma_map(dev
, pool
->netdev
, nr_pages
, pool
->umem
);
460 for (i
= 0; i
< dma_map
->dma_pages_cnt
; i
++) {
461 dma
= dma_map_page_attrs(dev
, pages
[i
], 0, PAGE_SIZE
,
462 DMA_BIDIRECTIONAL
, attrs
);
463 if (dma_mapping_error(dev
, dma
)) {
464 __xp_dma_unmap(dma_map
, attrs
);
467 dma_map
->dma_pages
[i
] = dma
;
471 xp_check_dma_contiguity(dma_map
);
473 err
= xp_init_dma_info(pool
, dma_map
);
475 __xp_dma_unmap(dma_map
, attrs
);
481 EXPORT_SYMBOL(xp_dma_map
);
483 static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool
*pool
,
486 return xp_desc_crosses_non_contig_pg(pool
, addr
, pool
->chunk_size
);
489 static bool xp_check_unaligned(struct xsk_buff_pool
*pool
, u64
*addr
)
491 *addr
= xp_unaligned_extract_addr(*addr
);
492 if (*addr
>= pool
->addrs_cnt
||
493 *addr
+ pool
->chunk_size
> pool
->addrs_cnt
||
494 xp_addr_crosses_non_contig_pg(pool
, *addr
))
499 static bool xp_check_aligned(struct xsk_buff_pool
*pool
, u64
*addr
)
501 *addr
= xp_aligned_extract_addr(pool
, *addr
);
502 return *addr
< pool
->addrs_cnt
;
505 static struct xdp_buff_xsk
*xp_get_xskb(struct xsk_buff_pool
*pool
, u64 addr
)
507 struct xdp_buff_xsk
*xskb
;
509 if (pool
->unaligned
) {
510 xskb
= pool
->free_heads
[--pool
->free_heads_cnt
];
511 xp_init_xskb_addr(xskb
, pool
, addr
);
513 xp_init_xskb_dma(xskb
, pool
, pool
->dma_pages
, addr
);
515 xskb
= &pool
->heads
[xp_aligned_extract_idx(pool
, addr
)];
521 static struct xdp_buff_xsk
*__xp_alloc(struct xsk_buff_pool
*pool
)
523 struct xdp_buff_xsk
*xskb
;
527 if (pool
->free_heads_cnt
== 0)
531 if (!xskq_cons_peek_addr_unchecked(pool
->fq
, &addr
)) {
532 pool
->fq
->queue_empty_descs
++;
536 ok
= pool
->unaligned
? xp_check_unaligned(pool
, &addr
) :
537 xp_check_aligned(pool
, &addr
);
539 pool
->fq
->invalid_descs
++;
540 xskq_cons_release(pool
->fq
);
546 xskb
= xp_get_xskb(pool
, addr
);
548 xskq_cons_release(pool
->fq
);
552 struct xdp_buff
*xp_alloc(struct xsk_buff_pool
*pool
)
554 struct xdp_buff_xsk
*xskb
;
556 if (!pool
->free_list_cnt
) {
557 xskb
= __xp_alloc(pool
);
561 pool
->free_list_cnt
--;
562 xskb
= list_first_entry(&pool
->free_list
, struct xdp_buff_xsk
,
564 list_del_init(&xskb
->list_node
);
567 xskb
->xdp
.data
= xskb
->xdp
.data_hard_start
+ XDP_PACKET_HEADROOM
;
568 xskb
->xdp
.data_meta
= xskb
->xdp
.data
;
572 xp_dma_sync_for_device(pool
, xskb
->dma
, pool
->frame_len
);
576 EXPORT_SYMBOL(xp_alloc
);
578 static u32
xp_alloc_new_from_fq(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
, u32 max
)
580 u32 i
, cached_cons
, nb_entries
;
582 if (max
> pool
->free_heads_cnt
)
583 max
= pool
->free_heads_cnt
;
584 max
= xskq_cons_nb_entries(pool
->fq
, max
);
586 cached_cons
= pool
->fq
->cached_cons
;
590 struct xdp_buff_xsk
*xskb
;
594 __xskq_cons_read_addr_unchecked(pool
->fq
, cached_cons
++, &addr
);
596 ok
= pool
->unaligned
? xp_check_unaligned(pool
, &addr
) :
597 xp_check_aligned(pool
, &addr
);
599 pool
->fq
->invalid_descs
++;
604 xskb
= xp_get_xskb(pool
, addr
);
610 xskq_cons_release_n(pool
->fq
, max
);
614 static u32
xp_alloc_reused(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
, u32 nb_entries
)
616 struct xdp_buff_xsk
*xskb
;
619 nb_entries
= min_t(u32
, nb_entries
, pool
->free_list_cnt
);
623 xskb
= list_first_entry(&pool
->free_list
, struct xdp_buff_xsk
, list_node
);
624 list_del_init(&xskb
->list_node
);
629 pool
->free_list_cnt
-= nb_entries
;
634 static u32
xp_alloc_slow(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
,
639 for (i
= 0; i
< max
; i
++) {
640 struct xdp_buff
*buff
;
642 buff
= xp_alloc(pool
);
652 u32
xp_alloc_batch(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
, u32 max
)
654 u32 nb_entries1
= 0, nb_entries2
;
656 if (unlikely(pool
->dev
&& dma_dev_need_sync(pool
->dev
)))
657 return xp_alloc_slow(pool
, xdp
, max
);
659 if (unlikely(pool
->free_list_cnt
)) {
660 nb_entries1
= xp_alloc_reused(pool
, xdp
, max
);
661 if (nb_entries1
== max
)
668 nb_entries2
= xp_alloc_new_from_fq(pool
, xdp
, max
);
670 pool
->fq
->queue_empty_descs
++;
672 return nb_entries1
+ nb_entries2
;
674 EXPORT_SYMBOL(xp_alloc_batch
);
676 bool xp_can_alloc(struct xsk_buff_pool
*pool
, u32 count
)
678 u32 req_count
, avail_count
;
680 if (pool
->free_list_cnt
>= count
)
683 req_count
= count
- pool
->free_list_cnt
;
684 avail_count
= xskq_cons_nb_entries(pool
->fq
, req_count
);
686 pool
->fq
->queue_empty_descs
++;
688 return avail_count
>= req_count
;
690 EXPORT_SYMBOL(xp_can_alloc
);
692 void xp_free(struct xdp_buff_xsk
*xskb
)
694 if (!list_empty(&xskb
->list_node
))
697 xskb
->pool
->free_list_cnt
++;
698 list_add(&xskb
->list_node
, &xskb
->pool
->free_list
);
700 EXPORT_SYMBOL(xp_free
);
702 void *xp_raw_get_data(struct xsk_buff_pool
*pool
, u64 addr
)
704 addr
= pool
->unaligned
? xp_unaligned_add_offset_to_addr(addr
) : addr
;
705 return pool
->addrs
+ addr
;
707 EXPORT_SYMBOL(xp_raw_get_data
);
709 dma_addr_t
xp_raw_get_dma(struct xsk_buff_pool
*pool
, u64 addr
)
711 addr
= pool
->unaligned
? xp_unaligned_add_offset_to_addr(addr
) : addr
;
712 return (pool
->dma_pages
[addr
>> PAGE_SHIFT
] &
713 ~XSK_NEXT_PG_CONTIG_MASK
) +
716 EXPORT_SYMBOL(xp_raw_get_dma
);