1 // SPDX-License-Identifier: GPL-2.0
3 #include <net/xsk_buff_pool.h>
4 #include <net/xdp_sock.h>
5 #include <net/xdp_sock_drv.h>
11 void xp_add_xsk(struct xsk_buff_pool
*pool
, struct xdp_sock
*xs
)
18 spin_lock_irqsave(&pool
->xsk_tx_list_lock
, flags
);
19 list_add_rcu(&xs
->tx_list
, &pool
->xsk_tx_list
);
20 spin_unlock_irqrestore(&pool
->xsk_tx_list_lock
, flags
);
23 void xp_del_xsk(struct xsk_buff_pool
*pool
, struct xdp_sock
*xs
)
30 spin_lock_irqsave(&pool
->xsk_tx_list_lock
, flags
);
31 list_del_rcu(&xs
->tx_list
);
32 spin_unlock_irqrestore(&pool
->xsk_tx_list_lock
, flags
);
35 void xp_destroy(struct xsk_buff_pool
*pool
)
40 kvfree(pool
->tx_descs
);
45 int xp_alloc_tx_descs(struct xsk_buff_pool
*pool
, struct xdp_sock
*xs
)
47 pool
->tx_descs
= kvcalloc(xs
->tx
->nentries
, sizeof(*pool
->tx_descs
),
55 struct xsk_buff_pool
*xp_create_and_assign_umem(struct xdp_sock
*xs
,
56 struct xdp_umem
*umem
)
58 bool unaligned
= umem
->flags
& XDP_UMEM_UNALIGNED_CHUNK_FLAG
;
59 struct xsk_buff_pool
*pool
;
60 struct xdp_buff_xsk
*xskb
;
63 entries
= unaligned
? umem
->chunks
: 0;
64 pool
= kvzalloc(struct_size(pool
, free_heads
, entries
), GFP_KERNEL
);
68 pool
->heads
= kvcalloc(umem
->chunks
, sizeof(*pool
->heads
), GFP_KERNEL
);
73 if (xp_alloc_tx_descs(pool
, xs
))
76 pool
->chunk_mask
= ~((u64
)umem
->chunk_size
- 1);
77 pool
->addrs_cnt
= umem
->size
;
78 pool
->heads_cnt
= umem
->chunks
;
79 pool
->free_heads_cnt
= umem
->chunks
;
80 pool
->headroom
= umem
->headroom
;
81 pool
->chunk_size
= umem
->chunk_size
;
82 pool
->chunk_shift
= ffs(umem
->chunk_size
) - 1;
83 pool
->unaligned
= unaligned
;
84 pool
->frame_len
= umem
->chunk_size
- umem
->headroom
-
87 pool
->addrs
= umem
->addrs
;
88 pool
->tx_metadata_len
= umem
->tx_metadata_len
;
89 pool
->tx_sw_csum
= umem
->flags
& XDP_UMEM_TX_SW_CSUM
;
90 INIT_LIST_HEAD(&pool
->free_list
);
91 INIT_LIST_HEAD(&pool
->xskb_list
);
92 INIT_LIST_HEAD(&pool
->xsk_tx_list
);
93 spin_lock_init(&pool
->xsk_tx_list_lock
);
94 spin_lock_init(&pool
->cq_lock
);
95 refcount_set(&pool
->users
, 1);
97 pool
->fq
= xs
->fq_tmp
;
98 pool
->cq
= xs
->cq_tmp
;
100 for (i
= 0; i
< pool
->free_heads_cnt
; i
++) {
101 xskb
= &pool
->heads
[i
];
103 xskb
->xdp
.frame_sz
= umem
->chunk_size
- umem
->headroom
;
104 INIT_LIST_HEAD(&xskb
->list_node
);
106 pool
->free_heads
[i
] = xskb
;
108 xp_init_xskb_addr(xskb
, pool
, i
* pool
->chunk_size
);
118 void xp_set_rxq_info(struct xsk_buff_pool
*pool
, struct xdp_rxq_info
*rxq
)
122 for (i
= 0; i
< pool
->heads_cnt
; i
++)
123 pool
->heads
[i
].xdp
.rxq
= rxq
;
125 EXPORT_SYMBOL(xp_set_rxq_info
);
127 void xp_fill_cb(struct xsk_buff_pool
*pool
, struct xsk_cb_desc
*desc
)
131 for (i
= 0; i
< pool
->heads_cnt
; i
++) {
132 struct xdp_buff_xsk
*xskb
= &pool
->heads
[i
];
134 memcpy(xskb
->cb
+ desc
->off
, desc
->src
, desc
->bytes
);
137 EXPORT_SYMBOL(xp_fill_cb
);
139 static void xp_disable_drv_zc(struct xsk_buff_pool
*pool
)
141 struct netdev_bpf bpf
;
146 if (pool
->umem
->zc
) {
147 bpf
.command
= XDP_SETUP_XSK_POOL
;
149 bpf
.xsk
.queue_id
= pool
->queue_id
;
151 err
= pool
->netdev
->netdev_ops
->ndo_bpf(pool
->netdev
, &bpf
);
154 WARN(1, "Failed to disable zero-copy!\n");
158 #define NETDEV_XDP_ACT_ZC (NETDEV_XDP_ACT_BASIC | \
159 NETDEV_XDP_ACT_REDIRECT | \
160 NETDEV_XDP_ACT_XSK_ZEROCOPY)
162 int xp_assign_dev(struct xsk_buff_pool
*pool
,
163 struct net_device
*netdev
, u16 queue_id
, u16 flags
)
165 bool force_zc
, force_copy
;
166 struct netdev_bpf bpf
;
171 force_zc
= flags
& XDP_ZEROCOPY
;
172 force_copy
= flags
& XDP_COPY
;
174 if (force_zc
&& force_copy
)
177 if (xsk_get_pool_from_qid(netdev
, queue_id
))
180 pool
->netdev
= netdev
;
181 pool
->queue_id
= queue_id
;
182 err
= xsk_reg_pool_at_qid(netdev
, pool
, queue_id
);
186 if (flags
& XDP_USE_SG
)
187 pool
->umem
->flags
|= XDP_UMEM_SG_FLAG
;
189 if (flags
& XDP_USE_NEED_WAKEUP
)
190 pool
->uses_need_wakeup
= true;
191 /* Tx needs to be explicitly woken up the first time. Also
192 * for supporting drivers that do not implement this
193 * feature. They will always have to call sendto() or poll().
195 pool
->cached_need_wakeup
= XDP_WAKEUP_TX
;
200 /* For copy-mode, we are done. */
203 if ((netdev
->xdp_features
& NETDEV_XDP_ACT_ZC
) != NETDEV_XDP_ACT_ZC
) {
208 if (netdev
->xdp_zc_max_segs
== 1 && (flags
& XDP_USE_SG
)) {
213 if (dev_get_min_mp_channel_count(netdev
)) {
218 bpf
.command
= XDP_SETUP_XSK_POOL
;
220 bpf
.xsk
.queue_id
= queue_id
;
222 err
= netdev
->netdev_ops
->ndo_bpf(netdev
, &bpf
);
226 if (!pool
->dma_pages
) {
227 WARN(1, "Driver did not DMA map zero-copy buffers");
231 pool
->umem
->zc
= true;
232 pool
->xdp_zc_max_segs
= netdev
->xdp_zc_max_segs
;
236 xp_disable_drv_zc(pool
);
239 err
= 0; /* fallback to copy mode */
241 xsk_clear_pool_at_qid(netdev
, queue_id
);
247 int xp_assign_dev_shared(struct xsk_buff_pool
*pool
, struct xdp_sock
*umem_xs
,
248 struct net_device
*dev
, u16 queue_id
)
251 struct xdp_umem
*umem
= umem_xs
->umem
;
253 /* One fill and completion ring required for each queue id. */
254 if (!pool
->fq
|| !pool
->cq
)
257 flags
= umem
->zc
? XDP_ZEROCOPY
: XDP_COPY
;
258 if (umem_xs
->pool
->uses_need_wakeup
)
259 flags
|= XDP_USE_NEED_WAKEUP
;
261 return xp_assign_dev(pool
, dev
, queue_id
, flags
);
264 void xp_clear_dev(struct xsk_buff_pool
*pool
)
269 xp_disable_drv_zc(pool
);
270 xsk_clear_pool_at_qid(pool
->netdev
, pool
->queue_id
);
271 dev_put(pool
->netdev
);
275 static void xp_release_deferred(struct work_struct
*work
)
277 struct xsk_buff_pool
*pool
= container_of(work
, struct xsk_buff_pool
,
285 xskq_destroy(pool
->fq
);
290 xskq_destroy(pool
->cq
);
294 xdp_put_umem(pool
->umem
, false);
298 void xp_get_pool(struct xsk_buff_pool
*pool
)
300 refcount_inc(&pool
->users
);
303 bool xp_put_pool(struct xsk_buff_pool
*pool
)
308 if (refcount_dec_and_test(&pool
->users
)) {
309 INIT_WORK(&pool
->work
, xp_release_deferred
);
310 schedule_work(&pool
->work
);
317 static struct xsk_dma_map
*xp_find_dma_map(struct xsk_buff_pool
*pool
)
319 struct xsk_dma_map
*dma_map
;
321 list_for_each_entry(dma_map
, &pool
->umem
->xsk_dma_list
, list
) {
322 if (dma_map
->netdev
== pool
->netdev
)
329 static struct xsk_dma_map
*xp_create_dma_map(struct device
*dev
, struct net_device
*netdev
,
330 u32 nr_pages
, struct xdp_umem
*umem
)
332 struct xsk_dma_map
*dma_map
;
334 dma_map
= kzalloc(sizeof(*dma_map
), GFP_KERNEL
);
338 dma_map
->dma_pages
= kvcalloc(nr_pages
, sizeof(*dma_map
->dma_pages
), GFP_KERNEL
);
339 if (!dma_map
->dma_pages
) {
344 dma_map
->netdev
= netdev
;
346 dma_map
->dma_pages_cnt
= nr_pages
;
347 refcount_set(&dma_map
->users
, 1);
348 list_add(&dma_map
->list
, &umem
->xsk_dma_list
);
352 static void xp_destroy_dma_map(struct xsk_dma_map
*dma_map
)
354 list_del(&dma_map
->list
);
355 kvfree(dma_map
->dma_pages
);
359 static void __xp_dma_unmap(struct xsk_dma_map
*dma_map
, unsigned long attrs
)
364 for (i
= 0; i
< dma_map
->dma_pages_cnt
; i
++) {
365 dma
= &dma_map
->dma_pages
[i
];
367 *dma
&= ~XSK_NEXT_PG_CONTIG_MASK
;
368 dma_unmap_page_attrs(dma_map
->dev
, *dma
, PAGE_SIZE
,
369 DMA_BIDIRECTIONAL
, attrs
);
374 xp_destroy_dma_map(dma_map
);
377 void xp_dma_unmap(struct xsk_buff_pool
*pool
, unsigned long attrs
)
379 struct xsk_dma_map
*dma_map
;
381 if (!pool
->dma_pages
)
384 dma_map
= xp_find_dma_map(pool
);
386 WARN(1, "Could not find dma_map for device");
390 if (!refcount_dec_and_test(&dma_map
->users
))
393 __xp_dma_unmap(dma_map
, attrs
);
394 kvfree(pool
->dma_pages
);
395 pool
->dma_pages
= NULL
;
396 pool
->dma_pages_cnt
= 0;
399 EXPORT_SYMBOL(xp_dma_unmap
);
401 static void xp_check_dma_contiguity(struct xsk_dma_map
*dma_map
)
405 for (i
= 0; i
< dma_map
->dma_pages_cnt
- 1; i
++) {
406 if (dma_map
->dma_pages
[i
] + PAGE_SIZE
== dma_map
->dma_pages
[i
+ 1])
407 dma_map
->dma_pages
[i
] |= XSK_NEXT_PG_CONTIG_MASK
;
409 dma_map
->dma_pages
[i
] &= ~XSK_NEXT_PG_CONTIG_MASK
;
413 static int xp_init_dma_info(struct xsk_buff_pool
*pool
, struct xsk_dma_map
*dma_map
)
415 if (!pool
->unaligned
) {
418 for (i
= 0; i
< pool
->heads_cnt
; i
++) {
419 struct xdp_buff_xsk
*xskb
= &pool
->heads
[i
];
422 orig_addr
= xskb
->xdp
.data_hard_start
- pool
->addrs
- pool
->headroom
;
423 xp_init_xskb_dma(xskb
, pool
, dma_map
->dma_pages
, orig_addr
);
427 pool
->dma_pages
= kvcalloc(dma_map
->dma_pages_cnt
, sizeof(*pool
->dma_pages
), GFP_KERNEL
);
428 if (!pool
->dma_pages
)
431 pool
->dev
= dma_map
->dev
;
432 pool
->dma_pages_cnt
= dma_map
->dma_pages_cnt
;
433 memcpy(pool
->dma_pages
, dma_map
->dma_pages
,
434 pool
->dma_pages_cnt
* sizeof(*pool
->dma_pages
));
439 int xp_dma_map(struct xsk_buff_pool
*pool
, struct device
*dev
,
440 unsigned long attrs
, struct page
**pages
, u32 nr_pages
)
442 struct xsk_dma_map
*dma_map
;
447 dma_map
= xp_find_dma_map(pool
);
449 err
= xp_init_dma_info(pool
, dma_map
);
453 refcount_inc(&dma_map
->users
);
457 dma_map
= xp_create_dma_map(dev
, pool
->netdev
, nr_pages
, pool
->umem
);
461 for (i
= 0; i
< dma_map
->dma_pages_cnt
; i
++) {
462 dma
= dma_map_page_attrs(dev
, pages
[i
], 0, PAGE_SIZE
,
463 DMA_BIDIRECTIONAL
, attrs
);
464 if (dma_mapping_error(dev
, dma
)) {
465 __xp_dma_unmap(dma_map
, attrs
);
468 dma_map
->dma_pages
[i
] = dma
;
472 xp_check_dma_contiguity(dma_map
);
474 err
= xp_init_dma_info(pool
, dma_map
);
476 __xp_dma_unmap(dma_map
, attrs
);
482 EXPORT_SYMBOL(xp_dma_map
);
484 static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool
*pool
,
487 return xp_desc_crosses_non_contig_pg(pool
, addr
, pool
->chunk_size
);
490 static bool xp_check_unaligned(struct xsk_buff_pool
*pool
, u64
*addr
)
492 *addr
= xp_unaligned_extract_addr(*addr
);
493 if (*addr
>= pool
->addrs_cnt
||
494 *addr
+ pool
->chunk_size
> pool
->addrs_cnt
||
495 xp_addr_crosses_non_contig_pg(pool
, *addr
))
500 static bool xp_check_aligned(struct xsk_buff_pool
*pool
, u64
*addr
)
502 *addr
= xp_aligned_extract_addr(pool
, *addr
);
503 return *addr
< pool
->addrs_cnt
;
506 static struct xdp_buff_xsk
*xp_get_xskb(struct xsk_buff_pool
*pool
, u64 addr
)
508 struct xdp_buff_xsk
*xskb
;
510 if (pool
->unaligned
) {
511 xskb
= pool
->free_heads
[--pool
->free_heads_cnt
];
512 xp_init_xskb_addr(xskb
, pool
, addr
);
514 xp_init_xskb_dma(xskb
, pool
, pool
->dma_pages
, addr
);
516 xskb
= &pool
->heads
[xp_aligned_extract_idx(pool
, addr
)];
522 static struct xdp_buff_xsk
*__xp_alloc(struct xsk_buff_pool
*pool
)
524 struct xdp_buff_xsk
*xskb
;
528 if (pool
->free_heads_cnt
== 0)
532 if (!xskq_cons_peek_addr_unchecked(pool
->fq
, &addr
)) {
533 pool
->fq
->queue_empty_descs
++;
537 ok
= pool
->unaligned
? xp_check_unaligned(pool
, &addr
) :
538 xp_check_aligned(pool
, &addr
);
540 pool
->fq
->invalid_descs
++;
541 xskq_cons_release(pool
->fq
);
547 xskb
= xp_get_xskb(pool
, addr
);
549 xskq_cons_release(pool
->fq
);
553 struct xdp_buff
*xp_alloc(struct xsk_buff_pool
*pool
)
555 struct xdp_buff_xsk
*xskb
;
557 if (!pool
->free_list_cnt
) {
558 xskb
= __xp_alloc(pool
);
562 pool
->free_list_cnt
--;
563 xskb
= list_first_entry(&pool
->free_list
, struct xdp_buff_xsk
,
565 list_del_init(&xskb
->list_node
);
568 xskb
->xdp
.data
= xskb
->xdp
.data_hard_start
+ XDP_PACKET_HEADROOM
;
569 xskb
->xdp
.data_meta
= xskb
->xdp
.data
;
573 xp_dma_sync_for_device(pool
, xskb
->dma
, pool
->frame_len
);
577 EXPORT_SYMBOL(xp_alloc
);
579 static u32
xp_alloc_new_from_fq(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
, u32 max
)
581 u32 i
, cached_cons
, nb_entries
;
583 if (max
> pool
->free_heads_cnt
)
584 max
= pool
->free_heads_cnt
;
585 max
= xskq_cons_nb_entries(pool
->fq
, max
);
587 cached_cons
= pool
->fq
->cached_cons
;
591 struct xdp_buff_xsk
*xskb
;
595 __xskq_cons_read_addr_unchecked(pool
->fq
, cached_cons
++, &addr
);
597 ok
= pool
->unaligned
? xp_check_unaligned(pool
, &addr
) :
598 xp_check_aligned(pool
, &addr
);
600 pool
->fq
->invalid_descs
++;
605 xskb
= xp_get_xskb(pool
, addr
);
611 xskq_cons_release_n(pool
->fq
, max
);
615 static u32
xp_alloc_reused(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
, u32 nb_entries
)
617 struct xdp_buff_xsk
*xskb
;
620 nb_entries
= min_t(u32
, nb_entries
, pool
->free_list_cnt
);
624 xskb
= list_first_entry(&pool
->free_list
, struct xdp_buff_xsk
, list_node
);
625 list_del_init(&xskb
->list_node
);
630 pool
->free_list_cnt
-= nb_entries
;
635 static u32
xp_alloc_slow(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
,
640 for (i
= 0; i
< max
; i
++) {
641 struct xdp_buff
*buff
;
643 buff
= xp_alloc(pool
);
653 u32
xp_alloc_batch(struct xsk_buff_pool
*pool
, struct xdp_buff
**xdp
, u32 max
)
655 u32 nb_entries1
= 0, nb_entries2
;
657 if (unlikely(pool
->dev
&& dma_dev_need_sync(pool
->dev
)))
658 return xp_alloc_slow(pool
, xdp
, max
);
660 if (unlikely(pool
->free_list_cnt
)) {
661 nb_entries1
= xp_alloc_reused(pool
, xdp
, max
);
662 if (nb_entries1
== max
)
669 nb_entries2
= xp_alloc_new_from_fq(pool
, xdp
, max
);
671 pool
->fq
->queue_empty_descs
++;
673 return nb_entries1
+ nb_entries2
;
675 EXPORT_SYMBOL(xp_alloc_batch
);
677 bool xp_can_alloc(struct xsk_buff_pool
*pool
, u32 count
)
679 u32 req_count
, avail_count
;
681 if (pool
->free_list_cnt
>= count
)
684 req_count
= count
- pool
->free_list_cnt
;
685 avail_count
= xskq_cons_nb_entries(pool
->fq
, req_count
);
687 pool
->fq
->queue_empty_descs
++;
689 return avail_count
>= req_count
;
691 EXPORT_SYMBOL(xp_can_alloc
);
693 void xp_free(struct xdp_buff_xsk
*xskb
)
695 if (!list_empty(&xskb
->list_node
))
698 xskb
->pool
->free_list_cnt
++;
699 list_add(&xskb
->list_node
, &xskb
->pool
->free_list
);
701 EXPORT_SYMBOL(xp_free
);
703 void *xp_raw_get_data(struct xsk_buff_pool
*pool
, u64 addr
)
705 addr
= pool
->unaligned
? xp_unaligned_add_offset_to_addr(addr
) : addr
;
706 return pool
->addrs
+ addr
;
708 EXPORT_SYMBOL(xp_raw_get_data
);
710 dma_addr_t
xp_raw_get_dma(struct xsk_buff_pool
*pool
, u64 addr
)
712 addr
= pool
->unaligned
? xp_unaligned_add_offset_to_addr(addr
) : addr
;
713 return (pool
->dma_pages
[addr
>> PAGE_SHIFT
] &
714 ~XSK_NEXT_PG_CONTIG_MASK
) +
717 EXPORT_SYMBOL(xp_raw_get_dma
);