1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2018 Intel Corporation. */
4 #include <linux/bpf_trace.h>
5 #include <net/xdp_sock.h>
9 #include "ixgbe_txrx_common.h"
11 struct xdp_umem
*ixgbe_xsk_umem(struct ixgbe_adapter
*adapter
,
12 struct ixgbe_ring
*ring
)
14 bool xdp_on
= READ_ONCE(adapter
->xdp_prog
);
15 int qid
= ring
->ring_idx
;
17 if (!xdp_on
|| !test_bit(qid
, adapter
->af_xdp_zc_qps
))
20 return xdp_get_umem_from_qid(adapter
->netdev
, qid
);
23 static int ixgbe_xsk_umem_dma_map(struct ixgbe_adapter
*adapter
,
24 struct xdp_umem
*umem
)
26 struct device
*dev
= &adapter
->pdev
->dev
;
30 for (i
= 0; i
< umem
->npgs
; i
++) {
31 dma
= dma_map_page_attrs(dev
, umem
->pgs
[i
], 0, PAGE_SIZE
,
32 DMA_BIDIRECTIONAL
, IXGBE_RX_DMA_ATTR
);
33 if (dma_mapping_error(dev
, dma
))
36 umem
->pages
[i
].dma
= dma
;
42 for (j
= 0; j
< i
; j
++) {
43 dma_unmap_page_attrs(dev
, umem
->pages
[i
].dma
, PAGE_SIZE
,
44 DMA_BIDIRECTIONAL
, IXGBE_RX_DMA_ATTR
);
45 umem
->pages
[i
].dma
= 0;
51 static void ixgbe_xsk_umem_dma_unmap(struct ixgbe_adapter
*adapter
,
52 struct xdp_umem
*umem
)
54 struct device
*dev
= &adapter
->pdev
->dev
;
57 for (i
= 0; i
< umem
->npgs
; i
++) {
58 dma_unmap_page_attrs(dev
, umem
->pages
[i
].dma
, PAGE_SIZE
,
59 DMA_BIDIRECTIONAL
, IXGBE_RX_DMA_ATTR
);
61 umem
->pages
[i
].dma
= 0;
65 static int ixgbe_xsk_umem_enable(struct ixgbe_adapter
*adapter
,
66 struct xdp_umem
*umem
,
69 struct net_device
*netdev
= adapter
->netdev
;
70 struct xdp_umem_fq_reuse
*reuseq
;
74 if (qid
>= adapter
->num_rx_queues
)
77 if (qid
>= netdev
->real_num_rx_queues
||
78 qid
>= netdev
->real_num_tx_queues
)
81 reuseq
= xsk_reuseq_prepare(adapter
->rx_ring
[0]->count
);
85 xsk_reuseq_free(xsk_reuseq_swap(umem
, reuseq
));
87 err
= ixgbe_xsk_umem_dma_map(adapter
, umem
);
91 if_running
= netif_running(adapter
->netdev
) &&
92 ixgbe_enabled_xdp_adapter(adapter
);
95 ixgbe_txrx_ring_disable(adapter
, qid
);
97 set_bit(qid
, adapter
->af_xdp_zc_qps
);
100 ixgbe_txrx_ring_enable(adapter
, qid
);
102 /* Kick start the NAPI context so that receiving will start */
103 err
= ixgbe_xsk_wakeup(adapter
->netdev
, qid
, XDP_WAKEUP_RX
);
111 static int ixgbe_xsk_umem_disable(struct ixgbe_adapter
*adapter
, u16 qid
)
113 struct xdp_umem
*umem
;
116 umem
= xdp_get_umem_from_qid(adapter
->netdev
, qid
);
120 if_running
= netif_running(adapter
->netdev
) &&
121 ixgbe_enabled_xdp_adapter(adapter
);
124 ixgbe_txrx_ring_disable(adapter
, qid
);
126 clear_bit(qid
, adapter
->af_xdp_zc_qps
);
127 ixgbe_xsk_umem_dma_unmap(adapter
, umem
);
130 ixgbe_txrx_ring_enable(adapter
, qid
);
135 int ixgbe_xsk_umem_setup(struct ixgbe_adapter
*adapter
, struct xdp_umem
*umem
,
138 return umem
? ixgbe_xsk_umem_enable(adapter
, umem
, qid
) :
139 ixgbe_xsk_umem_disable(adapter
, qid
);
142 static int ixgbe_run_xdp_zc(struct ixgbe_adapter
*adapter
,
143 struct ixgbe_ring
*rx_ring
,
144 struct xdp_buff
*xdp
)
146 struct xdp_umem
*umem
= rx_ring
->xsk_umem
;
147 int err
, result
= IXGBE_XDP_PASS
;
148 struct bpf_prog
*xdp_prog
;
149 struct xdp_frame
*xdpf
;
154 xdp_prog
= READ_ONCE(rx_ring
->xdp_prog
);
155 act
= bpf_prog_run_xdp(xdp_prog
, xdp
);
156 offset
= xdp
->data
- xdp
->data_hard_start
;
158 xdp
->handle
= xsk_umem_adjust_offset(umem
, xdp
->handle
, offset
);
164 xdpf
= convert_to_xdp_frame(xdp
);
165 if (unlikely(!xdpf
)) {
166 result
= IXGBE_XDP_CONSUMED
;
169 result
= ixgbe_xmit_xdp_ring(adapter
, xdpf
);
172 err
= xdp_do_redirect(rx_ring
->netdev
, xdp
, xdp_prog
);
173 result
= !err
? IXGBE_XDP_REDIR
: IXGBE_XDP_CONSUMED
;
176 bpf_warn_invalid_xdp_action(act
);
179 trace_xdp_exception(rx_ring
->netdev
, xdp_prog
, act
);
180 /* fallthrough -- handle aborts by dropping packet */
182 result
= IXGBE_XDP_CONSUMED
;
190 ixgbe_rx_buffer
*ixgbe_get_rx_buffer_zc(struct ixgbe_ring
*rx_ring
,
193 struct ixgbe_rx_buffer
*bi
;
195 bi
= &rx_ring
->rx_buffer_info
[rx_ring
->next_to_clean
];
197 /* we are reusing so sync this buffer for CPU use */
198 dma_sync_single_range_for_cpu(rx_ring
->dev
,
206 static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring
*rx_ring
,
207 struct ixgbe_rx_buffer
*obi
)
209 u16 nta
= rx_ring
->next_to_alloc
;
210 struct ixgbe_rx_buffer
*nbi
;
212 nbi
= &rx_ring
->rx_buffer_info
[rx_ring
->next_to_alloc
];
213 /* update, and store next to alloc */
215 rx_ring
->next_to_alloc
= (nta
< rx_ring
->count
) ? nta
: 0;
217 /* transfer page from old buffer to new buffer */
219 nbi
->addr
= obi
->addr
;
220 nbi
->handle
= obi
->handle
;
226 void ixgbe_zca_free(struct zero_copy_allocator
*alloc
, unsigned long handle
)
228 struct ixgbe_rx_buffer
*bi
;
229 struct ixgbe_ring
*rx_ring
;
233 rx_ring
= container_of(alloc
, struct ixgbe_ring
, zca
);
234 hr
= rx_ring
->xsk_umem
->headroom
+ XDP_PACKET_HEADROOM
;
235 mask
= rx_ring
->xsk_umem
->chunk_mask
;
237 nta
= rx_ring
->next_to_alloc
;
238 bi
= rx_ring
->rx_buffer_info
;
241 rx_ring
->next_to_alloc
= (nta
< rx_ring
->count
) ? nta
: 0;
245 bi
->dma
= xdp_umem_get_dma(rx_ring
->xsk_umem
, handle
);
248 bi
->addr
= xdp_umem_get_data(rx_ring
->xsk_umem
, handle
);
251 bi
->handle
= xsk_umem_adjust_offset(rx_ring
->xsk_umem
, (u64
)handle
,
252 rx_ring
->xsk_umem
->headroom
);
255 static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring
*rx_ring
,
256 struct ixgbe_rx_buffer
*bi
)
258 struct xdp_umem
*umem
= rx_ring
->xsk_umem
;
259 void *addr
= bi
->addr
;
265 if (!xsk_umem_peek_addr(umem
, &handle
)) {
266 rx_ring
->rx_stats
.alloc_rx_page_failed
++;
270 hr
= umem
->headroom
+ XDP_PACKET_HEADROOM
;
272 bi
->dma
= xdp_umem_get_dma(umem
, handle
);
275 bi
->addr
= xdp_umem_get_data(umem
, handle
);
278 bi
->handle
= xsk_umem_adjust_offset(umem
, handle
, umem
->headroom
);
280 xsk_umem_release_addr(umem
);
284 static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring
*rx_ring
,
285 struct ixgbe_rx_buffer
*bi
)
287 struct xdp_umem
*umem
= rx_ring
->xsk_umem
;
290 if (!xsk_umem_peek_addr_rq(umem
, &handle
)) {
291 rx_ring
->rx_stats
.alloc_rx_page_failed
++;
295 handle
&= rx_ring
->xsk_umem
->chunk_mask
;
297 hr
= umem
->headroom
+ XDP_PACKET_HEADROOM
;
299 bi
->dma
= xdp_umem_get_dma(umem
, handle
);
302 bi
->addr
= xdp_umem_get_data(umem
, handle
);
305 bi
->handle
= xsk_umem_adjust_offset(umem
, handle
, umem
->headroom
);
307 xsk_umem_release_addr_rq(umem
);
311 static __always_inline
bool
312 __ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring
*rx_ring
, u16 cleaned_count
,
313 bool alloc(struct ixgbe_ring
*rx_ring
,
314 struct ixgbe_rx_buffer
*bi
))
316 union ixgbe_adv_rx_desc
*rx_desc
;
317 struct ixgbe_rx_buffer
*bi
;
318 u16 i
= rx_ring
->next_to_use
;
325 rx_desc
= IXGBE_RX_DESC(rx_ring
, i
);
326 bi
= &rx_ring
->rx_buffer_info
[i
];
330 if (!alloc(rx_ring
, bi
)) {
335 /* sync the buffer for use by the device */
336 dma_sync_single_range_for_device(rx_ring
->dev
, bi
->dma
,
341 /* Refresh the desc even if buffer_addrs didn't change
342 * because each write-back erases this info.
344 rx_desc
->read
.pkt_addr
= cpu_to_le64(bi
->dma
);
350 rx_desc
= IXGBE_RX_DESC(rx_ring
, 0);
351 bi
= rx_ring
->rx_buffer_info
;
355 /* clear the length for the next_to_use descriptor */
356 rx_desc
->wb
.upper
.length
= 0;
359 } while (cleaned_count
);
363 if (rx_ring
->next_to_use
!= i
) {
364 rx_ring
->next_to_use
= i
;
366 /* update next to alloc since we have filled the ring */
367 rx_ring
->next_to_alloc
= i
;
369 /* Force memory writes to complete before letting h/w
370 * know there are new descriptors to fetch. (Only
371 * applicable for weak-ordered memory model archs,
375 writel(i
, rx_ring
->tail
);
381 void ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring
*rx_ring
, u16 count
)
383 __ixgbe_alloc_rx_buffers_zc(rx_ring
, count
,
384 ixgbe_alloc_buffer_slow_zc
);
387 static bool ixgbe_alloc_rx_buffers_fast_zc(struct ixgbe_ring
*rx_ring
,
390 return __ixgbe_alloc_rx_buffers_zc(rx_ring
, count
,
391 ixgbe_alloc_buffer_zc
);
394 static struct sk_buff
*ixgbe_construct_skb_zc(struct ixgbe_ring
*rx_ring
,
395 struct ixgbe_rx_buffer
*bi
,
396 struct xdp_buff
*xdp
)
398 unsigned int metasize
= xdp
->data
- xdp
->data_meta
;
399 unsigned int datasize
= xdp
->data_end
- xdp
->data
;
402 /* allocate a skb to store the frags */
403 skb
= __napi_alloc_skb(&rx_ring
->q_vector
->napi
,
404 xdp
->data_end
- xdp
->data_hard_start
,
405 GFP_ATOMIC
| __GFP_NOWARN
);
409 skb_reserve(skb
, xdp
->data
- xdp
->data_hard_start
);
410 memcpy(__skb_put(skb
, datasize
), xdp
->data
, datasize
);
412 skb_metadata_set(skb
, metasize
);
414 ixgbe_reuse_rx_buffer_zc(rx_ring
, bi
);
418 static void ixgbe_inc_ntc(struct ixgbe_ring
*rx_ring
)
420 u32 ntc
= rx_ring
->next_to_clean
+ 1;
422 ntc
= (ntc
< rx_ring
->count
) ? ntc
: 0;
423 rx_ring
->next_to_clean
= ntc
;
424 prefetch(IXGBE_RX_DESC(rx_ring
, ntc
));
427 int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector
*q_vector
,
428 struct ixgbe_ring
*rx_ring
,
431 unsigned int total_rx_bytes
= 0, total_rx_packets
= 0;
432 struct ixgbe_adapter
*adapter
= q_vector
->adapter
;
433 u16 cleaned_count
= ixgbe_desc_unused(rx_ring
);
434 unsigned int xdp_res
, xdp_xmit
= 0;
435 bool failure
= false;
439 xdp
.rxq
= &rx_ring
->xdp_rxq
;
441 while (likely(total_rx_packets
< budget
)) {
442 union ixgbe_adv_rx_desc
*rx_desc
;
443 struct ixgbe_rx_buffer
*bi
;
446 /* return some buffers to hardware, one at a time is too slow */
447 if (cleaned_count
>= IXGBE_RX_BUFFER_WRITE
) {
449 !ixgbe_alloc_rx_buffers_fast_zc(rx_ring
,
454 rx_desc
= IXGBE_RX_DESC(rx_ring
, rx_ring
->next_to_clean
);
455 size
= le16_to_cpu(rx_desc
->wb
.upper
.length
);
459 /* This memory barrier is needed to keep us from reading
460 * any other fields out of the rx_desc until we know the
461 * descriptor has been written back
465 bi
= ixgbe_get_rx_buffer_zc(rx_ring
, size
);
467 if (unlikely(!ixgbe_test_staterr(rx_desc
,
468 IXGBE_RXD_STAT_EOP
))) {
469 struct ixgbe_rx_buffer
*next_bi
;
471 ixgbe_reuse_rx_buffer_zc(rx_ring
, bi
);
472 ixgbe_inc_ntc(rx_ring
);
474 &rx_ring
->rx_buffer_info
[rx_ring
->next_to_clean
];
475 next_bi
->skb
= ERR_PTR(-EINVAL
);
479 if (unlikely(bi
->skb
)) {
480 ixgbe_reuse_rx_buffer_zc(rx_ring
, bi
);
481 ixgbe_inc_ntc(rx_ring
);
486 xdp
.data_meta
= xdp
.data
;
487 xdp
.data_hard_start
= xdp
.data
- XDP_PACKET_HEADROOM
;
488 xdp
.data_end
= xdp
.data
+ size
;
489 xdp
.handle
= bi
->handle
;
491 xdp_res
= ixgbe_run_xdp_zc(adapter
, rx_ring
, &xdp
);
494 if (xdp_res
& (IXGBE_XDP_TX
| IXGBE_XDP_REDIR
)) {
499 ixgbe_reuse_rx_buffer_zc(rx_ring
, bi
);
502 total_rx_bytes
+= size
;
505 ixgbe_inc_ntc(rx_ring
);
510 skb
= ixgbe_construct_skb_zc(rx_ring
, bi
, &xdp
);
512 rx_ring
->rx_stats
.alloc_rx_buff_failed
++;
517 ixgbe_inc_ntc(rx_ring
);
519 if (eth_skb_pad(skb
))
522 total_rx_bytes
+= skb
->len
;
525 ixgbe_process_skb_fields(rx_ring
, rx_desc
, skb
);
526 ixgbe_rx_skb(q_vector
, skb
);
529 if (xdp_xmit
& IXGBE_XDP_REDIR
)
532 if (xdp_xmit
& IXGBE_XDP_TX
) {
533 struct ixgbe_ring
*ring
= adapter
->xdp_ring
[smp_processor_id()];
535 /* Force memory writes to complete before letting h/w
536 * know there are new descriptors to fetch.
539 writel(ring
->next_to_use
, ring
->tail
);
542 u64_stats_update_begin(&rx_ring
->syncp
);
543 rx_ring
->stats
.packets
+= total_rx_packets
;
544 rx_ring
->stats
.bytes
+= total_rx_bytes
;
545 u64_stats_update_end(&rx_ring
->syncp
);
546 q_vector
->rx
.total_packets
+= total_rx_packets
;
547 q_vector
->rx
.total_bytes
+= total_rx_bytes
;
549 if (xsk_umem_uses_need_wakeup(rx_ring
->xsk_umem
)) {
550 if (failure
|| rx_ring
->next_to_clean
== rx_ring
->next_to_use
)
551 xsk_set_rx_need_wakeup(rx_ring
->xsk_umem
);
553 xsk_clear_rx_need_wakeup(rx_ring
->xsk_umem
);
555 return (int)total_rx_packets
;
557 return failure
? budget
: (int)total_rx_packets
;
560 void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring
*rx_ring
)
562 u16 i
= rx_ring
->next_to_clean
;
563 struct ixgbe_rx_buffer
*bi
= &rx_ring
->rx_buffer_info
[i
];
565 while (i
!= rx_ring
->next_to_alloc
) {
566 xsk_umem_fq_reuse(rx_ring
->xsk_umem
, bi
->handle
);
569 if (i
== rx_ring
->count
) {
571 bi
= rx_ring
->rx_buffer_info
;
576 static bool ixgbe_xmit_zc(struct ixgbe_ring
*xdp_ring
, unsigned int budget
)
578 union ixgbe_adv_tx_desc
*tx_desc
= NULL
;
579 struct ixgbe_tx_buffer
*tx_bi
;
580 bool work_done
= true;
581 struct xdp_desc desc
;
585 while (budget
-- > 0) {
586 if (unlikely(!ixgbe_desc_unused(xdp_ring
)) ||
587 !netif_carrier_ok(xdp_ring
->netdev
)) {
592 if (!xsk_umem_consume_tx(xdp_ring
->xsk_umem
, &desc
))
595 dma
= xdp_umem_get_dma(xdp_ring
->xsk_umem
, desc
.addr
);
597 dma_sync_single_for_device(xdp_ring
->dev
, dma
, desc
.len
,
600 tx_bi
= &xdp_ring
->tx_buffer_info
[xdp_ring
->next_to_use
];
601 tx_bi
->bytecount
= desc
.len
;
605 tx_desc
= IXGBE_TX_DESC(xdp_ring
, xdp_ring
->next_to_use
);
606 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
608 /* put descriptor type bits */
609 cmd_type
= IXGBE_ADVTXD_DTYP_DATA
|
610 IXGBE_ADVTXD_DCMD_DEXT
|
611 IXGBE_ADVTXD_DCMD_IFCS
;
612 cmd_type
|= desc
.len
| IXGBE_TXD_CMD
;
613 tx_desc
->read
.cmd_type_len
= cpu_to_le32(cmd_type
);
614 tx_desc
->read
.olinfo_status
=
615 cpu_to_le32(desc
.len
<< IXGBE_ADVTXD_PAYLEN_SHIFT
);
617 xdp_ring
->next_to_use
++;
618 if (xdp_ring
->next_to_use
== xdp_ring
->count
)
619 xdp_ring
->next_to_use
= 0;
623 ixgbe_xdp_ring_update_tail(xdp_ring
);
624 xsk_umem_consume_tx_done(xdp_ring
->xsk_umem
);
627 return !!budget
&& work_done
;
630 static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring
*tx_ring
,
631 struct ixgbe_tx_buffer
*tx_bi
)
633 xdp_return_frame(tx_bi
->xdpf
);
634 dma_unmap_single(tx_ring
->dev
,
635 dma_unmap_addr(tx_bi
, dma
),
636 dma_unmap_len(tx_bi
, len
), DMA_TO_DEVICE
);
637 dma_unmap_len_set(tx_bi
, len
, 0);
640 bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector
*q_vector
,
641 struct ixgbe_ring
*tx_ring
, int napi_budget
)
643 u16 ntc
= tx_ring
->next_to_clean
, ntu
= tx_ring
->next_to_use
;
644 unsigned int total_packets
= 0, total_bytes
= 0;
645 struct xdp_umem
*umem
= tx_ring
->xsk_umem
;
646 union ixgbe_adv_tx_desc
*tx_desc
;
647 struct ixgbe_tx_buffer
*tx_bi
;
650 tx_bi
= &tx_ring
->tx_buffer_info
[ntc
];
651 tx_desc
= IXGBE_TX_DESC(tx_ring
, ntc
);
654 if (!(tx_desc
->wb
.status
& cpu_to_le32(IXGBE_TXD_STAT_DD
)))
657 total_bytes
+= tx_bi
->bytecount
;
658 total_packets
+= tx_bi
->gso_segs
;
661 ixgbe_clean_xdp_tx_buffer(tx_ring
, tx_bi
);
670 if (unlikely(ntc
== tx_ring
->count
)) {
672 tx_bi
= tx_ring
->tx_buffer_info
;
673 tx_desc
= IXGBE_TX_DESC(tx_ring
, 0);
676 /* issue prefetch for next Tx descriptor */
680 tx_ring
->next_to_clean
= ntc
;
682 u64_stats_update_begin(&tx_ring
->syncp
);
683 tx_ring
->stats
.bytes
+= total_bytes
;
684 tx_ring
->stats
.packets
+= total_packets
;
685 u64_stats_update_end(&tx_ring
->syncp
);
686 q_vector
->tx
.total_bytes
+= total_bytes
;
687 q_vector
->tx
.total_packets
+= total_packets
;
690 xsk_umem_complete_tx(umem
, xsk_frames
);
692 if (xsk_umem_uses_need_wakeup(tx_ring
->xsk_umem
))
693 xsk_set_tx_need_wakeup(tx_ring
->xsk_umem
);
695 return ixgbe_xmit_zc(tx_ring
, q_vector
->tx
.work_limit
);
698 int ixgbe_xsk_wakeup(struct net_device
*dev
, u32 qid
, u32 flags
)
700 struct ixgbe_adapter
*adapter
= netdev_priv(dev
);
701 struct ixgbe_ring
*ring
;
703 if (test_bit(__IXGBE_DOWN
, &adapter
->state
))
706 if (!READ_ONCE(adapter
->xdp_prog
))
709 if (qid
>= adapter
->num_xdp_queues
)
712 ring
= adapter
->xdp_ring
[qid
];
714 if (test_bit(__IXGBE_TX_DISABLED
, &ring
->state
))
720 if (!napi_if_scheduled_mark_missed(&ring
->q_vector
->napi
)) {
721 u64 eics
= BIT_ULL(ring
->q_vector
->v_idx
);
723 ixgbe_irq_rearm_queues(adapter
, eics
);
729 void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring
*tx_ring
)
731 u16 ntc
= tx_ring
->next_to_clean
, ntu
= tx_ring
->next_to_use
;
732 struct xdp_umem
*umem
= tx_ring
->xsk_umem
;
733 struct ixgbe_tx_buffer
*tx_bi
;
737 tx_bi
= &tx_ring
->tx_buffer_info
[ntc
];
740 ixgbe_clean_xdp_tx_buffer(tx_ring
, tx_bi
);
747 if (ntc
== tx_ring
->count
)
752 xsk_umem_complete_tx(umem
, xsk_frames
);