1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2018 Intel Corporation. */
4 #include <linux/bpf_trace.h>
5 #include <net/xdp_sock_drv.h>
9 #include "ixgbe_txrx_common.h"
11 struct xsk_buff_pool
*ixgbe_xsk_pool(struct ixgbe_adapter
*adapter
,
12 struct ixgbe_ring
*ring
)
14 bool xdp_on
= READ_ONCE(adapter
->xdp_prog
);
15 int qid
= ring
->ring_idx
;
17 if (!xdp_on
|| !test_bit(qid
, adapter
->af_xdp_zc_qps
))
20 return xsk_get_pool_from_qid(adapter
->netdev
, qid
);
23 static int ixgbe_xsk_pool_enable(struct ixgbe_adapter
*adapter
,
24 struct xsk_buff_pool
*pool
,
27 struct net_device
*netdev
= adapter
->netdev
;
31 if (qid
>= adapter
->num_rx_queues
)
34 if (qid
>= netdev
->real_num_rx_queues
||
35 qid
>= netdev
->real_num_tx_queues
)
38 err
= xsk_pool_dma_map(pool
, &adapter
->pdev
->dev
, IXGBE_RX_DMA_ATTR
);
42 if_running
= netif_running(adapter
->netdev
) &&
43 ixgbe_enabled_xdp_adapter(adapter
);
46 ixgbe_txrx_ring_disable(adapter
, qid
);
48 set_bit(qid
, adapter
->af_xdp_zc_qps
);
51 ixgbe_txrx_ring_enable(adapter
, qid
);
53 /* Kick start the NAPI context so that receiving will start */
54 err
= ixgbe_xsk_wakeup(adapter
->netdev
, qid
, XDP_WAKEUP_RX
);
62 static int ixgbe_xsk_pool_disable(struct ixgbe_adapter
*adapter
, u16 qid
)
64 struct xsk_buff_pool
*pool
;
67 pool
= xsk_get_pool_from_qid(adapter
->netdev
, qid
);
71 if_running
= netif_running(adapter
->netdev
) &&
72 ixgbe_enabled_xdp_adapter(adapter
);
75 ixgbe_txrx_ring_disable(adapter
, qid
);
77 clear_bit(qid
, adapter
->af_xdp_zc_qps
);
78 xsk_pool_dma_unmap(pool
, IXGBE_RX_DMA_ATTR
);
81 ixgbe_txrx_ring_enable(adapter
, qid
);
86 int ixgbe_xsk_pool_setup(struct ixgbe_adapter
*adapter
,
87 struct xsk_buff_pool
*pool
,
90 return pool
? ixgbe_xsk_pool_enable(adapter
, pool
, qid
) :
91 ixgbe_xsk_pool_disable(adapter
, qid
);
94 static int ixgbe_run_xdp_zc(struct ixgbe_adapter
*adapter
,
95 struct ixgbe_ring
*rx_ring
,
98 int err
, result
= IXGBE_XDP_PASS
;
99 struct bpf_prog
*xdp_prog
;
100 struct xdp_frame
*xdpf
;
104 xdp_prog
= READ_ONCE(rx_ring
->xdp_prog
);
105 act
= bpf_prog_run_xdp(xdp_prog
, xdp
);
111 xdpf
= xdp_convert_buff_to_frame(xdp
);
112 if (unlikely(!xdpf
)) {
113 result
= IXGBE_XDP_CONSUMED
;
116 result
= ixgbe_xmit_xdp_ring(adapter
, xdpf
);
119 err
= xdp_do_redirect(rx_ring
->netdev
, xdp
, xdp_prog
);
120 result
= !err
? IXGBE_XDP_REDIR
: IXGBE_XDP_CONSUMED
;
123 bpf_warn_invalid_xdp_action(act
);
126 trace_xdp_exception(rx_ring
->netdev
, xdp_prog
, act
);
127 fallthrough
; /* handle aborts by dropping packet */
129 result
= IXGBE_XDP_CONSUMED
;
136 bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring
*rx_ring
, u16 count
)
138 union ixgbe_adv_rx_desc
*rx_desc
;
139 struct ixgbe_rx_buffer
*bi
;
140 u16 i
= rx_ring
->next_to_use
;
148 rx_desc
= IXGBE_RX_DESC(rx_ring
, i
);
149 bi
= &rx_ring
->rx_buffer_info
[i
];
153 bi
->xdp
= xsk_buff_alloc(rx_ring
->xsk_pool
);
159 dma
= xsk_buff_xdp_get_dma(bi
->xdp
);
161 /* Refresh the desc even if buffer_addrs didn't change
162 * because each write-back erases this info.
164 rx_desc
->read
.pkt_addr
= cpu_to_le64(dma
);
170 rx_desc
= IXGBE_RX_DESC(rx_ring
, 0);
171 bi
= rx_ring
->rx_buffer_info
;
175 /* clear the length for the next_to_use descriptor */
176 rx_desc
->wb
.upper
.length
= 0;
183 if (rx_ring
->next_to_use
!= i
) {
184 rx_ring
->next_to_use
= i
;
186 /* Force memory writes to complete before letting h/w
187 * know there are new descriptors to fetch. (Only
188 * applicable for weak-ordered memory model archs,
192 writel(i
, rx_ring
->tail
);
198 static struct sk_buff
*ixgbe_construct_skb_zc(struct ixgbe_ring
*rx_ring
,
199 struct ixgbe_rx_buffer
*bi
)
201 unsigned int metasize
= bi
->xdp
->data
- bi
->xdp
->data_meta
;
202 unsigned int datasize
= bi
->xdp
->data_end
- bi
->xdp
->data
;
205 /* allocate a skb to store the frags */
206 skb
= __napi_alloc_skb(&rx_ring
->q_vector
->napi
,
207 bi
->xdp
->data_end
- bi
->xdp
->data_hard_start
,
208 GFP_ATOMIC
| __GFP_NOWARN
);
212 skb_reserve(skb
, bi
->xdp
->data
- bi
->xdp
->data_hard_start
);
213 memcpy(__skb_put(skb
, datasize
), bi
->xdp
->data
, datasize
);
215 skb_metadata_set(skb
, metasize
);
217 xsk_buff_free(bi
->xdp
);
222 static void ixgbe_inc_ntc(struct ixgbe_ring
*rx_ring
)
224 u32 ntc
= rx_ring
->next_to_clean
+ 1;
226 ntc
= (ntc
< rx_ring
->count
) ? ntc
: 0;
227 rx_ring
->next_to_clean
= ntc
;
228 prefetch(IXGBE_RX_DESC(rx_ring
, ntc
));
231 int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector
*q_vector
,
232 struct ixgbe_ring
*rx_ring
,
235 unsigned int total_rx_bytes
= 0, total_rx_packets
= 0;
236 struct ixgbe_adapter
*adapter
= q_vector
->adapter
;
237 u16 cleaned_count
= ixgbe_desc_unused(rx_ring
);
238 unsigned int xdp_res
, xdp_xmit
= 0;
239 bool failure
= false;
242 while (likely(total_rx_packets
< budget
)) {
243 union ixgbe_adv_rx_desc
*rx_desc
;
244 struct ixgbe_rx_buffer
*bi
;
247 /* return some buffers to hardware, one at a time is too slow */
248 if (cleaned_count
>= IXGBE_RX_BUFFER_WRITE
) {
250 !ixgbe_alloc_rx_buffers_zc(rx_ring
,
255 rx_desc
= IXGBE_RX_DESC(rx_ring
, rx_ring
->next_to_clean
);
256 size
= le16_to_cpu(rx_desc
->wb
.upper
.length
);
260 /* This memory barrier is needed to keep us from reading
261 * any other fields out of the rx_desc until we know the
262 * descriptor has been written back
266 bi
= &rx_ring
->rx_buffer_info
[rx_ring
->next_to_clean
];
268 if (unlikely(!ixgbe_test_staterr(rx_desc
,
269 IXGBE_RXD_STAT_EOP
))) {
270 struct ixgbe_rx_buffer
*next_bi
;
272 xsk_buff_free(bi
->xdp
);
274 ixgbe_inc_ntc(rx_ring
);
276 &rx_ring
->rx_buffer_info
[rx_ring
->next_to_clean
];
277 next_bi
->discard
= true;
281 if (unlikely(bi
->discard
)) {
282 xsk_buff_free(bi
->xdp
);
285 ixgbe_inc_ntc(rx_ring
);
289 bi
->xdp
->data_end
= bi
->xdp
->data
+ size
;
290 xsk_buff_dma_sync_for_cpu(bi
->xdp
, rx_ring
->xsk_pool
);
291 xdp_res
= ixgbe_run_xdp_zc(adapter
, rx_ring
, bi
->xdp
);
294 if (xdp_res
& (IXGBE_XDP_TX
| IXGBE_XDP_REDIR
))
297 xsk_buff_free(bi
->xdp
);
301 total_rx_bytes
+= size
;
304 ixgbe_inc_ntc(rx_ring
);
309 skb
= ixgbe_construct_skb_zc(rx_ring
, bi
);
311 rx_ring
->rx_stats
.alloc_rx_buff_failed
++;
316 ixgbe_inc_ntc(rx_ring
);
318 if (eth_skb_pad(skb
))
321 total_rx_bytes
+= skb
->len
;
324 ixgbe_process_skb_fields(rx_ring
, rx_desc
, skb
);
325 ixgbe_rx_skb(q_vector
, skb
);
328 if (xdp_xmit
& IXGBE_XDP_REDIR
)
331 if (xdp_xmit
& IXGBE_XDP_TX
) {
332 struct ixgbe_ring
*ring
= adapter
->xdp_ring
[smp_processor_id()];
334 /* Force memory writes to complete before letting h/w
335 * know there are new descriptors to fetch.
338 writel(ring
->next_to_use
, ring
->tail
);
341 u64_stats_update_begin(&rx_ring
->syncp
);
342 rx_ring
->stats
.packets
+= total_rx_packets
;
343 rx_ring
->stats
.bytes
+= total_rx_bytes
;
344 u64_stats_update_end(&rx_ring
->syncp
);
345 q_vector
->rx
.total_packets
+= total_rx_packets
;
346 q_vector
->rx
.total_bytes
+= total_rx_bytes
;
348 if (xsk_uses_need_wakeup(rx_ring
->xsk_pool
)) {
349 if (failure
|| rx_ring
->next_to_clean
== rx_ring
->next_to_use
)
350 xsk_set_rx_need_wakeup(rx_ring
->xsk_pool
);
352 xsk_clear_rx_need_wakeup(rx_ring
->xsk_pool
);
354 return (int)total_rx_packets
;
356 return failure
? budget
: (int)total_rx_packets
;
359 void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring
*rx_ring
)
361 struct ixgbe_rx_buffer
*bi
;
364 for (i
= 0; i
< rx_ring
->count
; i
++) {
365 bi
= &rx_ring
->rx_buffer_info
[i
];
370 xsk_buff_free(bi
->xdp
);
375 static bool ixgbe_xmit_zc(struct ixgbe_ring
*xdp_ring
, unsigned int budget
)
377 struct xsk_buff_pool
*pool
= xdp_ring
->xsk_pool
;
378 union ixgbe_adv_tx_desc
*tx_desc
= NULL
;
379 struct ixgbe_tx_buffer
*tx_bi
;
380 bool work_done
= true;
381 struct xdp_desc desc
;
385 while (budget
-- > 0) {
386 if (unlikely(!ixgbe_desc_unused(xdp_ring
)) ||
387 !netif_carrier_ok(xdp_ring
->netdev
)) {
392 if (!xsk_tx_peek_desc(pool
, &desc
))
395 dma
= xsk_buff_raw_get_dma(pool
, desc
.addr
);
396 xsk_buff_raw_dma_sync_for_device(pool
, dma
, desc
.len
);
398 tx_bi
= &xdp_ring
->tx_buffer_info
[xdp_ring
->next_to_use
];
399 tx_bi
->bytecount
= desc
.len
;
403 tx_desc
= IXGBE_TX_DESC(xdp_ring
, xdp_ring
->next_to_use
);
404 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
406 /* put descriptor type bits */
407 cmd_type
= IXGBE_ADVTXD_DTYP_DATA
|
408 IXGBE_ADVTXD_DCMD_DEXT
|
409 IXGBE_ADVTXD_DCMD_IFCS
;
410 cmd_type
|= desc
.len
| IXGBE_TXD_CMD
;
411 tx_desc
->read
.cmd_type_len
= cpu_to_le32(cmd_type
);
412 tx_desc
->read
.olinfo_status
=
413 cpu_to_le32(desc
.len
<< IXGBE_ADVTXD_PAYLEN_SHIFT
);
415 xdp_ring
->next_to_use
++;
416 if (xdp_ring
->next_to_use
== xdp_ring
->count
)
417 xdp_ring
->next_to_use
= 0;
421 ixgbe_xdp_ring_update_tail(xdp_ring
);
422 xsk_tx_release(pool
);
425 return !!budget
&& work_done
;
428 static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring
*tx_ring
,
429 struct ixgbe_tx_buffer
*tx_bi
)
431 xdp_return_frame(tx_bi
->xdpf
);
432 dma_unmap_single(tx_ring
->dev
,
433 dma_unmap_addr(tx_bi
, dma
),
434 dma_unmap_len(tx_bi
, len
), DMA_TO_DEVICE
);
435 dma_unmap_len_set(tx_bi
, len
, 0);
438 bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector
*q_vector
,
439 struct ixgbe_ring
*tx_ring
, int napi_budget
)
441 u16 ntc
= tx_ring
->next_to_clean
, ntu
= tx_ring
->next_to_use
;
442 unsigned int total_packets
= 0, total_bytes
= 0;
443 struct xsk_buff_pool
*pool
= tx_ring
->xsk_pool
;
444 union ixgbe_adv_tx_desc
*tx_desc
;
445 struct ixgbe_tx_buffer
*tx_bi
;
448 tx_bi
= &tx_ring
->tx_buffer_info
[ntc
];
449 tx_desc
= IXGBE_TX_DESC(tx_ring
, ntc
);
452 if (!(tx_desc
->wb
.status
& cpu_to_le32(IXGBE_TXD_STAT_DD
)))
455 total_bytes
+= tx_bi
->bytecount
;
456 total_packets
+= tx_bi
->gso_segs
;
459 ixgbe_clean_xdp_tx_buffer(tx_ring
, tx_bi
);
468 if (unlikely(ntc
== tx_ring
->count
)) {
470 tx_bi
= tx_ring
->tx_buffer_info
;
471 tx_desc
= IXGBE_TX_DESC(tx_ring
, 0);
474 /* issue prefetch for next Tx descriptor */
478 tx_ring
->next_to_clean
= ntc
;
480 u64_stats_update_begin(&tx_ring
->syncp
);
481 tx_ring
->stats
.bytes
+= total_bytes
;
482 tx_ring
->stats
.packets
+= total_packets
;
483 u64_stats_update_end(&tx_ring
->syncp
);
484 q_vector
->tx
.total_bytes
+= total_bytes
;
485 q_vector
->tx
.total_packets
+= total_packets
;
488 xsk_tx_completed(pool
, xsk_frames
);
490 if (xsk_uses_need_wakeup(pool
))
491 xsk_set_tx_need_wakeup(pool
);
493 return ixgbe_xmit_zc(tx_ring
, q_vector
->tx
.work_limit
);
496 int ixgbe_xsk_wakeup(struct net_device
*dev
, u32 qid
, u32 flags
)
498 struct ixgbe_adapter
*adapter
= netdev_priv(dev
);
499 struct ixgbe_ring
*ring
;
501 if (test_bit(__IXGBE_DOWN
, &adapter
->state
))
504 if (!READ_ONCE(adapter
->xdp_prog
))
507 if (qid
>= adapter
->num_xdp_queues
)
510 ring
= adapter
->xdp_ring
[qid
];
512 if (test_bit(__IXGBE_TX_DISABLED
, &ring
->state
))
518 if (!napi_if_scheduled_mark_missed(&ring
->q_vector
->napi
)) {
519 u64 eics
= BIT_ULL(ring
->q_vector
->v_idx
);
521 ixgbe_irq_rearm_queues(adapter
, eics
);
527 void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring
*tx_ring
)
529 u16 ntc
= tx_ring
->next_to_clean
, ntu
= tx_ring
->next_to_use
;
530 struct xsk_buff_pool
*pool
= tx_ring
->xsk_pool
;
531 struct ixgbe_tx_buffer
*tx_bi
;
535 tx_bi
= &tx_ring
->tx_buffer_info
[ntc
];
538 ixgbe_clean_xdp_tx_buffer(tx_ring
, tx_bi
);
545 if (ntc
== tx_ring
->count
)
550 xsk_tx_completed(pool
, xsk_frames
);