1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (C) 2018 Netronome Systems, Inc */
3 /* Copyright (C) 2021 Corigine, Inc */
5 #include <linux/bpf_trace.h>
6 #include <linux/netdevice.h>
8 #include "../nfp_app.h"
9 #include "../nfp_net.h"
10 #include "../nfp_net_dp.h"
11 #include "../nfp_net_xsk.h"
15 nfp_nfd3_xsk_tx_xdp(const struct nfp_net_dp
*dp
, struct nfp_net_r_vector
*r_vec
,
16 struct nfp_net_rx_ring
*rx_ring
,
17 struct nfp_net_tx_ring
*tx_ring
,
18 struct nfp_net_xsk_rx_buf
*xrxbuf
, unsigned int pkt_len
,
21 struct xsk_buff_pool
*pool
= r_vec
->xsk_pool
;
22 struct nfp_nfd3_tx_buf
*txbuf
;
23 struct nfp_nfd3_tx_desc
*txd
;
26 if (nfp_net_tx_space(tx_ring
) < 1)
29 xsk_buff_raw_dma_sync_for_device(pool
, xrxbuf
->dma_addr
+ pkt_off
,
32 wr_idx
= D_IDX(tx_ring
, tx_ring
->wr_p
);
34 txbuf
= &tx_ring
->txbufs
[wr_idx
];
35 txbuf
->xdp
= xrxbuf
->xdp
;
36 txbuf
->real_len
= pkt_len
;
37 txbuf
->is_xsk_tx
= true;
39 /* Build TX descriptor */
40 txd
= &tx_ring
->txds
[wr_idx
];
41 txd
->offset_eop
= NFD3_DESC_TX_EOP
;
42 txd
->dma_len
= cpu_to_le16(pkt_len
);
43 nfp_desc_set_dma_addr_40b(txd
, xrxbuf
->dma_addr
+ pkt_off
);
44 txd
->data_len
= cpu_to_le16(pkt_len
);
50 tx_ring
->wr_ptr_add
++;
56 static void nfp_nfd3_xsk_rx_skb(struct nfp_net_rx_ring
*rx_ring
,
57 const struct nfp_net_rx_desc
*rxd
,
58 struct nfp_net_xsk_rx_buf
*xrxbuf
,
59 const struct nfp_meta_parsed
*meta
,
62 unsigned int *skbs_polled
)
64 struct nfp_net_r_vector
*r_vec
= rx_ring
->r_vec
;
65 struct nfp_net_dp
*dp
= &r_vec
->nfp_net
->dp
;
66 struct net_device
*netdev
;
69 if (likely(!meta
->portid
)) {
72 struct nfp_net
*nn
= netdev_priv(dp
->netdev
);
74 netdev
= nfp_app_dev_get(nn
->app
, meta
->portid
, NULL
);
75 if (unlikely(!netdev
)) {
76 nfp_net_xsk_rx_drop(r_vec
, xrxbuf
);
79 nfp_repr_inc_rx_stats(netdev
, pkt_len
);
82 skb
= napi_alloc_skb(&r_vec
->napi
, pkt_len
);
84 nfp_net_xsk_rx_drop(r_vec
, xrxbuf
);
87 skb_put_data(skb
, xrxbuf
->xdp
->data
, pkt_len
);
89 skb
->mark
= meta
->mark
;
90 skb_set_hash(skb
, meta
->hash
, meta
->hash_type
);
92 skb_record_rx_queue(skb
, rx_ring
->idx
);
93 skb
->protocol
= eth_type_trans(skb
, netdev
);
95 nfp_nfd3_rx_csum(dp
, r_vec
, rxd
, meta
, skb
);
97 if (unlikely(!nfp_net_vlan_strip(skb
, rxd
, meta
))) {
98 dev_kfree_skb_any(skb
);
99 nfp_net_xsk_rx_drop(r_vec
, xrxbuf
);
104 skb_metadata_set(skb
,
105 xrxbuf
->xdp
->data
- xrxbuf
->xdp
->data_meta
);
107 napi_gro_receive(&rx_ring
->r_vec
->napi
, skb
);
109 nfp_net_xsk_rx_free(xrxbuf
);
115 nfp_nfd3_xsk_rx(struct nfp_net_rx_ring
*rx_ring
, int budget
,
116 unsigned int *skbs_polled
)
118 struct nfp_net_r_vector
*r_vec
= rx_ring
->r_vec
;
119 struct nfp_net_dp
*dp
= &r_vec
->nfp_net
->dp
;
120 struct nfp_net_tx_ring
*tx_ring
;
121 struct bpf_prog
*xdp_prog
;
122 bool xdp_redir
= false;
125 xdp_prog
= READ_ONCE(dp
->xdp_prog
);
126 tx_ring
= r_vec
->xdp_ring
;
128 while (pkts_polled
< budget
) {
129 unsigned int meta_len
, data_len
, pkt_len
, pkt_off
;
130 struct nfp_net_xsk_rx_buf
*xrxbuf
;
131 struct nfp_net_rx_desc
*rxd
;
132 struct nfp_meta_parsed meta
;
135 idx
= D_IDX(rx_ring
, rx_ring
->rd_p
);
137 rxd
= &rx_ring
->rxds
[idx
];
138 if (!(rxd
->rxd
.meta_len_dd
& PCIE_DESC_RX_DD
))
144 xrxbuf
= &rx_ring
->xsk_rxbufs
[idx
];
146 /* If starved of buffers "drop" it and scream. */
147 if (rx_ring
->rd_p
>= rx_ring
->wr_p
) {
148 nn_dp_warn(dp
, "Starved of RX buffers\n");
149 nfp_net_xsk_rx_drop(r_vec
, xrxbuf
);
153 /* Memory barrier to ensure that we won't do other reads
158 memset(&meta
, 0, sizeof(meta
));
160 /* Only supporting AF_XDP with dynamic metadata so buffer layout
163 * ---------------------------------------------------------
164 * | off | metadata | packet | XXXX |
165 * ---------------------------------------------------------
167 meta_len
= rxd
->rxd
.meta_len_dd
& PCIE_DESC_RX_META_LEN_MASK
;
168 data_len
= le16_to_cpu(rxd
->rxd
.data_len
);
169 pkt_len
= data_len
- meta_len
;
171 if (unlikely(meta_len
> NFP_NET_MAX_PREPEND
)) {
172 nn_dp_warn(dp
, "Oversized RX packet metadata %u\n",
174 nfp_net_xsk_rx_drop(r_vec
, xrxbuf
);
179 u64_stats_update_begin(&r_vec
->rx_sync
);
181 r_vec
->rx_bytes
+= pkt_len
;
182 u64_stats_update_end(&r_vec
->rx_sync
);
184 xrxbuf
->xdp
->data
+= meta_len
;
185 xrxbuf
->xdp
->data_end
= xrxbuf
->xdp
->data
+ pkt_len
;
186 xdp_set_data_meta_invalid(xrxbuf
->xdp
);
187 xsk_buff_dma_sync_for_cpu(xrxbuf
->xdp
);
188 net_prefetch(xrxbuf
->xdp
->data
);
191 if (unlikely(nfp_nfd3_parse_meta(dp
->netdev
, &meta
,
195 pkt_len
, meta_len
))) {
196 nn_dp_warn(dp
, "Invalid RX packet metadata\n");
197 nfp_net_xsk_rx_drop(r_vec
, xrxbuf
);
201 if (unlikely(meta
.portid
)) {
202 struct nfp_net
*nn
= netdev_priv(dp
->netdev
);
204 if (meta
.portid
!= NFP_META_PORT_ID_CTRL
) {
205 nfp_nfd3_xsk_rx_skb(rx_ring
, rxd
,
212 nfp_app_ctrl_rx_raw(nn
->app
, xrxbuf
->xdp
->data
,
214 nfp_net_xsk_rx_free(xrxbuf
);
219 act
= bpf_prog_run_xdp(xdp_prog
, xrxbuf
->xdp
);
221 pkt_len
= xrxbuf
->xdp
->data_end
- xrxbuf
->xdp
->data
;
222 pkt_off
= xrxbuf
->xdp
->data
- xrxbuf
->xdp
->data_hard_start
;
226 nfp_nfd3_xsk_rx_skb(rx_ring
, rxd
, xrxbuf
, &meta
, pkt_len
,
230 if (!nfp_nfd3_xsk_tx_xdp(dp
, r_vec
, rx_ring
, tx_ring
,
231 xrxbuf
, pkt_len
, pkt_off
))
232 nfp_net_xsk_rx_drop(r_vec
, xrxbuf
);
234 nfp_net_xsk_rx_unstash(xrxbuf
);
237 if (xdp_do_redirect(dp
->netdev
, xrxbuf
->xdp
, xdp_prog
)) {
238 nfp_net_xsk_rx_drop(r_vec
, xrxbuf
);
240 nfp_net_xsk_rx_unstash(xrxbuf
);
245 bpf_warn_invalid_xdp_action(dp
->netdev
, xdp_prog
, act
);
248 trace_xdp_exception(dp
->netdev
, xdp_prog
, act
);
251 nfp_net_xsk_rx_drop(r_vec
, xrxbuf
);
256 nfp_net_xsk_rx_ring_fill_freelist(r_vec
->rx_ring
);
261 if (tx_ring
->wr_ptr_add
)
262 nfp_net_tx_xmit_more_flush(tx_ring
);
267 void nfp_nfd3_xsk_tx_free(struct nfp_nfd3_tx_buf
*txbuf
)
269 xsk_buff_free(txbuf
->xdp
);
275 static bool nfp_nfd3_xsk_complete(struct nfp_net_tx_ring
*tx_ring
)
277 struct nfp_net_r_vector
*r_vec
= tx_ring
->r_vec
;
278 u32 done_pkts
= 0, done_bytes
= 0, reused
= 0;
283 if (tx_ring
->wr_p
== tx_ring
->rd_p
)
286 /* Work out how many descriptors have been transmitted. */
287 qcp_rd_p
= nfp_qcp_rd_ptr_read(tx_ring
->qcp_q
);
289 if (qcp_rd_p
== tx_ring
->qcp_rd_p
)
292 todo
= D_IDX(tx_ring
, qcp_rd_p
- tx_ring
->qcp_rd_p
);
294 done_all
= todo
<= NFP_NET_XDP_MAX_COMPLETE
;
295 todo
= min(todo
, NFP_NET_XDP_MAX_COMPLETE
);
297 tx_ring
->qcp_rd_p
= D_IDX(tx_ring
, tx_ring
->qcp_rd_p
+ todo
);
301 struct nfp_nfd3_tx_buf
*txbuf
;
303 idx
= D_IDX(tx_ring
, tx_ring
->rd_p
);
306 txbuf
= &tx_ring
->txbufs
[idx
];
307 if (unlikely(!txbuf
->real_len
))
310 done_bytes
+= txbuf
->real_len
;
313 if (txbuf
->is_xsk_tx
) {
314 nfp_nfd3_xsk_tx_free(txbuf
);
319 u64_stats_update_begin(&r_vec
->tx_sync
);
320 r_vec
->tx_bytes
+= done_bytes
;
321 r_vec
->tx_pkts
+= done_pkts
;
322 u64_stats_update_end(&r_vec
->tx_sync
);
324 xsk_tx_completed(r_vec
->xsk_pool
, done_pkts
- reused
);
326 WARN_ONCE(tx_ring
->wr_p
- tx_ring
->rd_p
> tx_ring
->cnt
,
327 "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
328 tx_ring
->rd_p
, tx_ring
->wr_p
, tx_ring
->cnt
);
333 static void nfp_nfd3_xsk_tx(struct nfp_net_tx_ring
*tx_ring
)
335 struct nfp_net_r_vector
*r_vec
= tx_ring
->r_vec
;
336 struct xdp_desc desc
[NFP_NET_XSK_TX_BATCH
];
337 struct xsk_buff_pool
*xsk_pool
;
338 struct nfp_nfd3_tx_desc
*txd
;
339 u32 pkts
= 0, wr_idx
;
342 xsk_pool
= r_vec
->xsk_pool
;
344 while (nfp_net_tx_space(tx_ring
) >= NFP_NET_XSK_TX_BATCH
) {
345 for (i
= 0; i
< NFP_NET_XSK_TX_BATCH
; i
++)
346 if (!xsk_tx_peek_desc(xsk_pool
, &desc
[i
]))
352 wr_idx
= D_IDX(tx_ring
, tx_ring
->wr_p
+ i
);
353 prefetchw(&tx_ring
->txds
[wr_idx
]);
355 for (i
= 0; i
< got
; i
++)
356 xsk_buff_raw_dma_sync_for_device(xsk_pool
, desc
[i
].addr
,
359 for (i
= 0; i
< got
; i
++) {
360 wr_idx
= D_IDX(tx_ring
, tx_ring
->wr_p
+ i
);
362 tx_ring
->txbufs
[wr_idx
].real_len
= desc
[i
].len
;
363 tx_ring
->txbufs
[wr_idx
].is_xsk_tx
= false;
365 /* Build TX descriptor. */
366 txd
= &tx_ring
->txds
[wr_idx
];
367 nfp_desc_set_dma_addr_40b(txd
,
368 xsk_buff_raw_get_dma(xsk_pool
, desc
[i
].addr
));
369 txd
->offset_eop
= NFD3_DESC_TX_EOP
;
370 txd
->dma_len
= cpu_to_le16(desc
[i
].len
);
371 txd
->data_len
= cpu_to_le16(desc
[i
].len
);
374 tx_ring
->wr_p
+= got
;
381 xsk_tx_release(xsk_pool
);
382 /* Ensure all records are visible before incrementing write counter. */
384 nfp_qcp_wr_ptr_add(tx_ring
->qcp_q
, pkts
);
387 int nfp_nfd3_xsk_poll(struct napi_struct
*napi
, int budget
)
389 struct nfp_net_r_vector
*r_vec
=
390 container_of(napi
, struct nfp_net_r_vector
, napi
);
391 unsigned int pkts_polled
, skbs
= 0;
393 pkts_polled
= nfp_nfd3_xsk_rx(r_vec
->rx_ring
, budget
, &skbs
);
395 if (pkts_polled
< budget
) {
397 nfp_nfd3_tx_complete(r_vec
->tx_ring
, budget
);
399 if (!nfp_nfd3_xsk_complete(r_vec
->xdp_ring
))
400 pkts_polled
= budget
;
402 nfp_nfd3_xsk_tx(r_vec
->xdp_ring
);
404 if (pkts_polled
< budget
&& napi_complete_done(napi
, skbs
))
405 nfp_net_irq_unmask(r_vec
->nfp_net
, r_vec
->irq_entry
);