1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
2 /* QLogic qede NIC Driver
3 * Copyright (c) 2015-2017 QLogic Corporation
4 * Copyright (c) 2019-2020 Marvell International Ltd.
7 #include <linux/netdevice.h>
8 #include <linux/etherdevice.h>
9 #include <linux/skbuff.h>
10 #include <linux/bpf_trace.h>
11 #include <net/udp_tunnel.h>
15 #include <linux/if_ether.h>
16 #include <linux/if_vlan.h>
17 #include <net/ip6_checksum.h>
20 #include <linux/qed/qed_if.h>
22 /*********************************
23 * Content also used by slowpath *
24 *********************************/
26 int qede_alloc_rx_buffer(struct qede_rx_queue
*rxq
, bool allow_lazy
)
28 struct sw_rx_data
*sw_rx_data
;
29 struct eth_rx_bd
*rx_bd
;
33 /* In case lazy-allocation is allowed, postpone allocation until the
34 * end of the NAPI run. We'd still need to make sure the Rx ring has
35 * sufficient buffers to guarantee an additional Rx interrupt.
37 if (allow_lazy
&& likely(rxq
->filled_buffers
> 12)) {
38 rxq
->filled_buffers
--;
42 data
= alloc_pages(GFP_ATOMIC
, 0);
46 /* Map the entire page as it would be used
47 * for multiple RX buffer segment size mapping.
49 mapping
= dma_map_page(rxq
->dev
, data
, 0,
50 PAGE_SIZE
, rxq
->data_direction
);
51 if (unlikely(dma_mapping_error(rxq
->dev
, mapping
))) {
56 sw_rx_data
= &rxq
->sw_rx_ring
[rxq
->sw_rx_prod
& NUM_RX_BDS_MAX
];
57 sw_rx_data
->page_offset
= 0;
58 sw_rx_data
->data
= data
;
59 sw_rx_data
->mapping
= mapping
;
61 /* Advance PROD and get BD pointer */
62 rx_bd
= (struct eth_rx_bd
*)qed_chain_produce(&rxq
->rx_bd_ring
);
64 rx_bd
->addr
.hi
= cpu_to_le32(upper_32_bits(mapping
));
65 rx_bd
->addr
.lo
= cpu_to_le32(lower_32_bits(mapping
) +
69 rxq
->filled_buffers
++;
74 /* Unmap the data and free skb */
75 int qede_free_tx_pkt(struct qede_dev
*edev
, struct qede_tx_queue
*txq
, int *len
)
77 u16 idx
= txq
->sw_tx_cons
;
78 struct sk_buff
*skb
= txq
->sw_tx_ring
.skbs
[idx
].skb
;
79 struct eth_tx_1st_bd
*first_bd
;
80 struct eth_tx_bd
*tx_data_bd
;
83 bool data_split
= txq
->sw_tx_ring
.skbs
[idx
].flags
& QEDE_TSO_SPLIT_BD
;
84 int i
, split_bd_len
= 0;
88 "skb is null for txq idx=%d txq->sw_tx_cons=%d txq->sw_tx_prod=%d\n",
89 idx
, txq
->sw_tx_cons
, txq
->sw_tx_prod
);
95 first_bd
= (struct eth_tx_1st_bd
*)qed_chain_consume(&txq
->tx_pbl
);
99 nbds
= first_bd
->data
.nbds
;
102 struct eth_tx_bd
*split
= (struct eth_tx_bd
*)
103 qed_chain_consume(&txq
->tx_pbl
);
104 split_bd_len
= BD_UNMAP_LEN(split
);
107 dma_unmap_single(&edev
->pdev
->dev
, BD_UNMAP_ADDR(first_bd
),
108 BD_UNMAP_LEN(first_bd
) + split_bd_len
, DMA_TO_DEVICE
);
110 /* Unmap the data of the skb frags */
111 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++, bds_consumed
++) {
112 tx_data_bd
= (struct eth_tx_bd
*)
113 qed_chain_consume(&txq
->tx_pbl
);
114 dma_unmap_page(&edev
->pdev
->dev
, BD_UNMAP_ADDR(tx_data_bd
),
115 BD_UNMAP_LEN(tx_data_bd
), DMA_TO_DEVICE
);
118 while (bds_consumed
++ < nbds
)
119 qed_chain_consume(&txq
->tx_pbl
);
122 dev_kfree_skb_any(skb
);
123 txq
->sw_tx_ring
.skbs
[idx
].skb
= NULL
;
124 txq
->sw_tx_ring
.skbs
[idx
].flags
= 0;
129 /* Unmap the data and free skb when mapping failed during start_xmit */
130 static void qede_free_failed_tx_pkt(struct qede_tx_queue
*txq
,
131 struct eth_tx_1st_bd
*first_bd
,
132 int nbd
, bool data_split
)
134 u16 idx
= txq
->sw_tx_prod
;
135 struct sk_buff
*skb
= txq
->sw_tx_ring
.skbs
[idx
].skb
;
136 struct eth_tx_bd
*tx_data_bd
;
137 int i
, split_bd_len
= 0;
139 /* Return prod to its position before this skb was handled */
140 qed_chain_set_prod(&txq
->tx_pbl
,
141 le16_to_cpu(txq
->tx_db
.data
.bd_prod
), first_bd
);
143 first_bd
= (struct eth_tx_1st_bd
*)qed_chain_produce(&txq
->tx_pbl
);
146 struct eth_tx_bd
*split
= (struct eth_tx_bd
*)
147 qed_chain_produce(&txq
->tx_pbl
);
148 split_bd_len
= BD_UNMAP_LEN(split
);
152 dma_unmap_single(txq
->dev
, BD_UNMAP_ADDR(first_bd
),
153 BD_UNMAP_LEN(first_bd
) + split_bd_len
, DMA_TO_DEVICE
);
155 /* Unmap the data of the skb frags */
156 for (i
= 0; i
< nbd
; i
++) {
157 tx_data_bd
= (struct eth_tx_bd
*)
158 qed_chain_produce(&txq
->tx_pbl
);
159 if (tx_data_bd
->nbytes
)
160 dma_unmap_page(txq
->dev
,
161 BD_UNMAP_ADDR(tx_data_bd
),
162 BD_UNMAP_LEN(tx_data_bd
), DMA_TO_DEVICE
);
165 /* Return again prod to its position before this skb was handled */
166 qed_chain_set_prod(&txq
->tx_pbl
,
167 le16_to_cpu(txq
->tx_db
.data
.bd_prod
), first_bd
);
170 dev_kfree_skb_any(skb
);
171 txq
->sw_tx_ring
.skbs
[idx
].skb
= NULL
;
172 txq
->sw_tx_ring
.skbs
[idx
].flags
= 0;
175 static u32
qede_xmit_type(struct sk_buff
*skb
, int *ipv6_ext
)
177 u32 rc
= XMIT_L4_CSUM
;
180 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
183 l3_proto
= vlan_get_protocol(skb
);
184 if (l3_proto
== htons(ETH_P_IPV6
) &&
185 (ipv6_hdr(skb
)->nexthdr
== NEXTHDR_IPV6
))
188 if (skb
->encapsulation
) {
190 if (skb_is_gso(skb
)) {
191 unsigned short gso_type
= skb_shinfo(skb
)->gso_type
;
193 if ((gso_type
& SKB_GSO_UDP_TUNNEL_CSUM
) ||
194 (gso_type
& SKB_GSO_GRE_CSUM
))
195 rc
|= XMIT_ENC_GSO_L4_CSUM
;
208 static void qede_set_params_for_ipv6_ext(struct sk_buff
*skb
,
209 struct eth_tx_2nd_bd
*second_bd
,
210 struct eth_tx_3rd_bd
*third_bd
)
213 u16 bd2_bits1
= 0, bd2_bits2
= 0;
215 bd2_bits1
|= (1 << ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT
);
217 bd2_bits2
|= ((((u8
*)skb_transport_header(skb
) - skb
->data
) >> 1) &
218 ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK
)
219 << ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT
;
221 bd2_bits1
|= (ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH
<<
222 ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT
);
224 if (vlan_get_protocol(skb
) == htons(ETH_P_IPV6
))
225 l4_proto
= ipv6_hdr(skb
)->nexthdr
;
227 l4_proto
= ip_hdr(skb
)->protocol
;
229 if (l4_proto
== IPPROTO_UDP
)
230 bd2_bits1
|= 1 << ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT
;
233 third_bd
->data
.bitfields
|=
234 cpu_to_le16(((tcp_hdrlen(skb
) / 4) &
235 ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK
) <<
236 ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT
);
238 second_bd
->data
.bitfields1
= cpu_to_le16(bd2_bits1
);
239 second_bd
->data
.bitfields2
= cpu_to_le16(bd2_bits2
);
242 static int map_frag_to_bd(struct qede_tx_queue
*txq
,
243 skb_frag_t
*frag
, struct eth_tx_bd
*bd
)
247 /* Map skb non-linear frag data for DMA */
248 mapping
= skb_frag_dma_map(txq
->dev
, frag
, 0,
249 skb_frag_size(frag
), DMA_TO_DEVICE
);
250 if (unlikely(dma_mapping_error(txq
->dev
, mapping
)))
253 /* Setup the data pointer of the frag data */
254 BD_SET_UNMAP_ADDR_LEN(bd
, mapping
, skb_frag_size(frag
));
259 static u16
qede_get_skb_hlen(struct sk_buff
*skb
, bool is_encap_pkt
)
262 return (skb_inner_transport_header(skb
) +
263 inner_tcp_hdrlen(skb
) - skb
->data
);
265 return (skb_transport_header(skb
) +
266 tcp_hdrlen(skb
) - skb
->data
);
269 /* +2 for 1st BD for headers and 2nd BD for headlen (if required) */
270 #if ((MAX_SKB_FRAGS + 2) > ETH_TX_MAX_BDS_PER_NON_LSO_PACKET)
271 static bool qede_pkt_req_lin(struct sk_buff
*skb
, u8 xmit_type
)
273 int allowed_frags
= ETH_TX_MAX_BDS_PER_NON_LSO_PACKET
- 1;
275 if (xmit_type
& XMIT_LSO
) {
278 hlen
= qede_get_skb_hlen(skb
, xmit_type
& XMIT_ENC
);
280 /* linear payload would require its own BD */
281 if (skb_headlen(skb
) > hlen
)
285 return (skb_shinfo(skb
)->nr_frags
> allowed_frags
);
289 static inline void qede_update_tx_producer(struct qede_tx_queue
*txq
)
291 /* wmb makes sure that the BDs data is updated before updating the
292 * producer, otherwise FW may read old data from the BDs.
296 writel(txq
->tx_db
.raw
, txq
->doorbell_addr
);
298 /* Fence required to flush the write combined buffer, since another
299 * CPU may write to the same doorbell address and data may be lost
300 * due to relaxed order nature of write combined bar.
305 static int qede_xdp_xmit(struct qede_tx_queue
*txq
, dma_addr_t dma
, u16 pad
,
306 u16 len
, struct page
*page
, struct xdp_frame
*xdpf
)
308 struct eth_tx_1st_bd
*bd
;
309 struct sw_tx_xdp
*xdp
;
312 if (unlikely(qed_chain_get_elem_used(&txq
->tx_pbl
) >=
313 txq
->num_tx_buffers
)) {
318 bd
= qed_chain_produce(&txq
->tx_pbl
);
320 bd
->data
.bd_flags
.bitfields
= BIT(ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT
);
322 val
= (len
& ETH_TX_DATA_1ST_BD_PKT_LEN_MASK
) <<
323 ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT
;
325 bd
->data
.bitfields
= cpu_to_le16(val
);
327 /* We can safely ignore the offset, as it's 0 for XDP */
328 BD_SET_UNMAP_ADDR_LEN(bd
, dma
+ pad
, len
);
330 xdp
= txq
->sw_tx_ring
.xdp
+ txq
->sw_tx_prod
;
335 txq
->sw_tx_prod
= (txq
->sw_tx_prod
+ 1) % txq
->num_tx_buffers
;
340 int qede_xdp_transmit(struct net_device
*dev
, int n_frames
,
341 struct xdp_frame
**frames
, u32 flags
)
343 struct qede_dev
*edev
= netdev_priv(dev
);
344 struct device
*dmadev
= &edev
->pdev
->dev
;
345 struct qede_tx_queue
*xdp_tx
;
346 struct xdp_frame
*xdpf
;
351 if (unlikely(flags
& ~XDP_XMIT_FLAGS_MASK
))
354 if (unlikely(!netif_running(dev
)))
357 i
= smp_processor_id() % edev
->total_xdp_queues
;
358 xdp_tx
= edev
->fp_array
[i
].xdp_tx
;
360 spin_lock(&xdp_tx
->xdp_tx_lock
);
362 for (i
= 0; i
< n_frames
; i
++) {
365 mapping
= dma_map_single(dmadev
, xdpf
->data
, xdpf
->len
,
367 if (unlikely(dma_mapping_error(dmadev
, mapping
))) {
368 xdp_return_frame_rx_napi(xdpf
);
374 if (unlikely(qede_xdp_xmit(xdp_tx
, mapping
, 0, xdpf
->len
,
376 xdp_return_frame_rx_napi(xdpf
);
381 if (flags
& XDP_XMIT_FLUSH
) {
382 xdp_prod
= qed_chain_get_prod_idx(&xdp_tx
->tx_pbl
);
384 xdp_tx
->tx_db
.data
.bd_prod
= cpu_to_le16(xdp_prod
);
385 qede_update_tx_producer(xdp_tx
);
388 spin_unlock(&xdp_tx
->xdp_tx_lock
);
390 return n_frames
- drops
;
393 int qede_txq_has_work(struct qede_tx_queue
*txq
)
397 /* Tell compiler that consumer and producer can change */
399 hw_bd_cons
= le16_to_cpu(*txq
->hw_cons_ptr
);
400 if (qed_chain_get_cons_idx(&txq
->tx_pbl
) == hw_bd_cons
+ 1)
403 return hw_bd_cons
!= qed_chain_get_cons_idx(&txq
->tx_pbl
);
406 static void qede_xdp_tx_int(struct qede_dev
*edev
, struct qede_tx_queue
*txq
)
408 struct sw_tx_xdp
*xdp_info
, *xdp_arr
= txq
->sw_tx_ring
.xdp
;
409 struct device
*dev
= &edev
->pdev
->dev
;
410 struct xdp_frame
*xdpf
;
413 hw_bd_cons
= le16_to_cpu(*txq
->hw_cons_ptr
);
416 while (hw_bd_cons
!= qed_chain_get_cons_idx(&txq
->tx_pbl
)) {
417 xdp_info
= xdp_arr
+ txq
->sw_tx_cons
;
418 xdpf
= xdp_info
->xdpf
;
421 dma_unmap_single(dev
, xdp_info
->mapping
, xdpf
->len
,
423 xdp_return_frame(xdpf
);
425 xdp_info
->xdpf
= NULL
;
427 dma_unmap_page(dev
, xdp_info
->mapping
, PAGE_SIZE
,
429 __free_page(xdp_info
->page
);
432 qed_chain_consume(&txq
->tx_pbl
);
433 txq
->sw_tx_cons
= (txq
->sw_tx_cons
+ 1) % txq
->num_tx_buffers
;
438 static int qede_tx_int(struct qede_dev
*edev
, struct qede_tx_queue
*txq
)
440 unsigned int pkts_compl
= 0, bytes_compl
= 0;
441 struct netdev_queue
*netdev_txq
;
445 netdev_txq
= netdev_get_tx_queue(edev
->ndev
, txq
->ndev_txq_id
);
447 hw_bd_cons
= le16_to_cpu(*txq
->hw_cons_ptr
);
450 while (hw_bd_cons
!= qed_chain_get_cons_idx(&txq
->tx_pbl
)) {
453 rc
= qede_free_tx_pkt(edev
, txq
, &len
);
455 DP_NOTICE(edev
, "hw_bd_cons = %d, chain_cons=%d\n",
457 qed_chain_get_cons_idx(&txq
->tx_pbl
));
463 txq
->sw_tx_cons
= (txq
->sw_tx_cons
+ 1) % txq
->num_tx_buffers
;
467 netdev_tx_completed_queue(netdev_txq
, pkts_compl
, bytes_compl
);
469 /* Need to make the tx_bd_cons update visible to start_xmit()
470 * before checking for netif_tx_queue_stopped(). Without the
471 * memory barrier, there is a small possibility that
472 * start_xmit() will miss it and cause the queue to be stopped
474 * On the other hand we need an rmb() here to ensure the proper
475 * ordering of bit testing in the following
476 * netif_tx_queue_stopped(txq) call.
480 if (unlikely(netif_tx_queue_stopped(netdev_txq
))) {
481 /* Taking tx_lock is needed to prevent reenabling the queue
482 * while it's empty. This could have happen if rx_action() gets
483 * suspended in qede_tx_int() after the condition before
484 * netif_tx_wake_queue(), while tx_action (qede_start_xmit()):
486 * stops the queue->sees fresh tx_bd_cons->releases the queue->
487 * sends some packets consuming the whole queue again->
491 __netif_tx_lock(netdev_txq
, smp_processor_id());
493 if ((netif_tx_queue_stopped(netdev_txq
)) &&
494 (edev
->state
== QEDE_STATE_OPEN
) &&
495 (qed_chain_get_elem_left(&txq
->tx_pbl
)
496 >= (MAX_SKB_FRAGS
+ 1))) {
497 netif_tx_wake_queue(netdev_txq
);
498 DP_VERBOSE(edev
, NETIF_MSG_TX_DONE
,
499 "Wake queue was called\n");
502 __netif_tx_unlock(netdev_txq
);
508 bool qede_has_rx_work(struct qede_rx_queue
*rxq
)
510 u16 hw_comp_cons
, sw_comp_cons
;
512 /* Tell compiler that status block fields can change */
515 hw_comp_cons
= le16_to_cpu(*rxq
->hw_cons_ptr
);
516 sw_comp_cons
= qed_chain_get_cons_idx(&rxq
->rx_comp_ring
);
518 return hw_comp_cons
!= sw_comp_cons
;
521 static inline void qede_rx_bd_ring_consume(struct qede_rx_queue
*rxq
)
523 qed_chain_consume(&rxq
->rx_bd_ring
);
527 /* This function reuses the buffer(from an offset) from
528 * consumer index to producer index in the bd ring
530 static inline void qede_reuse_page(struct qede_rx_queue
*rxq
,
531 struct sw_rx_data
*curr_cons
)
533 struct eth_rx_bd
*rx_bd_prod
= qed_chain_produce(&rxq
->rx_bd_ring
);
534 struct sw_rx_data
*curr_prod
;
535 dma_addr_t new_mapping
;
537 curr_prod
= &rxq
->sw_rx_ring
[rxq
->sw_rx_prod
& NUM_RX_BDS_MAX
];
538 *curr_prod
= *curr_cons
;
540 new_mapping
= curr_prod
->mapping
+ curr_prod
->page_offset
;
542 rx_bd_prod
->addr
.hi
= cpu_to_le32(upper_32_bits(new_mapping
));
543 rx_bd_prod
->addr
.lo
= cpu_to_le32(lower_32_bits(new_mapping
) +
547 curr_cons
->data
= NULL
;
550 /* In case of allocation failures reuse buffers
551 * from consumer index to produce buffers for firmware
553 void qede_recycle_rx_bd_ring(struct qede_rx_queue
*rxq
, u8 count
)
555 struct sw_rx_data
*curr_cons
;
557 for (; count
> 0; count
--) {
558 curr_cons
= &rxq
->sw_rx_ring
[rxq
->sw_rx_cons
& NUM_RX_BDS_MAX
];
559 qede_reuse_page(rxq
, curr_cons
);
560 qede_rx_bd_ring_consume(rxq
);
564 static inline int qede_realloc_rx_buffer(struct qede_rx_queue
*rxq
,
565 struct sw_rx_data
*curr_cons
)
567 /* Move to the next segment in the page */
568 curr_cons
->page_offset
+= rxq
->rx_buf_seg_size
;
570 if (curr_cons
->page_offset
== PAGE_SIZE
) {
571 if (unlikely(qede_alloc_rx_buffer(rxq
, true))) {
572 /* Since we failed to allocate new buffer
573 * current buffer can be used again.
575 curr_cons
->page_offset
-= rxq
->rx_buf_seg_size
;
580 dma_unmap_page(rxq
->dev
, curr_cons
->mapping
,
581 PAGE_SIZE
, rxq
->data_direction
);
583 /* Increment refcount of the page as we don't want
584 * network stack to take the ownership of the page
585 * which can be recycled multiple times by the driver.
587 page_ref_inc(curr_cons
->data
);
588 qede_reuse_page(rxq
, curr_cons
);
594 void qede_update_rx_prod(struct qede_dev
*edev
, struct qede_rx_queue
*rxq
)
596 u16 bd_prod
= qed_chain_get_prod_idx(&rxq
->rx_bd_ring
);
597 u16 cqe_prod
= qed_chain_get_prod_idx(&rxq
->rx_comp_ring
);
598 struct eth_rx_prod_data rx_prods
= {0};
600 /* Update producers */
601 rx_prods
.bd_prod
= cpu_to_le16(bd_prod
);
602 rx_prods
.cqe_prod
= cpu_to_le16(cqe_prod
);
604 /* Make sure that the BD and SGE data is updated before updating the
605 * producers since FW might read the BD/SGE right after the producer
610 internal_ram_wr(rxq
->hw_rxq_prod_addr
, sizeof(rx_prods
),
614 static void qede_get_rxhash(struct sk_buff
*skb
, u8 bitfields
, __le32 rss_hash
)
616 enum pkt_hash_types hash_type
= PKT_HASH_TYPE_NONE
;
617 enum rss_hash_type htype
;
620 htype
= GET_FIELD(bitfields
, ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE
);
622 hash_type
= ((htype
== RSS_HASH_TYPE_IPV4
) ||
623 (htype
== RSS_HASH_TYPE_IPV6
)) ?
624 PKT_HASH_TYPE_L3
: PKT_HASH_TYPE_L4
;
625 hash
= le32_to_cpu(rss_hash
);
627 skb_set_hash(skb
, hash
, hash_type
);
630 static void qede_set_skb_csum(struct sk_buff
*skb
, u8 csum_flag
)
632 skb_checksum_none_assert(skb
);
634 if (csum_flag
& QEDE_CSUM_UNNECESSARY
)
635 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
637 if (csum_flag
& QEDE_TUNN_CSUM_UNNECESSARY
) {
639 skb
->encapsulation
= 1;
643 static inline void qede_skb_receive(struct qede_dev
*edev
,
644 struct qede_fastpath
*fp
,
645 struct qede_rx_queue
*rxq
,
646 struct sk_buff
*skb
, u16 vlan_tag
)
649 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
), vlan_tag
);
651 napi_gro_receive(&fp
->napi
, skb
);
654 static void qede_set_gro_params(struct qede_dev
*edev
,
656 struct eth_fast_path_rx_tpa_start_cqe
*cqe
)
658 u16 parsing_flags
= le16_to_cpu(cqe
->pars_flags
.flags
);
660 if (((parsing_flags
>> PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT
) &
661 PARSING_AND_ERR_FLAGS_L3TYPE_MASK
) == 2)
662 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
664 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
666 skb_shinfo(skb
)->gso_size
= __le16_to_cpu(cqe
->len_on_first_bd
) -
670 static int qede_fill_frag_skb(struct qede_dev
*edev
,
671 struct qede_rx_queue
*rxq
,
672 u8 tpa_agg_index
, u16 len_on_bd
)
674 struct sw_rx_data
*current_bd
= &rxq
->sw_rx_ring
[rxq
->sw_rx_cons
&
676 struct qede_agg_info
*tpa_info
= &rxq
->tpa_info
[tpa_agg_index
];
677 struct sk_buff
*skb
= tpa_info
->skb
;
679 if (unlikely(tpa_info
->state
!= QEDE_AGG_STATE_START
))
682 /* Add one frag and update the appropriate fields in the skb */
683 skb_fill_page_desc(skb
, tpa_info
->frag_id
++,
685 current_bd
->page_offset
+ rxq
->rx_headroom
,
688 if (unlikely(qede_realloc_rx_buffer(rxq
, current_bd
))) {
689 /* Incr page ref count to reuse on allocation failure
690 * so that it doesn't get freed while freeing SKB.
692 page_ref_inc(current_bd
->data
);
696 qede_rx_bd_ring_consume(rxq
);
698 skb
->data_len
+= len_on_bd
;
699 skb
->truesize
+= rxq
->rx_buf_seg_size
;
700 skb
->len
+= len_on_bd
;
705 tpa_info
->state
= QEDE_AGG_STATE_ERROR
;
706 qede_recycle_rx_bd_ring(rxq
, 1);
711 static bool qede_tunn_exist(u16 flag
)
713 return !!(flag
& (PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK
<<
714 PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT
));
717 static u8
qede_check_tunn_csum(u16 flag
)
722 if (flag
& (PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK
<<
723 PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT
))
724 csum_flag
|= PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK
<<
725 PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT
;
727 if (flag
& (PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK
<<
728 PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT
)) {
729 csum_flag
|= PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK
<<
730 PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT
;
731 tcsum
= QEDE_TUNN_CSUM_UNNECESSARY
;
734 csum_flag
|= PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_MASK
<<
735 PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_SHIFT
|
736 PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK
<<
737 PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT
;
739 if (csum_flag
& flag
)
740 return QEDE_CSUM_ERROR
;
742 return QEDE_CSUM_UNNECESSARY
| tcsum
;
745 static inline struct sk_buff
*
746 qede_build_skb(struct qede_rx_queue
*rxq
,
747 struct sw_rx_data
*bd
, u16 len
, u16 pad
)
752 buf
= page_address(bd
->data
) + bd
->page_offset
;
753 skb
= build_skb(buf
, rxq
->rx_buf_seg_size
);
755 skb_reserve(skb
, pad
);
761 static struct sk_buff
*
762 qede_tpa_rx_build_skb(struct qede_dev
*edev
,
763 struct qede_rx_queue
*rxq
,
764 struct sw_rx_data
*bd
, u16 len
, u16 pad
,
769 skb
= qede_build_skb(rxq
, bd
, len
, pad
);
770 bd
->page_offset
+= rxq
->rx_buf_seg_size
;
772 if (bd
->page_offset
== PAGE_SIZE
) {
773 if (unlikely(qede_alloc_rx_buffer(rxq
, true))) {
775 "Failed to allocate RX buffer for tpa start\n");
776 bd
->page_offset
-= rxq
->rx_buf_seg_size
;
777 page_ref_inc(bd
->data
);
778 dev_kfree_skb_any(skb
);
782 page_ref_inc(bd
->data
);
783 qede_reuse_page(rxq
, bd
);
786 /* We've consumed the first BD and prepared an SKB */
787 qede_rx_bd_ring_consume(rxq
);
792 static struct sk_buff
*
793 qede_rx_build_skb(struct qede_dev
*edev
,
794 struct qede_rx_queue
*rxq
,
795 struct sw_rx_data
*bd
, u16 len
, u16 pad
)
797 struct sk_buff
*skb
= NULL
;
799 /* For smaller frames still need to allocate skb, memcpy
800 * data and benefit in reusing the page segment instead of
803 if ((len
+ pad
<= edev
->rx_copybreak
)) {
804 unsigned int offset
= bd
->page_offset
+ pad
;
806 skb
= netdev_alloc_skb(edev
->ndev
, QEDE_RX_HDR_SIZE
);
810 skb_reserve(skb
, pad
);
811 skb_put_data(skb
, page_address(bd
->data
) + offset
, len
);
812 qede_reuse_page(rxq
, bd
);
816 skb
= qede_build_skb(rxq
, bd
, len
, pad
);
818 if (unlikely(qede_realloc_rx_buffer(rxq
, bd
))) {
819 /* Incr page ref count to reuse on allocation failure so
820 * that it doesn't get freed while freeing SKB [as its
821 * already mapped there].
823 page_ref_inc(bd
->data
);
824 dev_kfree_skb_any(skb
);
828 /* We've consumed the first BD and prepared an SKB */
829 qede_rx_bd_ring_consume(rxq
);
834 static void qede_tpa_start(struct qede_dev
*edev
,
835 struct qede_rx_queue
*rxq
,
836 struct eth_fast_path_rx_tpa_start_cqe
*cqe
)
838 struct qede_agg_info
*tpa_info
= &rxq
->tpa_info
[cqe
->tpa_agg_index
];
839 struct sw_rx_data
*sw_rx_data_cons
;
842 sw_rx_data_cons
= &rxq
->sw_rx_ring
[rxq
->sw_rx_cons
& NUM_RX_BDS_MAX
];
843 pad
= cqe
->placement_offset
+ rxq
->rx_headroom
;
845 tpa_info
->skb
= qede_tpa_rx_build_skb(edev
, rxq
, sw_rx_data_cons
,
846 le16_to_cpu(cqe
->len_on_first_bd
),
848 tpa_info
->buffer
.page_offset
= sw_rx_data_cons
->page_offset
;
849 tpa_info
->buffer
.mapping
= sw_rx_data_cons
->mapping
;
851 if (unlikely(!tpa_info
->skb
)) {
852 DP_NOTICE(edev
, "Failed to allocate SKB for gro\n");
854 /* Consume from ring but do not produce since
855 * this might be used by FW still, it will be re-used
858 tpa_info
->tpa_start_fail
= true;
859 qede_rx_bd_ring_consume(rxq
);
860 tpa_info
->state
= QEDE_AGG_STATE_ERROR
;
864 tpa_info
->frag_id
= 0;
865 tpa_info
->state
= QEDE_AGG_STATE_START
;
867 if ((le16_to_cpu(cqe
->pars_flags
.flags
) >>
868 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT
) &
869 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK
)
870 tpa_info
->vlan_tag
= le16_to_cpu(cqe
->vlan_tag
);
872 tpa_info
->vlan_tag
= 0;
874 qede_get_rxhash(tpa_info
->skb
, cqe
->bitfields
, cqe
->rss_hash
);
876 /* This is needed in order to enable forwarding support */
877 qede_set_gro_params(edev
, tpa_info
->skb
, cqe
);
879 cons_buf
: /* We still need to handle bd_len_list to consume buffers */
880 if (likely(cqe
->bw_ext_bd_len_list
[0]))
881 qede_fill_frag_skb(edev
, rxq
, cqe
->tpa_agg_index
,
882 le16_to_cpu(cqe
->bw_ext_bd_len_list
[0]));
884 if (unlikely(cqe
->bw_ext_bd_len_list
[1])) {
886 "Unlikely - got a TPA aggregation with more than one bw_ext_bd_len_list entry in the TPA start\n");
887 tpa_info
->state
= QEDE_AGG_STATE_ERROR
;
892 static void qede_gro_ip_csum(struct sk_buff
*skb
)
894 const struct iphdr
*iph
= ip_hdr(skb
);
897 skb_set_transport_header(skb
, sizeof(struct iphdr
));
900 th
->check
= ~tcp_v4_check(skb
->len
- skb_transport_offset(skb
),
901 iph
->saddr
, iph
->daddr
, 0);
903 tcp_gro_complete(skb
);
906 static void qede_gro_ipv6_csum(struct sk_buff
*skb
)
908 struct ipv6hdr
*iph
= ipv6_hdr(skb
);
911 skb_set_transport_header(skb
, sizeof(struct ipv6hdr
));
914 th
->check
= ~tcp_v6_check(skb
->len
- skb_transport_offset(skb
),
915 &iph
->saddr
, &iph
->daddr
, 0);
916 tcp_gro_complete(skb
);
920 static void qede_gro_receive(struct qede_dev
*edev
,
921 struct qede_fastpath
*fp
,
925 /* FW can send a single MTU sized packet from gro flow
926 * due to aggregation timeout/last segment etc. which
927 * is not expected to be a gro packet. If a skb has zero
928 * frags then simply push it in the stack as non gso skb.
930 if (unlikely(!skb
->data_len
)) {
931 skb_shinfo(skb
)->gso_type
= 0;
932 skb_shinfo(skb
)->gso_size
= 0;
937 if (skb_shinfo(skb
)->gso_size
) {
938 skb_reset_network_header(skb
);
940 switch (skb
->protocol
) {
941 case htons(ETH_P_IP
):
942 qede_gro_ip_csum(skb
);
944 case htons(ETH_P_IPV6
):
945 qede_gro_ipv6_csum(skb
);
949 "Error: FW GRO supports only IPv4/IPv6, not 0x%04x\n",
950 ntohs(skb
->protocol
));
956 skb_record_rx_queue(skb
, fp
->rxq
->rxq_id
);
957 qede_skb_receive(edev
, fp
, fp
->rxq
, skb
, vlan_tag
);
960 static inline void qede_tpa_cont(struct qede_dev
*edev
,
961 struct qede_rx_queue
*rxq
,
962 struct eth_fast_path_rx_tpa_cont_cqe
*cqe
)
966 for (i
= 0; cqe
->len_list
[i
]; i
++)
967 qede_fill_frag_skb(edev
, rxq
, cqe
->tpa_agg_index
,
968 le16_to_cpu(cqe
->len_list
[i
]));
972 "Strange - TPA cont with more than a single len_list entry\n");
975 static int qede_tpa_end(struct qede_dev
*edev
,
976 struct qede_fastpath
*fp
,
977 struct eth_fast_path_rx_tpa_end_cqe
*cqe
)
979 struct qede_rx_queue
*rxq
= fp
->rxq
;
980 struct qede_agg_info
*tpa_info
;
984 tpa_info
= &rxq
->tpa_info
[cqe
->tpa_agg_index
];
987 if (tpa_info
->buffer
.page_offset
== PAGE_SIZE
)
988 dma_unmap_page(rxq
->dev
, tpa_info
->buffer
.mapping
,
989 PAGE_SIZE
, rxq
->data_direction
);
991 for (i
= 0; cqe
->len_list
[i
]; i
++)
992 qede_fill_frag_skb(edev
, rxq
, cqe
->tpa_agg_index
,
993 le16_to_cpu(cqe
->len_list
[i
]));
996 "Strange - TPA emd with more than a single len_list entry\n");
998 if (unlikely(tpa_info
->state
!= QEDE_AGG_STATE_START
))
1002 if (unlikely(cqe
->num_of_bds
!= tpa_info
->frag_id
+ 1))
1004 "Strange - TPA had %02x BDs, but SKB has only %d frags\n",
1005 cqe
->num_of_bds
, tpa_info
->frag_id
);
1006 if (unlikely(skb
->len
!= le16_to_cpu(cqe
->total_packet_len
)))
1008 "Strange - total packet len [cqe] is %4x but SKB has len %04x\n",
1009 le16_to_cpu(cqe
->total_packet_len
), skb
->len
);
1011 /* Finalize the SKB */
1012 skb
->protocol
= eth_type_trans(skb
, edev
->ndev
);
1013 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1015 /* tcp_gro_complete() will copy NAPI_GRO_CB(skb)->count
1016 * to skb_shinfo(skb)->gso_segs
1018 NAPI_GRO_CB(skb
)->count
= le16_to_cpu(cqe
->num_of_coalesced_segs
);
1020 qede_gro_receive(edev
, fp
, skb
, tpa_info
->vlan_tag
);
1022 tpa_info
->state
= QEDE_AGG_STATE_NONE
;
1026 tpa_info
->state
= QEDE_AGG_STATE_NONE
;
1028 if (tpa_info
->tpa_start_fail
) {
1029 qede_reuse_page(rxq
, &tpa_info
->buffer
);
1030 tpa_info
->tpa_start_fail
= false;
1033 dev_kfree_skb_any(tpa_info
->skb
);
1034 tpa_info
->skb
= NULL
;
1038 static u8
qede_check_notunn_csum(u16 flag
)
1043 if (flag
& (PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK
<<
1044 PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT
)) {
1045 csum_flag
|= PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK
<<
1046 PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT
;
1047 csum
= QEDE_CSUM_UNNECESSARY
;
1050 csum_flag
|= PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK
<<
1051 PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT
;
1053 if (csum_flag
& flag
)
1054 return QEDE_CSUM_ERROR
;
1059 static u8
qede_check_csum(u16 flag
)
1061 if (!qede_tunn_exist(flag
))
1062 return qede_check_notunn_csum(flag
);
1064 return qede_check_tunn_csum(flag
);
1067 static bool qede_pkt_is_ip_fragmented(struct eth_fast_path_rx_reg_cqe
*cqe
,
1070 u8 tun_pars_flg
= cqe
->tunnel_pars_flags
.flags
;
1072 if ((tun_pars_flg
& (ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK
<<
1073 ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT
)) ||
1074 (flag
& (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK
<<
1075 PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT
)))
1081 /* Return true iff packet is to be passed to stack */
1082 static bool qede_rx_xdp(struct qede_dev
*edev
,
1083 struct qede_fastpath
*fp
,
1084 struct qede_rx_queue
*rxq
,
1085 struct bpf_prog
*prog
,
1086 struct sw_rx_data
*bd
,
1087 struct eth_fast_path_rx_reg_cqe
*cqe
,
1088 u16
*data_offset
, u16
*len
)
1090 struct xdp_buff xdp
;
1091 enum xdp_action act
;
1093 xdp
.data_hard_start
= page_address(bd
->data
);
1094 xdp
.data
= xdp
.data_hard_start
+ *data_offset
;
1095 xdp_set_data_meta_invalid(&xdp
);
1096 xdp
.data_end
= xdp
.data
+ *len
;
1097 xdp
.rxq
= &rxq
->xdp_rxq
;
1098 xdp
.frame_sz
= rxq
->rx_buf_seg_size
; /* PAGE_SIZE when XDP enabled */
1100 /* Queues always have a full reset currently, so for the time
1101 * being until there's atomic program replace just mark read
1102 * side for map helpers.
1105 act
= bpf_prog_run_xdp(prog
, &xdp
);
1108 /* Recalculate, as XDP might have changed the headers */
1109 *data_offset
= xdp
.data
- xdp
.data_hard_start
;
1110 *len
= xdp
.data_end
- xdp
.data
;
1112 if (act
== XDP_PASS
)
1115 /* Count number of packets not to be passed to stack */
1120 /* We need the replacement buffer before transmit. */
1121 if (unlikely(qede_alloc_rx_buffer(rxq
, true))) {
1122 qede_recycle_rx_bd_ring(rxq
, 1);
1124 trace_xdp_exception(edev
->ndev
, prog
, act
);
1128 /* Now if there's a transmission problem, we'd still have to
1129 * throw current buffer, as replacement was already allocated.
1131 if (unlikely(qede_xdp_xmit(fp
->xdp_tx
, bd
->mapping
,
1132 *data_offset
, *len
, bd
->data
,
1134 dma_unmap_page(rxq
->dev
, bd
->mapping
, PAGE_SIZE
,
1135 rxq
->data_direction
);
1136 __free_page(bd
->data
);
1138 trace_xdp_exception(edev
->ndev
, prog
, act
);
1140 dma_sync_single_for_device(rxq
->dev
,
1141 bd
->mapping
+ *data_offset
,
1142 *len
, rxq
->data_direction
);
1143 fp
->xdp_xmit
|= QEDE_XDP_TX
;
1146 /* Regardless, we've consumed an Rx BD */
1147 qede_rx_bd_ring_consume(rxq
);
1150 /* We need the replacement buffer before transmit. */
1151 if (unlikely(qede_alloc_rx_buffer(rxq
, true))) {
1152 qede_recycle_rx_bd_ring(rxq
, 1);
1154 trace_xdp_exception(edev
->ndev
, prog
, act
);
1158 dma_unmap_page(rxq
->dev
, bd
->mapping
, PAGE_SIZE
,
1159 rxq
->data_direction
);
1161 if (unlikely(xdp_do_redirect(edev
->ndev
, &xdp
, prog
)))
1162 DP_NOTICE(edev
, "Failed to redirect the packet\n");
1164 fp
->xdp_xmit
|= QEDE_XDP_REDIRECT
;
1166 qede_rx_bd_ring_consume(rxq
);
1169 bpf_warn_invalid_xdp_action(act
);
1172 trace_xdp_exception(edev
->ndev
, prog
, act
);
1175 qede_recycle_rx_bd_ring(rxq
, cqe
->bd_num
);
1181 static int qede_rx_build_jumbo(struct qede_dev
*edev
,
1182 struct qede_rx_queue
*rxq
,
1183 struct sk_buff
*skb
,
1184 struct eth_fast_path_rx_reg_cqe
*cqe
,
1187 u16 pkt_len
= le16_to_cpu(cqe
->pkt_len
);
1188 struct sw_rx_data
*bd
;
1192 pkt_len
-= first_bd_len
;
1194 /* We've already used one BD for the SKB. Now take care of the rest */
1195 for (num_frags
= cqe
->bd_num
- 1; num_frags
> 0; num_frags
--) {
1196 u16 cur_size
= pkt_len
> rxq
->rx_buf_size
? rxq
->rx_buf_size
:
1199 if (unlikely(!cur_size
)) {
1201 "Still got %d BDs for mapping jumbo, but length became 0\n",
1206 /* We need a replacement buffer for each BD */
1207 if (unlikely(qede_alloc_rx_buffer(rxq
, true)))
1210 /* Now that we've allocated the replacement buffer,
1211 * we can safely consume the next BD and map it to the SKB.
1213 bd_cons_idx
= rxq
->sw_rx_cons
& NUM_RX_BDS_MAX
;
1214 bd
= &rxq
->sw_rx_ring
[bd_cons_idx
];
1215 qede_rx_bd_ring_consume(rxq
);
1217 dma_unmap_page(rxq
->dev
, bd
->mapping
,
1218 PAGE_SIZE
, DMA_FROM_DEVICE
);
1220 skb_fill_page_desc(skb
, skb_shinfo(skb
)->nr_frags
++,
1221 bd
->data
, rxq
->rx_headroom
, cur_size
);
1223 skb
->truesize
+= PAGE_SIZE
;
1224 skb
->data_len
+= cur_size
;
1225 skb
->len
+= cur_size
;
1226 pkt_len
-= cur_size
;
1229 if (unlikely(pkt_len
))
1231 "Mapped all BDs of jumbo, but still have %d bytes\n",
1238 static int qede_rx_process_tpa_cqe(struct qede_dev
*edev
,
1239 struct qede_fastpath
*fp
,
1240 struct qede_rx_queue
*rxq
,
1241 union eth_rx_cqe
*cqe
,
1242 enum eth_rx_cqe_type type
)
1245 case ETH_RX_CQE_TYPE_TPA_START
:
1246 qede_tpa_start(edev
, rxq
, &cqe
->fast_path_tpa_start
);
1248 case ETH_RX_CQE_TYPE_TPA_CONT
:
1249 qede_tpa_cont(edev
, rxq
, &cqe
->fast_path_tpa_cont
);
1251 case ETH_RX_CQE_TYPE_TPA_END
:
1252 return qede_tpa_end(edev
, fp
, &cqe
->fast_path_tpa_end
);
1258 static int qede_rx_process_cqe(struct qede_dev
*edev
,
1259 struct qede_fastpath
*fp
,
1260 struct qede_rx_queue
*rxq
)
1262 struct bpf_prog
*xdp_prog
= READ_ONCE(rxq
->xdp_prog
);
1263 struct eth_fast_path_rx_reg_cqe
*fp_cqe
;
1264 u16 len
, pad
, bd_cons_idx
, parse_flag
;
1265 enum eth_rx_cqe_type cqe_type
;
1266 union eth_rx_cqe
*cqe
;
1267 struct sw_rx_data
*bd
;
1268 struct sk_buff
*skb
;
1272 /* Get the CQE from the completion ring */
1273 cqe
= (union eth_rx_cqe
*)qed_chain_consume(&rxq
->rx_comp_ring
);
1274 cqe_type
= cqe
->fast_path_regular
.type
;
1276 /* Process an unlikely slowpath event */
1277 if (unlikely(cqe_type
== ETH_RX_CQE_TYPE_SLOW_PATH
)) {
1278 struct eth_slow_path_rx_cqe
*sp_cqe
;
1280 sp_cqe
= (struct eth_slow_path_rx_cqe
*)cqe
;
1281 edev
->ops
->eth_cqe_completion(edev
->cdev
, fp
->id
, sp_cqe
);
1285 /* Handle TPA cqes */
1286 if (cqe_type
!= ETH_RX_CQE_TYPE_REGULAR
)
1287 return qede_rx_process_tpa_cqe(edev
, fp
, rxq
, cqe
, cqe_type
);
1289 /* Get the data from the SW ring; Consume it only after it's evident
1290 * we wouldn't recycle it.
1292 bd_cons_idx
= rxq
->sw_rx_cons
& NUM_RX_BDS_MAX
;
1293 bd
= &rxq
->sw_rx_ring
[bd_cons_idx
];
1295 fp_cqe
= &cqe
->fast_path_regular
;
1296 len
= le16_to_cpu(fp_cqe
->len_on_first_bd
);
1297 pad
= fp_cqe
->placement_offset
+ rxq
->rx_headroom
;
1299 /* Run eBPF program if one is attached */
1301 if (!qede_rx_xdp(edev
, fp
, rxq
, xdp_prog
, bd
, fp_cqe
,
1305 /* If this is an error packet then drop it */
1306 flags
= cqe
->fast_path_regular
.pars_flags
.flags
;
1307 parse_flag
= le16_to_cpu(flags
);
1309 csum_flag
= qede_check_csum(parse_flag
);
1310 if (unlikely(csum_flag
== QEDE_CSUM_ERROR
)) {
1311 if (qede_pkt_is_ip_fragmented(fp_cqe
, parse_flag
))
1314 rxq
->rx_hw_errors
++;
1317 /* Basic validation passed; Need to prepare an SKB. This would also
1318 * guarantee to finally consume the first BD upon success.
1320 skb
= qede_rx_build_skb(edev
, rxq
, bd
, len
, pad
);
1322 rxq
->rx_alloc_errors
++;
1323 qede_recycle_rx_bd_ring(rxq
, fp_cqe
->bd_num
);
1327 /* In case of Jumbo packet, several PAGE_SIZEd buffers will be pointed
1330 if (fp_cqe
->bd_num
> 1) {
1331 u16 unmapped_frags
= qede_rx_build_jumbo(edev
, rxq
, skb
,
1334 if (unlikely(unmapped_frags
> 0)) {
1335 qede_recycle_rx_bd_ring(rxq
, unmapped_frags
);
1336 dev_kfree_skb_any(skb
);
1341 /* The SKB contains all the data. Now prepare meta-magic */
1342 skb
->protocol
= eth_type_trans(skb
, edev
->ndev
);
1343 qede_get_rxhash(skb
, fp_cqe
->bitfields
, fp_cqe
->rss_hash
);
1344 qede_set_skb_csum(skb
, csum_flag
);
1345 skb_record_rx_queue(skb
, rxq
->rxq_id
);
1346 qede_ptp_record_rx_ts(edev
, cqe
, skb
);
1348 /* SKB is prepared - pass it to stack */
1349 qede_skb_receive(edev
, fp
, rxq
, skb
, le16_to_cpu(fp_cqe
->vlan_tag
));
1354 static int qede_rx_int(struct qede_fastpath
*fp
, int budget
)
1356 struct qede_rx_queue
*rxq
= fp
->rxq
;
1357 struct qede_dev
*edev
= fp
->edev
;
1358 int work_done
= 0, rcv_pkts
= 0;
1359 u16 hw_comp_cons
, sw_comp_cons
;
1361 hw_comp_cons
= le16_to_cpu(*rxq
->hw_cons_ptr
);
1362 sw_comp_cons
= qed_chain_get_cons_idx(&rxq
->rx_comp_ring
);
1364 /* Memory barrier to prevent the CPU from doing speculative reads of CQE
1365 * / BD in the while-loop before reading hw_comp_cons. If the CQE is
1366 * read before it is written by FW, then FW writes CQE and SB, and then
1367 * the CPU reads the hw_comp_cons, it will use an old CQE.
1371 /* Loop to complete all indicated BDs */
1372 while ((sw_comp_cons
!= hw_comp_cons
) && (work_done
< budget
)) {
1373 rcv_pkts
+= qede_rx_process_cqe(edev
, fp
, rxq
);
1374 qed_chain_recycle_consumed(&rxq
->rx_comp_ring
);
1375 sw_comp_cons
= qed_chain_get_cons_idx(&rxq
->rx_comp_ring
);
1379 rxq
->rcv_pkts
+= rcv_pkts
;
1381 /* Allocate replacement buffers */
1382 while (rxq
->num_rx_buffers
- rxq
->filled_buffers
)
1383 if (qede_alloc_rx_buffer(rxq
, false))
1386 /* Update producers */
1387 qede_update_rx_prod(edev
, rxq
);
1392 static bool qede_poll_is_more_work(struct qede_fastpath
*fp
)
1394 qed_sb_update_sb_idx(fp
->sb_info
);
1396 /* *_has_*_work() reads the status block, thus we need to ensure that
1397 * status block indices have been actually read (qed_sb_update_sb_idx)
1398 * prior to this check (*_has_*_work) so that we won't write the
1399 * "newer" value of the status block to HW (if there was a DMA right
1400 * after qede_has_rx_work and if there is no rmb, the memory reading
1401 * (qed_sb_update_sb_idx) may be postponed to right before *_ack_sb).
1402 * In this case there will never be another interrupt until there is
1403 * another update of the status block, while there is still unhandled
1408 if (likely(fp
->type
& QEDE_FASTPATH_RX
))
1409 if (qede_has_rx_work(fp
->rxq
))
1412 if (fp
->type
& QEDE_FASTPATH_XDP
)
1413 if (qede_txq_has_work(fp
->xdp_tx
))
1416 if (likely(fp
->type
& QEDE_FASTPATH_TX
)) {
1419 for_each_cos_in_txq(fp
->edev
, cos
) {
1420 if (qede_txq_has_work(&fp
->txq
[cos
]))
1428 /*********************
1429 * NDO & API related *
1430 *********************/
1431 int qede_poll(struct napi_struct
*napi
, int budget
)
1433 struct qede_fastpath
*fp
= container_of(napi
, struct qede_fastpath
,
1435 struct qede_dev
*edev
= fp
->edev
;
1436 int rx_work_done
= 0;
1441 if (likely(fp
->type
& QEDE_FASTPATH_TX
)) {
1444 for_each_cos_in_txq(fp
->edev
, cos
) {
1445 if (qede_txq_has_work(&fp
->txq
[cos
]))
1446 qede_tx_int(edev
, &fp
->txq
[cos
]);
1450 if ((fp
->type
& QEDE_FASTPATH_XDP
) && qede_txq_has_work(fp
->xdp_tx
))
1451 qede_xdp_tx_int(edev
, fp
->xdp_tx
);
1453 rx_work_done
= (likely(fp
->type
& QEDE_FASTPATH_RX
) &&
1454 qede_has_rx_work(fp
->rxq
)) ?
1455 qede_rx_int(fp
, budget
) : 0;
1456 if (rx_work_done
< budget
) {
1457 if (!qede_poll_is_more_work(fp
)) {
1458 napi_complete_done(napi
, rx_work_done
);
1460 /* Update and reenable interrupts */
1461 qed_sb_ack(fp
->sb_info
, IGU_INT_ENABLE
, 1);
1463 rx_work_done
= budget
;
1467 if (fp
->xdp_xmit
& QEDE_XDP_TX
) {
1468 xdp_prod
= qed_chain_get_prod_idx(&fp
->xdp_tx
->tx_pbl
);
1470 fp
->xdp_tx
->tx_db
.data
.bd_prod
= cpu_to_le16(xdp_prod
);
1471 qede_update_tx_producer(fp
->xdp_tx
);
1474 if (fp
->xdp_xmit
& QEDE_XDP_REDIRECT
)
1477 return rx_work_done
;
1480 irqreturn_t
qede_msix_fp_int(int irq
, void *fp_cookie
)
1482 struct qede_fastpath
*fp
= fp_cookie
;
1484 qed_sb_ack(fp
->sb_info
, IGU_INT_DISABLE
, 0 /*do not update*/);
1486 napi_schedule_irqoff(&fp
->napi
);
1490 /* Main transmit function */
1491 netdev_tx_t
qede_start_xmit(struct sk_buff
*skb
, struct net_device
*ndev
)
1493 struct qede_dev
*edev
= netdev_priv(ndev
);
1494 struct netdev_queue
*netdev_txq
;
1495 struct qede_tx_queue
*txq
;
1496 struct eth_tx_1st_bd
*first_bd
;
1497 struct eth_tx_2nd_bd
*second_bd
= NULL
;
1498 struct eth_tx_3rd_bd
*third_bd
= NULL
;
1499 struct eth_tx_bd
*tx_data_bd
= NULL
;
1500 u16 txq_index
, val
= 0;
1503 int rc
, frag_idx
= 0, ipv6_ext
= 0;
1507 bool data_split
= false;
1509 /* Get tx-queue context and netdev index */
1510 txq_index
= skb_get_queue_mapping(skb
);
1511 WARN_ON(txq_index
>= QEDE_TSS_COUNT(edev
) * edev
->dev_info
.num_tc
);
1512 txq
= QEDE_NDEV_TXQ_ID_TO_TXQ(edev
, txq_index
);
1513 netdev_txq
= netdev_get_tx_queue(ndev
, txq_index
);
1515 WARN_ON(qed_chain_get_elem_left(&txq
->tx_pbl
) < (MAX_SKB_FRAGS
+ 1));
1517 xmit_type
= qede_xmit_type(skb
, &ipv6_ext
);
1519 #if ((MAX_SKB_FRAGS + 2) > ETH_TX_MAX_BDS_PER_NON_LSO_PACKET)
1520 if (qede_pkt_req_lin(skb
, xmit_type
)) {
1521 if (skb_linearize(skb
)) {
1522 txq
->tx_mem_alloc_err
++;
1524 dev_kfree_skb_any(skb
);
1525 return NETDEV_TX_OK
;
1530 /* Fill the entry in the SW ring and the BDs in the FW ring */
1531 idx
= txq
->sw_tx_prod
;
1532 txq
->sw_tx_ring
.skbs
[idx
].skb
= skb
;
1533 first_bd
= (struct eth_tx_1st_bd
*)
1534 qed_chain_produce(&txq
->tx_pbl
);
1535 memset(first_bd
, 0, sizeof(*first_bd
));
1536 first_bd
->data
.bd_flags
.bitfields
=
1537 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT
;
1539 if (unlikely(skb_shinfo(skb
)->tx_flags
& SKBTX_HW_TSTAMP
))
1540 qede_ptp_tx_ts(edev
, skb
);
1542 /* Map skb linear data for DMA and set in the first BD */
1543 mapping
= dma_map_single(txq
->dev
, skb
->data
,
1544 skb_headlen(skb
), DMA_TO_DEVICE
);
1545 if (unlikely(dma_mapping_error(txq
->dev
, mapping
))) {
1546 DP_NOTICE(edev
, "SKB mapping failed\n");
1547 qede_free_failed_tx_pkt(txq
, first_bd
, 0, false);
1548 qede_update_tx_producer(txq
);
1549 return NETDEV_TX_OK
;
1552 BD_SET_UNMAP_ADDR_LEN(first_bd
, mapping
, skb_headlen(skb
));
1554 /* In case there is IPv6 with extension headers or LSO we need 2nd and
1557 if (unlikely((xmit_type
& XMIT_LSO
) | ipv6_ext
)) {
1558 second_bd
= (struct eth_tx_2nd_bd
*)
1559 qed_chain_produce(&txq
->tx_pbl
);
1560 memset(second_bd
, 0, sizeof(*second_bd
));
1563 third_bd
= (struct eth_tx_3rd_bd
*)
1564 qed_chain_produce(&txq
->tx_pbl
);
1565 memset(third_bd
, 0, sizeof(*third_bd
));
1568 /* We need to fill in additional data in second_bd... */
1569 tx_data_bd
= (struct eth_tx_bd
*)second_bd
;
1572 if (skb_vlan_tag_present(skb
)) {
1573 first_bd
->data
.vlan
= cpu_to_le16(skb_vlan_tag_get(skb
));
1574 first_bd
->data
.bd_flags
.bitfields
|=
1575 1 << ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT
;
1578 /* Fill the parsing flags & params according to the requested offload */
1579 if (xmit_type
& XMIT_L4_CSUM
) {
1580 /* We don't re-calculate IP checksum as it is already done by
1583 first_bd
->data
.bd_flags
.bitfields
|=
1584 1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT
;
1586 if (xmit_type
& XMIT_ENC
) {
1587 first_bd
->data
.bd_flags
.bitfields
|=
1588 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT
;
1590 val
|= (1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT
);
1593 /* Legacy FW had flipped behavior in regard to this bit -
1594 * I.e., needed to set to prevent FW from touching encapsulated
1595 * packets when it didn't need to.
1597 if (unlikely(txq
->is_legacy
))
1598 val
^= (1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT
);
1600 /* If the packet is IPv6 with extension header, indicate that
1601 * to FW and pass few params, since the device cracker doesn't
1602 * support parsing IPv6 with extension header/s.
1604 if (unlikely(ipv6_ext
))
1605 qede_set_params_for_ipv6_ext(skb
, second_bd
, third_bd
);
1608 if (xmit_type
& XMIT_LSO
) {
1609 first_bd
->data
.bd_flags
.bitfields
|=
1610 (1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT
);
1611 third_bd
->data
.lso_mss
=
1612 cpu_to_le16(skb_shinfo(skb
)->gso_size
);
1614 if (unlikely(xmit_type
& XMIT_ENC
)) {
1615 first_bd
->data
.bd_flags
.bitfields
|=
1616 1 << ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT
;
1618 if (xmit_type
& XMIT_ENC_GSO_L4_CSUM
) {
1619 u8 tmp
= ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT
;
1621 first_bd
->data
.bd_flags
.bitfields
|= 1 << tmp
;
1623 hlen
= qede_get_skb_hlen(skb
, true);
1625 first_bd
->data
.bd_flags
.bitfields
|=
1626 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT
;
1627 hlen
= qede_get_skb_hlen(skb
, false);
1630 /* @@@TBD - if will not be removed need to check */
1631 third_bd
->data
.bitfields
|=
1632 cpu_to_le16(1 << ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT
);
1634 /* Make life easier for FW guys who can't deal with header and
1635 * data on same BD. If we need to split, use the second bd...
1637 if (unlikely(skb_headlen(skb
) > hlen
)) {
1638 DP_VERBOSE(edev
, NETIF_MSG_TX_QUEUED
,
1639 "TSO split header size is %d (%x:%x)\n",
1640 first_bd
->nbytes
, first_bd
->addr
.hi
,
1643 mapping
= HILO_U64(le32_to_cpu(first_bd
->addr
.hi
),
1644 le32_to_cpu(first_bd
->addr
.lo
)) +
1647 BD_SET_UNMAP_ADDR_LEN(tx_data_bd
, mapping
,
1648 le16_to_cpu(first_bd
->nbytes
) -
1651 /* this marks the BD as one that has no
1652 * individual mapping
1654 txq
->sw_tx_ring
.skbs
[idx
].flags
|= QEDE_TSO_SPLIT_BD
;
1656 first_bd
->nbytes
= cpu_to_le16(hlen
);
1658 tx_data_bd
= (struct eth_tx_bd
*)third_bd
;
1662 val
|= ((skb
->len
& ETH_TX_DATA_1ST_BD_PKT_LEN_MASK
) <<
1663 ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT
);
1666 first_bd
->data
.bitfields
= cpu_to_le16(val
);
1668 /* Handle fragmented skb */
1669 /* special handle for frags inside 2nd and 3rd bds.. */
1670 while (tx_data_bd
&& frag_idx
< skb_shinfo(skb
)->nr_frags
) {
1671 rc
= map_frag_to_bd(txq
,
1672 &skb_shinfo(skb
)->frags
[frag_idx
],
1675 qede_free_failed_tx_pkt(txq
, first_bd
, nbd
, data_split
);
1676 qede_update_tx_producer(txq
);
1677 return NETDEV_TX_OK
;
1680 if (tx_data_bd
== (struct eth_tx_bd
*)second_bd
)
1681 tx_data_bd
= (struct eth_tx_bd
*)third_bd
;
1688 /* map last frags into 4th, 5th .... */
1689 for (; frag_idx
< skb_shinfo(skb
)->nr_frags
; frag_idx
++, nbd
++) {
1690 tx_data_bd
= (struct eth_tx_bd
*)
1691 qed_chain_produce(&txq
->tx_pbl
);
1693 memset(tx_data_bd
, 0, sizeof(*tx_data_bd
));
1695 rc
= map_frag_to_bd(txq
,
1696 &skb_shinfo(skb
)->frags
[frag_idx
],
1699 qede_free_failed_tx_pkt(txq
, first_bd
, nbd
, data_split
);
1700 qede_update_tx_producer(txq
);
1701 return NETDEV_TX_OK
;
1705 /* update the first BD with the actual num BDs */
1706 first_bd
->data
.nbds
= nbd
;
1708 netdev_tx_sent_queue(netdev_txq
, skb
->len
);
1710 skb_tx_timestamp(skb
);
1712 /* Advance packet producer only before sending the packet since mapping
1713 * of pages may fail.
1715 txq
->sw_tx_prod
= (txq
->sw_tx_prod
+ 1) % txq
->num_tx_buffers
;
1717 /* 'next page' entries are counted in the producer value */
1718 txq
->tx_db
.data
.bd_prod
=
1719 cpu_to_le16(qed_chain_get_prod_idx(&txq
->tx_pbl
));
1721 if (!netdev_xmit_more() || netif_xmit_stopped(netdev_txq
))
1722 qede_update_tx_producer(txq
);
1724 if (unlikely(qed_chain_get_elem_left(&txq
->tx_pbl
)
1725 < (MAX_SKB_FRAGS
+ 1))) {
1726 if (netdev_xmit_more())
1727 qede_update_tx_producer(txq
);
1729 netif_tx_stop_queue(netdev_txq
);
1731 DP_VERBOSE(edev
, NETIF_MSG_TX_QUEUED
,
1732 "Stop queue was called\n");
1733 /* paired memory barrier is in qede_tx_int(), we have to keep
1734 * ordering of set_bit() in netif_tx_stop_queue() and read of
1739 if ((qed_chain_get_elem_left(&txq
->tx_pbl
) >=
1740 (MAX_SKB_FRAGS
+ 1)) &&
1741 (edev
->state
== QEDE_STATE_OPEN
)) {
1742 netif_tx_wake_queue(netdev_txq
);
1743 DP_VERBOSE(edev
, NETIF_MSG_TX_QUEUED
,
1744 "Wake queue was called\n");
1748 return NETDEV_TX_OK
;
1751 u16
qede_select_queue(struct net_device
*dev
, struct sk_buff
*skb
,
1752 struct net_device
*sb_dev
)
1754 struct qede_dev
*edev
= netdev_priv(dev
);
1757 total_txq
= QEDE_TSS_COUNT(edev
) * edev
->dev_info
.num_tc
;
1759 return QEDE_TSS_COUNT(edev
) ?
1760 netdev_pick_tx(dev
, skb
, NULL
) % total_txq
: 0;
1763 /* 8B udp header + 8B base tunnel header + 32B option length */
1764 #define QEDE_MAX_TUN_HDR_LEN 48
1766 netdev_features_t
qede_features_check(struct sk_buff
*skb
,
1767 struct net_device
*dev
,
1768 netdev_features_t features
)
1770 if (skb
->encapsulation
) {
1773 switch (vlan_get_protocol(skb
)) {
1774 case htons(ETH_P_IP
):
1775 l4_proto
= ip_hdr(skb
)->protocol
;
1777 case htons(ETH_P_IPV6
):
1778 l4_proto
= ipv6_hdr(skb
)->nexthdr
;
1784 /* Disable offloads for geneve tunnels, as HW can't parse
1785 * the geneve header which has option length greater than 32b
1786 * and disable offloads for the ports which are not offloaded.
1788 if (l4_proto
== IPPROTO_UDP
) {
1789 struct qede_dev
*edev
= netdev_priv(dev
);
1790 u16 hdrlen
, vxln_port
, gnv_port
;
1792 hdrlen
= QEDE_MAX_TUN_HDR_LEN
;
1793 vxln_port
= edev
->vxlan_dst_port
;
1794 gnv_port
= edev
->geneve_dst_port
;
1796 if ((skb_inner_mac_header(skb
) -
1797 skb_transport_header(skb
)) > hdrlen
||
1798 (ntohs(udp_hdr(skb
)->dest
) != vxln_port
&&
1799 ntohs(udp_hdr(skb
)->dest
) != gnv_port
))
1800 return features
& ~(NETIF_F_CSUM_MASK
|
1802 } else if (l4_proto
== IPPROTO_IPIP
) {
1803 /* IPIP tunnels are unknown to the device or at least unsupported natively,
1804 * offloads for them can't be done trivially, so disable them for such skb.
1806 return features
& ~(NETIF_F_CSUM_MASK
| NETIF_F_GSO_MASK
);