1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 /* Copyright (c) 2021, Microsoft Corporation. */
4 #include <uapi/linux/bpf.h>
6 #include <linux/debugfs.h>
7 #include <linux/inetdevice.h>
8 #include <linux/etherdevice.h>
9 #include <linux/ethtool.h>
10 #include <linux/filter.h>
12 #include <linux/pci.h>
14 #include <net/checksum.h>
15 #include <net/ip6_checksum.h>
16 #include <net/page_pool/helpers.h>
19 #include <net/mana/mana.h>
20 #include <net/mana/mana_auxiliary.h>
22 static DEFINE_IDA(mana_adev_ida
);
24 static int mana_adev_idx_alloc(void)
26 return ida_alloc(&mana_adev_ida
, GFP_KERNEL
);
29 static void mana_adev_idx_free(int idx
)
31 ida_free(&mana_adev_ida
, idx
);
34 static ssize_t
mana_dbg_q_read(struct file
*filp
, char __user
*buf
, size_t count
,
37 struct gdma_queue
*gdma_q
= filp
->private_data
;
39 return simple_read_from_buffer(buf
, count
, pos
, gdma_q
->queue_mem_ptr
,
43 static const struct file_operations mana_dbg_q_fops
= {
46 .read
= mana_dbg_q_read
,
49 /* Microsoft Azure Network Adapter (MANA) functions */
51 static int mana_open(struct net_device
*ndev
)
53 struct mana_port_context
*apc
= netdev_priv(ndev
);
56 err
= mana_alloc_queues(ndev
);
60 apc
->port_is_up
= true;
62 /* Ensure port state updated before txq state */
65 netif_carrier_on(ndev
);
66 netif_tx_wake_all_queues(ndev
);
71 static int mana_close(struct net_device
*ndev
)
73 struct mana_port_context
*apc
= netdev_priv(ndev
);
78 return mana_detach(ndev
, true);
81 static bool mana_can_tx(struct gdma_queue
*wq
)
83 return mana_gd_wq_avail_space(wq
) >= MAX_TX_WQE_SIZE
;
86 static unsigned int mana_checksum_info(struct sk_buff
*skb
)
88 if (skb
->protocol
== htons(ETH_P_IP
)) {
89 struct iphdr
*ip
= ip_hdr(skb
);
91 if (ip
->protocol
== IPPROTO_TCP
)
94 if (ip
->protocol
== IPPROTO_UDP
)
96 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
97 struct ipv6hdr
*ip6
= ipv6_hdr(skb
);
99 if (ip6
->nexthdr
== IPPROTO_TCP
)
102 if (ip6
->nexthdr
== IPPROTO_UDP
)
106 /* No csum offloading */
110 static void mana_add_sge(struct mana_tx_package
*tp
, struct mana_skb_head
*ash
,
111 int sg_i
, dma_addr_t da
, int sge_len
, u32 gpa_mkey
)
113 ash
->dma_handle
[sg_i
] = da
;
114 ash
->size
[sg_i
] = sge_len
;
116 tp
->wqe_req
.sgl
[sg_i
].address
= da
;
117 tp
->wqe_req
.sgl
[sg_i
].mem_key
= gpa_mkey
;
118 tp
->wqe_req
.sgl
[sg_i
].size
= sge_len
;
121 static int mana_map_skb(struct sk_buff
*skb
, struct mana_port_context
*apc
,
122 struct mana_tx_package
*tp
, int gso_hs
)
124 struct mana_skb_head
*ash
= (struct mana_skb_head
*)skb
->head
;
125 int hsg
= 1; /* num of SGEs of linear part */
126 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
127 int skb_hlen
= skb_headlen(skb
);
128 int sge0_len
, sge1_len
= 0;
129 struct gdma_context
*gc
;
136 gc
= gd
->gdma_context
;
139 if (gso_hs
&& gso_hs
< skb_hlen
) {
141 sge1_len
= skb_hlen
- gso_hs
;
146 da
= dma_map_single(dev
, skb
->data
, sge0_len
, DMA_TO_DEVICE
);
147 if (dma_mapping_error(dev
, da
))
150 mana_add_sge(tp
, ash
, 0, da
, sge0_len
, gd
->gpa_mkey
);
154 da
= dma_map_single(dev
, skb
->data
+ sge0_len
, sge1_len
,
156 if (dma_mapping_error(dev
, da
))
159 mana_add_sge(tp
, ash
, sg_i
, da
, sge1_len
, gd
->gpa_mkey
);
163 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
166 frag
= &skb_shinfo(skb
)->frags
[i
];
167 da
= skb_frag_dma_map(dev
, frag
, 0, skb_frag_size(frag
),
169 if (dma_mapping_error(dev
, da
))
172 mana_add_sge(tp
, ash
, sg_i
, da
, skb_frag_size(frag
),
179 for (i
= sg_i
- 1; i
>= hsg
; i
--)
180 dma_unmap_page(dev
, ash
->dma_handle
[i
], ash
->size
[i
],
183 for (i
= hsg
- 1; i
>= 0; i
--)
184 dma_unmap_single(dev
, ash
->dma_handle
[i
], ash
->size
[i
],
190 /* Handle the case when GSO SKB linear length is too large.
191 * MANA NIC requires GSO packets to put only the packet header to SGE0.
192 * So, we need 2 SGEs for the skb linear part which contains more than the
194 * Return a positive value for the number of SGEs, or a negative value
197 static int mana_fix_skb_head(struct net_device
*ndev
, struct sk_buff
*skb
,
200 int num_sge
= 1 + skb_shinfo(skb
)->nr_frags
;
201 int skb_hlen
= skb_headlen(skb
);
203 if (gso_hs
< skb_hlen
) {
205 } else if (gso_hs
> skb_hlen
) {
208 "TX nonlinear head: hs:%d, skb_hlen:%d\n",
217 /* Get the GSO packet's header size */
218 static int mana_get_gso_hs(struct sk_buff
*skb
)
222 if (skb
->encapsulation
) {
223 gso_hs
= skb_inner_tcp_all_headers(skb
);
225 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_UDP_L4
) {
226 gso_hs
= skb_transport_offset(skb
) +
227 sizeof(struct udphdr
);
229 gso_hs
= skb_tcp_all_headers(skb
);
236 netdev_tx_t
mana_start_xmit(struct sk_buff
*skb
, struct net_device
*ndev
)
238 enum mana_tx_pkt_format pkt_fmt
= MANA_SHORT_PKT_FMT
;
239 struct mana_port_context
*apc
= netdev_priv(ndev
);
240 int gso_hs
= 0; /* zero for non-GSO pkts */
241 u16 txq_idx
= skb_get_queue_mapping(skb
);
242 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
243 bool ipv4
= false, ipv6
= false;
244 struct mana_tx_package pkg
= {};
245 struct netdev_queue
*net_txq
;
246 struct mana_stats_tx
*tx_stats
;
247 struct gdma_queue
*gdma_sq
;
248 unsigned int csum_type
;
249 struct mana_txq
*txq
;
253 if (unlikely(!apc
->port_is_up
))
256 if (skb_cow_head(skb
, MANA_HEADROOM
))
259 txq
= &apc
->tx_qp
[txq_idx
].txq
;
260 gdma_sq
= txq
->gdma_sq
;
261 cq
= &apc
->tx_qp
[txq_idx
].tx_cq
;
262 tx_stats
= &txq
->stats
;
264 pkg
.tx_oob
.s_oob
.vcq_num
= cq
->gdma_id
;
265 pkg
.tx_oob
.s_oob
.vsq_frame
= txq
->vsq_frame
;
267 if (txq
->vp_offset
> MANA_SHORT_VPORT_OFFSET_MAX
) {
268 pkg
.tx_oob
.l_oob
.long_vp_offset
= txq
->vp_offset
;
269 pkt_fmt
= MANA_LONG_PKT_FMT
;
271 pkg
.tx_oob
.s_oob
.short_vp_offset
= txq
->vp_offset
;
274 if (skb_vlan_tag_present(skb
)) {
275 pkt_fmt
= MANA_LONG_PKT_FMT
;
276 pkg
.tx_oob
.l_oob
.inject_vlan_pri_tag
= 1;
277 pkg
.tx_oob
.l_oob
.pcp
= skb_vlan_tag_get_prio(skb
);
278 pkg
.tx_oob
.l_oob
.dei
= skb_vlan_tag_get_cfi(skb
);
279 pkg
.tx_oob
.l_oob
.vlan_id
= skb_vlan_tag_get_id(skb
);
282 pkg
.tx_oob
.s_oob
.pkt_fmt
= pkt_fmt
;
284 if (pkt_fmt
== MANA_SHORT_PKT_FMT
) {
285 pkg
.wqe_req
.inline_oob_size
= sizeof(struct mana_tx_short_oob
);
286 u64_stats_update_begin(&tx_stats
->syncp
);
287 tx_stats
->short_pkt_fmt
++;
288 u64_stats_update_end(&tx_stats
->syncp
);
290 pkg
.wqe_req
.inline_oob_size
= sizeof(struct mana_tx_oob
);
291 u64_stats_update_begin(&tx_stats
->syncp
);
292 tx_stats
->long_pkt_fmt
++;
293 u64_stats_update_end(&tx_stats
->syncp
);
296 pkg
.wqe_req
.inline_oob_data
= &pkg
.tx_oob
;
297 pkg
.wqe_req
.flags
= 0;
298 pkg
.wqe_req
.client_data_unit
= 0;
300 pkg
.wqe_req
.num_sge
= 1 + skb_shinfo(skb
)->nr_frags
;
302 if (skb
->protocol
== htons(ETH_P_IP
))
304 else if (skb
->protocol
== htons(ETH_P_IPV6
))
307 if (skb_is_gso(skb
)) {
310 gso_hs
= mana_get_gso_hs(skb
);
312 num_sge
= mana_fix_skb_head(ndev
, skb
, gso_hs
);
314 pkg
.wqe_req
.num_sge
= num_sge
;
318 u64_stats_update_begin(&tx_stats
->syncp
);
319 if (skb
->encapsulation
) {
320 tx_stats
->tso_inner_packets
++;
321 tx_stats
->tso_inner_bytes
+= skb
->len
- gso_hs
;
323 tx_stats
->tso_packets
++;
324 tx_stats
->tso_bytes
+= skb
->len
- gso_hs
;
326 u64_stats_update_end(&tx_stats
->syncp
);
328 pkg
.tx_oob
.s_oob
.is_outer_ipv4
= ipv4
;
329 pkg
.tx_oob
.s_oob
.is_outer_ipv6
= ipv6
;
331 pkg
.tx_oob
.s_oob
.comp_iphdr_csum
= 1;
332 pkg
.tx_oob
.s_oob
.comp_tcp_csum
= 1;
333 pkg
.tx_oob
.s_oob
.trans_off
= skb_transport_offset(skb
);
335 pkg
.wqe_req
.client_data_unit
= skb_shinfo(skb
)->gso_size
;
336 pkg
.wqe_req
.flags
= GDMA_WR_OOB_IN_SGL
| GDMA_WR_PAD_BY_SGE0
;
338 ip_hdr(skb
)->tot_len
= 0;
339 ip_hdr(skb
)->check
= 0;
340 tcp_hdr(skb
)->check
=
341 ~csum_tcpudp_magic(ip_hdr(skb
)->saddr
,
342 ip_hdr(skb
)->daddr
, 0,
345 ipv6_hdr(skb
)->payload_len
= 0;
346 tcp_hdr(skb
)->check
=
347 ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
348 &ipv6_hdr(skb
)->daddr
, 0,
351 } else if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
352 csum_type
= mana_checksum_info(skb
);
354 u64_stats_update_begin(&tx_stats
->syncp
);
355 tx_stats
->csum_partial
++;
356 u64_stats_update_end(&tx_stats
->syncp
);
358 if (csum_type
== IPPROTO_TCP
) {
359 pkg
.tx_oob
.s_oob
.is_outer_ipv4
= ipv4
;
360 pkg
.tx_oob
.s_oob
.is_outer_ipv6
= ipv6
;
362 pkg
.tx_oob
.s_oob
.comp_tcp_csum
= 1;
363 pkg
.tx_oob
.s_oob
.trans_off
= skb_transport_offset(skb
);
365 } else if (csum_type
== IPPROTO_UDP
) {
366 pkg
.tx_oob
.s_oob
.is_outer_ipv4
= ipv4
;
367 pkg
.tx_oob
.s_oob
.is_outer_ipv6
= ipv6
;
369 pkg
.tx_oob
.s_oob
.comp_udp_csum
= 1;
371 /* Can't do offload of this type of checksum */
372 if (skb_checksum_help(skb
))
377 WARN_ON_ONCE(pkg
.wqe_req
.num_sge
> MAX_TX_WQE_SGL_ENTRIES
);
379 if (pkg
.wqe_req
.num_sge
<= ARRAY_SIZE(pkg
.sgl_array
)) {
380 pkg
.wqe_req
.sgl
= pkg
.sgl_array
;
382 pkg
.sgl_ptr
= kmalloc_array(pkg
.wqe_req
.num_sge
,
383 sizeof(struct gdma_sge
),
388 pkg
.wqe_req
.sgl
= pkg
.sgl_ptr
;
391 if (mana_map_skb(skb
, apc
, &pkg
, gso_hs
)) {
392 u64_stats_update_begin(&tx_stats
->syncp
);
393 tx_stats
->mana_map_err
++;
394 u64_stats_update_end(&tx_stats
->syncp
);
398 skb_queue_tail(&txq
->pending_skbs
, skb
);
401 net_txq
= netdev_get_tx_queue(ndev
, txq_idx
);
403 err
= mana_gd_post_work_request(gdma_sq
, &pkg
.wqe_req
,
404 (struct gdma_posted_wqe_info
*)skb
->cb
);
405 if (!mana_can_tx(gdma_sq
)) {
406 netif_tx_stop_queue(net_txq
);
407 apc
->eth_stats
.stop_queue
++;
411 (void)skb_dequeue_tail(&txq
->pending_skbs
);
412 netdev_warn(ndev
, "Failed to post TX OOB: %d\n", err
);
413 err
= NETDEV_TX_BUSY
;
418 atomic_inc(&txq
->pending_sends
);
420 mana_gd_wq_ring_doorbell(gd
->gdma_context
, gdma_sq
);
422 /* skb may be freed after mana_gd_post_work_request. Do not use it. */
425 tx_stats
= &txq
->stats
;
426 u64_stats_update_begin(&tx_stats
->syncp
);
428 tx_stats
->bytes
+= len
;
429 u64_stats_update_end(&tx_stats
->syncp
);
432 if (netif_tx_queue_stopped(net_txq
) && mana_can_tx(gdma_sq
)) {
433 netif_tx_wake_queue(net_txq
);
434 apc
->eth_stats
.wake_queue
++;
443 ndev
->stats
.tx_dropped
++;
445 dev_kfree_skb_any(skb
);
449 static void mana_get_stats64(struct net_device
*ndev
,
450 struct rtnl_link_stats64
*st
)
452 struct mana_port_context
*apc
= netdev_priv(ndev
);
453 unsigned int num_queues
= apc
->num_queues
;
454 struct mana_stats_rx
*rx_stats
;
455 struct mana_stats_tx
*tx_stats
;
460 if (!apc
->port_is_up
)
463 netdev_stats_to_stats64(st
, &ndev
->stats
);
465 for (q
= 0; q
< num_queues
; q
++) {
466 rx_stats
= &apc
->rxqs
[q
]->stats
;
469 start
= u64_stats_fetch_begin(&rx_stats
->syncp
);
470 packets
= rx_stats
->packets
;
471 bytes
= rx_stats
->bytes
;
472 } while (u64_stats_fetch_retry(&rx_stats
->syncp
, start
));
474 st
->rx_packets
+= packets
;
475 st
->rx_bytes
+= bytes
;
478 for (q
= 0; q
< num_queues
; q
++) {
479 tx_stats
= &apc
->tx_qp
[q
].txq
.stats
;
482 start
= u64_stats_fetch_begin(&tx_stats
->syncp
);
483 packets
= tx_stats
->packets
;
484 bytes
= tx_stats
->bytes
;
485 } while (u64_stats_fetch_retry(&tx_stats
->syncp
, start
));
487 st
->tx_packets
+= packets
;
488 st
->tx_bytes
+= bytes
;
492 static int mana_get_tx_queue(struct net_device
*ndev
, struct sk_buff
*skb
,
495 struct mana_port_context
*apc
= netdev_priv(ndev
);
496 u32 hash
= skb_get_hash(skb
);
497 struct sock
*sk
= skb
->sk
;
500 txq
= apc
->indir_table
[hash
& (apc
->indir_table_sz
- 1)];
502 if (txq
!= old_q
&& sk
&& sk_fullsock(sk
) &&
503 rcu_access_pointer(sk
->sk_dst_cache
))
504 sk_tx_queue_set(sk
, txq
);
509 static u16
mana_select_queue(struct net_device
*ndev
, struct sk_buff
*skb
,
510 struct net_device
*sb_dev
)
514 if (ndev
->real_num_tx_queues
== 1)
517 txq
= sk_tx_queue_get(skb
->sk
);
519 if (txq
< 0 || skb
->ooo_okay
|| txq
>= ndev
->real_num_tx_queues
) {
520 if (skb_rx_queue_recorded(skb
))
521 txq
= skb_get_rx_queue(skb
);
523 txq
= mana_get_tx_queue(ndev
, skb
, txq
);
529 /* Release pre-allocated RX buffers */
530 void mana_pre_dealloc_rxbufs(struct mana_port_context
*mpc
)
535 dev
= mpc
->ac
->gdma_dev
->gdma_context
->dev
;
537 if (!mpc
->rxbufs_pre
)
543 while (mpc
->rxbpre_total
) {
544 i
= --mpc
->rxbpre_total
;
545 dma_unmap_single(dev
, mpc
->das_pre
[i
], mpc
->rxbpre_datasize
,
547 put_page(virt_to_head_page(mpc
->rxbufs_pre
[i
]));
554 kfree(mpc
->rxbufs_pre
);
555 mpc
->rxbufs_pre
= NULL
;
558 mpc
->rxbpre_datasize
= 0;
559 mpc
->rxbpre_alloc_size
= 0;
560 mpc
->rxbpre_headroom
= 0;
563 /* Get a buffer from the pre-allocated RX buffers */
564 static void *mana_get_rxbuf_pre(struct mana_rxq
*rxq
, dma_addr_t
*da
)
566 struct net_device
*ndev
= rxq
->ndev
;
567 struct mana_port_context
*mpc
;
570 mpc
= netdev_priv(ndev
);
572 if (!mpc
->rxbufs_pre
|| !mpc
->das_pre
|| !mpc
->rxbpre_total
) {
573 netdev_err(ndev
, "No RX pre-allocated bufs\n");
577 /* Check sizes to catch unexpected coding error */
578 if (mpc
->rxbpre_datasize
!= rxq
->datasize
) {
579 netdev_err(ndev
, "rxbpre_datasize mismatch: %u: %u\n",
580 mpc
->rxbpre_datasize
, rxq
->datasize
);
584 if (mpc
->rxbpre_alloc_size
!= rxq
->alloc_size
) {
585 netdev_err(ndev
, "rxbpre_alloc_size mismatch: %u: %u\n",
586 mpc
->rxbpre_alloc_size
, rxq
->alloc_size
);
590 if (mpc
->rxbpre_headroom
!= rxq
->headroom
) {
591 netdev_err(ndev
, "rxbpre_headroom mismatch: %u: %u\n",
592 mpc
->rxbpre_headroom
, rxq
->headroom
);
598 *da
= mpc
->das_pre
[mpc
->rxbpre_total
];
599 va
= mpc
->rxbufs_pre
[mpc
->rxbpre_total
];
600 mpc
->rxbufs_pre
[mpc
->rxbpre_total
] = NULL
;
602 /* Deallocate the array after all buffers are gone */
603 if (!mpc
->rxbpre_total
)
604 mana_pre_dealloc_rxbufs(mpc
);
609 /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */
610 static void mana_get_rxbuf_cfg(int mtu
, u32
*datasize
, u32
*alloc_size
,
613 if (mtu
> MANA_XDP_MTU_MAX
)
614 *headroom
= 0; /* no support for XDP */
616 *headroom
= XDP_PACKET_HEADROOM
;
618 *alloc_size
= SKB_DATA_ALIGN(mtu
+ MANA_RXBUF_PAD
+ *headroom
);
620 /* Using page pool in this case, so alloc_size is PAGE_SIZE */
621 if (*alloc_size
< PAGE_SIZE
)
622 *alloc_size
= PAGE_SIZE
;
624 *datasize
= mtu
+ ETH_HLEN
;
627 int mana_pre_alloc_rxbufs(struct mana_port_context
*mpc
, int new_mtu
, int num_queues
)
636 mana_get_rxbuf_cfg(new_mtu
, &mpc
->rxbpre_datasize
,
637 &mpc
->rxbpre_alloc_size
, &mpc
->rxbpre_headroom
);
639 dev
= mpc
->ac
->gdma_dev
->gdma_context
->dev
;
641 num_rxb
= num_queues
* mpc
->rx_queue_size
;
643 WARN(mpc
->rxbufs_pre
, "mana rxbufs_pre exists\n");
644 mpc
->rxbufs_pre
= kmalloc_array(num_rxb
, sizeof(void *), GFP_KERNEL
);
645 if (!mpc
->rxbufs_pre
)
648 mpc
->das_pre
= kmalloc_array(num_rxb
, sizeof(dma_addr_t
), GFP_KERNEL
);
652 mpc
->rxbpre_total
= 0;
654 for (i
= 0; i
< num_rxb
; i
++) {
655 if (mpc
->rxbpre_alloc_size
> PAGE_SIZE
) {
656 va
= netdev_alloc_frag(mpc
->rxbpre_alloc_size
);
660 page
= virt_to_head_page(va
);
661 /* Check if the frag falls back to single page */
662 if (compound_order(page
) <
663 get_order(mpc
->rxbpre_alloc_size
)) {
668 page
= dev_alloc_page();
672 va
= page_to_virt(page
);
675 da
= dma_map_single(dev
, va
+ mpc
->rxbpre_headroom
,
676 mpc
->rxbpre_datasize
, DMA_FROM_DEVICE
);
677 if (dma_mapping_error(dev
, da
)) {
678 put_page(virt_to_head_page(va
));
682 mpc
->rxbufs_pre
[i
] = va
;
683 mpc
->das_pre
[i
] = da
;
684 mpc
->rxbpre_total
= i
+ 1;
690 mana_pre_dealloc_rxbufs(mpc
);
694 static int mana_change_mtu(struct net_device
*ndev
, int new_mtu
)
696 struct mana_port_context
*mpc
= netdev_priv(ndev
);
697 unsigned int old_mtu
= ndev
->mtu
;
700 /* Pre-allocate buffers to prevent failure in mana_attach later */
701 err
= mana_pre_alloc_rxbufs(mpc
, new_mtu
, mpc
->num_queues
);
703 netdev_err(ndev
, "Insufficient memory for new MTU\n");
707 err
= mana_detach(ndev
, false);
709 netdev_err(ndev
, "mana_detach failed: %d\n", err
);
713 WRITE_ONCE(ndev
->mtu
, new_mtu
);
715 err
= mana_attach(ndev
);
717 netdev_err(ndev
, "mana_attach failed: %d\n", err
);
718 WRITE_ONCE(ndev
->mtu
, old_mtu
);
722 mana_pre_dealloc_rxbufs(mpc
);
726 static const struct net_device_ops mana_devops
= {
727 .ndo_open
= mana_open
,
728 .ndo_stop
= mana_close
,
729 .ndo_select_queue
= mana_select_queue
,
730 .ndo_start_xmit
= mana_start_xmit
,
731 .ndo_validate_addr
= eth_validate_addr
,
732 .ndo_get_stats64
= mana_get_stats64
,
734 .ndo_xdp_xmit
= mana_xdp_xmit
,
735 .ndo_change_mtu
= mana_change_mtu
,
738 static void mana_cleanup_port_context(struct mana_port_context
*apc
)
741 * at this point all dir/files under the vport directory
742 * are already cleaned up.
743 * We are sure the apc->mana_port_debugfs remove will not
744 * cause any freed memory access issues
746 debugfs_remove(apc
->mana_port_debugfs
);
751 static void mana_cleanup_indir_table(struct mana_port_context
*apc
)
753 apc
->indir_table_sz
= 0;
754 kfree(apc
->indir_table
);
755 kfree(apc
->rxobj_table
);
758 static int mana_init_port_context(struct mana_port_context
*apc
)
760 apc
->rxqs
= kcalloc(apc
->num_queues
, sizeof(struct mana_rxq
*),
763 return !apc
->rxqs
? -ENOMEM
: 0;
766 static int mana_send_request(struct mana_context
*ac
, void *in_buf
,
767 u32 in_len
, void *out_buf
, u32 out_len
)
769 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
770 struct gdma_resp_hdr
*resp
= out_buf
;
771 struct gdma_req_hdr
*req
= in_buf
;
772 struct device
*dev
= gc
->dev
;
773 static atomic_t activity_id
;
776 req
->dev_id
= gc
->mana
.dev_id
;
777 req
->activity_id
= atomic_inc_return(&activity_id
);
779 err
= mana_gd_send_request(gc
, in_len
, in_buf
, out_len
,
781 if (err
|| resp
->status
) {
782 dev_err(dev
, "Failed to send mana message: %d, 0x%x\n",
784 return err
? err
: -EPROTO
;
787 if (req
->dev_id
.as_uint32
!= resp
->dev_id
.as_uint32
||
788 req
->activity_id
!= resp
->activity_id
) {
789 dev_err(dev
, "Unexpected mana message response: %x,%x,%x,%x\n",
790 req
->dev_id
.as_uint32
, resp
->dev_id
.as_uint32
,
791 req
->activity_id
, resp
->activity_id
);
798 static int mana_verify_resp_hdr(const struct gdma_resp_hdr
*resp_hdr
,
799 const enum mana_command_code expected_code
,
802 if (resp_hdr
->response
.msg_type
!= expected_code
)
805 if (resp_hdr
->response
.msg_version
< GDMA_MESSAGE_V1
)
808 if (resp_hdr
->response
.msg_size
< min_size
)
814 static int mana_pf_register_hw_vport(struct mana_port_context
*apc
)
816 struct mana_register_hw_vport_resp resp
= {};
817 struct mana_register_hw_vport_req req
= {};
820 mana_gd_init_req_hdr(&req
.hdr
, MANA_REGISTER_HW_PORT
,
821 sizeof(req
), sizeof(resp
));
822 req
.attached_gfid
= 1;
823 req
.is_pf_default_vport
= 1;
824 req
.allow_all_ether_types
= 1;
826 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
829 netdev_err(apc
->ndev
, "Failed to register hw vPort: %d\n", err
);
833 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_REGISTER_HW_PORT
,
835 if (err
|| resp
.hdr
.status
) {
836 netdev_err(apc
->ndev
, "Failed to register hw vPort: %d, 0x%x\n",
837 err
, resp
.hdr
.status
);
838 return err
? err
: -EPROTO
;
841 apc
->port_handle
= resp
.hw_vport_handle
;
845 static void mana_pf_deregister_hw_vport(struct mana_port_context
*apc
)
847 struct mana_deregister_hw_vport_resp resp
= {};
848 struct mana_deregister_hw_vport_req req
= {};
851 mana_gd_init_req_hdr(&req
.hdr
, MANA_DEREGISTER_HW_PORT
,
852 sizeof(req
), sizeof(resp
));
853 req
.hw_vport_handle
= apc
->port_handle
;
855 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
858 netdev_err(apc
->ndev
, "Failed to unregister hw vPort: %d\n",
863 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_DEREGISTER_HW_PORT
,
865 if (err
|| resp
.hdr
.status
)
866 netdev_err(apc
->ndev
,
867 "Failed to deregister hw vPort: %d, 0x%x\n",
868 err
, resp
.hdr
.status
);
871 static int mana_pf_register_filter(struct mana_port_context
*apc
)
873 struct mana_register_filter_resp resp
= {};
874 struct mana_register_filter_req req
= {};
877 mana_gd_init_req_hdr(&req
.hdr
, MANA_REGISTER_FILTER
,
878 sizeof(req
), sizeof(resp
));
879 req
.vport
= apc
->port_handle
;
880 memcpy(req
.mac_addr
, apc
->mac_addr
, ETH_ALEN
);
882 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
885 netdev_err(apc
->ndev
, "Failed to register filter: %d\n", err
);
889 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_REGISTER_FILTER
,
891 if (err
|| resp
.hdr
.status
) {
892 netdev_err(apc
->ndev
, "Failed to register filter: %d, 0x%x\n",
893 err
, resp
.hdr
.status
);
894 return err
? err
: -EPROTO
;
897 apc
->pf_filter_handle
= resp
.filter_handle
;
901 static void mana_pf_deregister_filter(struct mana_port_context
*apc
)
903 struct mana_deregister_filter_resp resp
= {};
904 struct mana_deregister_filter_req req
= {};
907 mana_gd_init_req_hdr(&req
.hdr
, MANA_DEREGISTER_FILTER
,
908 sizeof(req
), sizeof(resp
));
909 req
.filter_handle
= apc
->pf_filter_handle
;
911 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
914 netdev_err(apc
->ndev
, "Failed to unregister filter: %d\n",
919 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_DEREGISTER_FILTER
,
921 if (err
|| resp
.hdr
.status
)
922 netdev_err(apc
->ndev
,
923 "Failed to deregister filter: %d, 0x%x\n",
924 err
, resp
.hdr
.status
);
927 static int mana_query_device_cfg(struct mana_context
*ac
, u32 proto_major_ver
,
928 u32 proto_minor_ver
, u32 proto_micro_ver
,
931 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
932 struct mana_query_device_cfg_resp resp
= {};
933 struct mana_query_device_cfg_req req
= {};
934 struct device
*dev
= gc
->dev
;
937 mana_gd_init_req_hdr(&req
.hdr
, MANA_QUERY_DEV_CONFIG
,
938 sizeof(req
), sizeof(resp
));
940 req
.hdr
.resp
.msg_version
= GDMA_MESSAGE_V2
;
942 req
.proto_major_ver
= proto_major_ver
;
943 req
.proto_minor_ver
= proto_minor_ver
;
944 req
.proto_micro_ver
= proto_micro_ver
;
946 err
= mana_send_request(ac
, &req
, sizeof(req
), &resp
, sizeof(resp
));
948 dev_err(dev
, "Failed to query config: %d", err
);
952 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_QUERY_DEV_CONFIG
,
954 if (err
|| resp
.hdr
.status
) {
955 dev_err(dev
, "Invalid query result: %d, 0x%x\n", err
,
962 *max_num_vports
= resp
.max_num_vports
;
964 if (resp
.hdr
.response
.msg_version
== GDMA_MESSAGE_V2
)
965 gc
->adapter_mtu
= resp
.adapter_mtu
;
967 gc
->adapter_mtu
= ETH_FRAME_LEN
;
969 debugfs_create_u16("adapter-MTU", 0400, gc
->mana_pci_debugfs
, &gc
->adapter_mtu
);
974 static int mana_query_vport_cfg(struct mana_port_context
*apc
, u32 vport_index
,
975 u32
*max_sq
, u32
*max_rq
, u32
*num_indir_entry
)
977 struct mana_query_vport_cfg_resp resp
= {};
978 struct mana_query_vport_cfg_req req
= {};
981 mana_gd_init_req_hdr(&req
.hdr
, MANA_QUERY_VPORT_CONFIG
,
982 sizeof(req
), sizeof(resp
));
984 req
.vport_index
= vport_index
;
986 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
991 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_QUERY_VPORT_CONFIG
,
999 *max_sq
= resp
.max_num_sq
;
1000 *max_rq
= resp
.max_num_rq
;
1001 if (resp
.num_indirection_ent
> 0 &&
1002 resp
.num_indirection_ent
<= MANA_INDIRECT_TABLE_MAX_SIZE
&&
1003 is_power_of_2(resp
.num_indirection_ent
)) {
1004 *num_indir_entry
= resp
.num_indirection_ent
;
1006 netdev_warn(apc
->ndev
,
1007 "Setting indirection table size to default %d for vPort %d\n",
1008 MANA_INDIRECT_TABLE_DEF_SIZE
, apc
->port_idx
);
1009 *num_indir_entry
= MANA_INDIRECT_TABLE_DEF_SIZE
;
1012 apc
->port_handle
= resp
.vport
;
1013 ether_addr_copy(apc
->mac_addr
, resp
.mac_addr
);
1018 void mana_uncfg_vport(struct mana_port_context
*apc
)
1020 mutex_lock(&apc
->vport_mutex
);
1021 apc
->vport_use_count
--;
1022 WARN_ON(apc
->vport_use_count
< 0);
1023 mutex_unlock(&apc
->vport_mutex
);
1025 EXPORT_SYMBOL_NS(mana_uncfg_vport
, "NET_MANA");
1027 int mana_cfg_vport(struct mana_port_context
*apc
, u32 protection_dom_id
,
1030 struct mana_config_vport_resp resp
= {};
1031 struct mana_config_vport_req req
= {};
1034 /* This function is used to program the Ethernet port in the hardware
1035 * table. It can be called from the Ethernet driver or the RDMA driver.
1037 * For Ethernet usage, the hardware supports only one active user on a
1038 * physical port. The driver checks on the port usage before programming
1039 * the hardware when creating the RAW QP (RDMA driver) or exposing the
1040 * device to kernel NET layer (Ethernet driver).
1042 * Because the RDMA driver doesn't know in advance which QP type the
1043 * user will create, it exposes the device with all its ports. The user
1044 * may not be able to create RAW QP on a port if this port is already
1045 * in used by the Ethernet driver from the kernel.
1047 * This physical port limitation only applies to the RAW QP. For RC QP,
1048 * the hardware doesn't have this limitation. The user can create RC
1049 * QPs on a physical port up to the hardware limits independent of the
1050 * Ethernet usage on the same port.
1052 mutex_lock(&apc
->vport_mutex
);
1053 if (apc
->vport_use_count
> 0) {
1054 mutex_unlock(&apc
->vport_mutex
);
1057 apc
->vport_use_count
++;
1058 mutex_unlock(&apc
->vport_mutex
);
1060 mana_gd_init_req_hdr(&req
.hdr
, MANA_CONFIG_VPORT_TX
,
1061 sizeof(req
), sizeof(resp
));
1062 req
.vport
= apc
->port_handle
;
1063 req
.pdid
= protection_dom_id
;
1064 req
.doorbell_pageid
= doorbell_pg_id
;
1066 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
1069 netdev_err(apc
->ndev
, "Failed to configure vPort: %d\n", err
);
1073 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_CONFIG_VPORT_TX
,
1075 if (err
|| resp
.hdr
.status
) {
1076 netdev_err(apc
->ndev
, "Failed to configure vPort: %d, 0x%x\n",
1077 err
, resp
.hdr
.status
);
1084 apc
->tx_shortform_allowed
= resp
.short_form_allowed
;
1085 apc
->tx_vp_offset
= resp
.tx_vport_offset
;
1087 netdev_info(apc
->ndev
, "Configured vPort %llu PD %u DB %u\n",
1088 apc
->port_handle
, protection_dom_id
, doorbell_pg_id
);
1091 mana_uncfg_vport(apc
);
1095 EXPORT_SYMBOL_NS(mana_cfg_vport
, "NET_MANA");
1097 static int mana_cfg_vport_steering(struct mana_port_context
*apc
,
1099 bool update_default_rxobj
, bool update_key
,
1102 struct mana_cfg_rx_steer_req_v2
*req
;
1103 struct mana_cfg_rx_steer_resp resp
= {};
1104 struct net_device
*ndev
= apc
->ndev
;
1108 req_buf_size
= struct_size(req
, indir_tab
, apc
->indir_table_sz
);
1109 req
= kzalloc(req_buf_size
, GFP_KERNEL
);
1113 mana_gd_init_req_hdr(&req
->hdr
, MANA_CONFIG_VPORT_RX
, req_buf_size
,
1116 req
->hdr
.req
.msg_version
= GDMA_MESSAGE_V2
;
1118 req
->vport
= apc
->port_handle
;
1119 req
->num_indir_entries
= apc
->indir_table_sz
;
1120 req
->indir_tab_offset
= offsetof(struct mana_cfg_rx_steer_req_v2
,
1122 req
->rx_enable
= rx
;
1123 req
->rss_enable
= apc
->rss_state
;
1124 req
->update_default_rxobj
= update_default_rxobj
;
1125 req
->update_hashkey
= update_key
;
1126 req
->update_indir_tab
= update_tab
;
1127 req
->default_rxobj
= apc
->default_rxobj
;
1128 req
->cqe_coalescing_enable
= 0;
1131 memcpy(&req
->hashkey
, apc
->hashkey
, MANA_HASH_KEY_SIZE
);
1134 memcpy(req
->indir_tab
, apc
->rxobj_table
,
1135 flex_array_size(req
, indir_tab
, req
->num_indir_entries
));
1137 err
= mana_send_request(apc
->ac
, req
, req_buf_size
, &resp
,
1140 netdev_err(ndev
, "Failed to configure vPort RX: %d\n", err
);
1144 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_CONFIG_VPORT_RX
,
1147 netdev_err(ndev
, "vPort RX configuration failed: %d\n", err
);
1151 if (resp
.hdr
.status
) {
1152 netdev_err(ndev
, "vPort RX configuration failed: 0x%x\n",
1157 netdev_info(ndev
, "Configured steering vPort %llu entries %u\n",
1158 apc
->port_handle
, apc
->indir_table_sz
);
1164 int mana_create_wq_obj(struct mana_port_context
*apc
,
1165 mana_handle_t vport
,
1166 u32 wq_type
, struct mana_obj_spec
*wq_spec
,
1167 struct mana_obj_spec
*cq_spec
,
1168 mana_handle_t
*wq_obj
)
1170 struct mana_create_wqobj_resp resp
= {};
1171 struct mana_create_wqobj_req req
= {};
1172 struct net_device
*ndev
= apc
->ndev
;
1175 mana_gd_init_req_hdr(&req
.hdr
, MANA_CREATE_WQ_OBJ
,
1176 sizeof(req
), sizeof(resp
));
1178 req
.wq_type
= wq_type
;
1179 req
.wq_gdma_region
= wq_spec
->gdma_region
;
1180 req
.cq_gdma_region
= cq_spec
->gdma_region
;
1181 req
.wq_size
= wq_spec
->queue_size
;
1182 req
.cq_size
= cq_spec
->queue_size
;
1183 req
.cq_moderation_ctx_id
= cq_spec
->modr_ctx_id
;
1184 req
.cq_parent_qid
= cq_spec
->attached_eq
;
1186 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
1189 netdev_err(ndev
, "Failed to create WQ object: %d\n", err
);
1193 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_CREATE_WQ_OBJ
,
1195 if (err
|| resp
.hdr
.status
) {
1196 netdev_err(ndev
, "Failed to create WQ object: %d, 0x%x\n", err
,
1203 if (resp
.wq_obj
== INVALID_MANA_HANDLE
) {
1204 netdev_err(ndev
, "Got an invalid WQ object handle\n");
1209 *wq_obj
= resp
.wq_obj
;
1210 wq_spec
->queue_index
= resp
.wq_id
;
1211 cq_spec
->queue_index
= resp
.cq_id
;
1217 EXPORT_SYMBOL_NS(mana_create_wq_obj
, "NET_MANA");
1219 void mana_destroy_wq_obj(struct mana_port_context
*apc
, u32 wq_type
,
1220 mana_handle_t wq_obj
)
1222 struct mana_destroy_wqobj_resp resp
= {};
1223 struct mana_destroy_wqobj_req req
= {};
1224 struct net_device
*ndev
= apc
->ndev
;
1227 mana_gd_init_req_hdr(&req
.hdr
, MANA_DESTROY_WQ_OBJ
,
1228 sizeof(req
), sizeof(resp
));
1229 req
.wq_type
= wq_type
;
1230 req
.wq_obj_handle
= wq_obj
;
1232 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
1235 netdev_err(ndev
, "Failed to destroy WQ object: %d\n", err
);
1239 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_DESTROY_WQ_OBJ
,
1241 if (err
|| resp
.hdr
.status
)
1242 netdev_err(ndev
, "Failed to destroy WQ object: %d, 0x%x\n", err
,
1245 EXPORT_SYMBOL_NS(mana_destroy_wq_obj
, "NET_MANA");
1247 static void mana_destroy_eq(struct mana_context
*ac
)
1249 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
1250 struct gdma_queue
*eq
;
1256 debugfs_remove_recursive(ac
->mana_eqs_debugfs
);
1258 for (i
= 0; i
< gc
->max_num_queues
; i
++) {
1263 mana_gd_destroy_queue(gc
, eq
);
1270 static void mana_create_eq_debugfs(struct mana_context
*ac
, int i
)
1272 struct mana_eq eq
= ac
->eqs
[i
];
1275 sprintf(eqnum
, "eq%d", i
);
1276 eq
.mana_eq_debugfs
= debugfs_create_dir(eqnum
, ac
->mana_eqs_debugfs
);
1277 debugfs_create_u32("head", 0400, eq
.mana_eq_debugfs
, &eq
.eq
->head
);
1278 debugfs_create_u32("tail", 0400, eq
.mana_eq_debugfs
, &eq
.eq
->tail
);
1279 debugfs_create_file("eq_dump", 0400, eq
.mana_eq_debugfs
, eq
.eq
, &mana_dbg_q_fops
);
1282 static int mana_create_eq(struct mana_context
*ac
)
1284 struct gdma_dev
*gd
= ac
->gdma_dev
;
1285 struct gdma_context
*gc
= gd
->gdma_context
;
1286 struct gdma_queue_spec spec
= {};
1290 ac
->eqs
= kcalloc(gc
->max_num_queues
, sizeof(struct mana_eq
),
1295 spec
.type
= GDMA_EQ
;
1296 spec
.monitor_avl_buf
= false;
1297 spec
.queue_size
= EQ_SIZE
;
1298 spec
.eq
.callback
= NULL
;
1299 spec
.eq
.context
= ac
->eqs
;
1300 spec
.eq
.log2_throttle_limit
= LOG2_EQ_THROTTLE
;
1302 ac
->mana_eqs_debugfs
= debugfs_create_dir("EQs", gc
->mana_pci_debugfs
);
1304 for (i
= 0; i
< gc
->max_num_queues
; i
++) {
1305 spec
.eq
.msix_index
= (i
+ 1) % gc
->num_msix_usable
;
1306 err
= mana_gd_create_mana_eq(gd
, &spec
, &ac
->eqs
[i
].eq
);
1309 mana_create_eq_debugfs(ac
, i
);
1314 mana_destroy_eq(ac
);
1318 static int mana_fence_rq(struct mana_port_context
*apc
, struct mana_rxq
*rxq
)
1320 struct mana_fence_rq_resp resp
= {};
1321 struct mana_fence_rq_req req
= {};
1324 init_completion(&rxq
->fence_event
);
1326 mana_gd_init_req_hdr(&req
.hdr
, MANA_FENCE_RQ
,
1327 sizeof(req
), sizeof(resp
));
1328 req
.wq_obj_handle
= rxq
->rxobj
;
1330 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
1333 netdev_err(apc
->ndev
, "Failed to fence RQ %u: %d\n",
1338 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_FENCE_RQ
, sizeof(resp
));
1339 if (err
|| resp
.hdr
.status
) {
1340 netdev_err(apc
->ndev
, "Failed to fence RQ %u: %d, 0x%x\n",
1341 rxq
->rxq_idx
, err
, resp
.hdr
.status
);
1348 if (wait_for_completion_timeout(&rxq
->fence_event
, 10 * HZ
) == 0) {
1349 netdev_err(apc
->ndev
, "Failed to fence RQ %u: timed out\n",
1357 static void mana_fence_rqs(struct mana_port_context
*apc
)
1359 unsigned int rxq_idx
;
1360 struct mana_rxq
*rxq
;
1363 for (rxq_idx
= 0; rxq_idx
< apc
->num_queues
; rxq_idx
++) {
1364 rxq
= apc
->rxqs
[rxq_idx
];
1365 err
= mana_fence_rq(apc
, rxq
);
1367 /* In case of any error, use sleep instead. */
1373 static int mana_move_wq_tail(struct gdma_queue
*wq
, u32 num_units
)
1378 used_space_old
= wq
->head
- wq
->tail
;
1379 used_space_new
= wq
->head
- (wq
->tail
+ num_units
);
1381 if (WARN_ON_ONCE(used_space_new
> used_space_old
))
1384 wq
->tail
+= num_units
;
1388 static void mana_unmap_skb(struct sk_buff
*skb
, struct mana_port_context
*apc
)
1390 struct mana_skb_head
*ash
= (struct mana_skb_head
*)skb
->head
;
1391 struct gdma_context
*gc
= apc
->ac
->gdma_dev
->gdma_context
;
1392 struct device
*dev
= gc
->dev
;
1395 /* Number of SGEs of linear part */
1396 hsg
= (skb_is_gso(skb
) && skb_headlen(skb
) > ash
->size
[0]) ? 2 : 1;
1398 for (i
= 0; i
< hsg
; i
++)
1399 dma_unmap_single(dev
, ash
->dma_handle
[i
], ash
->size
[i
],
1402 for (i
= hsg
; i
< skb_shinfo(skb
)->nr_frags
+ hsg
; i
++)
1403 dma_unmap_page(dev
, ash
->dma_handle
[i
], ash
->size
[i
],
1407 static void mana_poll_tx_cq(struct mana_cq
*cq
)
1409 struct gdma_comp
*completions
= cq
->gdma_comp_buf
;
1410 struct gdma_posted_wqe_info
*wqe_info
;
1411 unsigned int pkt_transmitted
= 0;
1412 unsigned int wqe_unit_cnt
= 0;
1413 struct mana_txq
*txq
= cq
->txq
;
1414 struct mana_port_context
*apc
;
1415 struct netdev_queue
*net_txq
;
1416 struct gdma_queue
*gdma_wq
;
1417 unsigned int avail_space
;
1418 struct net_device
*ndev
;
1419 struct sk_buff
*skb
;
1425 apc
= netdev_priv(ndev
);
1427 comp_read
= mana_gd_poll_cq(cq
->gdma_cq
, completions
,
1428 CQE_POLLING_BUFFER
);
1433 for (i
= 0; i
< comp_read
; i
++) {
1434 struct mana_tx_comp_oob
*cqe_oob
;
1436 if (WARN_ON_ONCE(!completions
[i
].is_sq
))
1439 cqe_oob
= (struct mana_tx_comp_oob
*)completions
[i
].cqe_data
;
1440 if (WARN_ON_ONCE(cqe_oob
->cqe_hdr
.client_type
!=
1441 MANA_CQE_COMPLETION
))
1444 switch (cqe_oob
->cqe_hdr
.cqe_type
) {
1448 case CQE_TX_SA_DROP
:
1449 case CQE_TX_MTU_DROP
:
1450 case CQE_TX_INVALID_OOB
:
1451 case CQE_TX_INVALID_ETH_TYPE
:
1452 case CQE_TX_HDR_PROCESSING_ERROR
:
1453 case CQE_TX_VF_DISABLED
:
1454 case CQE_TX_VPORT_IDX_OUT_OF_RANGE
:
1455 case CQE_TX_VPORT_DISABLED
:
1456 case CQE_TX_VLAN_TAGGING_VIOLATION
:
1457 if (net_ratelimit())
1458 netdev_err(ndev
, "TX: CQE error %d\n",
1459 cqe_oob
->cqe_hdr
.cqe_type
);
1461 apc
->eth_stats
.tx_cqe_err
++;
1465 /* If the CQE type is unknown, log an error,
1466 * and still free the SKB, update tail, etc.
1468 if (net_ratelimit())
1469 netdev_err(ndev
, "TX: unknown CQE type %d\n",
1470 cqe_oob
->cqe_hdr
.cqe_type
);
1472 apc
->eth_stats
.tx_cqe_unknown_type
++;
1476 if (WARN_ON_ONCE(txq
->gdma_txq_id
!= completions
[i
].wq_num
))
1479 skb
= skb_dequeue(&txq
->pending_skbs
);
1480 if (WARN_ON_ONCE(!skb
))
1483 wqe_info
= (struct gdma_posted_wqe_info
*)skb
->cb
;
1484 wqe_unit_cnt
+= wqe_info
->wqe_size_in_bu
;
1486 mana_unmap_skb(skb
, apc
);
1488 napi_consume_skb(skb
, cq
->budget
);
1493 if (WARN_ON_ONCE(wqe_unit_cnt
== 0))
1496 mana_move_wq_tail(txq
->gdma_sq
, wqe_unit_cnt
);
1498 gdma_wq
= txq
->gdma_sq
;
1499 avail_space
= mana_gd_wq_avail_space(gdma_wq
);
1501 /* Ensure tail updated before checking q stop */
1504 net_txq
= txq
->net_txq
;
1505 txq_stopped
= netif_tx_queue_stopped(net_txq
);
1507 /* Ensure checking txq_stopped before apc->port_is_up. */
1510 if (txq_stopped
&& apc
->port_is_up
&& avail_space
>= MAX_TX_WQE_SIZE
) {
1511 netif_tx_wake_queue(net_txq
);
1512 apc
->eth_stats
.wake_queue
++;
1515 if (atomic_sub_return(pkt_transmitted
, &txq
->pending_sends
) < 0)
1518 cq
->work_done
= pkt_transmitted
;
1521 static void mana_post_pkt_rxq(struct mana_rxq
*rxq
)
1523 struct mana_recv_buf_oob
*recv_buf_oob
;
1527 curr_index
= rxq
->buf_index
++;
1528 if (rxq
->buf_index
== rxq
->num_rx_buf
)
1531 recv_buf_oob
= &rxq
->rx_oobs
[curr_index
];
1533 err
= mana_gd_post_work_request(rxq
->gdma_rq
, &recv_buf_oob
->wqe_req
,
1534 &recv_buf_oob
->wqe_inf
);
1535 if (WARN_ON_ONCE(err
))
1538 WARN_ON_ONCE(recv_buf_oob
->wqe_inf
.wqe_size_in_bu
!= 1);
1541 static struct sk_buff
*mana_build_skb(struct mana_rxq
*rxq
, void *buf_va
,
1542 uint pkt_len
, struct xdp_buff
*xdp
)
1544 struct sk_buff
*skb
= napi_build_skb(buf_va
, rxq
->alloc_size
);
1549 if (xdp
->data_hard_start
) {
1550 skb_reserve(skb
, xdp
->data
- xdp
->data_hard_start
);
1551 skb_put(skb
, xdp
->data_end
- xdp
->data
);
1555 skb_reserve(skb
, rxq
->headroom
);
1556 skb_put(skb
, pkt_len
);
1561 static void mana_rx_skb(void *buf_va
, bool from_pool
,
1562 struct mana_rxcomp_oob
*cqe
, struct mana_rxq
*rxq
)
1564 struct mana_stats_rx
*rx_stats
= &rxq
->stats
;
1565 struct net_device
*ndev
= rxq
->ndev
;
1566 uint pkt_len
= cqe
->ppi
[0].pkt_len
;
1567 u16 rxq_idx
= rxq
->rxq_idx
;
1568 struct napi_struct
*napi
;
1569 struct xdp_buff xdp
= {};
1570 struct sk_buff
*skb
;
1574 rxq
->rx_cq
.work_done
++;
1575 napi
= &rxq
->rx_cq
.napi
;
1578 ++ndev
->stats
.rx_dropped
;
1582 act
= mana_run_xdp(ndev
, rxq
, &xdp
, buf_va
, pkt_len
);
1584 if (act
== XDP_REDIRECT
&& !rxq
->xdp_rc
)
1587 if (act
!= XDP_PASS
&& act
!= XDP_TX
)
1590 skb
= mana_build_skb(rxq
, buf_va
, pkt_len
, &xdp
);
1596 skb_mark_for_recycle(skb
);
1598 skb
->dev
= napi
->dev
;
1600 skb
->protocol
= eth_type_trans(skb
, ndev
);
1601 skb_checksum_none_assert(skb
);
1602 skb_record_rx_queue(skb
, rxq_idx
);
1604 if ((ndev
->features
& NETIF_F_RXCSUM
) && cqe
->rx_iphdr_csum_succeed
) {
1605 if (cqe
->rx_tcp_csum_succeed
|| cqe
->rx_udp_csum_succeed
)
1606 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1609 if (cqe
->rx_hashtype
!= 0 && (ndev
->features
& NETIF_F_RXHASH
)) {
1610 hash_value
= cqe
->ppi
[0].pkt_hash
;
1612 if (cqe
->rx_hashtype
& MANA_HASH_L4
)
1613 skb_set_hash(skb
, hash_value
, PKT_HASH_TYPE_L4
);
1615 skb_set_hash(skb
, hash_value
, PKT_HASH_TYPE_L3
);
1618 if (cqe
->rx_vlantag_present
) {
1619 u16 vlan_tci
= cqe
->rx_vlan_id
;
1621 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
), vlan_tci
);
1624 u64_stats_update_begin(&rx_stats
->syncp
);
1625 rx_stats
->packets
++;
1626 rx_stats
->bytes
+= pkt_len
;
1630 u64_stats_update_end(&rx_stats
->syncp
);
1632 if (act
== XDP_TX
) {
1633 skb_set_queue_mapping(skb
, rxq_idx
);
1634 mana_xdp_tx(skb
, ndev
);
1638 napi_gro_receive(napi
, skb
);
1643 u64_stats_update_begin(&rx_stats
->syncp
);
1644 rx_stats
->xdp_drop
++;
1645 u64_stats_update_end(&rx_stats
->syncp
);
1649 page_pool_recycle_direct(rxq
->page_pool
,
1650 virt_to_head_page(buf_va
));
1652 WARN_ON_ONCE(rxq
->xdp_save_va
);
1653 /* Save for reuse */
1654 rxq
->xdp_save_va
= buf_va
;
1657 ++ndev
->stats
.rx_dropped
;
1662 static void *mana_get_rxfrag(struct mana_rxq
*rxq
, struct device
*dev
,
1663 dma_addr_t
*da
, bool *from_pool
, bool is_napi
)
1670 /* Reuse XDP dropped page if available */
1671 if (rxq
->xdp_save_va
) {
1672 va
= rxq
->xdp_save_va
;
1673 rxq
->xdp_save_va
= NULL
;
1674 } else if (rxq
->alloc_size
> PAGE_SIZE
) {
1676 va
= napi_alloc_frag(rxq
->alloc_size
);
1678 va
= netdev_alloc_frag(rxq
->alloc_size
);
1683 page
= virt_to_head_page(va
);
1684 /* Check if the frag falls back to single page */
1685 if (compound_order(page
) < get_order(rxq
->alloc_size
)) {
1690 page
= page_pool_dev_alloc_pages(rxq
->page_pool
);
1695 va
= page_to_virt(page
);
1698 *da
= dma_map_single(dev
, va
+ rxq
->headroom
, rxq
->datasize
,
1700 if (dma_mapping_error(dev
, *da
)) {
1702 page_pool_put_full_page(rxq
->page_pool
, page
, false);
1704 put_page(virt_to_head_page(va
));
1712 /* Allocate frag for rx buffer, and save the old buf */
1713 static void mana_refill_rx_oob(struct device
*dev
, struct mana_rxq
*rxq
,
1714 struct mana_recv_buf_oob
*rxoob
, void **old_buf
,
1721 va
= mana_get_rxfrag(rxq
, dev
, &da
, &from_pool
, true);
1725 dma_unmap_single(dev
, rxoob
->sgl
[0].address
, rxq
->datasize
,
1727 *old_buf
= rxoob
->buf_va
;
1728 *old_fp
= rxoob
->from_pool
;
1731 rxoob
->sgl
[0].address
= da
;
1732 rxoob
->from_pool
= from_pool
;
1735 static void mana_process_rx_cqe(struct mana_rxq
*rxq
, struct mana_cq
*cq
,
1736 struct gdma_comp
*cqe
)
1738 struct mana_rxcomp_oob
*oob
= (struct mana_rxcomp_oob
*)cqe
->cqe_data
;
1739 struct gdma_context
*gc
= rxq
->gdma_rq
->gdma_dev
->gdma_context
;
1740 struct net_device
*ndev
= rxq
->ndev
;
1741 struct mana_recv_buf_oob
*rxbuf_oob
;
1742 struct mana_port_context
*apc
;
1743 struct device
*dev
= gc
->dev
;
1744 void *old_buf
= NULL
;
1748 apc
= netdev_priv(ndev
);
1750 switch (oob
->cqe_hdr
.cqe_type
) {
1754 case CQE_RX_TRUNCATED
:
1755 ++ndev
->stats
.rx_dropped
;
1756 rxbuf_oob
= &rxq
->rx_oobs
[rxq
->buf_index
];
1757 netdev_warn_once(ndev
, "Dropped a truncated packet\n");
1760 case CQE_RX_COALESCED_4
:
1761 netdev_err(ndev
, "RX coalescing is unsupported\n");
1762 apc
->eth_stats
.rx_coalesced_err
++;
1765 case CQE_RX_OBJECT_FENCE
:
1766 complete(&rxq
->fence_event
);
1770 netdev_err(ndev
, "Unknown RX CQE type = %d\n",
1771 oob
->cqe_hdr
.cqe_type
);
1772 apc
->eth_stats
.rx_cqe_unknown_type
++;
1776 pktlen
= oob
->ppi
[0].pkt_len
;
1779 /* data packets should never have packetlength of zero */
1780 netdev_err(ndev
, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n",
1781 rxq
->gdma_id
, cq
->gdma_id
, rxq
->rxobj
);
1785 curr
= rxq
->buf_index
;
1786 rxbuf_oob
= &rxq
->rx_oobs
[curr
];
1787 WARN_ON_ONCE(rxbuf_oob
->wqe_inf
.wqe_size_in_bu
!= 1);
1789 mana_refill_rx_oob(dev
, rxq
, rxbuf_oob
, &old_buf
, &old_fp
);
1791 /* Unsuccessful refill will have old_buf == NULL.
1792 * In this case, mana_rx_skb() will drop the packet.
1794 mana_rx_skb(old_buf
, old_fp
, oob
, rxq
);
1797 mana_move_wq_tail(rxq
->gdma_rq
, rxbuf_oob
->wqe_inf
.wqe_size_in_bu
);
1799 mana_post_pkt_rxq(rxq
);
1802 static void mana_poll_rx_cq(struct mana_cq
*cq
)
1804 struct gdma_comp
*comp
= cq
->gdma_comp_buf
;
1805 struct mana_rxq
*rxq
= cq
->rxq
;
1808 comp_read
= mana_gd_poll_cq(cq
->gdma_cq
, comp
, CQE_POLLING_BUFFER
);
1809 WARN_ON_ONCE(comp_read
> CQE_POLLING_BUFFER
);
1811 rxq
->xdp_flush
= false;
1813 for (i
= 0; i
< comp_read
; i
++) {
1814 if (WARN_ON_ONCE(comp
[i
].is_sq
))
1817 /* verify recv cqe references the right rxq */
1818 if (WARN_ON_ONCE(comp
[i
].wq_num
!= cq
->rxq
->gdma_id
))
1821 mana_process_rx_cqe(rxq
, cq
, &comp
[i
]);
1824 if (comp_read
> 0) {
1825 struct gdma_context
*gc
= rxq
->gdma_rq
->gdma_dev
->gdma_context
;
1827 mana_gd_wq_ring_doorbell(gc
, rxq
->gdma_rq
);
1834 static int mana_cq_handler(void *context
, struct gdma_queue
*gdma_queue
)
1836 struct mana_cq
*cq
= context
;
1839 WARN_ON_ONCE(cq
->gdma_cq
!= gdma_queue
);
1841 if (cq
->type
== MANA_CQ_TYPE_RX
)
1842 mana_poll_rx_cq(cq
);
1844 mana_poll_tx_cq(cq
);
1847 cq
->work_done_since_doorbell
+= w
;
1849 if (w
< cq
->budget
) {
1850 mana_gd_ring_cq(gdma_queue
, SET_ARM_BIT
);
1851 cq
->work_done_since_doorbell
= 0;
1852 napi_complete_done(&cq
->napi
, w
);
1853 } else if (cq
->work_done_since_doorbell
>
1854 cq
->gdma_cq
->queue_size
/ COMP_ENTRY_SIZE
* 4) {
1855 /* MANA hardware requires at least one doorbell ring every 8
1856 * wraparounds of CQ even if there is no need to arm the CQ.
1857 * This driver rings the doorbell as soon as we have exceeded
1860 mana_gd_ring_cq(gdma_queue
, 0);
1861 cq
->work_done_since_doorbell
= 0;
1867 static int mana_poll(struct napi_struct
*napi
, int budget
)
1869 struct mana_cq
*cq
= container_of(napi
, struct mana_cq
, napi
);
1873 cq
->budget
= budget
;
1875 w
= mana_cq_handler(cq
, cq
->gdma_cq
);
1877 return min(w
, budget
);
1880 static void mana_schedule_napi(void *context
, struct gdma_queue
*gdma_queue
)
1882 struct mana_cq
*cq
= context
;
1884 napi_schedule_irqoff(&cq
->napi
);
1887 static void mana_deinit_cq(struct mana_port_context
*apc
, struct mana_cq
*cq
)
1889 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1894 mana_gd_destroy_queue(gd
->gdma_context
, cq
->gdma_cq
);
1897 static void mana_deinit_txq(struct mana_port_context
*apc
, struct mana_txq
*txq
)
1899 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1904 mana_gd_destroy_queue(gd
->gdma_context
, txq
->gdma_sq
);
1907 static void mana_destroy_txq(struct mana_port_context
*apc
)
1909 struct napi_struct
*napi
;
1915 for (i
= 0; i
< apc
->num_queues
; i
++) {
1916 debugfs_remove_recursive(apc
->tx_qp
[i
].mana_tx_debugfs
);
1918 napi
= &apc
->tx_qp
[i
].tx_cq
.napi
;
1919 if (apc
->tx_qp
[i
].txq
.napi_initialized
) {
1920 napi_synchronize(napi
);
1922 netif_napi_del(napi
);
1923 apc
->tx_qp
[i
].txq
.napi_initialized
= false;
1925 mana_destroy_wq_obj(apc
, GDMA_SQ
, apc
->tx_qp
[i
].tx_object
);
1927 mana_deinit_cq(apc
, &apc
->tx_qp
[i
].tx_cq
);
1929 mana_deinit_txq(apc
, &apc
->tx_qp
[i
].txq
);
1936 static void mana_create_txq_debugfs(struct mana_port_context
*apc
, int idx
)
1938 struct mana_tx_qp
*tx_qp
= &apc
->tx_qp
[idx
];
1941 sprintf(qnum
, "TX-%d", idx
);
1942 tx_qp
->mana_tx_debugfs
= debugfs_create_dir(qnum
, apc
->mana_port_debugfs
);
1943 debugfs_create_u32("sq_head", 0400, tx_qp
->mana_tx_debugfs
,
1944 &tx_qp
->txq
.gdma_sq
->head
);
1945 debugfs_create_u32("sq_tail", 0400, tx_qp
->mana_tx_debugfs
,
1946 &tx_qp
->txq
.gdma_sq
->tail
);
1947 debugfs_create_u32("sq_pend_skb_qlen", 0400, tx_qp
->mana_tx_debugfs
,
1948 &tx_qp
->txq
.pending_skbs
.qlen
);
1949 debugfs_create_u32("cq_head", 0400, tx_qp
->mana_tx_debugfs
,
1950 &tx_qp
->tx_cq
.gdma_cq
->head
);
1951 debugfs_create_u32("cq_tail", 0400, tx_qp
->mana_tx_debugfs
,
1952 &tx_qp
->tx_cq
.gdma_cq
->tail
);
1953 debugfs_create_u32("cq_budget", 0400, tx_qp
->mana_tx_debugfs
,
1954 &tx_qp
->tx_cq
.budget
);
1955 debugfs_create_file("txq_dump", 0400, tx_qp
->mana_tx_debugfs
,
1956 tx_qp
->txq
.gdma_sq
, &mana_dbg_q_fops
);
1957 debugfs_create_file("cq_dump", 0400, tx_qp
->mana_tx_debugfs
,
1958 tx_qp
->tx_cq
.gdma_cq
, &mana_dbg_q_fops
);
1961 static int mana_create_txq(struct mana_port_context
*apc
,
1962 struct net_device
*net
)
1964 struct mana_context
*ac
= apc
->ac
;
1965 struct gdma_dev
*gd
= ac
->gdma_dev
;
1966 struct mana_obj_spec wq_spec
;
1967 struct mana_obj_spec cq_spec
;
1968 struct gdma_queue_spec spec
;
1969 struct gdma_context
*gc
;
1970 struct mana_txq
*txq
;
1977 apc
->tx_qp
= kcalloc(apc
->num_queues
, sizeof(struct mana_tx_qp
),
1982 /* The minimum size of the WQE is 32 bytes, hence
1983 * apc->tx_queue_size represents the maximum number of WQEs
1984 * the SQ can store. This value is then used to size other queues
1985 * to prevent overflow.
1986 * Also note that the txq_size is always going to be MANA_PAGE_ALIGNED,
1987 * as min val of apc->tx_queue_size is 128 and that would make
1988 * txq_size 128*32 = 4096 and the other higher values of apc->tx_queue_size
1989 * are always power of two
1991 txq_size
= apc
->tx_queue_size
* 32;
1993 cq_size
= apc
->tx_queue_size
* COMP_ENTRY_SIZE
;
1995 gc
= gd
->gdma_context
;
1997 for (i
= 0; i
< apc
->num_queues
; i
++) {
1998 apc
->tx_qp
[i
].tx_object
= INVALID_MANA_HANDLE
;
2001 txq
= &apc
->tx_qp
[i
].txq
;
2003 u64_stats_init(&txq
->stats
.syncp
);
2005 txq
->net_txq
= netdev_get_tx_queue(net
, i
);
2006 txq
->vp_offset
= apc
->tx_vp_offset
;
2007 txq
->napi_initialized
= false;
2008 skb_queue_head_init(&txq
->pending_skbs
);
2010 memset(&spec
, 0, sizeof(spec
));
2011 spec
.type
= GDMA_SQ
;
2012 spec
.monitor_avl_buf
= true;
2013 spec
.queue_size
= txq_size
;
2014 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &txq
->gdma_sq
);
2018 /* Create SQ's CQ */
2019 cq
= &apc
->tx_qp
[i
].tx_cq
;
2020 cq
->type
= MANA_CQ_TYPE_TX
;
2024 memset(&spec
, 0, sizeof(spec
));
2025 spec
.type
= GDMA_CQ
;
2026 spec
.monitor_avl_buf
= false;
2027 spec
.queue_size
= cq_size
;
2028 spec
.cq
.callback
= mana_schedule_napi
;
2029 spec
.cq
.parent_eq
= ac
->eqs
[i
].eq
;
2030 spec
.cq
.context
= cq
;
2031 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &cq
->gdma_cq
);
2035 memset(&wq_spec
, 0, sizeof(wq_spec
));
2036 memset(&cq_spec
, 0, sizeof(cq_spec
));
2038 wq_spec
.gdma_region
= txq
->gdma_sq
->mem_info
.dma_region_handle
;
2039 wq_spec
.queue_size
= txq
->gdma_sq
->queue_size
;
2041 cq_spec
.gdma_region
= cq
->gdma_cq
->mem_info
.dma_region_handle
;
2042 cq_spec
.queue_size
= cq
->gdma_cq
->queue_size
;
2043 cq_spec
.modr_ctx_id
= 0;
2044 cq_spec
.attached_eq
= cq
->gdma_cq
->cq
.parent
->id
;
2046 err
= mana_create_wq_obj(apc
, apc
->port_handle
, GDMA_SQ
,
2048 &apc
->tx_qp
[i
].tx_object
);
2053 txq
->gdma_sq
->id
= wq_spec
.queue_index
;
2054 cq
->gdma_cq
->id
= cq_spec
.queue_index
;
2056 txq
->gdma_sq
->mem_info
.dma_region_handle
=
2057 GDMA_INVALID_DMA_REGION
;
2058 cq
->gdma_cq
->mem_info
.dma_region_handle
=
2059 GDMA_INVALID_DMA_REGION
;
2061 txq
->gdma_txq_id
= txq
->gdma_sq
->id
;
2063 cq
->gdma_id
= cq
->gdma_cq
->id
;
2065 if (WARN_ON(cq
->gdma_id
>= gc
->max_num_cqs
)) {
2070 gc
->cq_table
[cq
->gdma_id
] = cq
->gdma_cq
;
2072 mana_create_txq_debugfs(apc
, i
);
2074 netif_napi_add_tx(net
, &cq
->napi
, mana_poll
);
2075 napi_enable(&cq
->napi
);
2076 txq
->napi_initialized
= true;
2078 mana_gd_ring_cq(cq
->gdma_cq
, SET_ARM_BIT
);
2083 mana_destroy_txq(apc
);
2087 static void mana_destroy_rxq(struct mana_port_context
*apc
,
2088 struct mana_rxq
*rxq
, bool napi_initialized
)
2091 struct gdma_context
*gc
= apc
->ac
->gdma_dev
->gdma_context
;
2092 struct mana_recv_buf_oob
*rx_oob
;
2093 struct device
*dev
= gc
->dev
;
2094 struct napi_struct
*napi
;
2101 debugfs_remove_recursive(rxq
->mana_rx_debugfs
);
2103 napi
= &rxq
->rx_cq
.napi
;
2105 if (napi_initialized
) {
2106 napi_synchronize(napi
);
2110 netif_napi_del(napi
);
2112 xdp_rxq_info_unreg(&rxq
->xdp_rxq
);
2114 mana_destroy_wq_obj(apc
, GDMA_RQ
, rxq
->rxobj
);
2116 mana_deinit_cq(apc
, &rxq
->rx_cq
);
2118 if (rxq
->xdp_save_va
)
2119 put_page(virt_to_head_page(rxq
->xdp_save_va
));
2121 for (i
= 0; i
< rxq
->num_rx_buf
; i
++) {
2122 rx_oob
= &rxq
->rx_oobs
[i
];
2124 if (!rx_oob
->buf_va
)
2127 dma_unmap_single(dev
, rx_oob
->sgl
[0].address
,
2128 rx_oob
->sgl
[0].size
, DMA_FROM_DEVICE
);
2130 page
= virt_to_head_page(rx_oob
->buf_va
);
2132 if (rx_oob
->from_pool
)
2133 page_pool_put_full_page(rxq
->page_pool
, page
, false);
2137 rx_oob
->buf_va
= NULL
;
2140 page_pool_destroy(rxq
->page_pool
);
2143 mana_gd_destroy_queue(gc
, rxq
->gdma_rq
);
2148 static int mana_fill_rx_oob(struct mana_recv_buf_oob
*rx_oob
, u32 mem_key
,
2149 struct mana_rxq
*rxq
, struct device
*dev
)
2151 struct mana_port_context
*mpc
= netdev_priv(rxq
->ndev
);
2152 bool from_pool
= false;
2156 if (mpc
->rxbufs_pre
)
2157 va
= mana_get_rxbuf_pre(rxq
, &da
);
2159 va
= mana_get_rxfrag(rxq
, dev
, &da
, &from_pool
, false);
2164 rx_oob
->buf_va
= va
;
2165 rx_oob
->from_pool
= from_pool
;
2167 rx_oob
->sgl
[0].address
= da
;
2168 rx_oob
->sgl
[0].size
= rxq
->datasize
;
2169 rx_oob
->sgl
[0].mem_key
= mem_key
;
2174 #define MANA_WQE_HEADER_SIZE 16
2175 #define MANA_WQE_SGE_SIZE 16
2177 static int mana_alloc_rx_wqe(struct mana_port_context
*apc
,
2178 struct mana_rxq
*rxq
, u32
*rxq_size
, u32
*cq_size
)
2180 struct gdma_context
*gc
= apc
->ac
->gdma_dev
->gdma_context
;
2181 struct mana_recv_buf_oob
*rx_oob
;
2182 struct device
*dev
= gc
->dev
;
2186 WARN_ON(rxq
->datasize
== 0);
2191 for (buf_idx
= 0; buf_idx
< rxq
->num_rx_buf
; buf_idx
++) {
2192 rx_oob
= &rxq
->rx_oobs
[buf_idx
];
2193 memset(rx_oob
, 0, sizeof(*rx_oob
));
2195 rx_oob
->num_sge
= 1;
2197 ret
= mana_fill_rx_oob(rx_oob
, apc
->ac
->gdma_dev
->gpa_mkey
, rxq
,
2202 rx_oob
->wqe_req
.sgl
= rx_oob
->sgl
;
2203 rx_oob
->wqe_req
.num_sge
= rx_oob
->num_sge
;
2204 rx_oob
->wqe_req
.inline_oob_size
= 0;
2205 rx_oob
->wqe_req
.inline_oob_data
= NULL
;
2206 rx_oob
->wqe_req
.flags
= 0;
2207 rx_oob
->wqe_req
.client_data_unit
= 0;
2209 *rxq_size
+= ALIGN(MANA_WQE_HEADER_SIZE
+
2210 MANA_WQE_SGE_SIZE
* rx_oob
->num_sge
, 32);
2211 *cq_size
+= COMP_ENTRY_SIZE
;
2217 static int mana_push_wqe(struct mana_rxq
*rxq
)
2219 struct mana_recv_buf_oob
*rx_oob
;
2223 for (buf_idx
= 0; buf_idx
< rxq
->num_rx_buf
; buf_idx
++) {
2224 rx_oob
= &rxq
->rx_oobs
[buf_idx
];
2226 err
= mana_gd_post_and_ring(rxq
->gdma_rq
, &rx_oob
->wqe_req
,
2235 static int mana_create_page_pool(struct mana_rxq
*rxq
, struct gdma_context
*gc
)
2237 struct mana_port_context
*mpc
= netdev_priv(rxq
->ndev
);
2238 struct page_pool_params pprm
= {};
2241 pprm
.pool_size
= mpc
->rx_queue_size
;
2242 pprm
.nid
= gc
->numa_node
;
2243 pprm
.napi
= &rxq
->rx_cq
.napi
;
2244 pprm
.netdev
= rxq
->ndev
;
2246 rxq
->page_pool
= page_pool_create(&pprm
);
2248 if (IS_ERR(rxq
->page_pool
)) {
2249 ret
= PTR_ERR(rxq
->page_pool
);
2250 rxq
->page_pool
= NULL
;
2257 static struct mana_rxq
*mana_create_rxq(struct mana_port_context
*apc
,
2258 u32 rxq_idx
, struct mana_eq
*eq
,
2259 struct net_device
*ndev
)
2261 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2262 struct mana_obj_spec wq_spec
;
2263 struct mana_obj_spec cq_spec
;
2264 struct gdma_queue_spec spec
;
2265 struct mana_cq
*cq
= NULL
;
2266 struct gdma_context
*gc
;
2267 u32 cq_size
, rq_size
;
2268 struct mana_rxq
*rxq
;
2271 gc
= gd
->gdma_context
;
2273 rxq
= kzalloc(struct_size(rxq
, rx_oobs
, apc
->rx_queue_size
),
2279 rxq
->num_rx_buf
= apc
->rx_queue_size
;
2280 rxq
->rxq_idx
= rxq_idx
;
2281 rxq
->rxobj
= INVALID_MANA_HANDLE
;
2283 mana_get_rxbuf_cfg(ndev
->mtu
, &rxq
->datasize
, &rxq
->alloc_size
,
2286 /* Create page pool for RX queue */
2287 err
= mana_create_page_pool(rxq
, gc
);
2289 netdev_err(ndev
, "Create page pool err:%d\n", err
);
2293 err
= mana_alloc_rx_wqe(apc
, rxq
, &rq_size
, &cq_size
);
2297 rq_size
= MANA_PAGE_ALIGN(rq_size
);
2298 cq_size
= MANA_PAGE_ALIGN(cq_size
);
2301 memset(&spec
, 0, sizeof(spec
));
2302 spec
.type
= GDMA_RQ
;
2303 spec
.monitor_avl_buf
= true;
2304 spec
.queue_size
= rq_size
;
2305 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &rxq
->gdma_rq
);
2309 /* Create RQ's CQ */
2311 cq
->type
= MANA_CQ_TYPE_RX
;
2314 memset(&spec
, 0, sizeof(spec
));
2315 spec
.type
= GDMA_CQ
;
2316 spec
.monitor_avl_buf
= false;
2317 spec
.queue_size
= cq_size
;
2318 spec
.cq
.callback
= mana_schedule_napi
;
2319 spec
.cq
.parent_eq
= eq
->eq
;
2320 spec
.cq
.context
= cq
;
2321 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &cq
->gdma_cq
);
2325 memset(&wq_spec
, 0, sizeof(wq_spec
));
2326 memset(&cq_spec
, 0, sizeof(cq_spec
));
2327 wq_spec
.gdma_region
= rxq
->gdma_rq
->mem_info
.dma_region_handle
;
2328 wq_spec
.queue_size
= rxq
->gdma_rq
->queue_size
;
2330 cq_spec
.gdma_region
= cq
->gdma_cq
->mem_info
.dma_region_handle
;
2331 cq_spec
.queue_size
= cq
->gdma_cq
->queue_size
;
2332 cq_spec
.modr_ctx_id
= 0;
2333 cq_spec
.attached_eq
= cq
->gdma_cq
->cq
.parent
->id
;
2335 err
= mana_create_wq_obj(apc
, apc
->port_handle
, GDMA_RQ
,
2336 &wq_spec
, &cq_spec
, &rxq
->rxobj
);
2340 rxq
->gdma_rq
->id
= wq_spec
.queue_index
;
2341 cq
->gdma_cq
->id
= cq_spec
.queue_index
;
2343 rxq
->gdma_rq
->mem_info
.dma_region_handle
= GDMA_INVALID_DMA_REGION
;
2344 cq
->gdma_cq
->mem_info
.dma_region_handle
= GDMA_INVALID_DMA_REGION
;
2346 rxq
->gdma_id
= rxq
->gdma_rq
->id
;
2347 cq
->gdma_id
= cq
->gdma_cq
->id
;
2349 err
= mana_push_wqe(rxq
);
2353 if (WARN_ON(cq
->gdma_id
>= gc
->max_num_cqs
)) {
2358 gc
->cq_table
[cq
->gdma_id
] = cq
->gdma_cq
;
2360 netif_napi_add_weight(ndev
, &cq
->napi
, mana_poll
, 1);
2362 WARN_ON(xdp_rxq_info_reg(&rxq
->xdp_rxq
, ndev
, rxq_idx
,
2364 WARN_ON(xdp_rxq_info_reg_mem_model(&rxq
->xdp_rxq
, MEM_TYPE_PAGE_POOL
,
2367 napi_enable(&cq
->napi
);
2369 mana_gd_ring_cq(cq
->gdma_cq
, SET_ARM_BIT
);
2374 netdev_err(ndev
, "Failed to create RXQ: err = %d\n", err
);
2376 mana_destroy_rxq(apc
, rxq
, false);
2379 mana_deinit_cq(apc
, cq
);
2384 static void mana_create_rxq_debugfs(struct mana_port_context
*apc
, int idx
)
2386 struct mana_rxq
*rxq
;
2389 rxq
= apc
->rxqs
[idx
];
2391 sprintf(qnum
, "RX-%d", idx
);
2392 rxq
->mana_rx_debugfs
= debugfs_create_dir(qnum
, apc
->mana_port_debugfs
);
2393 debugfs_create_u32("rq_head", 0400, rxq
->mana_rx_debugfs
, &rxq
->gdma_rq
->head
);
2394 debugfs_create_u32("rq_tail", 0400, rxq
->mana_rx_debugfs
, &rxq
->gdma_rq
->tail
);
2395 debugfs_create_u32("rq_nbuf", 0400, rxq
->mana_rx_debugfs
, &rxq
->num_rx_buf
);
2396 debugfs_create_u32("cq_head", 0400, rxq
->mana_rx_debugfs
,
2397 &rxq
->rx_cq
.gdma_cq
->head
);
2398 debugfs_create_u32("cq_tail", 0400, rxq
->mana_rx_debugfs
,
2399 &rxq
->rx_cq
.gdma_cq
->tail
);
2400 debugfs_create_u32("cq_budget", 0400, rxq
->mana_rx_debugfs
, &rxq
->rx_cq
.budget
);
2401 debugfs_create_file("rxq_dump", 0400, rxq
->mana_rx_debugfs
, rxq
->gdma_rq
, &mana_dbg_q_fops
);
2402 debugfs_create_file("cq_dump", 0400, rxq
->mana_rx_debugfs
, rxq
->rx_cq
.gdma_cq
,
2406 static int mana_add_rx_queues(struct mana_port_context
*apc
,
2407 struct net_device
*ndev
)
2409 struct mana_context
*ac
= apc
->ac
;
2410 struct mana_rxq
*rxq
;
2414 for (i
= 0; i
< apc
->num_queues
; i
++) {
2415 rxq
= mana_create_rxq(apc
, i
, &ac
->eqs
[i
], ndev
);
2421 u64_stats_init(&rxq
->stats
.syncp
);
2425 mana_create_rxq_debugfs(apc
, i
);
2428 apc
->default_rxobj
= apc
->rxqs
[0]->rxobj
;
2433 static void mana_destroy_vport(struct mana_port_context
*apc
)
2435 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2436 struct mana_rxq
*rxq
;
2439 for (rxq_idx
= 0; rxq_idx
< apc
->num_queues
; rxq_idx
++) {
2440 rxq
= apc
->rxqs
[rxq_idx
];
2444 mana_destroy_rxq(apc
, rxq
, true);
2445 apc
->rxqs
[rxq_idx
] = NULL
;
2448 mana_destroy_txq(apc
);
2449 mana_uncfg_vport(apc
);
2451 if (gd
->gdma_context
->is_pf
)
2452 mana_pf_deregister_hw_vport(apc
);
2455 static int mana_create_vport(struct mana_port_context
*apc
,
2456 struct net_device
*net
)
2458 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2461 apc
->default_rxobj
= INVALID_MANA_HANDLE
;
2463 if (gd
->gdma_context
->is_pf
) {
2464 err
= mana_pf_register_hw_vport(apc
);
2469 err
= mana_cfg_vport(apc
, gd
->pdid
, gd
->doorbell
);
2473 return mana_create_txq(apc
, net
);
2476 static int mana_rss_table_alloc(struct mana_port_context
*apc
)
2478 if (!apc
->indir_table_sz
) {
2479 netdev_err(apc
->ndev
,
2480 "Indirection table size not set for vPort %d\n",
2485 apc
->indir_table
= kcalloc(apc
->indir_table_sz
, sizeof(u32
), GFP_KERNEL
);
2486 if (!apc
->indir_table
)
2489 apc
->rxobj_table
= kcalloc(apc
->indir_table_sz
, sizeof(mana_handle_t
), GFP_KERNEL
);
2490 if (!apc
->rxobj_table
) {
2491 kfree(apc
->indir_table
);
2498 static void mana_rss_table_init(struct mana_port_context
*apc
)
2502 for (i
= 0; i
< apc
->indir_table_sz
; i
++)
2503 apc
->indir_table
[i
] =
2504 ethtool_rxfh_indir_default(i
, apc
->num_queues
);
2507 int mana_config_rss(struct mana_port_context
*apc
, enum TRI_STATE rx
,
2508 bool update_hash
, bool update_tab
)
2515 for (i
= 0; i
< apc
->indir_table_sz
; i
++) {
2516 queue_idx
= apc
->indir_table
[i
];
2517 apc
->rxobj_table
[i
] = apc
->rxqs
[queue_idx
]->rxobj
;
2521 err
= mana_cfg_vport_steering(apc
, rx
, true, update_hash
, update_tab
);
2525 mana_fence_rqs(apc
);
2530 void mana_query_gf_stats(struct mana_port_context
*apc
)
2532 struct mana_query_gf_stat_resp resp
= {};
2533 struct mana_query_gf_stat_req req
= {};
2534 struct net_device
*ndev
= apc
->ndev
;
2537 mana_gd_init_req_hdr(&req
.hdr
, MANA_QUERY_GF_STAT
,
2538 sizeof(req
), sizeof(resp
));
2539 req
.hdr
.resp
.msg_version
= GDMA_MESSAGE_V2
;
2540 req
.req_stats
= STATISTICS_FLAGS_RX_DISCARDS_NO_WQE
|
2541 STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED
|
2542 STATISTICS_FLAGS_HC_RX_BYTES
|
2543 STATISTICS_FLAGS_HC_RX_UCAST_PACKETS
|
2544 STATISTICS_FLAGS_HC_RX_UCAST_BYTES
|
2545 STATISTICS_FLAGS_HC_RX_MCAST_PACKETS
|
2546 STATISTICS_FLAGS_HC_RX_MCAST_BYTES
|
2547 STATISTICS_FLAGS_HC_RX_BCAST_PACKETS
|
2548 STATISTICS_FLAGS_HC_RX_BCAST_BYTES
|
2549 STATISTICS_FLAGS_TX_ERRORS_GF_DISABLED
|
2550 STATISTICS_FLAGS_TX_ERRORS_VPORT_DISABLED
|
2551 STATISTICS_FLAGS_TX_ERRORS_INVAL_VPORT_OFFSET_PACKETS
|
2552 STATISTICS_FLAGS_TX_ERRORS_VLAN_ENFORCEMENT
|
2553 STATISTICS_FLAGS_TX_ERRORS_ETH_TYPE_ENFORCEMENT
|
2554 STATISTICS_FLAGS_TX_ERRORS_SA_ENFORCEMENT
|
2555 STATISTICS_FLAGS_TX_ERRORS_SQPDID_ENFORCEMENT
|
2556 STATISTICS_FLAGS_TX_ERRORS_CQPDID_ENFORCEMENT
|
2557 STATISTICS_FLAGS_TX_ERRORS_MTU_VIOLATION
|
2558 STATISTICS_FLAGS_TX_ERRORS_INVALID_OOB
|
2559 STATISTICS_FLAGS_HC_TX_BYTES
|
2560 STATISTICS_FLAGS_HC_TX_UCAST_PACKETS
|
2561 STATISTICS_FLAGS_HC_TX_UCAST_BYTES
|
2562 STATISTICS_FLAGS_HC_TX_MCAST_PACKETS
|
2563 STATISTICS_FLAGS_HC_TX_MCAST_BYTES
|
2564 STATISTICS_FLAGS_HC_TX_BCAST_PACKETS
|
2565 STATISTICS_FLAGS_HC_TX_BCAST_BYTES
|
2566 STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR
;
2568 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
2571 netdev_err(ndev
, "Failed to query GF stats: %d\n", err
);
2574 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_QUERY_GF_STAT
,
2576 if (err
|| resp
.hdr
.status
) {
2577 netdev_err(ndev
, "Failed to query GF stats: %d, 0x%x\n", err
,
2582 apc
->eth_stats
.hc_rx_discards_no_wqe
= resp
.rx_discards_nowqe
;
2583 apc
->eth_stats
.hc_rx_err_vport_disabled
= resp
.rx_err_vport_disabled
;
2584 apc
->eth_stats
.hc_rx_bytes
= resp
.hc_rx_bytes
;
2585 apc
->eth_stats
.hc_rx_ucast_pkts
= resp
.hc_rx_ucast_pkts
;
2586 apc
->eth_stats
.hc_rx_ucast_bytes
= resp
.hc_rx_ucast_bytes
;
2587 apc
->eth_stats
.hc_rx_bcast_pkts
= resp
.hc_rx_bcast_pkts
;
2588 apc
->eth_stats
.hc_rx_bcast_bytes
= resp
.hc_rx_bcast_bytes
;
2589 apc
->eth_stats
.hc_rx_mcast_pkts
= resp
.hc_rx_mcast_pkts
;
2590 apc
->eth_stats
.hc_rx_mcast_bytes
= resp
.hc_rx_mcast_bytes
;
2591 apc
->eth_stats
.hc_tx_err_gf_disabled
= resp
.tx_err_gf_disabled
;
2592 apc
->eth_stats
.hc_tx_err_vport_disabled
= resp
.tx_err_vport_disabled
;
2593 apc
->eth_stats
.hc_tx_err_inval_vportoffset_pkt
=
2594 resp
.tx_err_inval_vport_offset_pkt
;
2595 apc
->eth_stats
.hc_tx_err_vlan_enforcement
=
2596 resp
.tx_err_vlan_enforcement
;
2597 apc
->eth_stats
.hc_tx_err_eth_type_enforcement
=
2598 resp
.tx_err_ethtype_enforcement
;
2599 apc
->eth_stats
.hc_tx_err_sa_enforcement
= resp
.tx_err_SA_enforcement
;
2600 apc
->eth_stats
.hc_tx_err_sqpdid_enforcement
=
2601 resp
.tx_err_SQPDID_enforcement
;
2602 apc
->eth_stats
.hc_tx_err_cqpdid_enforcement
=
2603 resp
.tx_err_CQPDID_enforcement
;
2604 apc
->eth_stats
.hc_tx_err_mtu_violation
= resp
.tx_err_mtu_violation
;
2605 apc
->eth_stats
.hc_tx_err_inval_oob
= resp
.tx_err_inval_oob
;
2606 apc
->eth_stats
.hc_tx_bytes
= resp
.hc_tx_bytes
;
2607 apc
->eth_stats
.hc_tx_ucast_pkts
= resp
.hc_tx_ucast_pkts
;
2608 apc
->eth_stats
.hc_tx_ucast_bytes
= resp
.hc_tx_ucast_bytes
;
2609 apc
->eth_stats
.hc_tx_bcast_pkts
= resp
.hc_tx_bcast_pkts
;
2610 apc
->eth_stats
.hc_tx_bcast_bytes
= resp
.hc_tx_bcast_bytes
;
2611 apc
->eth_stats
.hc_tx_mcast_pkts
= resp
.hc_tx_mcast_pkts
;
2612 apc
->eth_stats
.hc_tx_mcast_bytes
= resp
.hc_tx_mcast_bytes
;
2613 apc
->eth_stats
.hc_tx_err_gdma
= resp
.tx_err_gdma
;
2616 static int mana_init_port(struct net_device
*ndev
)
2618 struct mana_port_context
*apc
= netdev_priv(ndev
);
2619 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2620 u32 max_txq
, max_rxq
, max_queues
;
2621 int port_idx
= apc
->port_idx
;
2622 struct gdma_context
*gc
;
2626 err
= mana_init_port_context(apc
);
2630 gc
= gd
->gdma_context
;
2632 err
= mana_query_vport_cfg(apc
, port_idx
, &max_txq
, &max_rxq
,
2633 &apc
->indir_table_sz
);
2635 netdev_err(ndev
, "Failed to query info for vPort %d\n",
2640 max_queues
= min_t(u32
, max_txq
, max_rxq
);
2641 if (apc
->max_queues
> max_queues
)
2642 apc
->max_queues
= max_queues
;
2644 if (apc
->num_queues
> apc
->max_queues
)
2645 apc
->num_queues
= apc
->max_queues
;
2647 eth_hw_addr_set(ndev
, apc
->mac_addr
);
2648 sprintf(vport
, "vport%d", port_idx
);
2649 apc
->mana_port_debugfs
= debugfs_create_dir(vport
, gc
->mana_pci_debugfs
);
2653 mana_cleanup_port_context(apc
);
2657 int mana_alloc_queues(struct net_device
*ndev
)
2659 struct mana_port_context
*apc
= netdev_priv(ndev
);
2660 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2663 err
= mana_create_vport(apc
, ndev
);
2667 err
= netif_set_real_num_tx_queues(ndev
, apc
->num_queues
);
2671 err
= mana_add_rx_queues(apc
, ndev
);
2675 apc
->rss_state
= apc
->num_queues
> 1 ? TRI_STATE_TRUE
: TRI_STATE_FALSE
;
2677 err
= netif_set_real_num_rx_queues(ndev
, apc
->num_queues
);
2681 mana_rss_table_init(apc
);
2683 err
= mana_config_rss(apc
, TRI_STATE_TRUE
, true, true);
2687 if (gd
->gdma_context
->is_pf
) {
2688 err
= mana_pf_register_filter(apc
);
2693 mana_chn_setxdp(apc
, mana_xdp_get(apc
));
2698 mana_destroy_vport(apc
);
2702 int mana_attach(struct net_device
*ndev
)
2704 struct mana_port_context
*apc
= netdev_priv(ndev
);
2709 err
= mana_init_port(ndev
);
2713 if (apc
->port_st_save
) {
2714 err
= mana_alloc_queues(ndev
);
2716 mana_cleanup_port_context(apc
);
2721 apc
->port_is_up
= apc
->port_st_save
;
2723 /* Ensure port state updated before txq state */
2726 if (apc
->port_is_up
)
2727 netif_carrier_on(ndev
);
2729 netif_device_attach(ndev
);
2734 static int mana_dealloc_queues(struct net_device
*ndev
)
2736 struct mana_port_context
*apc
= netdev_priv(ndev
);
2737 unsigned long timeout
= jiffies
+ 120 * HZ
;
2738 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2739 struct mana_txq
*txq
;
2740 struct sk_buff
*skb
;
2744 if (apc
->port_is_up
)
2747 mana_chn_setxdp(apc
, NULL
);
2749 if (gd
->gdma_context
->is_pf
)
2750 mana_pf_deregister_filter(apc
);
2752 /* No packet can be transmitted now since apc->port_is_up is false.
2753 * There is still a tiny chance that mana_poll_tx_cq() can re-enable
2754 * a txq because it may not timely see apc->port_is_up being cleared
2755 * to false, but it doesn't matter since mana_start_xmit() drops any
2756 * new packets due to apc->port_is_up being false.
2758 * Drain all the in-flight TX packets.
2759 * A timeout of 120 seconds for all the queues is used.
2760 * This will break the while loop when h/w is not responding.
2761 * This value of 120 has been decided here considering max
2765 for (i
= 0; i
< apc
->num_queues
; i
++) {
2766 txq
= &apc
->tx_qp
[i
].txq
;
2768 while (atomic_read(&txq
->pending_sends
) > 0 &&
2769 time_before(jiffies
, timeout
)) {
2770 usleep_range(tsleep
, tsleep
+ 1000);
2773 if (atomic_read(&txq
->pending_sends
)) {
2774 err
= pcie_flr(to_pci_dev(gd
->gdma_context
->dev
));
2776 netdev_err(ndev
, "flr failed %d with %d pkts pending in txq %u\n",
2777 err
, atomic_read(&txq
->pending_sends
),
2784 for (i
= 0; i
< apc
->num_queues
; i
++) {
2785 txq
= &apc
->tx_qp
[i
].txq
;
2786 while ((skb
= skb_dequeue(&txq
->pending_skbs
))) {
2787 mana_unmap_skb(skb
, apc
);
2788 dev_kfree_skb_any(skb
);
2790 atomic_set(&txq
->pending_sends
, 0);
2792 /* We're 100% sure the queues can no longer be woken up, because
2793 * we're sure now mana_poll_tx_cq() can't be running.
2796 apc
->rss_state
= TRI_STATE_FALSE
;
2797 err
= mana_config_rss(apc
, TRI_STATE_FALSE
, false, false);
2799 netdev_err(ndev
, "Failed to disable vPort: %d\n", err
);
2803 mana_destroy_vport(apc
);
2808 int mana_detach(struct net_device
*ndev
, bool from_close
)
2810 struct mana_port_context
*apc
= netdev_priv(ndev
);
2815 apc
->port_st_save
= apc
->port_is_up
;
2816 apc
->port_is_up
= false;
2818 /* Ensure port state updated before txq state */
2821 netif_tx_disable(ndev
);
2822 netif_carrier_off(ndev
);
2824 if (apc
->port_st_save
) {
2825 err
= mana_dealloc_queues(ndev
);
2831 netif_device_detach(ndev
);
2832 mana_cleanup_port_context(apc
);
2838 static int mana_probe_port(struct mana_context
*ac
, int port_idx
,
2839 struct net_device
**ndev_storage
)
2841 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
2842 struct mana_port_context
*apc
;
2843 struct net_device
*ndev
;
2846 ndev
= alloc_etherdev_mq(sizeof(struct mana_port_context
),
2847 gc
->max_num_queues
);
2851 *ndev_storage
= ndev
;
2853 apc
= netdev_priv(ndev
);
2856 apc
->max_queues
= gc
->max_num_queues
;
2857 apc
->num_queues
= gc
->max_num_queues
;
2858 apc
->tx_queue_size
= DEF_TX_BUFFERS_PER_QUEUE
;
2859 apc
->rx_queue_size
= DEF_RX_BUFFERS_PER_QUEUE
;
2860 apc
->port_handle
= INVALID_MANA_HANDLE
;
2861 apc
->pf_filter_handle
= INVALID_MANA_HANDLE
;
2862 apc
->port_idx
= port_idx
;
2864 mutex_init(&apc
->vport_mutex
);
2865 apc
->vport_use_count
= 0;
2867 ndev
->netdev_ops
= &mana_devops
;
2868 ndev
->ethtool_ops
= &mana_ethtool_ops
;
2869 ndev
->mtu
= ETH_DATA_LEN
;
2870 ndev
->max_mtu
= gc
->adapter_mtu
- ETH_HLEN
;
2871 ndev
->min_mtu
= ETH_MIN_MTU
;
2872 ndev
->needed_headroom
= MANA_HEADROOM
;
2873 ndev
->dev_port
= port_idx
;
2874 SET_NETDEV_DEV(ndev
, gc
->dev
);
2876 netif_carrier_off(ndev
);
2878 netdev_rss_key_fill(apc
->hashkey
, MANA_HASH_KEY_SIZE
);
2880 err
= mana_init_port(ndev
);
2884 err
= mana_rss_table_alloc(apc
);
2888 netdev_lockdep_set_classes(ndev
);
2890 ndev
->hw_features
= NETIF_F_SG
| NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
;
2891 ndev
->hw_features
|= NETIF_F_RXCSUM
;
2892 ndev
->hw_features
|= NETIF_F_TSO
| NETIF_F_TSO6
;
2893 ndev
->hw_features
|= NETIF_F_RXHASH
;
2894 ndev
->features
= ndev
->hw_features
| NETIF_F_HW_VLAN_CTAG_TX
|
2895 NETIF_F_HW_VLAN_CTAG_RX
;
2896 ndev
->vlan_features
= ndev
->features
;
2897 xdp_set_features_flag(ndev
, NETDEV_XDP_ACT_BASIC
|
2898 NETDEV_XDP_ACT_REDIRECT
|
2899 NETDEV_XDP_ACT_NDO_XMIT
);
2901 err
= register_netdev(ndev
);
2903 netdev_err(ndev
, "Unable to register netdev.\n");
2910 mana_cleanup_indir_table(apc
);
2912 mana_cleanup_port_context(apc
);
2914 *ndev_storage
= NULL
;
2915 netdev_err(ndev
, "Failed to probe vPort %d: %d\n", port_idx
, err
);
2920 static void adev_release(struct device
*dev
)
2922 struct mana_adev
*madev
= container_of(dev
, struct mana_adev
, adev
.dev
);
2927 static void remove_adev(struct gdma_dev
*gd
)
2929 struct auxiliary_device
*adev
= gd
->adev
;
2932 auxiliary_device_delete(adev
);
2933 auxiliary_device_uninit(adev
);
2935 mana_adev_idx_free(id
);
2939 static int add_adev(struct gdma_dev
*gd
)
2941 struct auxiliary_device
*adev
;
2942 struct mana_adev
*madev
;
2945 madev
= kzalloc(sizeof(*madev
), GFP_KERNEL
);
2949 adev
= &madev
->adev
;
2950 ret
= mana_adev_idx_alloc();
2955 adev
->name
= "rdma";
2956 adev
->dev
.parent
= gd
->gdma_context
->dev
;
2957 adev
->dev
.release
= adev_release
;
2960 ret
= auxiliary_device_init(adev
);
2964 /* madev is owned by the auxiliary device */
2966 ret
= auxiliary_device_add(adev
);
2974 auxiliary_device_uninit(adev
);
2977 mana_adev_idx_free(adev
->id
);
2985 int mana_probe(struct gdma_dev
*gd
, bool resuming
)
2987 struct gdma_context
*gc
= gd
->gdma_context
;
2988 struct mana_context
*ac
= gd
->driver_data
;
2989 struct device
*dev
= gc
->dev
;
2995 "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n",
2996 MANA_MAJOR_VERSION
, MANA_MINOR_VERSION
, MANA_MICRO_VERSION
);
2998 err
= mana_gd_register_device(gd
);
3003 ac
= kzalloc(sizeof(*ac
), GFP_KERNEL
);
3008 gd
->driver_data
= ac
;
3011 err
= mana_create_eq(ac
);
3015 err
= mana_query_device_cfg(ac
, MANA_MAJOR_VERSION
, MANA_MINOR_VERSION
,
3016 MANA_MICRO_VERSION
, &num_ports
);
3021 ac
->num_ports
= num_ports
;
3023 if (ac
->num_ports
!= num_ports
) {
3024 dev_err(dev
, "The number of vPorts changed: %d->%d\n",
3025 ac
->num_ports
, num_ports
);
3031 if (ac
->num_ports
== 0)
3032 dev_err(dev
, "Failed to detect any vPort\n");
3034 if (ac
->num_ports
> MAX_PORTS_IN_MANA_DEV
)
3035 ac
->num_ports
= MAX_PORTS_IN_MANA_DEV
;
3038 for (i
= 0; i
< ac
->num_ports
; i
++) {
3039 err
= mana_probe_port(ac
, i
, &ac
->ports
[i
]);
3040 /* we log the port for which the probe failed and stop
3041 * probes for subsequent ports.
3042 * Note that we keep running ports, for which the probes
3043 * were successful, unless add_adev fails too
3046 dev_err(dev
, "Probe Failed for port %d\n", i
);
3051 for (i
= 0; i
< ac
->num_ports
; i
++) {
3053 err
= mana_attach(ac
->ports
[i
]);
3055 /* we log the port for which the attach failed and stop
3056 * attach for subsequent ports
3057 * Note that we keep running ports, for which the attach
3058 * were successful, unless add_adev fails too
3061 dev_err(dev
, "Attach Failed for port %d\n", i
);
3070 mana_remove(gd
, false);
3075 void mana_remove(struct gdma_dev
*gd
, bool suspending
)
3077 struct gdma_context
*gc
= gd
->gdma_context
;
3078 struct mana_context
*ac
= gd
->driver_data
;
3079 struct mana_port_context
*apc
;
3080 struct device
*dev
= gc
->dev
;
3081 struct net_device
*ndev
;
3085 /* adev currently doesn't support suspending, always remove it */
3089 for (i
= 0; i
< ac
->num_ports
; i
++) {
3090 ndev
= ac
->ports
[i
];
3091 apc
= netdev_priv(ndev
);
3094 dev_err(dev
, "No net device to remove\n");
3098 /* All cleanup actions should stay after rtnl_lock(), otherwise
3099 * other functions may access partially cleaned up data.
3103 err
= mana_detach(ndev
, false);
3105 netdev_err(ndev
, "Failed to detach vPort %d: %d\n",
3109 /* No need to unregister the ndev. */
3114 unregister_netdevice(ndev
);
3115 mana_cleanup_indir_table(apc
);
3122 mana_destroy_eq(ac
);
3124 mana_gd_deregister_device(gd
);
3129 gd
->driver_data
= NULL
;
3130 gd
->gdma_context
= NULL
;
3134 struct net_device
*mana_get_primary_netdev_rcu(struct mana_context
*ac
, u32 port_index
)
3136 struct net_device
*ndev
;
3138 RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
3139 "Taking primary netdev without holding the RCU read lock");
3140 if (port_index
>= ac
->num_ports
)
3143 /* When mana is used in netvsc, the upper netdevice should be returned. */
3144 if (ac
->ports
[port_index
]->flags
& IFF_SLAVE
)
3145 ndev
= netdev_master_upper_dev_get_rcu(ac
->ports
[port_index
]);
3147 ndev
= ac
->ports
[port_index
];
3151 EXPORT_SYMBOL_NS(mana_get_primary_netdev_rcu
, "NET_MANA");