1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
3 * Copyright(c) 2017 - 2020 Intel Corporation.
7 * This file contains HFI1 support for VNIC functionality
11 #include <linux/if_vlan.h>
16 #define HFI_TX_TIMEOUT_MS 1000
18 #define HFI1_VNIC_RCV_Q_SIZE 1024
20 #define HFI1_VNIC_UP 0
22 static DEFINE_SPINLOCK(vport_cntr_lock
);
24 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
26 for (src64 = &qstats->x_grp.unicast, \
27 dst64 = &stats->x_grp.unicast; \
28 dst64 <= &stats->x_grp.s_1519_max;) { \
29 *dst64++ += *src64++; \
33 #define VNIC_MASK (0xFF)
34 #define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK))
36 /* hfi1_vnic_update_stats - update statistics */
37 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info
*vinfo
,
38 struct opa_vnic_stats
*stats
)
40 struct net_device
*netdev
= vinfo
->netdev
;
43 /* add tx counters on different queues */
44 for (i
= 0; i
< vinfo
->num_tx_q
; i
++) {
45 struct opa_vnic_stats
*qstats
= &vinfo
->stats
[i
];
46 struct rtnl_link_stats64
*qnstats
= &vinfo
->stats
[i
].netstats
;
48 stats
->netstats
.tx_fifo_errors
+= qnstats
->tx_fifo_errors
;
49 stats
->netstats
.tx_carrier_errors
+= qnstats
->tx_carrier_errors
;
50 stats
->tx_drop_state
+= qstats
->tx_drop_state
;
51 stats
->tx_dlid_zero
+= qstats
->tx_dlid_zero
;
53 SUM_GRP_COUNTERS(stats
, qstats
, tx_grp
);
54 stats
->netstats
.tx_packets
+= qnstats
->tx_packets
;
55 stats
->netstats
.tx_bytes
+= qnstats
->tx_bytes
;
58 /* add rx counters on different queues */
59 for (i
= 0; i
< vinfo
->num_rx_q
; i
++) {
60 struct opa_vnic_stats
*qstats
= &vinfo
->stats
[i
];
61 struct rtnl_link_stats64
*qnstats
= &vinfo
->stats
[i
].netstats
;
63 stats
->netstats
.rx_fifo_errors
+= qnstats
->rx_fifo_errors
;
64 stats
->netstats
.rx_nohandler
+= qnstats
->rx_nohandler
;
65 stats
->rx_drop_state
+= qstats
->rx_drop_state
;
66 stats
->rx_oversize
+= qstats
->rx_oversize
;
67 stats
->rx_runt
+= qstats
->rx_runt
;
69 SUM_GRP_COUNTERS(stats
, qstats
, rx_grp
);
70 stats
->netstats
.rx_packets
+= qnstats
->rx_packets
;
71 stats
->netstats
.rx_bytes
+= qnstats
->rx_bytes
;
74 stats
->netstats
.tx_errors
= stats
->netstats
.tx_fifo_errors
+
75 stats
->netstats
.tx_carrier_errors
+
76 stats
->tx_drop_state
+ stats
->tx_dlid_zero
;
77 stats
->netstats
.tx_dropped
= stats
->netstats
.tx_errors
;
79 stats
->netstats
.rx_errors
= stats
->netstats
.rx_fifo_errors
+
80 stats
->netstats
.rx_nohandler
+
81 stats
->rx_drop_state
+ stats
->rx_oversize
+
83 stats
->netstats
.rx_dropped
= stats
->netstats
.rx_errors
;
85 netdev
->stats
.tx_packets
= stats
->netstats
.tx_packets
;
86 netdev
->stats
.tx_bytes
= stats
->netstats
.tx_bytes
;
87 netdev
->stats
.tx_fifo_errors
= stats
->netstats
.tx_fifo_errors
;
88 netdev
->stats
.tx_carrier_errors
= stats
->netstats
.tx_carrier_errors
;
89 netdev
->stats
.tx_errors
= stats
->netstats
.tx_errors
;
90 netdev
->stats
.tx_dropped
= stats
->netstats
.tx_dropped
;
92 netdev
->stats
.rx_packets
= stats
->netstats
.rx_packets
;
93 netdev
->stats
.rx_bytes
= stats
->netstats
.rx_bytes
;
94 netdev
->stats
.rx_fifo_errors
= stats
->netstats
.rx_fifo_errors
;
95 netdev
->stats
.multicast
= stats
->rx_grp
.mcastbcast
;
96 netdev
->stats
.rx_length_errors
= stats
->rx_oversize
+ stats
->rx_runt
;
97 netdev
->stats
.rx_errors
= stats
->netstats
.rx_errors
;
98 netdev
->stats
.rx_dropped
= stats
->netstats
.rx_dropped
;
101 /* update_len_counters - update pkt's len histogram counters */
102 static inline void update_len_counters(struct opa_vnic_grp_stats
*grp
,
105 /* account for 4 byte FCS */
108 else if (len
>= 1020)
122 /* hfi1_vnic_update_tx_counters - update transmit counters */
123 static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info
*vinfo
,
124 u8 q_idx
, struct sk_buff
*skb
, int err
)
126 struct ethhdr
*mac_hdr
= (struct ethhdr
*)skb_mac_header(skb
);
127 struct opa_vnic_stats
*stats
= &vinfo
->stats
[q_idx
];
128 struct opa_vnic_grp_stats
*tx_grp
= &stats
->tx_grp
;
131 stats
->netstats
.tx_packets
++;
132 stats
->netstats
.tx_bytes
+= skb
->len
+ ETH_FCS_LEN
;
134 update_len_counters(tx_grp
, skb
->len
);
136 /* rest of the counts are for good packets only */
140 if (is_multicast_ether_addr(mac_hdr
->h_dest
))
141 tx_grp
->mcastbcast
++;
145 if (!__vlan_get_tag(skb
, &vlan_tci
))
151 /* hfi1_vnic_update_rx_counters - update receive counters */
152 static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info
*vinfo
,
153 u8 q_idx
, struct sk_buff
*skb
, int err
)
155 struct ethhdr
*mac_hdr
= (struct ethhdr
*)skb
->data
;
156 struct opa_vnic_stats
*stats
= &vinfo
->stats
[q_idx
];
157 struct opa_vnic_grp_stats
*rx_grp
= &stats
->rx_grp
;
160 stats
->netstats
.rx_packets
++;
161 stats
->netstats
.rx_bytes
+= skb
->len
+ ETH_FCS_LEN
;
163 update_len_counters(rx_grp
, skb
->len
);
165 /* rest of the counts are for good packets only */
169 if (is_multicast_ether_addr(mac_hdr
->h_dest
))
170 rx_grp
->mcastbcast
++;
174 if (!__vlan_get_tag(skb
, &vlan_tci
))
180 /* This function is overloaded for opa_vnic specific implementation */
181 static void hfi1_vnic_get_stats64(struct net_device
*netdev
,
182 struct rtnl_link_stats64
*stats
)
184 struct opa_vnic_stats
*vstats
= (struct opa_vnic_stats
*)stats
;
185 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
187 hfi1_vnic_update_stats(vinfo
, vstats
);
190 static u64
create_bypass_pbc(u32 vl
, u32 dw_len
)
194 pbc
= ((u64
)PBC_IHCRC_NONE
<< PBC_INSERT_HCRC_SHIFT
)
195 | PBC_INSERT_BYPASS_ICRC
| PBC_CREDIT_RETURN
197 | ((vl
& PBC_VL_MASK
) << PBC_VL_SHIFT
)
198 | (dw_len
& PBC_LENGTH_DWS_MASK
) << PBC_LENGTH_DWS_SHIFT
;
203 /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
204 static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info
*vinfo
,
207 netif_stop_subqueue(vinfo
->netdev
, q_idx
);
208 if (!hfi1_vnic_sdma_write_avail(vinfo
, q_idx
))
211 netif_start_subqueue(vinfo
->netdev
, q_idx
);
214 static netdev_tx_t
hfi1_netdev_start_xmit(struct sk_buff
*skb
,
215 struct net_device
*netdev
)
217 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
218 u8 pad_len
, q_idx
= skb
->queue_mapping
;
219 struct hfi1_devdata
*dd
= vinfo
->dd
;
220 struct opa_vnic_skb_mdata
*mdata
;
221 u32 pkt_len
, total_len
;
225 v_dbg("xmit: queue %d skb len %d\n", q_idx
, skb
->len
);
226 if (unlikely(!netif_oper_up(netdev
))) {
227 vinfo
->stats
[q_idx
].tx_drop_state
++;
231 /* take out meta data */
232 mdata
= (struct opa_vnic_skb_mdata
*)skb
->data
;
233 skb_pull(skb
, sizeof(*mdata
));
234 if (unlikely(mdata
->flags
& OPA_VNIC_SKB_MDATA_ENCAP_ERR
)) {
235 vinfo
->stats
[q_idx
].tx_dlid_zero
++;
239 /* add tail padding (for 8 bytes size alignment) and icrc */
240 pad_len
= -(skb
->len
+ OPA_VNIC_ICRC_TAIL_LEN
) & 0x7;
241 pad_len
+= OPA_VNIC_ICRC_TAIL_LEN
;
244 * pkt_len is how much data we have to write, includes header and data.
245 * total_len is length of the packet in Dwords plus the PBC should not
248 pkt_len
= (skb
->len
+ pad_len
) >> 2;
249 total_len
= pkt_len
+ 2; /* PBC + packet */
251 pbc
= create_bypass_pbc(mdata
->vl
, total_len
);
254 v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc
, skb
->len
, pad_len
);
255 err
= dd
->process_vnic_dma_send(dd
, q_idx
, vinfo
, skb
, pbc
, pad_len
);
258 vinfo
->stats
[q_idx
].netstats
.tx_fifo_errors
++;
259 else if (err
!= -EBUSY
)
260 vinfo
->stats
[q_idx
].netstats
.tx_carrier_errors
++;
262 /* remove the header before updating tx counters */
263 skb_pull(skb
, OPA_VNIC_HDR_LEN
);
265 if (unlikely(err
== -EBUSY
)) {
266 hfi1_vnic_maybe_stop_tx(vinfo
, q_idx
);
267 dev_kfree_skb_any(skb
);
268 return NETDEV_TX_BUSY
;
272 /* update tx counters */
273 hfi1_vnic_update_tx_counters(vinfo
, q_idx
, skb
, err
);
274 dev_kfree_skb_any(skb
);
278 static u16
hfi1_vnic_select_queue(struct net_device
*netdev
,
280 struct net_device
*sb_dev
)
282 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
283 struct opa_vnic_skb_mdata
*mdata
;
284 struct sdma_engine
*sde
;
286 mdata
= (struct opa_vnic_skb_mdata
*)skb
->data
;
287 sde
= sdma_select_engine_vl(vinfo
->dd
, mdata
->entropy
, mdata
->vl
);
288 return sde
->this_idx
;
291 /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
292 static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue
*rxq
,
295 struct hfi1_vnic_vport_info
*vinfo
= rxq
->vinfo
;
296 int max_len
= vinfo
->netdev
->mtu
+ VLAN_ETH_HLEN
;
299 skb_pull(skb
, OPA_VNIC_HDR_LEN
);
301 /* Validate Packet length */
302 if (unlikely(skb
->len
> max_len
))
303 vinfo
->stats
[rxq
->idx
].rx_oversize
++;
304 else if (unlikely(skb
->len
< ETH_ZLEN
))
305 vinfo
->stats
[rxq
->idx
].rx_runt
++;
311 static struct hfi1_vnic_vport_info
*get_vnic_port(struct hfi1_devdata
*dd
,
314 int vnic_id
= VNIC_ID(vesw_id
);
316 return hfi1_netdev_get_data(dd
, vnic_id
);
319 static struct hfi1_vnic_vport_info
*get_first_vnic_port(struct hfi1_devdata
*dd
)
321 struct hfi1_vnic_vport_info
*vinfo
;
322 int next_id
= VNIC_ID(0);
324 vinfo
= hfi1_netdev_get_first_data(dd
, &next_id
);
326 if (next_id
> VNIC_ID(VNIC_MASK
))
332 void hfi1_vnic_bypass_rcv(struct hfi1_packet
*packet
)
334 struct hfi1_devdata
*dd
= packet
->rcd
->dd
;
335 struct hfi1_vnic_vport_info
*vinfo
= NULL
;
336 struct hfi1_vnic_rx_queue
*rxq
;
338 int l4_type
, vesw_id
= -1, rc
;
340 unsigned char *pad_info
;
342 l4_type
= hfi1_16B_get_l4(packet
->ebuf
);
343 if (likely(l4_type
== OPA_16B_L4_ETHR
)) {
344 vesw_id
= HFI1_VNIC_GET_VESWID(packet
->ebuf
);
345 vinfo
= get_vnic_port(dd
, vesw_id
);
348 * In case of invalid vesw id, count the error on
349 * the first available vport.
351 if (unlikely(!vinfo
)) {
352 struct hfi1_vnic_vport_info
*vinfo_tmp
;
354 vinfo_tmp
= get_first_vnic_port(dd
);
356 spin_lock(&vport_cntr_lock
);
357 vinfo_tmp
->stats
[0].netstats
.rx_nohandler
++;
358 spin_unlock(&vport_cntr_lock
);
363 if (unlikely(!vinfo
)) {
364 dd_dev_warn(dd
, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
365 l4_type
, vesw_id
, packet
->rcd
->ctxt
);
369 q_idx
= packet
->rcd
->vnic_q_idx
;
370 rxq
= &vinfo
->rxq
[q_idx
];
371 if (unlikely(!netif_oper_up(vinfo
->netdev
))) {
372 vinfo
->stats
[q_idx
].rx_drop_state
++;
376 skb
= netdev_alloc_skb(vinfo
->netdev
, packet
->tlen
);
377 if (unlikely(!skb
)) {
378 vinfo
->stats
[q_idx
].netstats
.rx_fifo_errors
++;
382 memcpy(skb
->data
, packet
->ebuf
, packet
->tlen
);
383 skb_put(skb
, packet
->tlen
);
385 pad_info
= skb
->data
+ skb
->len
- 1;
386 skb_trim(skb
, (skb
->len
- OPA_VNIC_ICRC_TAIL_LEN
-
387 ((*pad_info
) & 0x7)));
389 rc
= hfi1_vnic_decap_skb(rxq
, skb
);
391 /* update rx counters */
392 hfi1_vnic_update_rx_counters(vinfo
, rxq
->idx
, skb
, rc
);
394 dev_kfree_skb_any(skb
);
398 skb_checksum_none_assert(skb
);
399 skb
->protocol
= eth_type_trans(skb
, rxq
->netdev
);
401 napi_gro_receive(&rxq
->napi
, skb
);
404 static int hfi1_vnic_up(struct hfi1_vnic_vport_info
*vinfo
)
406 struct hfi1_devdata
*dd
= vinfo
->dd
;
407 struct net_device
*netdev
= vinfo
->netdev
;
410 /* ensure virtual eth switch id is valid */
414 rc
= hfi1_netdev_add_data(dd
, VNIC_ID(vinfo
->vesw_id
), vinfo
);
418 rc
= hfi1_netdev_rx_init(dd
);
422 netif_carrier_on(netdev
);
423 netif_tx_start_all_queues(netdev
);
424 set_bit(HFI1_VNIC_UP
, &vinfo
->flags
);
429 hfi1_netdev_remove_data(dd
, VNIC_ID(vinfo
->vesw_id
));
433 static void hfi1_vnic_down(struct hfi1_vnic_vport_info
*vinfo
)
435 struct hfi1_devdata
*dd
= vinfo
->dd
;
437 clear_bit(HFI1_VNIC_UP
, &vinfo
->flags
);
438 netif_carrier_off(vinfo
->netdev
);
439 netif_tx_disable(vinfo
->netdev
);
440 hfi1_netdev_remove_data(dd
, VNIC_ID(vinfo
->vesw_id
));
442 hfi1_netdev_rx_destroy(dd
);
445 static int hfi1_netdev_open(struct net_device
*netdev
)
447 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
450 mutex_lock(&vinfo
->lock
);
451 rc
= hfi1_vnic_up(vinfo
);
452 mutex_unlock(&vinfo
->lock
);
456 static int hfi1_netdev_close(struct net_device
*netdev
)
458 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
460 mutex_lock(&vinfo
->lock
);
461 if (test_bit(HFI1_VNIC_UP
, &vinfo
->flags
))
462 hfi1_vnic_down(vinfo
);
463 mutex_unlock(&vinfo
->lock
);
467 static int hfi1_vnic_init(struct hfi1_vnic_vport_info
*vinfo
)
469 struct hfi1_devdata
*dd
= vinfo
->dd
;
472 mutex_lock(&hfi1_mutex
);
473 if (!dd
->vnic_num_vports
) {
474 rc
= hfi1_vnic_txreq_init(dd
);
479 rc
= hfi1_netdev_rx_init(dd
);
481 dd_dev_err(dd
, "Unable to initialize netdev contexts\n");
485 hfi1_init_vnic_rsm(dd
);
487 dd
->vnic_num_vports
++;
488 hfi1_vnic_sdma_init(vinfo
);
491 if (!dd
->vnic_num_vports
)
492 hfi1_vnic_txreq_deinit(dd
);
494 mutex_unlock(&hfi1_mutex
);
498 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info
*vinfo
)
500 struct hfi1_devdata
*dd
= vinfo
->dd
;
502 mutex_lock(&hfi1_mutex
);
503 if (--dd
->vnic_num_vports
== 0) {
504 hfi1_deinit_vnic_rsm(dd
);
505 hfi1_vnic_txreq_deinit(dd
);
507 mutex_unlock(&hfi1_mutex
);
508 hfi1_netdev_rx_destroy(dd
);
511 static void hfi1_vnic_set_vesw_id(struct net_device
*netdev
, int id
)
513 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
517 * If vesw_id is being changed, and if the vnic port is up,
518 * reset the vnic port to ensure new vesw_id gets picked up
520 if (id
!= vinfo
->vesw_id
) {
521 mutex_lock(&vinfo
->lock
);
522 if (test_bit(HFI1_VNIC_UP
, &vinfo
->flags
)) {
523 hfi1_vnic_down(vinfo
);
531 mutex_unlock(&vinfo
->lock
);
536 static const struct net_device_ops hfi1_netdev_ops
= {
537 .ndo_open
= hfi1_netdev_open
,
538 .ndo_stop
= hfi1_netdev_close
,
539 .ndo_start_xmit
= hfi1_netdev_start_xmit
,
540 .ndo_select_queue
= hfi1_vnic_select_queue
,
541 .ndo_get_stats64
= hfi1_vnic_get_stats64
,
544 static void hfi1_vnic_free_rn(struct net_device
*netdev
)
546 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
548 hfi1_vnic_deinit(vinfo
);
549 mutex_destroy(&vinfo
->lock
);
553 struct net_device
*hfi1_vnic_alloc_rn(struct ib_device
*device
,
555 enum rdma_netdev_t type
,
557 unsigned char name_assign_type
,
558 void (*setup
)(struct net_device
*))
560 struct hfi1_devdata
*dd
= dd_from_ibdev(device
);
561 struct hfi1_vnic_vport_info
*vinfo
;
562 struct net_device
*netdev
;
563 struct rdma_netdev
*rn
;
566 if (!dd
->num_netdev_contexts
)
567 return ERR_PTR(-ENOMEM
);
569 if (!port_num
|| (port_num
> dd
->num_pports
))
570 return ERR_PTR(-EINVAL
);
572 if (type
!= RDMA_NETDEV_OPA_VNIC
)
573 return ERR_PTR(-EOPNOTSUPP
);
575 size
= sizeof(struct opa_vnic_rdma_netdev
) + sizeof(*vinfo
);
576 netdev
= alloc_netdev_mqs(size
, name
, name_assign_type
, setup
,
577 chip_sdma_engines(dd
),
578 dd
->num_netdev_contexts
);
580 return ERR_PTR(-ENOMEM
);
582 rn
= netdev_priv(netdev
);
583 vinfo
= opa_vnic_dev_priv(netdev
);
585 vinfo
->num_tx_q
= chip_sdma_engines(dd
);
586 vinfo
->num_rx_q
= dd
->num_netdev_contexts
;
587 vinfo
->netdev
= netdev
;
588 rn
->free_rdma_netdev
= hfi1_vnic_free_rn
;
589 rn
->set_id
= hfi1_vnic_set_vesw_id
;
591 netdev
->features
= NETIF_F_HIGHDMA
| NETIF_F_SG
;
592 netdev
->hw_features
= netdev
->features
;
593 netdev
->vlan_features
= netdev
->features
;
594 netdev
->watchdog_timeo
= msecs_to_jiffies(HFI_TX_TIMEOUT_MS
);
595 netdev
->netdev_ops
= &hfi1_netdev_ops
;
596 mutex_init(&vinfo
->lock
);
598 for (i
= 0; i
< vinfo
->num_rx_q
; i
++) {
599 struct hfi1_vnic_rx_queue
*rxq
= &vinfo
->rxq
[i
];
603 rxq
->netdev
= netdev
;
606 rc
= hfi1_vnic_init(vinfo
);
612 mutex_destroy(&vinfo
->lock
);