treewide: remove redundant IS_ERR() before error code check
[linux/fpc-iii.git] / drivers / net / ethernet / emulex / benet / be_main.c
blob56f59db6ebf2a27a612722a6b87d46e119bb8741
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2005 - 2016 Broadcom
4 * All rights reserved.
6 * Contact Information:
7 * linux-drivers@emulex.com
9 * Emulex
10 * 3333 Susan Street
11 * Costa Mesa, CA 92626
14 #include <linux/prefetch.h>
15 #include <linux/module.h>
16 #include "be.h"
17 #include "be_cmds.h"
18 #include <asm/div64.h>
19 #include <linux/aer.h>
20 #include <linux/if_bridge.h>
21 #include <net/busy_poll.h>
22 #include <net/vxlan.h>
24 MODULE_VERSION(DRV_VER);
25 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
26 MODULE_AUTHOR("Emulex Corporation");
27 MODULE_LICENSE("GPL");
29 /* num_vfs module param is obsolete.
30 * Use sysfs method to enable/disable VFs.
32 static unsigned int num_vfs;
33 module_param(num_vfs, uint, 0444);
34 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
36 static ushort rx_frag_size = 2048;
37 module_param(rx_frag_size, ushort, 0444);
38 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
40 /* Per-module error detection/recovery workq shared across all functions.
41 * Each function schedules its own work request on this shared workq.
43 static struct workqueue_struct *be_err_recovery_workq;
45 static const struct pci_device_id be_dev_ids[] = {
46 #ifdef CONFIG_BE2NET_BE2
47 { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
48 { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
49 #endif /* CONFIG_BE2NET_BE2 */
50 #ifdef CONFIG_BE2NET_BE3
51 { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
53 #endif /* CONFIG_BE2NET_BE3 */
54 #ifdef CONFIG_BE2NET_LANCER
55 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
56 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
57 #endif /* CONFIG_BE2NET_LANCER */
58 #ifdef CONFIG_BE2NET_SKYHAWK
59 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
60 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
61 #endif /* CONFIG_BE2NET_SKYHAWK */
62 { 0 }
64 MODULE_DEVICE_TABLE(pci, be_dev_ids);
66 /* Workqueue used by all functions for defering cmd calls to the adapter */
67 static struct workqueue_struct *be_wq;
69 /* UE Status Low CSR */
70 static const char * const ue_status_low_desc[] = {
71 "CEV",
72 "CTX",
73 "DBUF",
74 "ERX",
75 "Host",
76 "MPU",
77 "NDMA",
78 "PTC ",
79 "RDMA ",
80 "RXF ",
81 "RXIPS ",
82 "RXULP0 ",
83 "RXULP1 ",
84 "RXULP2 ",
85 "TIM ",
86 "TPOST ",
87 "TPRE ",
88 "TXIPS ",
89 "TXULP0 ",
90 "TXULP1 ",
91 "UC ",
92 "WDMA ",
93 "TXULP2 ",
94 "HOST1 ",
95 "P0_OB_LINK ",
96 "P1_OB_LINK ",
97 "HOST_GPIO ",
98 "MBOX ",
99 "ERX2 ",
100 "SPARE ",
101 "JTAG ",
102 "MPU_INTPEND "
105 /* UE Status High CSR */
106 static const char * const ue_status_hi_desc[] = {
107 "LPCMEMHOST",
108 "MGMT_MAC",
109 "PCS0ONLINE",
110 "MPU_IRAM",
111 "PCS1ONLINE",
112 "PCTL0",
113 "PCTL1",
114 "PMEM",
115 "RR",
116 "TXPB",
117 "RXPP",
118 "XAUI",
119 "TXP",
120 "ARM",
121 "IPC",
122 "HOST2",
123 "HOST3",
124 "HOST4",
125 "HOST5",
126 "HOST6",
127 "HOST7",
128 "ECRC",
129 "Poison TLP",
130 "NETC",
131 "PERIPH",
132 "LLTXULP",
133 "D2P",
134 "RCON",
135 "LDMA",
136 "LLTXP",
137 "LLTXPB",
138 "Unknown"
141 #define BE_VF_IF_EN_FLAGS (BE_IF_FLAGS_UNTAGGED | \
142 BE_IF_FLAGS_BROADCAST | \
143 BE_IF_FLAGS_MULTICAST | \
144 BE_IF_FLAGS_PASS_L3L4_ERRORS)
146 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
148 struct be_dma_mem *mem = &q->dma_mem;
150 if (mem->va) {
151 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
152 mem->dma);
153 mem->va = NULL;
157 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
158 u16 len, u16 entry_size)
160 struct be_dma_mem *mem = &q->dma_mem;
162 memset(q, 0, sizeof(*q));
163 q->len = len;
164 q->entry_size = entry_size;
165 mem->size = len * entry_size;
166 mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
167 &mem->dma, GFP_KERNEL);
168 if (!mem->va)
169 return -ENOMEM;
170 return 0;
173 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
175 u32 reg, enabled;
177 pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
178 &reg);
179 enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 if (!enabled && enable)
182 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
183 else if (enabled && !enable)
184 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
185 else
186 return;
188 pci_write_config_dword(adapter->pdev,
189 PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
192 static void be_intr_set(struct be_adapter *adapter, bool enable)
194 int status = 0;
196 /* On lancer interrupts can't be controlled via this register */
197 if (lancer_chip(adapter))
198 return;
200 if (be_check_error(adapter, BE_ERROR_EEH))
201 return;
203 status = be_cmd_intr_set(adapter, enable);
204 if (status)
205 be_reg_intr_set(adapter, enable);
208 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
210 u32 val = 0;
212 if (be_check_error(adapter, BE_ERROR_HW))
213 return;
215 val |= qid & DB_RQ_RING_ID_MASK;
216 val |= posted << DB_RQ_NUM_POSTED_SHIFT;
218 wmb();
219 iowrite32(val, adapter->db + DB_RQ_OFFSET);
222 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
223 u16 posted)
225 u32 val = 0;
227 if (be_check_error(adapter, BE_ERROR_HW))
228 return;
230 val |= txo->q.id & DB_TXULP_RING_ID_MASK;
231 val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
233 wmb();
234 iowrite32(val, adapter->db + txo->db_offset);
237 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
238 bool arm, bool clear_int, u16 num_popped,
239 u32 eq_delay_mult_enc)
241 u32 val = 0;
243 val |= qid & DB_EQ_RING_ID_MASK;
244 val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
246 if (be_check_error(adapter, BE_ERROR_HW))
247 return;
249 if (arm)
250 val |= 1 << DB_EQ_REARM_SHIFT;
251 if (clear_int)
252 val |= 1 << DB_EQ_CLR_SHIFT;
253 val |= 1 << DB_EQ_EVNT_SHIFT;
254 val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
255 val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
256 iowrite32(val, adapter->db + DB_EQ_OFFSET);
259 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
261 u32 val = 0;
263 val |= qid & DB_CQ_RING_ID_MASK;
264 val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
265 DB_CQ_RING_ID_EXT_MASK_SHIFT);
267 if (be_check_error(adapter, BE_ERROR_HW))
268 return;
270 if (arm)
271 val |= 1 << DB_CQ_REARM_SHIFT;
272 val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
273 iowrite32(val, adapter->db + DB_CQ_OFFSET);
276 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
278 int i;
280 /* Check if mac has already been added as part of uc-list */
281 for (i = 0; i < adapter->uc_macs; i++) {
282 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
283 /* mac already added, skip addition */
284 adapter->pmac_id[0] = adapter->pmac_id[i + 1];
285 return 0;
289 return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
290 &adapter->pmac_id[0], 0);
293 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
295 int i;
297 /* Skip deletion if the programmed mac is
298 * being used in uc-list
300 for (i = 0; i < adapter->uc_macs; i++) {
301 if (adapter->pmac_id[i + 1] == pmac_id)
302 return;
304 be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
307 static int be_mac_addr_set(struct net_device *netdev, void *p)
309 struct be_adapter *adapter = netdev_priv(netdev);
310 struct device *dev = &adapter->pdev->dev;
311 struct sockaddr *addr = p;
312 int status;
313 u8 mac[ETH_ALEN];
314 u32 old_pmac_id = adapter->pmac_id[0];
316 if (!is_valid_ether_addr(addr->sa_data))
317 return -EADDRNOTAVAIL;
319 /* Proceed further only if, User provided MAC is different
320 * from active MAC
322 if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
323 return 0;
325 /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
326 * address
328 if (BEx_chip(adapter) && be_virtfn(adapter) &&
329 !check_privilege(adapter, BE_PRIV_FILTMGMT))
330 return -EPERM;
332 /* if device is not running, copy MAC to netdev->dev_addr */
333 if (!netif_running(netdev))
334 goto done;
336 /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
337 * privilege or if PF did not provision the new MAC address.
338 * On BE3, this cmd will always fail if the VF doesn't have the
339 * FILTMGMT privilege. This failure is OK, only if the PF programmed
340 * the MAC for the VF.
342 mutex_lock(&adapter->rx_filter_lock);
343 status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
344 if (!status) {
346 /* Delete the old programmed MAC. This call may fail if the
347 * old MAC was already deleted by the PF driver.
349 if (adapter->pmac_id[0] != old_pmac_id)
350 be_dev_mac_del(adapter, old_pmac_id);
353 mutex_unlock(&adapter->rx_filter_lock);
354 /* Decide if the new MAC is successfully activated only after
355 * querying the FW
357 status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
358 adapter->if_handle, true, 0);
359 if (status)
360 goto err;
362 /* The MAC change did not happen, either due to lack of privilege
363 * or PF didn't pre-provision.
365 if (!ether_addr_equal(addr->sa_data, mac)) {
366 status = -EPERM;
367 goto err;
370 /* Remember currently programmed MAC */
371 ether_addr_copy(adapter->dev_mac, addr->sa_data);
372 done:
373 ether_addr_copy(netdev->dev_addr, addr->sa_data);
374 dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
375 return 0;
376 err:
377 dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
378 return status;
381 /* BE2 supports only v0 cmd */
382 static void *hw_stats_from_cmd(struct be_adapter *adapter)
384 if (BE2_chip(adapter)) {
385 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
387 return &cmd->hw_stats;
388 } else if (BE3_chip(adapter)) {
389 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
391 return &cmd->hw_stats;
392 } else {
393 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
395 return &cmd->hw_stats;
399 /* BE2 supports only v0 cmd */
400 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
402 if (BE2_chip(adapter)) {
403 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
405 return &hw_stats->erx;
406 } else if (BE3_chip(adapter)) {
407 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
409 return &hw_stats->erx;
410 } else {
411 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
413 return &hw_stats->erx;
417 static void populate_be_v0_stats(struct be_adapter *adapter)
419 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
420 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
421 struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
422 struct be_port_rxf_stats_v0 *port_stats =
423 &rxf_stats->port[adapter->port_num];
424 struct be_drv_stats *drvs = &adapter->drv_stats;
426 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
427 drvs->rx_pause_frames = port_stats->rx_pause_frames;
428 drvs->rx_crc_errors = port_stats->rx_crc_errors;
429 drvs->rx_control_frames = port_stats->rx_control_frames;
430 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
431 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
432 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
433 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
434 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
435 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
436 drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
437 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
438 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
439 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
440 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
441 drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
442 drvs->rx_dropped_header_too_small =
443 port_stats->rx_dropped_header_too_small;
444 drvs->rx_address_filtered =
445 port_stats->rx_address_filtered +
446 port_stats->rx_vlan_filtered;
447 drvs->rx_alignment_symbol_errors =
448 port_stats->rx_alignment_symbol_errors;
450 drvs->tx_pauseframes = port_stats->tx_pauseframes;
451 drvs->tx_controlframes = port_stats->tx_controlframes;
453 if (adapter->port_num)
454 drvs->jabber_events = rxf_stats->port1_jabber_events;
455 else
456 drvs->jabber_events = rxf_stats->port0_jabber_events;
457 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
458 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
459 drvs->forwarded_packets = rxf_stats->forwarded_packets;
460 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
461 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
462 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
463 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
466 static void populate_be_v1_stats(struct be_adapter *adapter)
468 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
469 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
470 struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
471 struct be_port_rxf_stats_v1 *port_stats =
472 &rxf_stats->port[adapter->port_num];
473 struct be_drv_stats *drvs = &adapter->drv_stats;
475 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
476 drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
477 drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
478 drvs->rx_pause_frames = port_stats->rx_pause_frames;
479 drvs->rx_crc_errors = port_stats->rx_crc_errors;
480 drvs->rx_control_frames = port_stats->rx_control_frames;
481 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
482 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
483 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
484 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
485 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
486 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
487 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
488 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
489 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
490 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
491 drvs->rx_dropped_header_too_small =
492 port_stats->rx_dropped_header_too_small;
493 drvs->rx_input_fifo_overflow_drop =
494 port_stats->rx_input_fifo_overflow_drop;
495 drvs->rx_address_filtered = port_stats->rx_address_filtered;
496 drvs->rx_alignment_symbol_errors =
497 port_stats->rx_alignment_symbol_errors;
498 drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
499 drvs->tx_pauseframes = port_stats->tx_pauseframes;
500 drvs->tx_controlframes = port_stats->tx_controlframes;
501 drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
502 drvs->jabber_events = port_stats->jabber_events;
503 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
504 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
505 drvs->forwarded_packets = rxf_stats->forwarded_packets;
506 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
507 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
508 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
509 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
512 static void populate_be_v2_stats(struct be_adapter *adapter)
514 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
515 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
516 struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
517 struct be_port_rxf_stats_v2 *port_stats =
518 &rxf_stats->port[adapter->port_num];
519 struct be_drv_stats *drvs = &adapter->drv_stats;
521 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
522 drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
523 drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
524 drvs->rx_pause_frames = port_stats->rx_pause_frames;
525 drvs->rx_crc_errors = port_stats->rx_crc_errors;
526 drvs->rx_control_frames = port_stats->rx_control_frames;
527 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
528 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
529 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
530 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
531 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
532 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
533 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
534 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
535 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
536 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
537 drvs->rx_dropped_header_too_small =
538 port_stats->rx_dropped_header_too_small;
539 drvs->rx_input_fifo_overflow_drop =
540 port_stats->rx_input_fifo_overflow_drop;
541 drvs->rx_address_filtered = port_stats->rx_address_filtered;
542 drvs->rx_alignment_symbol_errors =
543 port_stats->rx_alignment_symbol_errors;
544 drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
545 drvs->tx_pauseframes = port_stats->tx_pauseframes;
546 drvs->tx_controlframes = port_stats->tx_controlframes;
547 drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
548 drvs->jabber_events = port_stats->jabber_events;
549 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
550 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
551 drvs->forwarded_packets = rxf_stats->forwarded_packets;
552 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
553 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
554 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
555 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
556 if (be_roce_supported(adapter)) {
557 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
558 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
559 drvs->rx_roce_frames = port_stats->roce_frames_received;
560 drvs->roce_drops_crc = port_stats->roce_drops_crc;
561 drvs->roce_drops_payload_len =
562 port_stats->roce_drops_payload_len;
566 static void populate_lancer_stats(struct be_adapter *adapter)
568 struct be_drv_stats *drvs = &adapter->drv_stats;
569 struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
571 be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
572 drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
573 drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
574 drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
575 drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
576 drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
577 drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
578 drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
579 drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
580 drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
581 drvs->rx_dropped_tcp_length =
582 pport_stats->rx_dropped_invalid_tcp_length;
583 drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
584 drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
585 drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
586 drvs->rx_dropped_header_too_small =
587 pport_stats->rx_dropped_header_too_small;
588 drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
589 drvs->rx_address_filtered =
590 pport_stats->rx_address_filtered +
591 pport_stats->rx_vlan_filtered;
592 drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
593 drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
594 drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
595 drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
596 drvs->jabber_events = pport_stats->rx_jabbers;
597 drvs->forwarded_packets = pport_stats->num_forwards_lo;
598 drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
599 drvs->rx_drops_too_many_frags =
600 pport_stats->rx_drops_too_many_frags_lo;
603 static void accumulate_16bit_val(u32 *acc, u16 val)
605 #define lo(x) (x & 0xFFFF)
606 #define hi(x) (x & 0xFFFF0000)
607 bool wrapped = val < lo(*acc);
608 u32 newacc = hi(*acc) + val;
610 if (wrapped)
611 newacc += 65536;
612 WRITE_ONCE(*acc, newacc);
615 static void populate_erx_stats(struct be_adapter *adapter,
616 struct be_rx_obj *rxo, u32 erx_stat)
618 if (!BEx_chip(adapter))
619 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
620 else
621 /* below erx HW counter can actually wrap around after
622 * 65535. Driver accumulates a 32-bit value
624 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
625 (u16)erx_stat);
628 void be_parse_stats(struct be_adapter *adapter)
630 struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
631 struct be_rx_obj *rxo;
632 int i;
633 u32 erx_stat;
635 if (lancer_chip(adapter)) {
636 populate_lancer_stats(adapter);
637 } else {
638 if (BE2_chip(adapter))
639 populate_be_v0_stats(adapter);
640 else if (BE3_chip(adapter))
641 /* for BE3 */
642 populate_be_v1_stats(adapter);
643 else
644 populate_be_v2_stats(adapter);
646 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
647 for_all_rx_queues(adapter, rxo, i) {
648 erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
649 populate_erx_stats(adapter, rxo, erx_stat);
654 static void be_get_stats64(struct net_device *netdev,
655 struct rtnl_link_stats64 *stats)
657 struct be_adapter *adapter = netdev_priv(netdev);
658 struct be_drv_stats *drvs = &adapter->drv_stats;
659 struct be_rx_obj *rxo;
660 struct be_tx_obj *txo;
661 u64 pkts, bytes;
662 unsigned int start;
663 int i;
665 for_all_rx_queues(adapter, rxo, i) {
666 const struct be_rx_stats *rx_stats = rx_stats(rxo);
668 do {
669 start = u64_stats_fetch_begin_irq(&rx_stats->sync);
670 pkts = rx_stats(rxo)->rx_pkts;
671 bytes = rx_stats(rxo)->rx_bytes;
672 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
673 stats->rx_packets += pkts;
674 stats->rx_bytes += bytes;
675 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
676 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
677 rx_stats(rxo)->rx_drops_no_frags;
680 for_all_tx_queues(adapter, txo, i) {
681 const struct be_tx_stats *tx_stats = tx_stats(txo);
683 do {
684 start = u64_stats_fetch_begin_irq(&tx_stats->sync);
685 pkts = tx_stats(txo)->tx_pkts;
686 bytes = tx_stats(txo)->tx_bytes;
687 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
688 stats->tx_packets += pkts;
689 stats->tx_bytes += bytes;
692 /* bad pkts received */
693 stats->rx_errors = drvs->rx_crc_errors +
694 drvs->rx_alignment_symbol_errors +
695 drvs->rx_in_range_errors +
696 drvs->rx_out_range_errors +
697 drvs->rx_frame_too_long +
698 drvs->rx_dropped_too_small +
699 drvs->rx_dropped_too_short +
700 drvs->rx_dropped_header_too_small +
701 drvs->rx_dropped_tcp_length +
702 drvs->rx_dropped_runt;
704 /* detailed rx errors */
705 stats->rx_length_errors = drvs->rx_in_range_errors +
706 drvs->rx_out_range_errors +
707 drvs->rx_frame_too_long;
709 stats->rx_crc_errors = drvs->rx_crc_errors;
711 /* frame alignment errors */
712 stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
714 /* receiver fifo overrun */
715 /* drops_no_pbuf is no per i/f, it's per BE card */
716 stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
717 drvs->rx_input_fifo_overflow_drop +
718 drvs->rx_drops_no_pbuf;
721 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
723 struct net_device *netdev = adapter->netdev;
725 if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
726 netif_carrier_off(netdev);
727 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
730 if (link_status)
731 netif_carrier_on(netdev);
732 else
733 netif_carrier_off(netdev);
735 netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
738 static int be_gso_hdr_len(struct sk_buff *skb)
740 if (skb->encapsulation)
741 return skb_inner_transport_offset(skb) +
742 inner_tcp_hdrlen(skb);
743 return skb_transport_offset(skb) + tcp_hdrlen(skb);
746 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
748 struct be_tx_stats *stats = tx_stats(txo);
749 u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
750 /* Account for headers which get duplicated in TSO pkt */
751 u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
753 u64_stats_update_begin(&stats->sync);
754 stats->tx_reqs++;
755 stats->tx_bytes += skb->len + dup_hdr_len;
756 stats->tx_pkts += tx_pkts;
757 if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
758 stats->tx_vxlan_offload_pkts += tx_pkts;
759 u64_stats_update_end(&stats->sync);
762 /* Returns number of WRBs needed for the skb */
763 static u32 skb_wrb_cnt(struct sk_buff *skb)
765 /* +1 for the header wrb */
766 return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
769 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
771 wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
772 wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
773 wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
774 wrb->rsvd0 = 0;
777 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
778 * to avoid the swap and shift/mask operations in wrb_fill().
780 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
782 wrb->frag_pa_hi = 0;
783 wrb->frag_pa_lo = 0;
784 wrb->frag_len = 0;
785 wrb->rsvd0 = 0;
788 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
789 struct sk_buff *skb)
791 u8 vlan_prio;
792 u16 vlan_tag;
794 vlan_tag = skb_vlan_tag_get(skb);
795 vlan_prio = skb_vlan_tag_get_prio(skb);
796 /* If vlan priority provided by OS is NOT in available bmap */
797 if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
798 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
799 adapter->recommended_prio_bits;
801 return vlan_tag;
804 /* Used only for IP tunnel packets */
805 static u16 skb_inner_ip_proto(struct sk_buff *skb)
807 return (inner_ip_hdr(skb)->version == 4) ?
808 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
811 static u16 skb_ip_proto(struct sk_buff *skb)
813 return (ip_hdr(skb)->version == 4) ?
814 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
817 static inline bool be_is_txq_full(struct be_tx_obj *txo)
819 return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
822 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
824 return atomic_read(&txo->q.used) < txo->q.len / 2;
827 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
829 return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
832 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
833 struct sk_buff *skb,
834 struct be_wrb_params *wrb_params)
836 u16 proto;
838 if (skb_is_gso(skb)) {
839 BE_WRB_F_SET(wrb_params->features, LSO, 1);
840 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
841 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
842 BE_WRB_F_SET(wrb_params->features, LSO6, 1);
843 } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
844 if (skb->encapsulation) {
845 BE_WRB_F_SET(wrb_params->features, IPCS, 1);
846 proto = skb_inner_ip_proto(skb);
847 } else {
848 proto = skb_ip_proto(skb);
850 if (proto == IPPROTO_TCP)
851 BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
852 else if (proto == IPPROTO_UDP)
853 BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
856 if (skb_vlan_tag_present(skb)) {
857 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
858 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
861 BE_WRB_F_SET(wrb_params->features, CRC, 1);
864 static void wrb_fill_hdr(struct be_adapter *adapter,
865 struct be_eth_hdr_wrb *hdr,
866 struct be_wrb_params *wrb_params,
867 struct sk_buff *skb)
869 memset(hdr, 0, sizeof(*hdr));
871 SET_TX_WRB_HDR_BITS(crc, hdr,
872 BE_WRB_F_GET(wrb_params->features, CRC));
873 SET_TX_WRB_HDR_BITS(ipcs, hdr,
874 BE_WRB_F_GET(wrb_params->features, IPCS));
875 SET_TX_WRB_HDR_BITS(tcpcs, hdr,
876 BE_WRB_F_GET(wrb_params->features, TCPCS));
877 SET_TX_WRB_HDR_BITS(udpcs, hdr,
878 BE_WRB_F_GET(wrb_params->features, UDPCS));
880 SET_TX_WRB_HDR_BITS(lso, hdr,
881 BE_WRB_F_GET(wrb_params->features, LSO));
882 SET_TX_WRB_HDR_BITS(lso6, hdr,
883 BE_WRB_F_GET(wrb_params->features, LSO6));
884 SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
886 /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
887 * hack is not needed, the evt bit is set while ringing DB.
889 SET_TX_WRB_HDR_BITS(event, hdr,
890 BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
891 SET_TX_WRB_HDR_BITS(vlan, hdr,
892 BE_WRB_F_GET(wrb_params->features, VLAN));
893 SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
895 SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
896 SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
897 SET_TX_WRB_HDR_BITS(mgmt, hdr,
898 BE_WRB_F_GET(wrb_params->features, OS2BMC));
901 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
902 bool unmap_single)
904 dma_addr_t dma;
905 u32 frag_len = le32_to_cpu(wrb->frag_len);
908 dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
909 (u64)le32_to_cpu(wrb->frag_pa_lo);
910 if (frag_len) {
911 if (unmap_single)
912 dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
913 else
914 dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
918 /* Grab a WRB header for xmit */
919 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
921 u32 head = txo->q.head;
923 queue_head_inc(&txo->q);
924 return head;
927 /* Set up the WRB header for xmit */
928 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
929 struct be_tx_obj *txo,
930 struct be_wrb_params *wrb_params,
931 struct sk_buff *skb, u16 head)
933 u32 num_frags = skb_wrb_cnt(skb);
934 struct be_queue_info *txq = &txo->q;
935 struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
937 wrb_fill_hdr(adapter, hdr, wrb_params, skb);
938 be_dws_cpu_to_le(hdr, sizeof(*hdr));
940 BUG_ON(txo->sent_skb_list[head]);
941 txo->sent_skb_list[head] = skb;
942 txo->last_req_hdr = head;
943 atomic_add(num_frags, &txq->used);
944 txo->last_req_wrb_cnt = num_frags;
945 txo->pend_wrb_cnt += num_frags;
948 /* Setup a WRB fragment (buffer descriptor) for xmit */
949 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
950 int len)
952 struct be_eth_wrb *wrb;
953 struct be_queue_info *txq = &txo->q;
955 wrb = queue_head_node(txq);
956 wrb_fill(wrb, busaddr, len);
957 queue_head_inc(txq);
960 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
961 * was invoked. The producer index is restored to the previous packet and the
962 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
964 static void be_xmit_restore(struct be_adapter *adapter,
965 struct be_tx_obj *txo, u32 head, bool map_single,
966 u32 copied)
968 struct device *dev;
969 struct be_eth_wrb *wrb;
970 struct be_queue_info *txq = &txo->q;
972 dev = &adapter->pdev->dev;
973 txq->head = head;
975 /* skip the first wrb (hdr); it's not mapped */
976 queue_head_inc(txq);
977 while (copied) {
978 wrb = queue_head_node(txq);
979 unmap_tx_frag(dev, wrb, map_single);
980 map_single = false;
981 copied -= le32_to_cpu(wrb->frag_len);
982 queue_head_inc(txq);
985 txq->head = head;
988 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
989 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
990 * of WRBs used up by the packet.
992 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
993 struct sk_buff *skb,
994 struct be_wrb_params *wrb_params)
996 u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
997 struct device *dev = &adapter->pdev->dev;
998 bool map_single = false;
999 u32 head;
1000 dma_addr_t busaddr;
1001 int len;
1003 head = be_tx_get_wrb_hdr(txo);
1005 if (skb->len > skb->data_len) {
1006 len = skb_headlen(skb);
1008 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1009 if (dma_mapping_error(dev, busaddr))
1010 goto dma_err;
1011 map_single = true;
1012 be_tx_setup_wrb_frag(txo, busaddr, len);
1013 copied += len;
1016 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1017 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1018 len = skb_frag_size(frag);
1020 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1021 if (dma_mapping_error(dev, busaddr))
1022 goto dma_err;
1023 be_tx_setup_wrb_frag(txo, busaddr, len);
1024 copied += len;
1027 be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1029 be_tx_stats_update(txo, skb);
1030 return wrb_cnt;
1032 dma_err:
1033 adapter->drv_stats.dma_map_errors++;
1034 be_xmit_restore(adapter, txo, head, map_single, copied);
1035 return 0;
1038 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1040 return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1043 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1044 struct sk_buff *skb,
1045 struct be_wrb_params
1046 *wrb_params)
1048 bool insert_vlan = false;
1049 u16 vlan_tag = 0;
1051 skb = skb_share_check(skb, GFP_ATOMIC);
1052 if (unlikely(!skb))
1053 return skb;
1055 if (skb_vlan_tag_present(skb)) {
1056 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1057 insert_vlan = true;
1060 if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1061 if (!insert_vlan) {
1062 vlan_tag = adapter->pvid;
1063 insert_vlan = true;
1065 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1066 * skip VLAN insertion
1068 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1071 if (insert_vlan) {
1072 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1073 vlan_tag);
1074 if (unlikely(!skb))
1075 return skb;
1076 __vlan_hwaccel_clear_tag(skb);
1079 /* Insert the outer VLAN, if any */
1080 if (adapter->qnq_vid) {
1081 vlan_tag = adapter->qnq_vid;
1082 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1083 vlan_tag);
1084 if (unlikely(!skb))
1085 return skb;
1086 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1089 return skb;
1092 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1094 struct ethhdr *eh = (struct ethhdr *)skb->data;
1095 u16 offset = ETH_HLEN;
1097 if (eh->h_proto == htons(ETH_P_IPV6)) {
1098 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1100 offset += sizeof(struct ipv6hdr);
1101 if (ip6h->nexthdr != NEXTHDR_TCP &&
1102 ip6h->nexthdr != NEXTHDR_UDP) {
1103 struct ipv6_opt_hdr *ehdr =
1104 (struct ipv6_opt_hdr *)(skb->data + offset);
1106 /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1107 if (ehdr->hdrlen == 0xff)
1108 return true;
1111 return false;
1114 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1116 return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1119 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1121 return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1124 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1125 struct sk_buff *skb,
1126 struct be_wrb_params
1127 *wrb_params)
1129 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1130 unsigned int eth_hdr_len;
1131 struct iphdr *ip;
1133 /* For padded packets, BE HW modifies tot_len field in IP header
1134 * incorrecly when VLAN tag is inserted by HW.
1135 * For padded packets, Lancer computes incorrect checksum.
1137 eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1138 VLAN_ETH_HLEN : ETH_HLEN;
1139 if (skb->len <= 60 &&
1140 (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1141 is_ipv4_pkt(skb)) {
1142 ip = (struct iphdr *)ip_hdr(skb);
1143 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1146 /* If vlan tag is already inlined in the packet, skip HW VLAN
1147 * tagging in pvid-tagging mode
1149 if (be_pvid_tagging_enabled(adapter) &&
1150 veh->h_vlan_proto == htons(ETH_P_8021Q))
1151 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1153 /* HW has a bug wherein it will calculate CSUM for VLAN
1154 * pkts even though it is disabled.
1155 * Manually insert VLAN in pkt.
1157 if (skb->ip_summed != CHECKSUM_PARTIAL &&
1158 skb_vlan_tag_present(skb)) {
1159 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1160 if (unlikely(!skb))
1161 goto err;
1164 /* HW may lockup when VLAN HW tagging is requested on
1165 * certain ipv6 packets. Drop such pkts if the HW workaround to
1166 * skip HW tagging is not enabled by FW.
1168 if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1169 (adapter->pvid || adapter->qnq_vid) &&
1170 !qnq_async_evt_rcvd(adapter)))
1171 goto tx_drop;
1173 /* Manual VLAN tag insertion to prevent:
1174 * ASIC lockup when the ASIC inserts VLAN tag into
1175 * certain ipv6 packets. Insert VLAN tags in driver,
1176 * and set event, completion, vlan bits accordingly
1177 * in the Tx WRB.
1179 if (be_ipv6_tx_stall_chk(adapter, skb) &&
1180 be_vlan_tag_tx_chk(adapter, skb)) {
1181 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1182 if (unlikely(!skb))
1183 goto err;
1186 return skb;
1187 tx_drop:
1188 dev_kfree_skb_any(skb);
1189 err:
1190 return NULL;
1193 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1194 struct sk_buff *skb,
1195 struct be_wrb_params *wrb_params)
1197 int err;
1199 /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1200 * packets that are 32b or less may cause a transmit stall
1201 * on that port. The workaround is to pad such packets
1202 * (len <= 32 bytes) to a minimum length of 36b.
1204 if (skb->len <= 32) {
1205 if (skb_put_padto(skb, 36))
1206 return NULL;
1209 if (BEx_chip(adapter) || lancer_chip(adapter)) {
1210 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1211 if (!skb)
1212 return NULL;
1215 /* The stack can send us skbs with length greater than
1216 * what the HW can handle. Trim the extra bytes.
1218 WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1219 err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1220 WARN_ON(err);
1222 return skb;
1225 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1227 struct be_queue_info *txq = &txo->q;
1228 struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1230 /* Mark the last request eventable if it hasn't been marked already */
1231 if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1232 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1234 /* compose a dummy wrb if there are odd set of wrbs to notify */
1235 if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1236 wrb_fill_dummy(queue_head_node(txq));
1237 queue_head_inc(txq);
1238 atomic_inc(&txq->used);
1239 txo->pend_wrb_cnt++;
1240 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1241 TX_HDR_WRB_NUM_SHIFT);
1242 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1243 TX_HDR_WRB_NUM_SHIFT);
1245 be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1246 txo->pend_wrb_cnt = 0;
1249 /* OS2BMC related */
1251 #define DHCP_CLIENT_PORT 68
1252 #define DHCP_SERVER_PORT 67
1253 #define NET_BIOS_PORT1 137
1254 #define NET_BIOS_PORT2 138
1255 #define DHCPV6_RAS_PORT 547
1257 #define is_mc_allowed_on_bmc(adapter, eh) \
1258 (!is_multicast_filt_enabled(adapter) && \
1259 is_multicast_ether_addr(eh->h_dest) && \
1260 !is_broadcast_ether_addr(eh->h_dest))
1262 #define is_bc_allowed_on_bmc(adapter, eh) \
1263 (!is_broadcast_filt_enabled(adapter) && \
1264 is_broadcast_ether_addr(eh->h_dest))
1266 #define is_arp_allowed_on_bmc(adapter, skb) \
1267 (is_arp(skb) && is_arp_filt_enabled(adapter))
1269 #define is_arp(skb) (skb->protocol == htons(ETH_P_ARP))
1271 #define is_arp_filt_enabled(adapter) \
1272 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1274 #define is_dhcp_client_filt_enabled(adapter) \
1275 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1277 #define is_dhcp_srvr_filt_enabled(adapter) \
1278 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1280 #define is_nbios_filt_enabled(adapter) \
1281 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1283 #define is_ipv6_na_filt_enabled(adapter) \
1284 (adapter->bmc_filt_mask & \
1285 BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1287 #define is_ipv6_ra_filt_enabled(adapter) \
1288 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1290 #define is_ipv6_ras_filt_enabled(adapter) \
1291 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1293 #define is_broadcast_filt_enabled(adapter) \
1294 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1296 #define is_multicast_filt_enabled(adapter) \
1297 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1299 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1300 struct sk_buff **skb)
1302 struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1303 bool os2bmc = false;
1305 if (!be_is_os2bmc_enabled(adapter))
1306 goto done;
1308 if (!is_multicast_ether_addr(eh->h_dest))
1309 goto done;
1311 if (is_mc_allowed_on_bmc(adapter, eh) ||
1312 is_bc_allowed_on_bmc(adapter, eh) ||
1313 is_arp_allowed_on_bmc(adapter, (*skb))) {
1314 os2bmc = true;
1315 goto done;
1318 if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1319 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1320 u8 nexthdr = hdr->nexthdr;
1322 if (nexthdr == IPPROTO_ICMPV6) {
1323 struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1325 switch (icmp6->icmp6_type) {
1326 case NDISC_ROUTER_ADVERTISEMENT:
1327 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1328 goto done;
1329 case NDISC_NEIGHBOUR_ADVERTISEMENT:
1330 os2bmc = is_ipv6_na_filt_enabled(adapter);
1331 goto done;
1332 default:
1333 break;
1338 if (is_udp_pkt((*skb))) {
1339 struct udphdr *udp = udp_hdr((*skb));
1341 switch (ntohs(udp->dest)) {
1342 case DHCP_CLIENT_PORT:
1343 os2bmc = is_dhcp_client_filt_enabled(adapter);
1344 goto done;
1345 case DHCP_SERVER_PORT:
1346 os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1347 goto done;
1348 case NET_BIOS_PORT1:
1349 case NET_BIOS_PORT2:
1350 os2bmc = is_nbios_filt_enabled(adapter);
1351 goto done;
1352 case DHCPV6_RAS_PORT:
1353 os2bmc = is_ipv6_ras_filt_enabled(adapter);
1354 goto done;
1355 default:
1356 break;
1359 done:
1360 /* For packets over a vlan, which are destined
1361 * to BMC, asic expects the vlan to be inline in the packet.
1363 if (os2bmc)
1364 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1366 return os2bmc;
1369 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1371 struct be_adapter *adapter = netdev_priv(netdev);
1372 u16 q_idx = skb_get_queue_mapping(skb);
1373 struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1374 struct be_wrb_params wrb_params = { 0 };
1375 bool flush = !netdev_xmit_more();
1376 u16 wrb_cnt;
1378 skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1379 if (unlikely(!skb))
1380 goto drop;
1382 be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1384 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1385 if (unlikely(!wrb_cnt)) {
1386 dev_kfree_skb_any(skb);
1387 goto drop;
1390 /* if os2bmc is enabled and if the pkt is destined to bmc,
1391 * enqueue the pkt a 2nd time with mgmt bit set.
1393 if (be_send_pkt_to_bmc(adapter, &skb)) {
1394 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1395 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1396 if (unlikely(!wrb_cnt))
1397 goto drop;
1398 else
1399 skb_get(skb);
1402 if (be_is_txq_full(txo)) {
1403 netif_stop_subqueue(netdev, q_idx);
1404 tx_stats(txo)->tx_stops++;
1407 if (flush || __netif_subqueue_stopped(netdev, q_idx))
1408 be_xmit_flush(adapter, txo);
1410 return NETDEV_TX_OK;
1411 drop:
1412 tx_stats(txo)->tx_drv_drops++;
1413 /* Flush the already enqueued tx requests */
1414 if (flush && txo->pend_wrb_cnt)
1415 be_xmit_flush(adapter, txo);
1417 return NETDEV_TX_OK;
1420 static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1422 struct be_adapter *adapter = netdev_priv(netdev);
1423 struct device *dev = &adapter->pdev->dev;
1424 struct be_tx_obj *txo;
1425 struct sk_buff *skb;
1426 struct tcphdr *tcphdr;
1427 struct udphdr *udphdr;
1428 u32 *entry;
1429 int status;
1430 int i, j;
1432 for_all_tx_queues(adapter, txo, i) {
1433 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1434 i, txo->q.head, txo->q.tail,
1435 atomic_read(&txo->q.used), txo->q.id);
1437 entry = txo->q.dma_mem.va;
1438 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1439 if (entry[j] != 0 || entry[j + 1] != 0 ||
1440 entry[j + 2] != 0 || entry[j + 3] != 0) {
1441 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1442 j, entry[j], entry[j + 1],
1443 entry[j + 2], entry[j + 3]);
1447 entry = txo->cq.dma_mem.va;
1448 dev_info(dev, "TXCQ Dump: %d H: %d T: %d used: %d\n",
1449 i, txo->cq.head, txo->cq.tail,
1450 atomic_read(&txo->cq.used));
1451 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1452 if (entry[j] != 0 || entry[j + 1] != 0 ||
1453 entry[j + 2] != 0 || entry[j + 3] != 0) {
1454 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1455 j, entry[j], entry[j + 1],
1456 entry[j + 2], entry[j + 3]);
1460 for (j = 0; j < TX_Q_LEN; j++) {
1461 if (txo->sent_skb_list[j]) {
1462 skb = txo->sent_skb_list[j];
1463 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1464 tcphdr = tcp_hdr(skb);
1465 dev_info(dev, "TCP source port %d\n",
1466 ntohs(tcphdr->source));
1467 dev_info(dev, "TCP dest port %d\n",
1468 ntohs(tcphdr->dest));
1469 dev_info(dev, "TCP sequence num %d\n",
1470 ntohs(tcphdr->seq));
1471 dev_info(dev, "TCP ack_seq %d\n",
1472 ntohs(tcphdr->ack_seq));
1473 } else if (ip_hdr(skb)->protocol ==
1474 IPPROTO_UDP) {
1475 udphdr = udp_hdr(skb);
1476 dev_info(dev, "UDP source port %d\n",
1477 ntohs(udphdr->source));
1478 dev_info(dev, "UDP dest port %d\n",
1479 ntohs(udphdr->dest));
1481 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1482 j, skb, skb->len, skb->protocol);
1487 if (lancer_chip(adapter)) {
1488 dev_info(dev, "Initiating reset due to tx timeout\n");
1489 dev_info(dev, "Resetting adapter\n");
1490 status = lancer_physdev_ctrl(adapter,
1491 PHYSDEV_CONTROL_FW_RESET_MASK);
1492 if (status)
1493 dev_err(dev, "Reset failed .. Reboot server\n");
1497 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1499 return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1500 BE_IF_FLAGS_ALL_PROMISCUOUS;
1503 static int be_set_vlan_promisc(struct be_adapter *adapter)
1505 struct device *dev = &adapter->pdev->dev;
1506 int status;
1508 if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1509 return 0;
1511 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1512 if (!status) {
1513 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1514 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1515 } else {
1516 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1518 return status;
1521 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1523 struct device *dev = &adapter->pdev->dev;
1524 int status;
1526 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1527 if (!status) {
1528 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1529 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1531 return status;
1535 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1536 * If the user configures more, place BE in vlan promiscuous mode.
1538 static int be_vid_config(struct be_adapter *adapter)
1540 struct device *dev = &adapter->pdev->dev;
1541 u16 vids[BE_NUM_VLANS_SUPPORTED];
1542 u16 num = 0, i = 0;
1543 int status = 0;
1545 /* No need to change the VLAN state if the I/F is in promiscuous */
1546 if (adapter->netdev->flags & IFF_PROMISC)
1547 return 0;
1549 if (adapter->vlans_added > be_max_vlans(adapter))
1550 return be_set_vlan_promisc(adapter);
1552 if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1553 status = be_clear_vlan_promisc(adapter);
1554 if (status)
1555 return status;
1557 /* Construct VLAN Table to give to HW */
1558 for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1559 vids[num++] = cpu_to_le16(i);
1561 status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1562 if (status) {
1563 dev_err(dev, "Setting HW VLAN filtering failed\n");
1564 /* Set to VLAN promisc mode as setting VLAN filter failed */
1565 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1566 addl_status(status) ==
1567 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1568 return be_set_vlan_promisc(adapter);
1570 return status;
1573 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1575 struct be_adapter *adapter = netdev_priv(netdev);
1576 int status = 0;
1578 mutex_lock(&adapter->rx_filter_lock);
1580 /* Packets with VID 0 are always received by Lancer by default */
1581 if (lancer_chip(adapter) && vid == 0)
1582 goto done;
1584 if (test_bit(vid, adapter->vids))
1585 goto done;
1587 set_bit(vid, adapter->vids);
1588 adapter->vlans_added++;
1590 status = be_vid_config(adapter);
1591 done:
1592 mutex_unlock(&adapter->rx_filter_lock);
1593 return status;
1596 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1598 struct be_adapter *adapter = netdev_priv(netdev);
1599 int status = 0;
1601 mutex_lock(&adapter->rx_filter_lock);
1603 /* Packets with VID 0 are always received by Lancer by default */
1604 if (lancer_chip(adapter) && vid == 0)
1605 goto done;
1607 if (!test_bit(vid, adapter->vids))
1608 goto done;
1610 clear_bit(vid, adapter->vids);
1611 adapter->vlans_added--;
1613 status = be_vid_config(adapter);
1614 done:
1615 mutex_unlock(&adapter->rx_filter_lock);
1616 return status;
1619 static void be_set_all_promisc(struct be_adapter *adapter)
1621 be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1622 adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1625 static void be_set_mc_promisc(struct be_adapter *adapter)
1627 int status;
1629 if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1630 return;
1632 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1633 if (!status)
1634 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1637 static void be_set_uc_promisc(struct be_adapter *adapter)
1639 int status;
1641 if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1642 return;
1644 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1645 if (!status)
1646 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1649 static void be_clear_uc_promisc(struct be_adapter *adapter)
1651 int status;
1653 if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1654 return;
1656 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1657 if (!status)
1658 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1661 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1662 * We use a single callback function for both sync and unsync. We really don't
1663 * add/remove addresses through this callback. But, we use it to detect changes
1664 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1666 static int be_uc_list_update(struct net_device *netdev,
1667 const unsigned char *addr)
1669 struct be_adapter *adapter = netdev_priv(netdev);
1671 adapter->update_uc_list = true;
1672 return 0;
1675 static int be_mc_list_update(struct net_device *netdev,
1676 const unsigned char *addr)
1678 struct be_adapter *adapter = netdev_priv(netdev);
1680 adapter->update_mc_list = true;
1681 return 0;
1684 static void be_set_mc_list(struct be_adapter *adapter)
1686 struct net_device *netdev = adapter->netdev;
1687 struct netdev_hw_addr *ha;
1688 bool mc_promisc = false;
1689 int status;
1691 netif_addr_lock_bh(netdev);
1692 __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1694 if (netdev->flags & IFF_PROMISC) {
1695 adapter->update_mc_list = false;
1696 } else if (netdev->flags & IFF_ALLMULTI ||
1697 netdev_mc_count(netdev) > be_max_mc(adapter)) {
1698 /* Enable multicast promisc if num configured exceeds
1699 * what we support
1701 mc_promisc = true;
1702 adapter->update_mc_list = false;
1703 } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1704 /* Update mc-list unconditionally if the iface was previously
1705 * in mc-promisc mode and now is out of that mode.
1707 adapter->update_mc_list = true;
1710 if (adapter->update_mc_list) {
1711 int i = 0;
1713 /* cache the mc-list in adapter */
1714 netdev_for_each_mc_addr(ha, netdev) {
1715 ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1716 i++;
1718 adapter->mc_count = netdev_mc_count(netdev);
1720 netif_addr_unlock_bh(netdev);
1722 if (mc_promisc) {
1723 be_set_mc_promisc(adapter);
1724 } else if (adapter->update_mc_list) {
1725 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1726 if (!status)
1727 adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1728 else
1729 be_set_mc_promisc(adapter);
1731 adapter->update_mc_list = false;
1735 static void be_clear_mc_list(struct be_adapter *adapter)
1737 struct net_device *netdev = adapter->netdev;
1739 __dev_mc_unsync(netdev, NULL);
1740 be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1741 adapter->mc_count = 0;
1744 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1746 if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1747 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1748 return 0;
1751 return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1752 adapter->if_handle,
1753 &adapter->pmac_id[uc_idx + 1], 0);
1756 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1758 if (pmac_id == adapter->pmac_id[0])
1759 return;
1761 be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1764 static void be_set_uc_list(struct be_adapter *adapter)
1766 struct net_device *netdev = adapter->netdev;
1767 struct netdev_hw_addr *ha;
1768 bool uc_promisc = false;
1769 int curr_uc_macs = 0, i;
1771 netif_addr_lock_bh(netdev);
1772 __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1774 if (netdev->flags & IFF_PROMISC) {
1775 adapter->update_uc_list = false;
1776 } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1777 uc_promisc = true;
1778 adapter->update_uc_list = false;
1779 } else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1780 /* Update uc-list unconditionally if the iface was previously
1781 * in uc-promisc mode and now is out of that mode.
1783 adapter->update_uc_list = true;
1786 if (adapter->update_uc_list) {
1787 /* cache the uc-list in adapter array */
1788 i = 0;
1789 netdev_for_each_uc_addr(ha, netdev) {
1790 ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1791 i++;
1793 curr_uc_macs = netdev_uc_count(netdev);
1795 netif_addr_unlock_bh(netdev);
1797 if (uc_promisc) {
1798 be_set_uc_promisc(adapter);
1799 } else if (adapter->update_uc_list) {
1800 be_clear_uc_promisc(adapter);
1802 for (i = 0; i < adapter->uc_macs; i++)
1803 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1805 for (i = 0; i < curr_uc_macs; i++)
1806 be_uc_mac_add(adapter, i);
1807 adapter->uc_macs = curr_uc_macs;
1808 adapter->update_uc_list = false;
1812 static void be_clear_uc_list(struct be_adapter *adapter)
1814 struct net_device *netdev = adapter->netdev;
1815 int i;
1817 __dev_uc_unsync(netdev, NULL);
1818 for (i = 0; i < adapter->uc_macs; i++)
1819 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1821 adapter->uc_macs = 0;
1824 static void __be_set_rx_mode(struct be_adapter *adapter)
1826 struct net_device *netdev = adapter->netdev;
1828 mutex_lock(&adapter->rx_filter_lock);
1830 if (netdev->flags & IFF_PROMISC) {
1831 if (!be_in_all_promisc(adapter))
1832 be_set_all_promisc(adapter);
1833 } else if (be_in_all_promisc(adapter)) {
1834 /* We need to re-program the vlan-list or clear
1835 * vlan-promisc mode (if needed) when the interface
1836 * comes out of promisc mode.
1838 be_vid_config(adapter);
1841 be_set_uc_list(adapter);
1842 be_set_mc_list(adapter);
1844 mutex_unlock(&adapter->rx_filter_lock);
1847 static void be_work_set_rx_mode(struct work_struct *work)
1849 struct be_cmd_work *cmd_work =
1850 container_of(work, struct be_cmd_work, work);
1852 __be_set_rx_mode(cmd_work->adapter);
1853 kfree(cmd_work);
1856 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1858 struct be_adapter *adapter = netdev_priv(netdev);
1859 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1860 int status;
1862 if (!sriov_enabled(adapter))
1863 return -EPERM;
1865 if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1866 return -EINVAL;
1868 /* Proceed further only if user provided MAC is different
1869 * from active MAC
1871 if (ether_addr_equal(mac, vf_cfg->mac_addr))
1872 return 0;
1874 if (BEx_chip(adapter)) {
1875 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1876 vf + 1);
1878 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1879 &vf_cfg->pmac_id, vf + 1);
1880 } else {
1881 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1882 vf + 1);
1885 if (status) {
1886 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1887 mac, vf, status);
1888 return be_cmd_status(status);
1891 ether_addr_copy(vf_cfg->mac_addr, mac);
1893 return 0;
1896 static int be_get_vf_config(struct net_device *netdev, int vf,
1897 struct ifla_vf_info *vi)
1899 struct be_adapter *adapter = netdev_priv(netdev);
1900 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1902 if (!sriov_enabled(adapter))
1903 return -EPERM;
1905 if (vf >= adapter->num_vfs)
1906 return -EINVAL;
1908 vi->vf = vf;
1909 vi->max_tx_rate = vf_cfg->tx_rate;
1910 vi->min_tx_rate = 0;
1911 vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1912 vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1913 memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1914 vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1915 vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1917 return 0;
1920 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1922 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1923 u16 vids[BE_NUM_VLANS_SUPPORTED];
1924 int vf_if_id = vf_cfg->if_handle;
1925 int status;
1927 /* Enable Transparent VLAN Tagging */
1928 status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1929 if (status)
1930 return status;
1932 /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1933 vids[0] = 0;
1934 status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1935 if (!status)
1936 dev_info(&adapter->pdev->dev,
1937 "Cleared guest VLANs on VF%d", vf);
1939 /* After TVT is enabled, disallow VFs to program VLAN filters */
1940 if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1941 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1942 ~BE_PRIV_FILTMGMT, vf + 1);
1943 if (!status)
1944 vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1946 return 0;
1949 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1951 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1952 struct device *dev = &adapter->pdev->dev;
1953 int status;
1955 /* Reset Transparent VLAN Tagging. */
1956 status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1957 vf_cfg->if_handle, 0, 0);
1958 if (status)
1959 return status;
1961 /* Allow VFs to program VLAN filtering */
1962 if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1963 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1964 BE_PRIV_FILTMGMT, vf + 1);
1965 if (!status) {
1966 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1967 dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1971 dev_info(dev,
1972 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1973 return 0;
1976 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1977 __be16 vlan_proto)
1979 struct be_adapter *adapter = netdev_priv(netdev);
1980 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1981 int status;
1983 if (!sriov_enabled(adapter))
1984 return -EPERM;
1986 if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1987 return -EINVAL;
1989 if (vlan_proto != htons(ETH_P_8021Q))
1990 return -EPROTONOSUPPORT;
1992 if (vlan || qos) {
1993 vlan |= qos << VLAN_PRIO_SHIFT;
1994 status = be_set_vf_tvt(adapter, vf, vlan);
1995 } else {
1996 status = be_clear_vf_tvt(adapter, vf);
1999 if (status) {
2000 dev_err(&adapter->pdev->dev,
2001 "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2002 status);
2003 return be_cmd_status(status);
2006 vf_cfg->vlan_tag = vlan;
2007 return 0;
2010 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2011 int min_tx_rate, int max_tx_rate)
2013 struct be_adapter *adapter = netdev_priv(netdev);
2014 struct device *dev = &adapter->pdev->dev;
2015 int percent_rate, status = 0;
2016 u16 link_speed = 0;
2017 u8 link_status;
2019 if (!sriov_enabled(adapter))
2020 return -EPERM;
2022 if (vf >= adapter->num_vfs)
2023 return -EINVAL;
2025 if (min_tx_rate)
2026 return -EINVAL;
2028 if (!max_tx_rate)
2029 goto config_qos;
2031 status = be_cmd_link_status_query(adapter, &link_speed,
2032 &link_status, 0);
2033 if (status)
2034 goto err;
2036 if (!link_status) {
2037 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2038 status = -ENETDOWN;
2039 goto err;
2042 if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2043 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2044 link_speed);
2045 status = -EINVAL;
2046 goto err;
2049 /* On Skyhawk the QOS setting must be done only as a % value */
2050 percent_rate = link_speed / 100;
2051 if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2052 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2053 percent_rate);
2054 status = -EINVAL;
2055 goto err;
2058 config_qos:
2059 status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2060 if (status)
2061 goto err;
2063 adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2064 return 0;
2066 err:
2067 dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2068 max_tx_rate, vf);
2069 return be_cmd_status(status);
2072 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2073 int link_state)
2075 struct be_adapter *adapter = netdev_priv(netdev);
2076 int status;
2078 if (!sriov_enabled(adapter))
2079 return -EPERM;
2081 if (vf >= adapter->num_vfs)
2082 return -EINVAL;
2084 status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2085 if (status) {
2086 dev_err(&adapter->pdev->dev,
2087 "Link state change on VF %d failed: %#x\n", vf, status);
2088 return be_cmd_status(status);
2091 adapter->vf_cfg[vf].plink_tracking = link_state;
2093 return 0;
2096 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2098 struct be_adapter *adapter = netdev_priv(netdev);
2099 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2100 u8 spoofchk;
2101 int status;
2103 if (!sriov_enabled(adapter))
2104 return -EPERM;
2106 if (vf >= adapter->num_vfs)
2107 return -EINVAL;
2109 if (BEx_chip(adapter))
2110 return -EOPNOTSUPP;
2112 if (enable == vf_cfg->spoofchk)
2113 return 0;
2115 spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2117 status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2118 0, spoofchk);
2119 if (status) {
2120 dev_err(&adapter->pdev->dev,
2121 "Spoofchk change on VF %d failed: %#x\n", vf, status);
2122 return be_cmd_status(status);
2125 vf_cfg->spoofchk = enable;
2126 return 0;
2129 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2130 ulong now)
2132 aic->rx_pkts_prev = rx_pkts;
2133 aic->tx_reqs_prev = tx_pkts;
2134 aic->jiffies = now;
2137 static int be_get_new_eqd(struct be_eq_obj *eqo)
2139 struct be_adapter *adapter = eqo->adapter;
2140 int eqd, start;
2141 struct be_aic_obj *aic;
2142 struct be_rx_obj *rxo;
2143 struct be_tx_obj *txo;
2144 u64 rx_pkts = 0, tx_pkts = 0;
2145 ulong now;
2146 u32 pps, delta;
2147 int i;
2149 aic = &adapter->aic_obj[eqo->idx];
2150 if (!adapter->aic_enabled) {
2151 if (aic->jiffies)
2152 aic->jiffies = 0;
2153 eqd = aic->et_eqd;
2154 return eqd;
2157 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2158 do {
2159 start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2160 rx_pkts += rxo->stats.rx_pkts;
2161 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2164 for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2165 do {
2166 start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2167 tx_pkts += txo->stats.tx_reqs;
2168 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2171 /* Skip, if wrapped around or first calculation */
2172 now = jiffies;
2173 if (!aic->jiffies || time_before(now, aic->jiffies) ||
2174 rx_pkts < aic->rx_pkts_prev ||
2175 tx_pkts < aic->tx_reqs_prev) {
2176 be_aic_update(aic, rx_pkts, tx_pkts, now);
2177 return aic->prev_eqd;
2180 delta = jiffies_to_msecs(now - aic->jiffies);
2181 if (delta == 0)
2182 return aic->prev_eqd;
2184 pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2185 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2186 eqd = (pps / 15000) << 2;
2188 if (eqd < 8)
2189 eqd = 0;
2190 eqd = min_t(u32, eqd, aic->max_eqd);
2191 eqd = max_t(u32, eqd, aic->min_eqd);
2193 be_aic_update(aic, rx_pkts, tx_pkts, now);
2195 return eqd;
2198 /* For Skyhawk-R only */
2199 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2201 struct be_adapter *adapter = eqo->adapter;
2202 struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2203 ulong now = jiffies;
2204 int eqd;
2205 u32 mult_enc;
2207 if (!adapter->aic_enabled)
2208 return 0;
2210 if (jiffies_to_msecs(now - aic->jiffies) < 1)
2211 eqd = aic->prev_eqd;
2212 else
2213 eqd = be_get_new_eqd(eqo);
2215 if (eqd > 100)
2216 mult_enc = R2I_DLY_ENC_1;
2217 else if (eqd > 60)
2218 mult_enc = R2I_DLY_ENC_2;
2219 else if (eqd > 20)
2220 mult_enc = R2I_DLY_ENC_3;
2221 else
2222 mult_enc = R2I_DLY_ENC_0;
2224 aic->prev_eqd = eqd;
2226 return mult_enc;
2229 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2231 struct be_set_eqd set_eqd[MAX_EVT_QS];
2232 struct be_aic_obj *aic;
2233 struct be_eq_obj *eqo;
2234 int i, num = 0, eqd;
2236 for_all_evt_queues(adapter, eqo, i) {
2237 aic = &adapter->aic_obj[eqo->idx];
2238 eqd = be_get_new_eqd(eqo);
2239 if (force_update || eqd != aic->prev_eqd) {
2240 set_eqd[num].delay_multiplier = (eqd * 65)/100;
2241 set_eqd[num].eq_id = eqo->q.id;
2242 aic->prev_eqd = eqd;
2243 num++;
2247 if (num)
2248 be_cmd_modify_eqd(adapter, set_eqd, num);
2251 static void be_rx_stats_update(struct be_rx_obj *rxo,
2252 struct be_rx_compl_info *rxcp)
2254 struct be_rx_stats *stats = rx_stats(rxo);
2256 u64_stats_update_begin(&stats->sync);
2257 stats->rx_compl++;
2258 stats->rx_bytes += rxcp->pkt_size;
2259 stats->rx_pkts++;
2260 if (rxcp->tunneled)
2261 stats->rx_vxlan_offload_pkts++;
2262 if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2263 stats->rx_mcast_pkts++;
2264 if (rxcp->err)
2265 stats->rx_compl_err++;
2266 u64_stats_update_end(&stats->sync);
2269 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2271 /* L4 checksum is not reliable for non TCP/UDP packets.
2272 * Also ignore ipcksm for ipv6 pkts
2274 return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2275 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2278 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2280 struct be_adapter *adapter = rxo->adapter;
2281 struct be_rx_page_info *rx_page_info;
2282 struct be_queue_info *rxq = &rxo->q;
2283 u32 frag_idx = rxq->tail;
2285 rx_page_info = &rxo->page_info_tbl[frag_idx];
2286 BUG_ON(!rx_page_info->page);
2288 if (rx_page_info->last_frag) {
2289 dma_unmap_page(&adapter->pdev->dev,
2290 dma_unmap_addr(rx_page_info, bus),
2291 adapter->big_page_size, DMA_FROM_DEVICE);
2292 rx_page_info->last_frag = false;
2293 } else {
2294 dma_sync_single_for_cpu(&adapter->pdev->dev,
2295 dma_unmap_addr(rx_page_info, bus),
2296 rx_frag_size, DMA_FROM_DEVICE);
2299 queue_tail_inc(rxq);
2300 atomic_dec(&rxq->used);
2301 return rx_page_info;
2304 /* Throwaway the data in the Rx completion */
2305 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2306 struct be_rx_compl_info *rxcp)
2308 struct be_rx_page_info *page_info;
2309 u16 i, num_rcvd = rxcp->num_rcvd;
2311 for (i = 0; i < num_rcvd; i++) {
2312 page_info = get_rx_page_info(rxo);
2313 put_page(page_info->page);
2314 memset(page_info, 0, sizeof(*page_info));
2319 * skb_fill_rx_data forms a complete skb for an ether frame
2320 * indicated by rxcp.
2322 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2323 struct be_rx_compl_info *rxcp)
2325 struct be_rx_page_info *page_info;
2326 u16 i, j;
2327 u16 hdr_len, curr_frag_len, remaining;
2328 u8 *start;
2330 page_info = get_rx_page_info(rxo);
2331 start = page_address(page_info->page) + page_info->page_offset;
2332 prefetch(start);
2334 /* Copy data in the first descriptor of this completion */
2335 curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2337 skb->len = curr_frag_len;
2338 if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2339 memcpy(skb->data, start, curr_frag_len);
2340 /* Complete packet has now been moved to data */
2341 put_page(page_info->page);
2342 skb->data_len = 0;
2343 skb->tail += curr_frag_len;
2344 } else {
2345 hdr_len = ETH_HLEN;
2346 memcpy(skb->data, start, hdr_len);
2347 skb_shinfo(skb)->nr_frags = 1;
2348 skb_frag_set_page(skb, 0, page_info->page);
2349 skb_frag_off_set(&skb_shinfo(skb)->frags[0],
2350 page_info->page_offset + hdr_len);
2351 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2352 curr_frag_len - hdr_len);
2353 skb->data_len = curr_frag_len - hdr_len;
2354 skb->truesize += rx_frag_size;
2355 skb->tail += hdr_len;
2357 page_info->page = NULL;
2359 if (rxcp->pkt_size <= rx_frag_size) {
2360 BUG_ON(rxcp->num_rcvd != 1);
2361 return;
2364 /* More frags present for this completion */
2365 remaining = rxcp->pkt_size - curr_frag_len;
2366 for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2367 page_info = get_rx_page_info(rxo);
2368 curr_frag_len = min(remaining, rx_frag_size);
2370 /* Coalesce all frags from the same physical page in one slot */
2371 if (page_info->page_offset == 0) {
2372 /* Fresh page */
2373 j++;
2374 skb_frag_set_page(skb, j, page_info->page);
2375 skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2376 page_info->page_offset);
2377 skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2378 skb_shinfo(skb)->nr_frags++;
2379 } else {
2380 put_page(page_info->page);
2383 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2384 skb->len += curr_frag_len;
2385 skb->data_len += curr_frag_len;
2386 skb->truesize += rx_frag_size;
2387 remaining -= curr_frag_len;
2388 page_info->page = NULL;
2390 BUG_ON(j > MAX_SKB_FRAGS);
2393 /* Process the RX completion indicated by rxcp when GRO is disabled */
2394 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2395 struct be_rx_compl_info *rxcp)
2397 struct be_adapter *adapter = rxo->adapter;
2398 struct net_device *netdev = adapter->netdev;
2399 struct sk_buff *skb;
2401 skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2402 if (unlikely(!skb)) {
2403 rx_stats(rxo)->rx_drops_no_skbs++;
2404 be_rx_compl_discard(rxo, rxcp);
2405 return;
2408 skb_fill_rx_data(rxo, skb, rxcp);
2410 if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2411 skb->ip_summed = CHECKSUM_UNNECESSARY;
2412 else
2413 skb_checksum_none_assert(skb);
2415 skb->protocol = eth_type_trans(skb, netdev);
2416 skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2417 if (netdev->features & NETIF_F_RXHASH)
2418 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2420 skb->csum_level = rxcp->tunneled;
2421 skb_mark_napi_id(skb, napi);
2423 if (rxcp->vlanf)
2424 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2426 netif_receive_skb(skb);
2429 /* Process the RX completion indicated by rxcp when GRO is enabled */
2430 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2431 struct napi_struct *napi,
2432 struct be_rx_compl_info *rxcp)
2434 struct be_adapter *adapter = rxo->adapter;
2435 struct be_rx_page_info *page_info;
2436 struct sk_buff *skb = NULL;
2437 u16 remaining, curr_frag_len;
2438 u16 i, j;
2440 skb = napi_get_frags(napi);
2441 if (!skb) {
2442 be_rx_compl_discard(rxo, rxcp);
2443 return;
2446 remaining = rxcp->pkt_size;
2447 for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2448 page_info = get_rx_page_info(rxo);
2450 curr_frag_len = min(remaining, rx_frag_size);
2452 /* Coalesce all frags from the same physical page in one slot */
2453 if (i == 0 || page_info->page_offset == 0) {
2454 /* First frag or Fresh page */
2455 j++;
2456 skb_frag_set_page(skb, j, page_info->page);
2457 skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2458 page_info->page_offset);
2459 skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2460 } else {
2461 put_page(page_info->page);
2463 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2464 skb->truesize += rx_frag_size;
2465 remaining -= curr_frag_len;
2466 memset(page_info, 0, sizeof(*page_info));
2468 BUG_ON(j > MAX_SKB_FRAGS);
2470 skb_shinfo(skb)->nr_frags = j + 1;
2471 skb->len = rxcp->pkt_size;
2472 skb->data_len = rxcp->pkt_size;
2473 skb->ip_summed = CHECKSUM_UNNECESSARY;
2474 skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2475 if (adapter->netdev->features & NETIF_F_RXHASH)
2476 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2478 skb->csum_level = rxcp->tunneled;
2480 if (rxcp->vlanf)
2481 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2483 napi_gro_frags(napi);
2486 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2487 struct be_rx_compl_info *rxcp)
2489 rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2490 rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2491 rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2492 rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2493 rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2494 rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2495 rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2496 rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2497 rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2498 rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2499 rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2500 if (rxcp->vlanf) {
2501 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2502 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2504 rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2505 rxcp->tunneled =
2506 GET_RX_COMPL_V1_BITS(tunneled, compl);
2509 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2510 struct be_rx_compl_info *rxcp)
2512 rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2513 rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2514 rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2515 rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2516 rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2517 rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2518 rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2519 rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2520 rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2521 rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2522 rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2523 if (rxcp->vlanf) {
2524 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2525 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2527 rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2528 rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2531 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2533 struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2534 struct be_rx_compl_info *rxcp = &rxo->rxcp;
2535 struct be_adapter *adapter = rxo->adapter;
2537 /* For checking the valid bit it is Ok to use either definition as the
2538 * valid bit is at the same position in both v0 and v1 Rx compl */
2539 if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2540 return NULL;
2542 rmb();
2543 be_dws_le_to_cpu(compl, sizeof(*compl));
2545 if (adapter->be3_native)
2546 be_parse_rx_compl_v1(compl, rxcp);
2547 else
2548 be_parse_rx_compl_v0(compl, rxcp);
2550 if (rxcp->ip_frag)
2551 rxcp->l4_csum = 0;
2553 if (rxcp->vlanf) {
2554 /* In QNQ modes, if qnq bit is not set, then the packet was
2555 * tagged only with the transparent outer vlan-tag and must
2556 * not be treated as a vlan packet by host
2558 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2559 rxcp->vlanf = 0;
2561 if (!lancer_chip(adapter))
2562 rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2564 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2565 !test_bit(rxcp->vlan_tag, adapter->vids))
2566 rxcp->vlanf = 0;
2569 /* As the compl has been parsed, reset it; we wont touch it again */
2570 compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2572 queue_tail_inc(&rxo->cq);
2573 return rxcp;
2576 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2578 u32 order = get_order(size);
2580 if (order > 0)
2581 gfp |= __GFP_COMP;
2582 return alloc_pages(gfp, order);
2586 * Allocate a page, split it to fragments of size rx_frag_size and post as
2587 * receive buffers to BE
2589 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2591 struct be_adapter *adapter = rxo->adapter;
2592 struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2593 struct be_queue_info *rxq = &rxo->q;
2594 struct page *pagep = NULL;
2595 struct device *dev = &adapter->pdev->dev;
2596 struct be_eth_rx_d *rxd;
2597 u64 page_dmaaddr = 0, frag_dmaaddr;
2598 u32 posted, page_offset = 0, notify = 0;
2600 page_info = &rxo->page_info_tbl[rxq->head];
2601 for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2602 if (!pagep) {
2603 pagep = be_alloc_pages(adapter->big_page_size, gfp);
2604 if (unlikely(!pagep)) {
2605 rx_stats(rxo)->rx_post_fail++;
2606 break;
2608 page_dmaaddr = dma_map_page(dev, pagep, 0,
2609 adapter->big_page_size,
2610 DMA_FROM_DEVICE);
2611 if (dma_mapping_error(dev, page_dmaaddr)) {
2612 put_page(pagep);
2613 pagep = NULL;
2614 adapter->drv_stats.dma_map_errors++;
2615 break;
2617 page_offset = 0;
2618 } else {
2619 get_page(pagep);
2620 page_offset += rx_frag_size;
2622 page_info->page_offset = page_offset;
2623 page_info->page = pagep;
2625 rxd = queue_head_node(rxq);
2626 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2627 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2628 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2630 /* Any space left in the current big page for another frag? */
2631 if ((page_offset + rx_frag_size + rx_frag_size) >
2632 adapter->big_page_size) {
2633 pagep = NULL;
2634 page_info->last_frag = true;
2635 dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2636 } else {
2637 dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2640 prev_page_info = page_info;
2641 queue_head_inc(rxq);
2642 page_info = &rxo->page_info_tbl[rxq->head];
2645 /* Mark the last frag of a page when we break out of the above loop
2646 * with no more slots available in the RXQ
2648 if (pagep) {
2649 prev_page_info->last_frag = true;
2650 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2653 if (posted) {
2654 atomic_add(posted, &rxq->used);
2655 if (rxo->rx_post_starved)
2656 rxo->rx_post_starved = false;
2657 do {
2658 notify = min(MAX_NUM_POST_ERX_DB, posted);
2659 be_rxq_notify(adapter, rxq->id, notify);
2660 posted -= notify;
2661 } while (posted);
2662 } else if (atomic_read(&rxq->used) == 0) {
2663 /* Let be_worker replenish when memory is available */
2664 rxo->rx_post_starved = true;
2668 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2670 switch (status) {
2671 case BE_TX_COMP_HDR_PARSE_ERR:
2672 tx_stats(txo)->tx_hdr_parse_err++;
2673 break;
2674 case BE_TX_COMP_NDMA_ERR:
2675 tx_stats(txo)->tx_dma_err++;
2676 break;
2677 case BE_TX_COMP_ACL_ERR:
2678 tx_stats(txo)->tx_spoof_check_err++;
2679 break;
2683 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2685 switch (status) {
2686 case LANCER_TX_COMP_LSO_ERR:
2687 tx_stats(txo)->tx_tso_err++;
2688 break;
2689 case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2690 case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2691 tx_stats(txo)->tx_spoof_check_err++;
2692 break;
2693 case LANCER_TX_COMP_QINQ_ERR:
2694 tx_stats(txo)->tx_qinq_err++;
2695 break;
2696 case LANCER_TX_COMP_PARITY_ERR:
2697 tx_stats(txo)->tx_internal_parity_err++;
2698 break;
2699 case LANCER_TX_COMP_DMA_ERR:
2700 tx_stats(txo)->tx_dma_err++;
2701 break;
2702 case LANCER_TX_COMP_SGE_ERR:
2703 tx_stats(txo)->tx_sge_err++;
2704 break;
2708 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2709 struct be_tx_obj *txo)
2711 struct be_queue_info *tx_cq = &txo->cq;
2712 struct be_tx_compl_info *txcp = &txo->txcp;
2713 struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2715 if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2716 return NULL;
2718 /* Ensure load ordering of valid bit dword and other dwords below */
2719 rmb();
2720 be_dws_le_to_cpu(compl, sizeof(*compl));
2722 txcp->status = GET_TX_COMPL_BITS(status, compl);
2723 txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2725 if (txcp->status) {
2726 if (lancer_chip(adapter)) {
2727 lancer_update_tx_err(txo, txcp->status);
2728 /* Reset the adapter incase of TSO,
2729 * SGE or Parity error
2731 if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2732 txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2733 txcp->status == LANCER_TX_COMP_SGE_ERR)
2734 be_set_error(adapter, BE_ERROR_TX);
2735 } else {
2736 be_update_tx_err(txo, txcp->status);
2740 if (be_check_error(adapter, BE_ERROR_TX))
2741 return NULL;
2743 compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2744 queue_tail_inc(tx_cq);
2745 return txcp;
2748 static u16 be_tx_compl_process(struct be_adapter *adapter,
2749 struct be_tx_obj *txo, u16 last_index)
2751 struct sk_buff **sent_skbs = txo->sent_skb_list;
2752 struct be_queue_info *txq = &txo->q;
2753 struct sk_buff *skb = NULL;
2754 bool unmap_skb_hdr = false;
2755 struct be_eth_wrb *wrb;
2756 u16 num_wrbs = 0;
2757 u32 frag_index;
2759 do {
2760 if (sent_skbs[txq->tail]) {
2761 /* Free skb from prev req */
2762 if (skb)
2763 dev_consume_skb_any(skb);
2764 skb = sent_skbs[txq->tail];
2765 sent_skbs[txq->tail] = NULL;
2766 queue_tail_inc(txq); /* skip hdr wrb */
2767 num_wrbs++;
2768 unmap_skb_hdr = true;
2770 wrb = queue_tail_node(txq);
2771 frag_index = txq->tail;
2772 unmap_tx_frag(&adapter->pdev->dev, wrb,
2773 (unmap_skb_hdr && skb_headlen(skb)));
2774 unmap_skb_hdr = false;
2775 queue_tail_inc(txq);
2776 num_wrbs++;
2777 } while (frag_index != last_index);
2778 dev_consume_skb_any(skb);
2780 return num_wrbs;
2783 /* Return the number of events in the event queue */
2784 static inline int events_get(struct be_eq_obj *eqo)
2786 struct be_eq_entry *eqe;
2787 int num = 0;
2789 do {
2790 eqe = queue_tail_node(&eqo->q);
2791 if (eqe->evt == 0)
2792 break;
2794 rmb();
2795 eqe->evt = 0;
2796 num++;
2797 queue_tail_inc(&eqo->q);
2798 } while (true);
2800 return num;
2803 /* Leaves the EQ is disarmed state */
2804 static void be_eq_clean(struct be_eq_obj *eqo)
2806 int num = events_get(eqo);
2808 be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2811 /* Free posted rx buffers that were not used */
2812 static void be_rxq_clean(struct be_rx_obj *rxo)
2814 struct be_queue_info *rxq = &rxo->q;
2815 struct be_rx_page_info *page_info;
2817 while (atomic_read(&rxq->used) > 0) {
2818 page_info = get_rx_page_info(rxo);
2819 put_page(page_info->page);
2820 memset(page_info, 0, sizeof(*page_info));
2822 BUG_ON(atomic_read(&rxq->used));
2823 rxq->tail = 0;
2824 rxq->head = 0;
2827 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2829 struct be_queue_info *rx_cq = &rxo->cq;
2830 struct be_rx_compl_info *rxcp;
2831 struct be_adapter *adapter = rxo->adapter;
2832 int flush_wait = 0;
2834 /* Consume pending rx completions.
2835 * Wait for the flush completion (identified by zero num_rcvd)
2836 * to arrive. Notify CQ even when there are no more CQ entries
2837 * for HW to flush partially coalesced CQ entries.
2838 * In Lancer, there is no need to wait for flush compl.
2840 for (;;) {
2841 rxcp = be_rx_compl_get(rxo);
2842 if (!rxcp) {
2843 if (lancer_chip(adapter))
2844 break;
2846 if (flush_wait++ > 50 ||
2847 be_check_error(adapter,
2848 BE_ERROR_HW)) {
2849 dev_warn(&adapter->pdev->dev,
2850 "did not receive flush compl\n");
2851 break;
2853 be_cq_notify(adapter, rx_cq->id, true, 0);
2854 mdelay(1);
2855 } else {
2856 be_rx_compl_discard(rxo, rxcp);
2857 be_cq_notify(adapter, rx_cq->id, false, 1);
2858 if (rxcp->num_rcvd == 0)
2859 break;
2863 /* After cleanup, leave the CQ in unarmed state */
2864 be_cq_notify(adapter, rx_cq->id, false, 0);
2867 static void be_tx_compl_clean(struct be_adapter *adapter)
2869 struct device *dev = &adapter->pdev->dev;
2870 u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2871 struct be_tx_compl_info *txcp;
2872 struct be_queue_info *txq;
2873 u32 end_idx, notified_idx;
2874 struct be_tx_obj *txo;
2875 int i, pending_txqs;
2877 /* Stop polling for compls when HW has been silent for 10ms */
2878 do {
2879 pending_txqs = adapter->num_tx_qs;
2881 for_all_tx_queues(adapter, txo, i) {
2882 cmpl = 0;
2883 num_wrbs = 0;
2884 txq = &txo->q;
2885 while ((txcp = be_tx_compl_get(adapter, txo))) {
2886 num_wrbs +=
2887 be_tx_compl_process(adapter, txo,
2888 txcp->end_index);
2889 cmpl++;
2891 if (cmpl) {
2892 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2893 atomic_sub(num_wrbs, &txq->used);
2894 timeo = 0;
2896 if (!be_is_tx_compl_pending(txo))
2897 pending_txqs--;
2900 if (pending_txqs == 0 || ++timeo > 10 ||
2901 be_check_error(adapter, BE_ERROR_HW))
2902 break;
2904 mdelay(1);
2905 } while (true);
2907 /* Free enqueued TX that was never notified to HW */
2908 for_all_tx_queues(adapter, txo, i) {
2909 txq = &txo->q;
2911 if (atomic_read(&txq->used)) {
2912 dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2913 i, atomic_read(&txq->used));
2914 notified_idx = txq->tail;
2915 end_idx = txq->tail;
2916 index_adv(&end_idx, atomic_read(&txq->used) - 1,
2917 txq->len);
2918 /* Use the tx-compl process logic to handle requests
2919 * that were not sent to the HW.
2921 num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2922 atomic_sub(num_wrbs, &txq->used);
2923 BUG_ON(atomic_read(&txq->used));
2924 txo->pend_wrb_cnt = 0;
2925 /* Since hw was never notified of these requests,
2926 * reset TXQ indices
2928 txq->head = notified_idx;
2929 txq->tail = notified_idx;
2934 static void be_evt_queues_destroy(struct be_adapter *adapter)
2936 struct be_eq_obj *eqo;
2937 int i;
2939 for_all_evt_queues(adapter, eqo, i) {
2940 if (eqo->q.created) {
2941 be_eq_clean(eqo);
2942 be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2943 netif_napi_del(&eqo->napi);
2944 free_cpumask_var(eqo->affinity_mask);
2946 be_queue_free(adapter, &eqo->q);
2950 static int be_evt_queues_create(struct be_adapter *adapter)
2952 struct be_queue_info *eq;
2953 struct be_eq_obj *eqo;
2954 struct be_aic_obj *aic;
2955 int i, rc;
2957 /* need enough EQs to service both RX and TX queues */
2958 adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2959 max(adapter->cfg_num_rx_irqs,
2960 adapter->cfg_num_tx_irqs));
2962 adapter->aic_enabled = true;
2964 for_all_evt_queues(adapter, eqo, i) {
2965 int numa_node = dev_to_node(&adapter->pdev->dev);
2967 aic = &adapter->aic_obj[i];
2968 eqo->adapter = adapter;
2969 eqo->idx = i;
2970 aic->max_eqd = BE_MAX_EQD;
2972 eq = &eqo->q;
2973 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2974 sizeof(struct be_eq_entry));
2975 if (rc)
2976 return rc;
2978 rc = be_cmd_eq_create(adapter, eqo);
2979 if (rc)
2980 return rc;
2982 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2983 return -ENOMEM;
2984 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2985 eqo->affinity_mask);
2986 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2987 BE_NAPI_WEIGHT);
2989 return 0;
2992 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2994 struct be_queue_info *q;
2996 q = &adapter->mcc_obj.q;
2997 if (q->created)
2998 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2999 be_queue_free(adapter, q);
3001 q = &adapter->mcc_obj.cq;
3002 if (q->created)
3003 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3004 be_queue_free(adapter, q);
3007 /* Must be called only after TX qs are created as MCC shares TX EQ */
3008 static int be_mcc_queues_create(struct be_adapter *adapter)
3010 struct be_queue_info *q, *cq;
3012 cq = &adapter->mcc_obj.cq;
3013 if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3014 sizeof(struct be_mcc_compl)))
3015 goto err;
3017 /* Use the default EQ for MCC completions */
3018 if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3019 goto mcc_cq_free;
3021 q = &adapter->mcc_obj.q;
3022 if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3023 goto mcc_cq_destroy;
3025 if (be_cmd_mccq_create(adapter, q, cq))
3026 goto mcc_q_free;
3028 return 0;
3030 mcc_q_free:
3031 be_queue_free(adapter, q);
3032 mcc_cq_destroy:
3033 be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3034 mcc_cq_free:
3035 be_queue_free(adapter, cq);
3036 err:
3037 return -1;
3040 static void be_tx_queues_destroy(struct be_adapter *adapter)
3042 struct be_queue_info *q;
3043 struct be_tx_obj *txo;
3044 u8 i;
3046 for_all_tx_queues(adapter, txo, i) {
3047 q = &txo->q;
3048 if (q->created)
3049 be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3050 be_queue_free(adapter, q);
3052 q = &txo->cq;
3053 if (q->created)
3054 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3055 be_queue_free(adapter, q);
3059 static int be_tx_qs_create(struct be_adapter *adapter)
3061 struct be_queue_info *cq;
3062 struct be_tx_obj *txo;
3063 struct be_eq_obj *eqo;
3064 int status, i;
3066 adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3068 for_all_tx_queues(adapter, txo, i) {
3069 cq = &txo->cq;
3070 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3071 sizeof(struct be_eth_tx_compl));
3072 if (status)
3073 return status;
3075 u64_stats_init(&txo->stats.sync);
3076 u64_stats_init(&txo->stats.sync_compl);
3078 /* If num_evt_qs is less than num_tx_qs, then more than
3079 * one txq share an eq
3081 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3082 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3083 if (status)
3084 return status;
3086 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3087 sizeof(struct be_eth_wrb));
3088 if (status)
3089 return status;
3091 status = be_cmd_txq_create(adapter, txo);
3092 if (status)
3093 return status;
3095 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3096 eqo->idx);
3099 dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3100 adapter->num_tx_qs);
3101 return 0;
3104 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3106 struct be_queue_info *q;
3107 struct be_rx_obj *rxo;
3108 int i;
3110 for_all_rx_queues(adapter, rxo, i) {
3111 q = &rxo->cq;
3112 if (q->created)
3113 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3114 be_queue_free(adapter, q);
3118 static int be_rx_cqs_create(struct be_adapter *adapter)
3120 struct be_queue_info *eq, *cq;
3121 struct be_rx_obj *rxo;
3122 int rc, i;
3124 adapter->num_rss_qs =
3125 min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3127 /* We'll use RSS only if atleast 2 RSS rings are supported. */
3128 if (adapter->num_rss_qs < 2)
3129 adapter->num_rss_qs = 0;
3131 adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3133 /* When the interface is not capable of RSS rings (and there is no
3134 * need to create a default RXQ) we'll still need one RXQ
3136 if (adapter->num_rx_qs == 0)
3137 adapter->num_rx_qs = 1;
3139 adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3140 for_all_rx_queues(adapter, rxo, i) {
3141 rxo->adapter = adapter;
3142 cq = &rxo->cq;
3143 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3144 sizeof(struct be_eth_rx_compl));
3145 if (rc)
3146 return rc;
3148 u64_stats_init(&rxo->stats.sync);
3149 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3150 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3151 if (rc)
3152 return rc;
3155 dev_info(&adapter->pdev->dev,
3156 "created %d RX queue(s)\n", adapter->num_rx_qs);
3157 return 0;
3160 static irqreturn_t be_intx(int irq, void *dev)
3162 struct be_eq_obj *eqo = dev;
3163 struct be_adapter *adapter = eqo->adapter;
3164 int num_evts = 0;
3166 /* IRQ is not expected when NAPI is scheduled as the EQ
3167 * will not be armed.
3168 * But, this can happen on Lancer INTx where it takes
3169 * a while to de-assert INTx or in BE2 where occasionaly
3170 * an interrupt may be raised even when EQ is unarmed.
3171 * If NAPI is already scheduled, then counting & notifying
3172 * events will orphan them.
3174 if (napi_schedule_prep(&eqo->napi)) {
3175 num_evts = events_get(eqo);
3176 __napi_schedule(&eqo->napi);
3177 if (num_evts)
3178 eqo->spurious_intr = 0;
3180 be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3182 /* Return IRQ_HANDLED only for the the first spurious intr
3183 * after a valid intr to stop the kernel from branding
3184 * this irq as a bad one!
3186 if (num_evts || eqo->spurious_intr++ == 0)
3187 return IRQ_HANDLED;
3188 else
3189 return IRQ_NONE;
3192 static irqreturn_t be_msix(int irq, void *dev)
3194 struct be_eq_obj *eqo = dev;
3196 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3197 napi_schedule(&eqo->napi);
3198 return IRQ_HANDLED;
3201 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3203 return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3206 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3207 int budget)
3209 struct be_adapter *adapter = rxo->adapter;
3210 struct be_queue_info *rx_cq = &rxo->cq;
3211 struct be_rx_compl_info *rxcp;
3212 u32 work_done;
3213 u32 frags_consumed = 0;
3215 for (work_done = 0; work_done < budget; work_done++) {
3216 rxcp = be_rx_compl_get(rxo);
3217 if (!rxcp)
3218 break;
3220 /* Is it a flush compl that has no data */
3221 if (unlikely(rxcp->num_rcvd == 0))
3222 goto loop_continue;
3224 /* Discard compl with partial DMA Lancer B0 */
3225 if (unlikely(!rxcp->pkt_size)) {
3226 be_rx_compl_discard(rxo, rxcp);
3227 goto loop_continue;
3230 /* On BE drop pkts that arrive due to imperfect filtering in
3231 * promiscuous mode on some skews
3233 if (unlikely(rxcp->port != adapter->port_num &&
3234 !lancer_chip(adapter))) {
3235 be_rx_compl_discard(rxo, rxcp);
3236 goto loop_continue;
3239 if (do_gro(rxcp))
3240 be_rx_compl_process_gro(rxo, napi, rxcp);
3241 else
3242 be_rx_compl_process(rxo, napi, rxcp);
3244 loop_continue:
3245 frags_consumed += rxcp->num_rcvd;
3246 be_rx_stats_update(rxo, rxcp);
3249 if (work_done) {
3250 be_cq_notify(adapter, rx_cq->id, true, work_done);
3252 /* When an rx-obj gets into post_starved state, just
3253 * let be_worker do the posting.
3255 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3256 !rxo->rx_post_starved)
3257 be_post_rx_frags(rxo, GFP_ATOMIC,
3258 max_t(u32, MAX_RX_POST,
3259 frags_consumed));
3262 return work_done;
3266 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3267 int idx)
3269 int num_wrbs = 0, work_done = 0;
3270 struct be_tx_compl_info *txcp;
3272 while ((txcp = be_tx_compl_get(adapter, txo))) {
3273 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3274 work_done++;
3277 if (work_done) {
3278 be_cq_notify(adapter, txo->cq.id, true, work_done);
3279 atomic_sub(num_wrbs, &txo->q.used);
3281 /* As Tx wrbs have been freed up, wake up netdev queue
3282 * if it was stopped due to lack of tx wrbs. */
3283 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3284 be_can_txq_wake(txo)) {
3285 netif_wake_subqueue(adapter->netdev, idx);
3288 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3289 tx_stats(txo)->tx_compl += work_done;
3290 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3294 int be_poll(struct napi_struct *napi, int budget)
3296 struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3297 struct be_adapter *adapter = eqo->adapter;
3298 int max_work = 0, work, i, num_evts;
3299 struct be_rx_obj *rxo;
3300 struct be_tx_obj *txo;
3301 u32 mult_enc = 0;
3303 num_evts = events_get(eqo);
3305 for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3306 be_process_tx(adapter, txo, i);
3308 /* This loop will iterate twice for EQ0 in which
3309 * completions of the last RXQ (default one) are also processed
3310 * For other EQs the loop iterates only once
3312 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3313 work = be_process_rx(rxo, napi, budget);
3314 max_work = max(work, max_work);
3317 if (is_mcc_eqo(eqo))
3318 be_process_mcc(adapter);
3320 if (max_work < budget) {
3321 napi_complete_done(napi, max_work);
3323 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3324 * delay via a delay multiplier encoding value
3326 if (skyhawk_chip(adapter))
3327 mult_enc = be_get_eq_delay_mult_enc(eqo);
3329 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3330 mult_enc);
3331 } else {
3332 /* As we'll continue in polling mode, count and clear events */
3333 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3335 return max_work;
3338 void be_detect_error(struct be_adapter *adapter)
3340 u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3341 u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3342 struct device *dev = &adapter->pdev->dev;
3343 u16 val;
3344 u32 i;
3346 if (be_check_error(adapter, BE_ERROR_HW))
3347 return;
3349 if (lancer_chip(adapter)) {
3350 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3351 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3352 be_set_error(adapter, BE_ERROR_UE);
3353 sliport_err1 = ioread32(adapter->db +
3354 SLIPORT_ERROR1_OFFSET);
3355 sliport_err2 = ioread32(adapter->db +
3356 SLIPORT_ERROR2_OFFSET);
3357 /* Do not log error messages if its a FW reset */
3358 if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3359 sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3360 dev_info(dev, "Reset is in progress\n");
3361 } else {
3362 dev_err(dev, "Error detected in the card\n");
3363 dev_err(dev, "ERR: sliport status 0x%x\n",
3364 sliport_status);
3365 dev_err(dev, "ERR: sliport error1 0x%x\n",
3366 sliport_err1);
3367 dev_err(dev, "ERR: sliport error2 0x%x\n",
3368 sliport_err2);
3371 } else {
3372 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3373 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3374 ue_lo_mask = ioread32(adapter->pcicfg +
3375 PCICFG_UE_STATUS_LOW_MASK);
3376 ue_hi_mask = ioread32(adapter->pcicfg +
3377 PCICFG_UE_STATUS_HI_MASK);
3379 ue_lo = (ue_lo & ~ue_lo_mask);
3380 ue_hi = (ue_hi & ~ue_hi_mask);
3382 if (ue_lo || ue_hi) {
3383 /* On certain platforms BE3 hardware can indicate
3384 * spurious UEs. In case of a UE in the chip,
3385 * the POST register correctly reports either a
3386 * FAT_LOG_START state (FW is currently dumping
3387 * FAT log data) or a ARMFW_UE state. Check for the
3388 * above states to ascertain if the UE is valid or not.
3390 if (BE3_chip(adapter)) {
3391 val = be_POST_stage_get(adapter);
3392 if ((val & POST_STAGE_FAT_LOG_START)
3393 != POST_STAGE_FAT_LOG_START &&
3394 (val & POST_STAGE_ARMFW_UE)
3395 != POST_STAGE_ARMFW_UE &&
3396 (val & POST_STAGE_RECOVERABLE_ERR)
3397 != POST_STAGE_RECOVERABLE_ERR)
3398 return;
3401 dev_err(dev, "Error detected in the adapter");
3402 be_set_error(adapter, BE_ERROR_UE);
3404 for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3405 if (ue_lo & 1)
3406 dev_err(dev, "UE: %s bit set\n",
3407 ue_status_low_desc[i]);
3409 for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3410 if (ue_hi & 1)
3411 dev_err(dev, "UE: %s bit set\n",
3412 ue_status_hi_desc[i]);
3418 static void be_msix_disable(struct be_adapter *adapter)
3420 if (msix_enabled(adapter)) {
3421 pci_disable_msix(adapter->pdev);
3422 adapter->num_msix_vec = 0;
3423 adapter->num_msix_roce_vec = 0;
3427 static int be_msix_enable(struct be_adapter *adapter)
3429 unsigned int i, max_roce_eqs;
3430 struct device *dev = &adapter->pdev->dev;
3431 int num_vec;
3433 /* If RoCE is supported, program the max number of vectors that
3434 * could be used for NIC and RoCE, else, just program the number
3435 * we'll use initially.
3437 if (be_roce_supported(adapter)) {
3438 max_roce_eqs =
3439 be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3440 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3441 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3442 } else {
3443 num_vec = max(adapter->cfg_num_rx_irqs,
3444 adapter->cfg_num_tx_irqs);
3447 for (i = 0; i < num_vec; i++)
3448 adapter->msix_entries[i].entry = i;
3450 num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3451 MIN_MSIX_VECTORS, num_vec);
3452 if (num_vec < 0)
3453 goto fail;
3455 if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3456 adapter->num_msix_roce_vec = num_vec / 2;
3457 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3458 adapter->num_msix_roce_vec);
3461 adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3463 dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3464 adapter->num_msix_vec);
3465 return 0;
3467 fail:
3468 dev_warn(dev, "MSIx enable failed\n");
3470 /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3471 if (be_virtfn(adapter))
3472 return num_vec;
3473 return 0;
3476 static inline int be_msix_vec_get(struct be_adapter *adapter,
3477 struct be_eq_obj *eqo)
3479 return adapter->msix_entries[eqo->msix_idx].vector;
3482 static int be_msix_register(struct be_adapter *adapter)
3484 struct net_device *netdev = adapter->netdev;
3485 struct be_eq_obj *eqo;
3486 int status, i, vec;
3488 for_all_evt_queues(adapter, eqo, i) {
3489 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3490 vec = be_msix_vec_get(adapter, eqo);
3491 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3492 if (status)
3493 goto err_msix;
3495 irq_set_affinity_hint(vec, eqo->affinity_mask);
3498 return 0;
3499 err_msix:
3500 for (i--; i >= 0; i--) {
3501 eqo = &adapter->eq_obj[i];
3502 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3504 dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3505 status);
3506 be_msix_disable(adapter);
3507 return status;
3510 static int be_irq_register(struct be_adapter *adapter)
3512 struct net_device *netdev = adapter->netdev;
3513 int status;
3515 if (msix_enabled(adapter)) {
3516 status = be_msix_register(adapter);
3517 if (status == 0)
3518 goto done;
3519 /* INTx is not supported for VF */
3520 if (be_virtfn(adapter))
3521 return status;
3524 /* INTx: only the first EQ is used */
3525 netdev->irq = adapter->pdev->irq;
3526 status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3527 &adapter->eq_obj[0]);
3528 if (status) {
3529 dev_err(&adapter->pdev->dev,
3530 "INTx request IRQ failed - err %d\n", status);
3531 return status;
3533 done:
3534 adapter->isr_registered = true;
3535 return 0;
3538 static void be_irq_unregister(struct be_adapter *adapter)
3540 struct net_device *netdev = adapter->netdev;
3541 struct be_eq_obj *eqo;
3542 int i, vec;
3544 if (!adapter->isr_registered)
3545 return;
3547 /* INTx */
3548 if (!msix_enabled(adapter)) {
3549 free_irq(netdev->irq, &adapter->eq_obj[0]);
3550 goto done;
3553 /* MSIx */
3554 for_all_evt_queues(adapter, eqo, i) {
3555 vec = be_msix_vec_get(adapter, eqo);
3556 irq_set_affinity_hint(vec, NULL);
3557 free_irq(vec, eqo);
3560 done:
3561 adapter->isr_registered = false;
3564 static void be_rx_qs_destroy(struct be_adapter *adapter)
3566 struct rss_info *rss = &adapter->rss_info;
3567 struct be_queue_info *q;
3568 struct be_rx_obj *rxo;
3569 int i;
3571 for_all_rx_queues(adapter, rxo, i) {
3572 q = &rxo->q;
3573 if (q->created) {
3574 /* If RXQs are destroyed while in an "out of buffer"
3575 * state, there is a possibility of an HW stall on
3576 * Lancer. So, post 64 buffers to each queue to relieve
3577 * the "out of buffer" condition.
3578 * Make sure there's space in the RXQ before posting.
3580 if (lancer_chip(adapter)) {
3581 be_rx_cq_clean(rxo);
3582 if (atomic_read(&q->used) == 0)
3583 be_post_rx_frags(rxo, GFP_KERNEL,
3584 MAX_RX_POST);
3587 be_cmd_rxq_destroy(adapter, q);
3588 be_rx_cq_clean(rxo);
3589 be_rxq_clean(rxo);
3591 be_queue_free(adapter, q);
3594 if (rss->rss_flags) {
3595 rss->rss_flags = RSS_ENABLE_NONE;
3596 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3597 128, rss->rss_hkey);
3601 static void be_disable_if_filters(struct be_adapter *adapter)
3603 /* Don't delete MAC on BE3 VFs without FILTMGMT privilege */
3604 if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3605 check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3606 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3607 eth_zero_addr(adapter->dev_mac);
3610 be_clear_uc_list(adapter);
3611 be_clear_mc_list(adapter);
3613 /* The IFACE flags are enabled in the open path and cleared
3614 * in the close path. When a VF gets detached from the host and
3615 * assigned to a VM the following happens:
3616 * - VF's IFACE flags get cleared in the detach path
3617 * - IFACE create is issued by the VF in the attach path
3618 * Due to a bug in the BE3/Skyhawk-R FW
3619 * (Lancer FW doesn't have the bug), the IFACE capability flags
3620 * specified along with the IFACE create cmd issued by a VF are not
3621 * honoured by FW. As a consequence, if a *new* driver
3622 * (that enables/disables IFACE flags in open/close)
3623 * is loaded in the host and an *old* driver is * used by a VM/VF,
3624 * the IFACE gets created *without* the needed flags.
3625 * To avoid this, disable RX-filter flags only for Lancer.
3627 if (lancer_chip(adapter)) {
3628 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3629 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3633 static int be_close(struct net_device *netdev)
3635 struct be_adapter *adapter = netdev_priv(netdev);
3636 struct be_eq_obj *eqo;
3637 int i;
3639 /* This protection is needed as be_close() may be called even when the
3640 * adapter is in cleared state (after eeh perm failure)
3642 if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3643 return 0;
3645 /* Before attempting cleanup ensure all the pending cmds in the
3646 * config_wq have finished execution
3648 flush_workqueue(be_wq);
3650 be_disable_if_filters(adapter);
3652 if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3653 for_all_evt_queues(adapter, eqo, i) {
3654 napi_disable(&eqo->napi);
3656 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3659 be_async_mcc_disable(adapter);
3661 /* Wait for all pending tx completions to arrive so that
3662 * all tx skbs are freed.
3664 netif_tx_disable(netdev);
3665 be_tx_compl_clean(adapter);
3667 be_rx_qs_destroy(adapter);
3669 for_all_evt_queues(adapter, eqo, i) {
3670 if (msix_enabled(adapter))
3671 synchronize_irq(be_msix_vec_get(adapter, eqo));
3672 else
3673 synchronize_irq(netdev->irq);
3674 be_eq_clean(eqo);
3677 be_irq_unregister(adapter);
3679 return 0;
3682 static int be_rx_qs_create(struct be_adapter *adapter)
3684 struct rss_info *rss = &adapter->rss_info;
3685 u8 rss_key[RSS_HASH_KEY_LEN];
3686 struct be_rx_obj *rxo;
3687 int rc, i, j;
3689 for_all_rx_queues(adapter, rxo, i) {
3690 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3691 sizeof(struct be_eth_rx_d));
3692 if (rc)
3693 return rc;
3696 if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3697 rxo = default_rxo(adapter);
3698 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3699 rx_frag_size, adapter->if_handle,
3700 false, &rxo->rss_id);
3701 if (rc)
3702 return rc;
3705 for_all_rss_queues(adapter, rxo, i) {
3706 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3707 rx_frag_size, adapter->if_handle,
3708 true, &rxo->rss_id);
3709 if (rc)
3710 return rc;
3713 if (be_multi_rxq(adapter)) {
3714 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3715 for_all_rss_queues(adapter, rxo, i) {
3716 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3717 break;
3718 rss->rsstable[j + i] = rxo->rss_id;
3719 rss->rss_queue[j + i] = i;
3722 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3723 RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3725 if (!BEx_chip(adapter))
3726 rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3727 RSS_ENABLE_UDP_IPV6;
3729 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3730 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3731 RSS_INDIR_TABLE_LEN, rss_key);
3732 if (rc) {
3733 rss->rss_flags = RSS_ENABLE_NONE;
3734 return rc;
3737 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3738 } else {
3739 /* Disable RSS, if only default RX Q is created */
3740 rss->rss_flags = RSS_ENABLE_NONE;
3744 /* Post 1 less than RXQ-len to avoid head being equal to tail,
3745 * which is a queue empty condition
3747 for_all_rx_queues(adapter, rxo, i)
3748 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3750 return 0;
3753 static int be_enable_if_filters(struct be_adapter *adapter)
3755 int status;
3757 status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3758 if (status)
3759 return status;
3761 /* Normally this condition usually true as the ->dev_mac is zeroed.
3762 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3763 * subsequent be_dev_mac_add() can fail (after fresh boot)
3765 if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3766 int old_pmac_id = -1;
3768 /* Remember old programmed MAC if any - can happen on BE3 VF */
3769 if (!is_zero_ether_addr(adapter->dev_mac))
3770 old_pmac_id = adapter->pmac_id[0];
3772 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3773 if (status)
3774 return status;
3776 /* Delete the old programmed MAC as we successfully programmed
3777 * a new MAC
3779 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3780 be_dev_mac_del(adapter, old_pmac_id);
3782 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3785 if (adapter->vlans_added)
3786 be_vid_config(adapter);
3788 __be_set_rx_mode(adapter);
3790 return 0;
3793 static int be_open(struct net_device *netdev)
3795 struct be_adapter *adapter = netdev_priv(netdev);
3796 struct be_eq_obj *eqo;
3797 struct be_rx_obj *rxo;
3798 struct be_tx_obj *txo;
3799 u8 link_status;
3800 int status, i;
3802 status = be_rx_qs_create(adapter);
3803 if (status)
3804 goto err;
3806 status = be_enable_if_filters(adapter);
3807 if (status)
3808 goto err;
3810 status = be_irq_register(adapter);
3811 if (status)
3812 goto err;
3814 for_all_rx_queues(adapter, rxo, i)
3815 be_cq_notify(adapter, rxo->cq.id, true, 0);
3817 for_all_tx_queues(adapter, txo, i)
3818 be_cq_notify(adapter, txo->cq.id, true, 0);
3820 be_async_mcc_enable(adapter);
3822 for_all_evt_queues(adapter, eqo, i) {
3823 napi_enable(&eqo->napi);
3824 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3826 adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3828 status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3829 if (!status)
3830 be_link_status_update(adapter, link_status);
3832 netif_tx_start_all_queues(netdev);
3833 if (skyhawk_chip(adapter))
3834 udp_tunnel_get_rx_info(netdev);
3836 return 0;
3837 err:
3838 be_close(adapter->netdev);
3839 return -EIO;
3842 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3844 u32 addr;
3846 addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3848 mac[5] = (u8)(addr & 0xFF);
3849 mac[4] = (u8)((addr >> 8) & 0xFF);
3850 mac[3] = (u8)((addr >> 16) & 0xFF);
3851 /* Use the OUI from the current MAC address */
3852 memcpy(mac, adapter->netdev->dev_addr, 3);
3856 * Generate a seed MAC address from the PF MAC Address using jhash.
3857 * MAC Address for VFs are assigned incrementally starting from the seed.
3858 * These addresses are programmed in the ASIC by the PF and the VF driver
3859 * queries for the MAC address during its probe.
3861 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3863 u32 vf;
3864 int status = 0;
3865 u8 mac[ETH_ALEN];
3866 struct be_vf_cfg *vf_cfg;
3868 be_vf_eth_addr_generate(adapter, mac);
3870 for_all_vfs(adapter, vf_cfg, vf) {
3871 if (BEx_chip(adapter))
3872 status = be_cmd_pmac_add(adapter, mac,
3873 vf_cfg->if_handle,
3874 &vf_cfg->pmac_id, vf + 1);
3875 else
3876 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3877 vf + 1);
3879 if (status)
3880 dev_err(&adapter->pdev->dev,
3881 "Mac address assignment failed for VF %d\n",
3882 vf);
3883 else
3884 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3886 mac[5] += 1;
3888 return status;
3891 static int be_vfs_mac_query(struct be_adapter *adapter)
3893 int status, vf;
3894 u8 mac[ETH_ALEN];
3895 struct be_vf_cfg *vf_cfg;
3897 for_all_vfs(adapter, vf_cfg, vf) {
3898 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3899 mac, vf_cfg->if_handle,
3900 false, vf+1);
3901 if (status)
3902 return status;
3903 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3905 return 0;
3908 static void be_vf_clear(struct be_adapter *adapter)
3910 struct be_vf_cfg *vf_cfg;
3911 u32 vf;
3913 if (pci_vfs_assigned(adapter->pdev)) {
3914 dev_warn(&adapter->pdev->dev,
3915 "VFs are assigned to VMs: not disabling VFs\n");
3916 goto done;
3919 pci_disable_sriov(adapter->pdev);
3921 for_all_vfs(adapter, vf_cfg, vf) {
3922 if (BEx_chip(adapter))
3923 be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3924 vf_cfg->pmac_id, vf + 1);
3925 else
3926 be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3927 vf + 1);
3929 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3932 if (BE3_chip(adapter))
3933 be_cmd_set_hsw_config(adapter, 0, 0,
3934 adapter->if_handle,
3935 PORT_FWD_TYPE_PASSTHRU, 0);
3936 done:
3937 kfree(adapter->vf_cfg);
3938 adapter->num_vfs = 0;
3939 adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3942 static void be_clear_queues(struct be_adapter *adapter)
3944 be_mcc_queues_destroy(adapter);
3945 be_rx_cqs_destroy(adapter);
3946 be_tx_queues_destroy(adapter);
3947 be_evt_queues_destroy(adapter);
3950 static void be_cancel_worker(struct be_adapter *adapter)
3952 if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3953 cancel_delayed_work_sync(&adapter->work);
3954 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3958 static void be_cancel_err_detection(struct be_adapter *adapter)
3960 struct be_error_recovery *err_rec = &adapter->error_recovery;
3962 if (!be_err_recovery_workq)
3963 return;
3965 if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3966 cancel_delayed_work_sync(&err_rec->err_detection_work);
3967 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3971 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3973 struct net_device *netdev = adapter->netdev;
3974 struct device *dev = &adapter->pdev->dev;
3975 struct be_vxlan_port *vxlan_port;
3976 __be16 port;
3977 int status;
3979 vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3980 struct be_vxlan_port, list);
3981 port = vxlan_port->port;
3983 status = be_cmd_manage_iface(adapter, adapter->if_handle,
3984 OP_CONVERT_NORMAL_TO_TUNNEL);
3985 if (status) {
3986 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3987 return status;
3989 adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3991 status = be_cmd_set_vxlan_port(adapter, port);
3992 if (status) {
3993 dev_warn(dev, "Failed to add VxLAN port\n");
3994 return status;
3996 adapter->vxlan_port = port;
3998 netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3999 NETIF_F_TSO | NETIF_F_TSO6 |
4000 NETIF_F_GSO_UDP_TUNNEL;
4002 dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4003 be16_to_cpu(port));
4004 return 0;
4007 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4009 struct net_device *netdev = adapter->netdev;
4011 if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4012 be_cmd_manage_iface(adapter, adapter->if_handle,
4013 OP_CONVERT_TUNNEL_TO_NORMAL);
4015 if (adapter->vxlan_port)
4016 be_cmd_set_vxlan_port(adapter, 0);
4018 adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4019 adapter->vxlan_port = 0;
4021 netdev->hw_enc_features = 0;
4024 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4025 struct be_resources *vft_res)
4027 struct be_resources res = adapter->pool_res;
4028 u32 vf_if_cap_flags = res.vf_if_cap_flags;
4029 struct be_resources res_mod = {0};
4030 u16 num_vf_qs = 1;
4032 /* Distribute the queue resources among the PF and it's VFs */
4033 if (num_vfs) {
4034 /* Divide the rx queues evenly among the VFs and the PF, capped
4035 * at VF-EQ-count. Any remainder queues belong to the PF.
4037 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4038 res.max_rss_qs / (num_vfs + 1));
4040 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4041 * RSS Tables per port. Provide RSS on VFs, only if number of
4042 * VFs requested is less than it's PF Pool's RSS Tables limit.
4044 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4045 num_vf_qs = 1;
4048 /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4049 * which are modifiable using SET_PROFILE_CONFIG cmd.
4051 be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4052 RESOURCE_MODIFIABLE, 0);
4054 /* If RSS IFACE capability flags are modifiable for a VF, set the
4055 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4056 * more than 1 RSSQ is available for a VF.
4057 * Otherwise, provision only 1 queue pair for VF.
4059 if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4060 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4061 if (num_vf_qs > 1) {
4062 vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4063 if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4064 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4065 } else {
4066 vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4067 BE_IF_FLAGS_DEFQ_RSS);
4069 } else {
4070 num_vf_qs = 1;
4073 if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4074 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4075 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4078 vft_res->vf_if_cap_flags = vf_if_cap_flags;
4079 vft_res->max_rx_qs = num_vf_qs;
4080 vft_res->max_rss_qs = num_vf_qs;
4081 vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4082 vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4084 /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4085 * among the PF and it's VFs, if the fields are changeable
4087 if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4088 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4090 if (res_mod.max_vlans == FIELD_MODIFIABLE)
4091 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4093 if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4094 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4096 if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4097 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4100 static void be_if_destroy(struct be_adapter *adapter)
4102 be_cmd_if_destroy(adapter, adapter->if_handle, 0);
4104 kfree(adapter->pmac_id);
4105 adapter->pmac_id = NULL;
4107 kfree(adapter->mc_list);
4108 adapter->mc_list = NULL;
4110 kfree(adapter->uc_list);
4111 adapter->uc_list = NULL;
4114 static int be_clear(struct be_adapter *adapter)
4116 struct pci_dev *pdev = adapter->pdev;
4117 struct be_resources vft_res = {0};
4119 be_cancel_worker(adapter);
4121 flush_workqueue(be_wq);
4123 if (sriov_enabled(adapter))
4124 be_vf_clear(adapter);
4126 /* Re-configure FW to distribute resources evenly across max-supported
4127 * number of VFs, only when VFs are not already enabled.
4129 if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4130 !pci_vfs_assigned(pdev)) {
4131 be_calculate_vf_res(adapter,
4132 pci_sriov_get_totalvfs(pdev),
4133 &vft_res);
4134 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4135 pci_sriov_get_totalvfs(pdev),
4136 &vft_res);
4139 be_disable_vxlan_offloads(adapter);
4141 be_if_destroy(adapter);
4143 be_clear_queues(adapter);
4145 be_msix_disable(adapter);
4146 adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4147 return 0;
4150 static int be_vfs_if_create(struct be_adapter *adapter)
4152 struct be_resources res = {0};
4153 u32 cap_flags, en_flags, vf;
4154 struct be_vf_cfg *vf_cfg;
4155 int status;
4157 /* If a FW profile exists, then cap_flags are updated */
4158 cap_flags = BE_VF_IF_EN_FLAGS;
4160 for_all_vfs(adapter, vf_cfg, vf) {
4161 if (!BE3_chip(adapter)) {
4162 status = be_cmd_get_profile_config(adapter, &res, NULL,
4163 ACTIVE_PROFILE_TYPE,
4164 RESOURCE_LIMITS,
4165 vf + 1);
4166 if (!status) {
4167 cap_flags = res.if_cap_flags;
4168 /* Prevent VFs from enabling VLAN promiscuous
4169 * mode
4171 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4175 /* PF should enable IF flags during proxy if_create call */
4176 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4177 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4178 &vf_cfg->if_handle, vf + 1);
4179 if (status)
4180 return status;
4183 return 0;
4186 static int be_vf_setup_init(struct be_adapter *adapter)
4188 struct be_vf_cfg *vf_cfg;
4189 int vf;
4191 adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4192 GFP_KERNEL);
4193 if (!adapter->vf_cfg)
4194 return -ENOMEM;
4196 for_all_vfs(adapter, vf_cfg, vf) {
4197 vf_cfg->if_handle = -1;
4198 vf_cfg->pmac_id = -1;
4200 return 0;
4203 static int be_vf_setup(struct be_adapter *adapter)
4205 struct device *dev = &adapter->pdev->dev;
4206 struct be_vf_cfg *vf_cfg;
4207 int status, old_vfs, vf;
4208 bool spoofchk;
4210 old_vfs = pci_num_vf(adapter->pdev);
4212 status = be_vf_setup_init(adapter);
4213 if (status)
4214 goto err;
4216 if (old_vfs) {
4217 for_all_vfs(adapter, vf_cfg, vf) {
4218 status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4219 if (status)
4220 goto err;
4223 status = be_vfs_mac_query(adapter);
4224 if (status)
4225 goto err;
4226 } else {
4227 status = be_vfs_if_create(adapter);
4228 if (status)
4229 goto err;
4231 status = be_vf_eth_addr_config(adapter);
4232 if (status)
4233 goto err;
4236 for_all_vfs(adapter, vf_cfg, vf) {
4237 /* Allow VFs to programs MAC/VLAN filters */
4238 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4239 vf + 1);
4240 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4241 status = be_cmd_set_fn_privileges(adapter,
4242 vf_cfg->privileges |
4243 BE_PRIV_FILTMGMT,
4244 vf + 1);
4245 if (!status) {
4246 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4247 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4248 vf);
4252 /* Allow full available bandwidth */
4253 if (!old_vfs)
4254 be_cmd_config_qos(adapter, 0, 0, vf + 1);
4256 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4257 vf_cfg->if_handle, NULL,
4258 &spoofchk);
4259 if (!status)
4260 vf_cfg->spoofchk = spoofchk;
4262 if (!old_vfs) {
4263 be_cmd_enable_vf(adapter, vf + 1);
4264 be_cmd_set_logical_link_config(adapter,
4265 IFLA_VF_LINK_STATE_AUTO,
4266 vf+1);
4270 if (!old_vfs) {
4271 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4272 if (status) {
4273 dev_err(dev, "SRIOV enable failed\n");
4274 adapter->num_vfs = 0;
4275 goto err;
4279 if (BE3_chip(adapter)) {
4280 /* On BE3, enable VEB only when SRIOV is enabled */
4281 status = be_cmd_set_hsw_config(adapter, 0, 0,
4282 adapter->if_handle,
4283 PORT_FWD_TYPE_VEB, 0);
4284 if (status)
4285 goto err;
4288 adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4289 return 0;
4290 err:
4291 dev_err(dev, "VF setup failed\n");
4292 be_vf_clear(adapter);
4293 return status;
4296 /* Converting function_mode bits on BE3 to SH mc_type enums */
4298 static u8 be_convert_mc_type(u32 function_mode)
4300 if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4301 return vNIC1;
4302 else if (function_mode & QNQ_MODE)
4303 return FLEX10;
4304 else if (function_mode & VNIC_MODE)
4305 return vNIC2;
4306 else if (function_mode & UMC_ENABLED)
4307 return UMC;
4308 else
4309 return MC_NONE;
4312 /* On BE2/BE3 FW does not suggest the supported limits */
4313 static void BEx_get_resources(struct be_adapter *adapter,
4314 struct be_resources *res)
4316 bool use_sriov = adapter->num_vfs ? 1 : 0;
4318 if (be_physfn(adapter))
4319 res->max_uc_mac = BE_UC_PMAC_COUNT;
4320 else
4321 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4323 adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4325 if (be_is_mc(adapter)) {
4326 /* Assuming that there are 4 channels per port,
4327 * when multi-channel is enabled
4329 if (be_is_qnq_mode(adapter))
4330 res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4331 else
4332 /* In a non-qnq multichannel mode, the pvid
4333 * takes up one vlan entry
4335 res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4336 } else {
4337 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4340 res->max_mcast_mac = BE_MAX_MC;
4342 /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4343 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4344 * *only* if it is RSS-capable.
4346 if (BE2_chip(adapter) || use_sriov || (adapter->port_num > 1) ||
4347 be_virtfn(adapter) ||
4348 (be_is_mc(adapter) &&
4349 !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4350 res->max_tx_qs = 1;
4351 } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4352 struct be_resources super_nic_res = {0};
4354 /* On a SuperNIC profile, the driver needs to use the
4355 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4357 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4358 ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4360 /* Some old versions of BE3 FW don't report max_tx_qs value */
4361 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4362 } else {
4363 res->max_tx_qs = BE3_MAX_TX_QS;
4366 if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4367 !use_sriov && be_physfn(adapter))
4368 res->max_rss_qs = (adapter->be3_native) ?
4369 BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4370 res->max_rx_qs = res->max_rss_qs + 1;
4372 if (be_physfn(adapter))
4373 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4374 BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4375 else
4376 res->max_evt_qs = 1;
4378 res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4379 res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4380 if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4381 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4384 static void be_setup_init(struct be_adapter *adapter)
4386 adapter->vlan_prio_bmap = 0xff;
4387 adapter->phy.link_speed = -1;
4388 adapter->if_handle = -1;
4389 adapter->be3_native = false;
4390 adapter->if_flags = 0;
4391 adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4392 if (be_physfn(adapter))
4393 adapter->cmd_privileges = MAX_PRIVILEGES;
4394 else
4395 adapter->cmd_privileges = MIN_PRIVILEGES;
4398 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4399 * However, this HW limitation is not exposed to the host via any SLI cmd.
4400 * As a result, in the case of SRIOV and in particular multi-partition configs
4401 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4402 * for distribution between the VFs. This self-imposed limit will determine the
4403 * no: of VFs for which RSS can be enabled.
4405 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4407 struct be_port_resources port_res = {0};
4408 u8 rss_tables_on_port;
4409 u16 max_vfs = be_max_vfs(adapter);
4411 be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4412 RESOURCE_LIMITS, 0);
4414 rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4416 /* Each PF Pool's RSS Tables limit =
4417 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4419 adapter->pool_res.max_rss_tables =
4420 max_vfs * rss_tables_on_port / port_res.max_vfs;
4423 static int be_get_sriov_config(struct be_adapter *adapter)
4425 struct be_resources res = {0};
4426 int max_vfs, old_vfs;
4428 be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4429 RESOURCE_LIMITS, 0);
4431 /* Some old versions of BE3 FW don't report max_vfs value */
4432 if (BE3_chip(adapter) && !res.max_vfs) {
4433 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4434 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4437 adapter->pool_res = res;
4439 /* If during previous unload of the driver, the VFs were not disabled,
4440 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4441 * Instead use the TotalVFs value stored in the pci-dev struct.
4443 old_vfs = pci_num_vf(adapter->pdev);
4444 if (old_vfs) {
4445 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4446 old_vfs);
4448 adapter->pool_res.max_vfs =
4449 pci_sriov_get_totalvfs(adapter->pdev);
4450 adapter->num_vfs = old_vfs;
4453 if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4454 be_calculate_pf_pool_rss_tables(adapter);
4455 dev_info(&adapter->pdev->dev,
4456 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4457 be_max_pf_pool_rss_tables(adapter));
4459 return 0;
4462 static void be_alloc_sriov_res(struct be_adapter *adapter)
4464 int old_vfs = pci_num_vf(adapter->pdev);
4465 struct be_resources vft_res = {0};
4466 int status;
4468 be_get_sriov_config(adapter);
4470 if (!old_vfs)
4471 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4473 /* When the HW is in SRIOV capable configuration, the PF-pool
4474 * resources are given to PF during driver load, if there are no
4475 * old VFs. This facility is not available in BE3 FW.
4476 * Also, this is done by FW in Lancer chip.
4478 if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4479 be_calculate_vf_res(adapter, 0, &vft_res);
4480 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4481 &vft_res);
4482 if (status)
4483 dev_err(&adapter->pdev->dev,
4484 "Failed to optimize SRIOV resources\n");
4488 static int be_get_resources(struct be_adapter *adapter)
4490 struct device *dev = &adapter->pdev->dev;
4491 struct be_resources res = {0};
4492 int status;
4494 /* For Lancer, SH etc read per-function resource limits from FW.
4495 * GET_FUNC_CONFIG returns per function guaranteed limits.
4496 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4498 if (BEx_chip(adapter)) {
4499 BEx_get_resources(adapter, &res);
4500 } else {
4501 status = be_cmd_get_func_config(adapter, &res);
4502 if (status)
4503 return status;
4505 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4506 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4507 !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4508 res.max_rss_qs -= 1;
4511 /* If RoCE is supported stash away half the EQs for RoCE */
4512 res.max_nic_evt_qs = be_roce_supported(adapter) ?
4513 res.max_evt_qs / 2 : res.max_evt_qs;
4514 adapter->res = res;
4516 /* If FW supports RSS default queue, then skip creating non-RSS
4517 * queue for non-IP traffic.
4519 adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4520 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4522 dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4523 be_max_txqs(adapter), be_max_rxqs(adapter),
4524 be_max_rss(adapter), be_max_nic_eqs(adapter),
4525 be_max_vfs(adapter));
4526 dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4527 be_max_uc(adapter), be_max_mc(adapter),
4528 be_max_vlans(adapter));
4530 /* Ensure RX and TX queues are created in pairs at init time */
4531 adapter->cfg_num_rx_irqs =
4532 min_t(u16, netif_get_num_default_rss_queues(),
4533 be_max_qp_irqs(adapter));
4534 adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4535 return 0;
4538 static int be_get_config(struct be_adapter *adapter)
4540 int status, level;
4541 u16 profile_id;
4543 status = be_cmd_get_cntl_attributes(adapter);
4544 if (status)
4545 return status;
4547 status = be_cmd_query_fw_cfg(adapter);
4548 if (status)
4549 return status;
4551 if (!lancer_chip(adapter) && be_physfn(adapter))
4552 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4554 if (BEx_chip(adapter)) {
4555 level = be_cmd_get_fw_log_level(adapter);
4556 adapter->msg_enable =
4557 level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4560 be_cmd_get_acpi_wol_cap(adapter);
4561 pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4562 pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4564 be_cmd_query_port_name(adapter);
4566 if (be_physfn(adapter)) {
4567 status = be_cmd_get_active_profile(adapter, &profile_id);
4568 if (!status)
4569 dev_info(&adapter->pdev->dev,
4570 "Using profile 0x%x\n", profile_id);
4573 return 0;
4576 static int be_mac_setup(struct be_adapter *adapter)
4578 u8 mac[ETH_ALEN];
4579 int status;
4581 if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4582 status = be_cmd_get_perm_mac(adapter, mac);
4583 if (status)
4584 return status;
4586 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4587 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4589 /* Initial MAC for BE3 VFs is already programmed by PF */
4590 if (BEx_chip(adapter) && be_virtfn(adapter))
4591 memcpy(adapter->dev_mac, mac, ETH_ALEN);
4594 return 0;
4597 static void be_schedule_worker(struct be_adapter *adapter)
4599 queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4600 adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4603 static void be_destroy_err_recovery_workq(void)
4605 if (!be_err_recovery_workq)
4606 return;
4608 flush_workqueue(be_err_recovery_workq);
4609 destroy_workqueue(be_err_recovery_workq);
4610 be_err_recovery_workq = NULL;
4613 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4615 struct be_error_recovery *err_rec = &adapter->error_recovery;
4617 if (!be_err_recovery_workq)
4618 return;
4620 queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4621 msecs_to_jiffies(delay));
4622 adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4625 static int be_setup_queues(struct be_adapter *adapter)
4627 struct net_device *netdev = adapter->netdev;
4628 int status;
4630 status = be_evt_queues_create(adapter);
4631 if (status)
4632 goto err;
4634 status = be_tx_qs_create(adapter);
4635 if (status)
4636 goto err;
4638 status = be_rx_cqs_create(adapter);
4639 if (status)
4640 goto err;
4642 status = be_mcc_queues_create(adapter);
4643 if (status)
4644 goto err;
4646 status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4647 if (status)
4648 goto err;
4650 status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4651 if (status)
4652 goto err;
4654 return 0;
4655 err:
4656 dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4657 return status;
4660 static int be_if_create(struct be_adapter *adapter)
4662 u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4663 u32 cap_flags = be_if_cap_flags(adapter);
4664 int status;
4666 /* alloc required memory for other filtering fields */
4667 adapter->pmac_id = kcalloc(be_max_uc(adapter),
4668 sizeof(*adapter->pmac_id), GFP_KERNEL);
4669 if (!adapter->pmac_id)
4670 return -ENOMEM;
4672 adapter->mc_list = kcalloc(be_max_mc(adapter),
4673 sizeof(*adapter->mc_list), GFP_KERNEL);
4674 if (!adapter->mc_list)
4675 return -ENOMEM;
4677 adapter->uc_list = kcalloc(be_max_uc(adapter),
4678 sizeof(*adapter->uc_list), GFP_KERNEL);
4679 if (!adapter->uc_list)
4680 return -ENOMEM;
4682 if (adapter->cfg_num_rx_irqs == 1)
4683 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4685 en_flags &= cap_flags;
4686 /* will enable all the needed filter flags in be_open() */
4687 status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4688 &adapter->if_handle, 0);
4690 if (status)
4691 return status;
4693 return 0;
4696 int be_update_queues(struct be_adapter *adapter)
4698 struct net_device *netdev = adapter->netdev;
4699 int status;
4701 if (netif_running(netdev)) {
4702 /* be_tx_timeout() must not run concurrently with this
4703 * function, synchronize with an already-running dev_watchdog
4705 netif_tx_lock_bh(netdev);
4706 /* device cannot transmit now, avoid dev_watchdog timeouts */
4707 netif_carrier_off(netdev);
4708 netif_tx_unlock_bh(netdev);
4710 be_close(netdev);
4713 be_cancel_worker(adapter);
4715 /* If any vectors have been shared with RoCE we cannot re-program
4716 * the MSIx table.
4718 if (!adapter->num_msix_roce_vec)
4719 be_msix_disable(adapter);
4721 be_clear_queues(adapter);
4722 status = be_cmd_if_destroy(adapter, adapter->if_handle, 0);
4723 if (status)
4724 return status;
4726 if (!msix_enabled(adapter)) {
4727 status = be_msix_enable(adapter);
4728 if (status)
4729 return status;
4732 status = be_if_create(adapter);
4733 if (status)
4734 return status;
4736 status = be_setup_queues(adapter);
4737 if (status)
4738 return status;
4740 be_schedule_worker(adapter);
4742 /* The IF was destroyed and re-created. We need to clear
4743 * all promiscuous flags valid for the destroyed IF.
4744 * Without this promisc mode is not restored during
4745 * be_open() because the driver thinks that it is
4746 * already enabled in HW.
4748 adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4750 if (netif_running(netdev))
4751 status = be_open(netdev);
4753 return status;
4756 static inline int fw_major_num(const char *fw_ver)
4758 int fw_major = 0, i;
4760 i = sscanf(fw_ver, "%d.", &fw_major);
4761 if (i != 1)
4762 return 0;
4764 return fw_major;
4767 /* If it is error recovery, FLR the PF
4768 * Else if any VFs are already enabled don't FLR the PF
4770 static bool be_reset_required(struct be_adapter *adapter)
4772 if (be_error_recovering(adapter))
4773 return true;
4774 else
4775 return pci_num_vf(adapter->pdev) == 0;
4778 /* Wait for the FW to be ready and perform the required initialization */
4779 static int be_func_init(struct be_adapter *adapter)
4781 int status;
4783 status = be_fw_wait_ready(adapter);
4784 if (status)
4785 return status;
4787 /* FW is now ready; clear errors to allow cmds/doorbell */
4788 be_clear_error(adapter, BE_CLEAR_ALL);
4790 if (be_reset_required(adapter)) {
4791 status = be_cmd_reset_function(adapter);
4792 if (status)
4793 return status;
4795 /* Wait for interrupts to quiesce after an FLR */
4796 msleep(100);
4799 /* Tell FW we're ready to fire cmds */
4800 status = be_cmd_fw_init(adapter);
4801 if (status)
4802 return status;
4804 /* Allow interrupts for other ULPs running on NIC function */
4805 be_intr_set(adapter, true);
4807 return 0;
4810 static int be_setup(struct be_adapter *adapter)
4812 struct device *dev = &adapter->pdev->dev;
4813 int status;
4815 status = be_func_init(adapter);
4816 if (status)
4817 return status;
4819 be_setup_init(adapter);
4821 if (!lancer_chip(adapter))
4822 be_cmd_req_native_mode(adapter);
4824 /* invoke this cmd first to get pf_num and vf_num which are needed
4825 * for issuing profile related cmds
4827 if (!BEx_chip(adapter)) {
4828 status = be_cmd_get_func_config(adapter, NULL);
4829 if (status)
4830 return status;
4833 status = be_get_config(adapter);
4834 if (status)
4835 goto err;
4837 if (!BE2_chip(adapter) && be_physfn(adapter))
4838 be_alloc_sriov_res(adapter);
4840 status = be_get_resources(adapter);
4841 if (status)
4842 goto err;
4844 status = be_msix_enable(adapter);
4845 if (status)
4846 goto err;
4848 /* will enable all the needed filter flags in be_open() */
4849 status = be_if_create(adapter);
4850 if (status)
4851 goto err;
4853 /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4854 rtnl_lock();
4855 status = be_setup_queues(adapter);
4856 rtnl_unlock();
4857 if (status)
4858 goto err;
4860 be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4862 status = be_mac_setup(adapter);
4863 if (status)
4864 goto err;
4866 be_cmd_get_fw_ver(adapter);
4867 dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4869 if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4870 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4871 adapter->fw_ver);
4872 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4875 status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4876 adapter->rx_fc);
4877 if (status)
4878 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4879 &adapter->rx_fc);
4881 dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4882 adapter->tx_fc, adapter->rx_fc);
4884 if (be_physfn(adapter))
4885 be_cmd_set_logical_link_config(adapter,
4886 IFLA_VF_LINK_STATE_AUTO, 0);
4888 /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4889 * confusing a linux bridge or OVS that it might be connected to.
4890 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4891 * when SRIOV is not enabled.
4893 if (BE3_chip(adapter))
4894 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4895 PORT_FWD_TYPE_PASSTHRU, 0);
4897 if (adapter->num_vfs)
4898 be_vf_setup(adapter);
4900 status = be_cmd_get_phy_info(adapter);
4901 if (!status && be_pause_supported(adapter))
4902 adapter->phy.fc_autoneg = 1;
4904 if (be_physfn(adapter) && !lancer_chip(adapter))
4905 be_cmd_set_features(adapter);
4907 be_schedule_worker(adapter);
4908 adapter->flags |= BE_FLAGS_SETUP_DONE;
4909 return 0;
4910 err:
4911 be_clear(adapter);
4912 return status;
4915 #ifdef CONFIG_NET_POLL_CONTROLLER
4916 static void be_netpoll(struct net_device *netdev)
4918 struct be_adapter *adapter = netdev_priv(netdev);
4919 struct be_eq_obj *eqo;
4920 int i;
4922 for_all_evt_queues(adapter, eqo, i) {
4923 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4924 napi_schedule(&eqo->napi);
4927 #endif
4929 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4931 const struct firmware *fw;
4932 int status;
4934 if (!netif_running(adapter->netdev)) {
4935 dev_err(&adapter->pdev->dev,
4936 "Firmware load not allowed (interface is down)\n");
4937 return -ENETDOWN;
4940 status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4941 if (status)
4942 goto fw_exit;
4944 dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4946 if (lancer_chip(adapter))
4947 status = lancer_fw_download(adapter, fw);
4948 else
4949 status = be_fw_download(adapter, fw);
4951 if (!status)
4952 be_cmd_get_fw_ver(adapter);
4954 fw_exit:
4955 release_firmware(fw);
4956 return status;
4959 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4960 u16 flags, struct netlink_ext_ack *extack)
4962 struct be_adapter *adapter = netdev_priv(dev);
4963 struct nlattr *attr, *br_spec;
4964 int rem;
4965 int status = 0;
4966 u16 mode = 0;
4968 if (!sriov_enabled(adapter))
4969 return -EOPNOTSUPP;
4971 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4972 if (!br_spec)
4973 return -EINVAL;
4975 nla_for_each_nested(attr, br_spec, rem) {
4976 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4977 continue;
4979 if (nla_len(attr) < sizeof(mode))
4980 return -EINVAL;
4982 mode = nla_get_u16(attr);
4983 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4984 return -EOPNOTSUPP;
4986 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4987 return -EINVAL;
4989 status = be_cmd_set_hsw_config(adapter, 0, 0,
4990 adapter->if_handle,
4991 mode == BRIDGE_MODE_VEPA ?
4992 PORT_FWD_TYPE_VEPA :
4993 PORT_FWD_TYPE_VEB, 0);
4994 if (status)
4995 goto err;
4997 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4998 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5000 return status;
5002 err:
5003 dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5004 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5006 return status;
5009 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5010 struct net_device *dev, u32 filter_mask,
5011 int nlflags)
5013 struct be_adapter *adapter = netdev_priv(dev);
5014 int status = 0;
5015 u8 hsw_mode;
5017 /* BE and Lancer chips support VEB mode only */
5018 if (BEx_chip(adapter) || lancer_chip(adapter)) {
5019 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5020 if (!pci_sriov_get_totalvfs(adapter->pdev))
5021 return 0;
5022 hsw_mode = PORT_FWD_TYPE_VEB;
5023 } else {
5024 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5025 adapter->if_handle, &hsw_mode,
5026 NULL);
5027 if (status)
5028 return 0;
5030 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5031 return 0;
5034 return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5035 hsw_mode == PORT_FWD_TYPE_VEPA ?
5036 BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5037 0, 0, nlflags, filter_mask, NULL);
5040 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5041 void (*func)(struct work_struct *))
5043 struct be_cmd_work *work;
5045 work = kzalloc(sizeof(*work), GFP_ATOMIC);
5046 if (!work) {
5047 dev_err(&adapter->pdev->dev,
5048 "be_work memory allocation failed\n");
5049 return NULL;
5052 INIT_WORK(&work->work, func);
5053 work->adapter = adapter;
5054 return work;
5057 /* VxLAN offload Notes:
5059 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5060 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5061 * is expected to work across all types of IP tunnels once exported. Skyhawk
5062 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5063 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5064 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5065 * those other tunnels are unexported on the fly through ndo_features_check().
5067 * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5068 * adds more than one port, disable offloads and re-enable them again when
5069 * there's only one port left. We maintain a list of ports for this purpose.
5071 static void be_work_add_vxlan_port(struct work_struct *work)
5073 struct be_cmd_work *cmd_work =
5074 container_of(work, struct be_cmd_work, work);
5075 struct be_adapter *adapter = cmd_work->adapter;
5076 struct device *dev = &adapter->pdev->dev;
5077 __be16 port = cmd_work->info.vxlan_port;
5078 struct be_vxlan_port *vxlan_port;
5079 int status;
5081 /* Bump up the alias count if it is an existing port */
5082 list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5083 if (vxlan_port->port == port) {
5084 vxlan_port->port_aliases++;
5085 goto done;
5089 /* Add a new port to our list. We don't need a lock here since port
5090 * add/delete are done only in the context of a single-threaded work
5091 * queue (be_wq).
5093 vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5094 if (!vxlan_port)
5095 goto done;
5097 vxlan_port->port = port;
5098 INIT_LIST_HEAD(&vxlan_port->list);
5099 list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5100 adapter->vxlan_port_count++;
5102 if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5103 dev_info(dev,
5104 "Only one UDP port supported for VxLAN offloads\n");
5105 dev_info(dev, "Disabling VxLAN offloads\n");
5106 goto err;
5109 if (adapter->vxlan_port_count > 1)
5110 goto done;
5112 status = be_enable_vxlan_offloads(adapter);
5113 if (!status)
5114 goto done;
5116 err:
5117 be_disable_vxlan_offloads(adapter);
5118 done:
5119 kfree(cmd_work);
5120 return;
5123 static void be_work_del_vxlan_port(struct work_struct *work)
5125 struct be_cmd_work *cmd_work =
5126 container_of(work, struct be_cmd_work, work);
5127 struct be_adapter *adapter = cmd_work->adapter;
5128 __be16 port = cmd_work->info.vxlan_port;
5129 struct be_vxlan_port *vxlan_port;
5131 /* Nothing to be done if a port alias is being deleted */
5132 list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5133 if (vxlan_port->port == port) {
5134 if (vxlan_port->port_aliases) {
5135 vxlan_port->port_aliases--;
5136 goto done;
5138 break;
5142 /* No port aliases left; delete the port from the list */
5143 list_del(&vxlan_port->list);
5144 adapter->vxlan_port_count--;
5146 /* Disable VxLAN offload if this is the offloaded port */
5147 if (adapter->vxlan_port == vxlan_port->port) {
5148 WARN_ON(adapter->vxlan_port_count);
5149 be_disable_vxlan_offloads(adapter);
5150 dev_info(&adapter->pdev->dev,
5151 "Disabled VxLAN offloads for UDP port %d\n",
5152 be16_to_cpu(port));
5153 goto out;
5156 /* If only 1 port is left, re-enable VxLAN offload */
5157 if (adapter->vxlan_port_count == 1)
5158 be_enable_vxlan_offloads(adapter);
5160 out:
5161 kfree(vxlan_port);
5162 done:
5163 kfree(cmd_work);
5166 static void be_cfg_vxlan_port(struct net_device *netdev,
5167 struct udp_tunnel_info *ti,
5168 void (*func)(struct work_struct *))
5170 struct be_adapter *adapter = netdev_priv(netdev);
5171 struct be_cmd_work *cmd_work;
5173 if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5174 return;
5176 if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5177 return;
5179 cmd_work = be_alloc_work(adapter, func);
5180 if (cmd_work) {
5181 cmd_work->info.vxlan_port = ti->port;
5182 queue_work(be_wq, &cmd_work->work);
5186 static void be_del_vxlan_port(struct net_device *netdev,
5187 struct udp_tunnel_info *ti)
5189 be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5192 static void be_add_vxlan_port(struct net_device *netdev,
5193 struct udp_tunnel_info *ti)
5195 be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5198 static netdev_features_t be_features_check(struct sk_buff *skb,
5199 struct net_device *dev,
5200 netdev_features_t features)
5202 struct be_adapter *adapter = netdev_priv(dev);
5203 u8 l4_hdr = 0;
5205 if (skb_is_gso(skb)) {
5206 /* IPv6 TSO requests with extension hdrs are a problem
5207 * to Lancer and BE3 HW. Disable TSO6 feature.
5209 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5210 features &= ~NETIF_F_TSO6;
5212 /* Lancer cannot handle the packet with MSS less than 256.
5213 * Also it can't handle a TSO packet with a single segment
5214 * Disable the GSO support in such cases
5216 if (lancer_chip(adapter) &&
5217 (skb_shinfo(skb)->gso_size < 256 ||
5218 skb_shinfo(skb)->gso_segs == 1))
5219 features &= ~NETIF_F_GSO_MASK;
5222 /* The code below restricts offload features for some tunneled and
5223 * Q-in-Q packets.
5224 * Offload features for normal (non tunnel) packets are unchanged.
5226 features = vlan_features_check(skb, features);
5227 if (!skb->encapsulation ||
5228 !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5229 return features;
5231 /* It's an encapsulated packet and VxLAN offloads are enabled. We
5232 * should disable tunnel offload features if it's not a VxLAN packet,
5233 * as tunnel offloads have been enabled only for VxLAN. This is done to
5234 * allow other tunneled traffic like GRE work fine while VxLAN
5235 * offloads are configured in Skyhawk-R.
5237 switch (vlan_get_protocol(skb)) {
5238 case htons(ETH_P_IP):
5239 l4_hdr = ip_hdr(skb)->protocol;
5240 break;
5241 case htons(ETH_P_IPV6):
5242 l4_hdr = ipv6_hdr(skb)->nexthdr;
5243 break;
5244 default:
5245 return features;
5248 if (l4_hdr != IPPROTO_UDP ||
5249 skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5250 skb->inner_protocol != htons(ETH_P_TEB) ||
5251 skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5252 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5253 !adapter->vxlan_port ||
5254 udp_hdr(skb)->dest != adapter->vxlan_port)
5255 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5257 return features;
5260 static int be_get_phys_port_id(struct net_device *dev,
5261 struct netdev_phys_item_id *ppid)
5263 int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5264 struct be_adapter *adapter = netdev_priv(dev);
5265 u8 *id;
5267 if (MAX_PHYS_ITEM_ID_LEN < id_len)
5268 return -ENOSPC;
5270 ppid->id[0] = adapter->hba_port_num + 1;
5271 id = &ppid->id[1];
5272 for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5273 i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5274 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5276 ppid->id_len = id_len;
5278 return 0;
5281 static void be_set_rx_mode(struct net_device *dev)
5283 struct be_adapter *adapter = netdev_priv(dev);
5284 struct be_cmd_work *work;
5286 work = be_alloc_work(adapter, be_work_set_rx_mode);
5287 if (work)
5288 queue_work(be_wq, &work->work);
5291 static const struct net_device_ops be_netdev_ops = {
5292 .ndo_open = be_open,
5293 .ndo_stop = be_close,
5294 .ndo_start_xmit = be_xmit,
5295 .ndo_set_rx_mode = be_set_rx_mode,
5296 .ndo_set_mac_address = be_mac_addr_set,
5297 .ndo_get_stats64 = be_get_stats64,
5298 .ndo_validate_addr = eth_validate_addr,
5299 .ndo_vlan_rx_add_vid = be_vlan_add_vid,
5300 .ndo_vlan_rx_kill_vid = be_vlan_rem_vid,
5301 .ndo_set_vf_mac = be_set_vf_mac,
5302 .ndo_set_vf_vlan = be_set_vf_vlan,
5303 .ndo_set_vf_rate = be_set_vf_tx_rate,
5304 .ndo_get_vf_config = be_get_vf_config,
5305 .ndo_set_vf_link_state = be_set_vf_link_state,
5306 .ndo_set_vf_spoofchk = be_set_vf_spoofchk,
5307 .ndo_tx_timeout = be_tx_timeout,
5308 #ifdef CONFIG_NET_POLL_CONTROLLER
5309 .ndo_poll_controller = be_netpoll,
5310 #endif
5311 .ndo_bridge_setlink = be_ndo_bridge_setlink,
5312 .ndo_bridge_getlink = be_ndo_bridge_getlink,
5313 .ndo_udp_tunnel_add = be_add_vxlan_port,
5314 .ndo_udp_tunnel_del = be_del_vxlan_port,
5315 .ndo_features_check = be_features_check,
5316 .ndo_get_phys_port_id = be_get_phys_port_id,
5319 static void be_netdev_init(struct net_device *netdev)
5321 struct be_adapter *adapter = netdev_priv(netdev);
5323 netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5324 NETIF_F_GSO_UDP_TUNNEL |
5325 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5326 NETIF_F_HW_VLAN_CTAG_TX;
5327 if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5328 netdev->hw_features |= NETIF_F_RXHASH;
5330 netdev->features |= netdev->hw_features |
5331 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5333 netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5334 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5336 netdev->priv_flags |= IFF_UNICAST_FLT;
5338 netdev->flags |= IFF_MULTICAST;
5340 netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5342 netdev->netdev_ops = &be_netdev_ops;
5344 netdev->ethtool_ops = &be_ethtool_ops;
5346 /* MTU range: 256 - 9000 */
5347 netdev->min_mtu = BE_MIN_MTU;
5348 netdev->max_mtu = BE_MAX_MTU;
5351 static void be_cleanup(struct be_adapter *adapter)
5353 struct net_device *netdev = adapter->netdev;
5355 rtnl_lock();
5356 netif_device_detach(netdev);
5357 if (netif_running(netdev))
5358 be_close(netdev);
5359 rtnl_unlock();
5361 be_clear(adapter);
5364 static int be_resume(struct be_adapter *adapter)
5366 struct net_device *netdev = adapter->netdev;
5367 int status;
5369 status = be_setup(adapter);
5370 if (status)
5371 return status;
5373 rtnl_lock();
5374 if (netif_running(netdev))
5375 status = be_open(netdev);
5376 rtnl_unlock();
5378 if (status)
5379 return status;
5381 netif_device_attach(netdev);
5383 return 0;
5386 static void be_soft_reset(struct be_adapter *adapter)
5388 u32 val;
5390 dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5391 val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5392 val |= SLIPORT_SOFTRESET_SR_MASK;
5393 iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5396 static bool be_err_is_recoverable(struct be_adapter *adapter)
5398 struct be_error_recovery *err_rec = &adapter->error_recovery;
5399 unsigned long initial_idle_time =
5400 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5401 unsigned long recovery_interval =
5402 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5403 u16 ue_err_code;
5404 u32 val;
5406 val = be_POST_stage_get(adapter);
5407 if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5408 return false;
5409 ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5410 if (ue_err_code == 0)
5411 return false;
5413 dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5414 ue_err_code);
5416 if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5417 dev_err(&adapter->pdev->dev,
5418 "Cannot recover within %lu sec from driver load\n",
5419 jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5420 return false;
5423 if (err_rec->last_recovery_time && time_before_eq(
5424 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5425 dev_err(&adapter->pdev->dev,
5426 "Cannot recover within %lu sec from last recovery\n",
5427 jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5428 return false;
5431 if (ue_err_code == err_rec->last_err_code) {
5432 dev_err(&adapter->pdev->dev,
5433 "Cannot recover from a consecutive TPE error\n");
5434 return false;
5437 err_rec->last_recovery_time = jiffies;
5438 err_rec->last_err_code = ue_err_code;
5439 return true;
5442 static int be_tpe_recover(struct be_adapter *adapter)
5444 struct be_error_recovery *err_rec = &adapter->error_recovery;
5445 int status = -EAGAIN;
5446 u32 val;
5448 switch (err_rec->recovery_state) {
5449 case ERR_RECOVERY_ST_NONE:
5450 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5451 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5452 break;
5454 case ERR_RECOVERY_ST_DETECT:
5455 val = be_POST_stage_get(adapter);
5456 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5457 POST_STAGE_RECOVERABLE_ERR) {
5458 dev_err(&adapter->pdev->dev,
5459 "Unrecoverable HW error detected: 0x%x\n", val);
5460 status = -EINVAL;
5461 err_rec->resched_delay = 0;
5462 break;
5465 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5467 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5468 * milliseconds before it checks for final error status in
5469 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5470 * If it does, then PF0 initiates a Soft Reset.
5472 if (adapter->pf_num == 0) {
5473 err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5474 err_rec->resched_delay = err_rec->ue_to_reset_time -
5475 ERR_RECOVERY_UE_DETECT_DURATION;
5476 break;
5479 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5480 err_rec->resched_delay = err_rec->ue_to_poll_time -
5481 ERR_RECOVERY_UE_DETECT_DURATION;
5482 break;
5484 case ERR_RECOVERY_ST_RESET:
5485 if (!be_err_is_recoverable(adapter)) {
5486 dev_err(&adapter->pdev->dev,
5487 "Failed to meet recovery criteria\n");
5488 status = -EIO;
5489 err_rec->resched_delay = 0;
5490 break;
5492 be_soft_reset(adapter);
5493 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5494 err_rec->resched_delay = err_rec->ue_to_poll_time -
5495 err_rec->ue_to_reset_time;
5496 break;
5498 case ERR_RECOVERY_ST_PRE_POLL:
5499 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5500 err_rec->resched_delay = 0;
5501 status = 0; /* done */
5502 break;
5504 default:
5505 status = -EINVAL;
5506 err_rec->resched_delay = 0;
5507 break;
5510 return status;
5513 static int be_err_recover(struct be_adapter *adapter)
5515 int status;
5517 if (!lancer_chip(adapter)) {
5518 if (!adapter->error_recovery.recovery_supported ||
5519 adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5520 return -EIO;
5521 status = be_tpe_recover(adapter);
5522 if (status)
5523 goto err;
5526 /* Wait for adapter to reach quiescent state before
5527 * destroying queues
5529 status = be_fw_wait_ready(adapter);
5530 if (status)
5531 goto err;
5533 adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5535 be_cleanup(adapter);
5537 status = be_resume(adapter);
5538 if (status)
5539 goto err;
5541 adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5543 err:
5544 return status;
5547 static void be_err_detection_task(struct work_struct *work)
5549 struct be_error_recovery *err_rec =
5550 container_of(work, struct be_error_recovery,
5551 err_detection_work.work);
5552 struct be_adapter *adapter =
5553 container_of(err_rec, struct be_adapter,
5554 error_recovery);
5555 u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5556 struct device *dev = &adapter->pdev->dev;
5557 int recovery_status;
5559 be_detect_error(adapter);
5560 if (!be_check_error(adapter, BE_ERROR_HW))
5561 goto reschedule_task;
5563 recovery_status = be_err_recover(adapter);
5564 if (!recovery_status) {
5565 err_rec->recovery_retries = 0;
5566 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5567 dev_info(dev, "Adapter recovery successful\n");
5568 goto reschedule_task;
5569 } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5570 /* BEx/SH recovery state machine */
5571 if (adapter->pf_num == 0 &&
5572 err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5573 dev_err(&adapter->pdev->dev,
5574 "Adapter recovery in progress\n");
5575 resched_delay = err_rec->resched_delay;
5576 goto reschedule_task;
5577 } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5578 /* For VFs, check if PF have allocated resources
5579 * every second.
5581 dev_err(dev, "Re-trying adapter recovery\n");
5582 goto reschedule_task;
5583 } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5584 ERR_RECOVERY_MAX_RETRY_COUNT) {
5585 /* In case of another error during recovery, it takes 30 sec
5586 * for adapter to come out of error. Retry error recovery after
5587 * this time interval.
5589 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5590 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5591 goto reschedule_task;
5592 } else {
5593 dev_err(dev, "Adapter recovery failed\n");
5594 dev_err(dev, "Please reboot server to recover\n");
5597 return;
5599 reschedule_task:
5600 be_schedule_err_detection(adapter, resched_delay);
5603 static void be_log_sfp_info(struct be_adapter *adapter)
5605 int status;
5607 status = be_cmd_query_sfp_info(adapter);
5608 if (!status) {
5609 dev_err(&adapter->pdev->dev,
5610 "Port %c: %s Vendor: %s part no: %s",
5611 adapter->port_name,
5612 be_misconfig_evt_port_state[adapter->phy_state],
5613 adapter->phy.vendor_name,
5614 adapter->phy.vendor_pn);
5616 adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5619 static void be_worker(struct work_struct *work)
5621 struct be_adapter *adapter =
5622 container_of(work, struct be_adapter, work.work);
5623 struct be_rx_obj *rxo;
5624 int i;
5626 if (be_physfn(adapter) &&
5627 MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5628 be_cmd_get_die_temperature(adapter);
5630 /* when interrupts are not yet enabled, just reap any pending
5631 * mcc completions
5633 if (!netif_running(adapter->netdev)) {
5634 be_process_mcc(adapter);
5635 goto reschedule;
5638 if (!adapter->stats_cmd_sent) {
5639 if (lancer_chip(adapter))
5640 lancer_cmd_get_pport_stats(adapter,
5641 &adapter->stats_cmd);
5642 else
5643 be_cmd_get_stats(adapter, &adapter->stats_cmd);
5646 for_all_rx_queues(adapter, rxo, i) {
5647 /* Replenish RX-queues starved due to memory
5648 * allocation failures.
5650 if (rxo->rx_post_starved)
5651 be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5654 /* EQ-delay update for Skyhawk is done while notifying EQ */
5655 if (!skyhawk_chip(adapter))
5656 be_eqd_update(adapter, false);
5658 if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5659 be_log_sfp_info(adapter);
5661 reschedule:
5662 adapter->work_counter++;
5663 queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5666 static void be_unmap_pci_bars(struct be_adapter *adapter)
5668 if (adapter->csr)
5669 pci_iounmap(adapter->pdev, adapter->csr);
5670 if (adapter->db)
5671 pci_iounmap(adapter->pdev, adapter->db);
5672 if (adapter->pcicfg && adapter->pcicfg_mapped)
5673 pci_iounmap(adapter->pdev, adapter->pcicfg);
5676 static int db_bar(struct be_adapter *adapter)
5678 if (lancer_chip(adapter) || be_virtfn(adapter))
5679 return 0;
5680 else
5681 return 4;
5684 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5686 if (skyhawk_chip(adapter)) {
5687 adapter->roce_db.size = 4096;
5688 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5689 db_bar(adapter));
5690 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5691 db_bar(adapter));
5693 return 0;
5696 static int be_map_pci_bars(struct be_adapter *adapter)
5698 struct pci_dev *pdev = adapter->pdev;
5699 u8 __iomem *addr;
5700 u32 sli_intf;
5702 pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5703 adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5704 SLI_INTF_FAMILY_SHIFT;
5705 adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5707 if (BEx_chip(adapter) && be_physfn(adapter)) {
5708 adapter->csr = pci_iomap(pdev, 2, 0);
5709 if (!adapter->csr)
5710 return -ENOMEM;
5713 addr = pci_iomap(pdev, db_bar(adapter), 0);
5714 if (!addr)
5715 goto pci_map_err;
5716 adapter->db = addr;
5718 if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5719 if (be_physfn(adapter)) {
5720 /* PCICFG is the 2nd BAR in BE2 */
5721 addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5722 if (!addr)
5723 goto pci_map_err;
5724 adapter->pcicfg = addr;
5725 adapter->pcicfg_mapped = true;
5726 } else {
5727 adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5728 adapter->pcicfg_mapped = false;
5732 be_roce_map_pci_bars(adapter);
5733 return 0;
5735 pci_map_err:
5736 dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5737 be_unmap_pci_bars(adapter);
5738 return -ENOMEM;
5741 static void be_drv_cleanup(struct be_adapter *adapter)
5743 struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5744 struct device *dev = &adapter->pdev->dev;
5746 if (mem->va)
5747 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5749 mem = &adapter->rx_filter;
5750 if (mem->va)
5751 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5753 mem = &adapter->stats_cmd;
5754 if (mem->va)
5755 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5758 /* Allocate and initialize various fields in be_adapter struct */
5759 static int be_drv_init(struct be_adapter *adapter)
5761 struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5762 struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5763 struct be_dma_mem *rx_filter = &adapter->rx_filter;
5764 struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5765 struct device *dev = &adapter->pdev->dev;
5766 int status = 0;
5768 mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5769 mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5770 &mbox_mem_alloc->dma,
5771 GFP_KERNEL);
5772 if (!mbox_mem_alloc->va)
5773 return -ENOMEM;
5775 mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5776 mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5777 mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5779 rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5780 rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5781 &rx_filter->dma, GFP_KERNEL);
5782 if (!rx_filter->va) {
5783 status = -ENOMEM;
5784 goto free_mbox;
5787 if (lancer_chip(adapter))
5788 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5789 else if (BE2_chip(adapter))
5790 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5791 else if (BE3_chip(adapter))
5792 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5793 else
5794 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5795 stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5796 &stats_cmd->dma, GFP_KERNEL);
5797 if (!stats_cmd->va) {
5798 status = -ENOMEM;
5799 goto free_rx_filter;
5802 mutex_init(&adapter->mbox_lock);
5803 mutex_init(&adapter->mcc_lock);
5804 mutex_init(&adapter->rx_filter_lock);
5805 spin_lock_init(&adapter->mcc_cq_lock);
5806 init_completion(&adapter->et_cmd_compl);
5808 pci_save_state(adapter->pdev);
5810 INIT_DELAYED_WORK(&adapter->work, be_worker);
5812 adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5813 adapter->error_recovery.resched_delay = 0;
5814 INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5815 be_err_detection_task);
5817 adapter->rx_fc = true;
5818 adapter->tx_fc = true;
5820 /* Must be a power of 2 or else MODULO will BUG_ON */
5821 adapter->be_get_temp_freq = 64;
5823 INIT_LIST_HEAD(&adapter->vxlan_port_list);
5824 return 0;
5826 free_rx_filter:
5827 dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5828 free_mbox:
5829 dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5830 mbox_mem_alloc->dma);
5831 return status;
5834 static void be_remove(struct pci_dev *pdev)
5836 struct be_adapter *adapter = pci_get_drvdata(pdev);
5838 if (!adapter)
5839 return;
5841 be_roce_dev_remove(adapter);
5842 be_intr_set(adapter, false);
5844 be_cancel_err_detection(adapter);
5846 unregister_netdev(adapter->netdev);
5848 be_clear(adapter);
5850 if (!pci_vfs_assigned(adapter->pdev))
5851 be_cmd_reset_function(adapter);
5853 /* tell fw we're done with firing cmds */
5854 be_cmd_fw_clean(adapter);
5856 be_unmap_pci_bars(adapter);
5857 be_drv_cleanup(adapter);
5859 pci_disable_pcie_error_reporting(pdev);
5861 pci_release_regions(pdev);
5862 pci_disable_device(pdev);
5864 free_netdev(adapter->netdev);
5867 static ssize_t be_hwmon_show_temp(struct device *dev,
5868 struct device_attribute *dev_attr,
5869 char *buf)
5871 struct be_adapter *adapter = dev_get_drvdata(dev);
5873 /* Unit: millidegree Celsius */
5874 if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5875 return -EIO;
5876 else
5877 return sprintf(buf, "%u\n",
5878 adapter->hwmon_info.be_on_die_temp * 1000);
5881 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5882 be_hwmon_show_temp, NULL, 1);
5884 static struct attribute *be_hwmon_attrs[] = {
5885 &sensor_dev_attr_temp1_input.dev_attr.attr,
5886 NULL
5889 ATTRIBUTE_GROUPS(be_hwmon);
5891 static char *mc_name(struct be_adapter *adapter)
5893 char *str = ""; /* default */
5895 switch (adapter->mc_type) {
5896 case UMC:
5897 str = "UMC";
5898 break;
5899 case FLEX10:
5900 str = "FLEX10";
5901 break;
5902 case vNIC1:
5903 str = "vNIC-1";
5904 break;
5905 case nPAR:
5906 str = "nPAR";
5907 break;
5908 case UFP:
5909 str = "UFP";
5910 break;
5911 case vNIC2:
5912 str = "vNIC-2";
5913 break;
5914 default:
5915 str = "";
5918 return str;
5921 static inline char *func_name(struct be_adapter *adapter)
5923 return be_physfn(adapter) ? "PF" : "VF";
5926 static inline char *nic_name(struct pci_dev *pdev)
5928 switch (pdev->device) {
5929 case OC_DEVICE_ID1:
5930 return OC_NAME;
5931 case OC_DEVICE_ID2:
5932 return OC_NAME_BE;
5933 case OC_DEVICE_ID3:
5934 case OC_DEVICE_ID4:
5935 return OC_NAME_LANCER;
5936 case BE_DEVICE_ID2:
5937 return BE3_NAME;
5938 case OC_DEVICE_ID5:
5939 case OC_DEVICE_ID6:
5940 return OC_NAME_SH;
5941 default:
5942 return BE_NAME;
5946 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5948 struct be_adapter *adapter;
5949 struct net_device *netdev;
5950 int status = 0;
5952 dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5954 status = pci_enable_device(pdev);
5955 if (status)
5956 goto do_none;
5958 status = pci_request_regions(pdev, DRV_NAME);
5959 if (status)
5960 goto disable_dev;
5961 pci_set_master(pdev);
5963 netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5964 if (!netdev) {
5965 status = -ENOMEM;
5966 goto rel_reg;
5968 adapter = netdev_priv(netdev);
5969 adapter->pdev = pdev;
5970 pci_set_drvdata(pdev, adapter);
5971 adapter->netdev = netdev;
5972 SET_NETDEV_DEV(netdev, &pdev->dev);
5974 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5975 if (!status) {
5976 netdev->features |= NETIF_F_HIGHDMA;
5977 } else {
5978 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5979 if (status) {
5980 dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5981 goto free_netdev;
5985 status = pci_enable_pcie_error_reporting(pdev);
5986 if (!status)
5987 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5989 status = be_map_pci_bars(adapter);
5990 if (status)
5991 goto free_netdev;
5993 status = be_drv_init(adapter);
5994 if (status)
5995 goto unmap_bars;
5997 status = be_setup(adapter);
5998 if (status)
5999 goto drv_cleanup;
6001 be_netdev_init(netdev);
6002 status = register_netdev(netdev);
6003 if (status != 0)
6004 goto unsetup;
6006 be_roce_dev_add(adapter);
6008 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6009 adapter->error_recovery.probe_time = jiffies;
6011 /* On Die temperature not supported for VF. */
6012 if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6013 adapter->hwmon_info.hwmon_dev =
6014 devm_hwmon_device_register_with_groups(&pdev->dev,
6015 DRV_NAME,
6016 adapter,
6017 be_hwmon_groups);
6018 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6021 dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6022 func_name(adapter), mc_name(adapter), adapter->port_name);
6024 return 0;
6026 unsetup:
6027 be_clear(adapter);
6028 drv_cleanup:
6029 be_drv_cleanup(adapter);
6030 unmap_bars:
6031 be_unmap_pci_bars(adapter);
6032 free_netdev:
6033 free_netdev(netdev);
6034 rel_reg:
6035 pci_release_regions(pdev);
6036 disable_dev:
6037 pci_disable_device(pdev);
6038 do_none:
6039 dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6040 return status;
6043 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6045 struct be_adapter *adapter = pci_get_drvdata(pdev);
6047 be_intr_set(adapter, false);
6048 be_cancel_err_detection(adapter);
6050 be_cleanup(adapter);
6052 pci_save_state(pdev);
6053 pci_disable_device(pdev);
6054 pci_set_power_state(pdev, pci_choose_state(pdev, state));
6055 return 0;
6058 static int be_pci_resume(struct pci_dev *pdev)
6060 struct be_adapter *adapter = pci_get_drvdata(pdev);
6061 int status = 0;
6063 status = pci_enable_device(pdev);
6064 if (status)
6065 return status;
6067 pci_restore_state(pdev);
6069 status = be_resume(adapter);
6070 if (status)
6071 return status;
6073 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6075 return 0;
6079 * An FLR will stop BE from DMAing any data.
6081 static void be_shutdown(struct pci_dev *pdev)
6083 struct be_adapter *adapter = pci_get_drvdata(pdev);
6085 if (!adapter)
6086 return;
6088 be_roce_dev_shutdown(adapter);
6089 cancel_delayed_work_sync(&adapter->work);
6090 be_cancel_err_detection(adapter);
6092 netif_device_detach(adapter->netdev);
6094 be_cmd_reset_function(adapter);
6096 pci_disable_device(pdev);
6099 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6100 pci_channel_state_t state)
6102 struct be_adapter *adapter = pci_get_drvdata(pdev);
6104 dev_err(&adapter->pdev->dev, "EEH error detected\n");
6106 be_roce_dev_remove(adapter);
6108 if (!be_check_error(adapter, BE_ERROR_EEH)) {
6109 be_set_error(adapter, BE_ERROR_EEH);
6111 be_cancel_err_detection(adapter);
6113 be_cleanup(adapter);
6116 if (state == pci_channel_io_perm_failure)
6117 return PCI_ERS_RESULT_DISCONNECT;
6119 pci_disable_device(pdev);
6121 /* The error could cause the FW to trigger a flash debug dump.
6122 * Resetting the card while flash dump is in progress
6123 * can cause it not to recover; wait for it to finish.
6124 * Wait only for first function as it is needed only once per
6125 * adapter.
6127 if (pdev->devfn == 0)
6128 ssleep(30);
6130 return PCI_ERS_RESULT_NEED_RESET;
6133 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6135 struct be_adapter *adapter = pci_get_drvdata(pdev);
6136 int status;
6138 dev_info(&adapter->pdev->dev, "EEH reset\n");
6140 status = pci_enable_device(pdev);
6141 if (status)
6142 return PCI_ERS_RESULT_DISCONNECT;
6144 pci_set_master(pdev);
6145 pci_restore_state(pdev);
6147 /* Check if card is ok and fw is ready */
6148 dev_info(&adapter->pdev->dev,
6149 "Waiting for FW to be ready after EEH reset\n");
6150 status = be_fw_wait_ready(adapter);
6151 if (status)
6152 return PCI_ERS_RESULT_DISCONNECT;
6154 be_clear_error(adapter, BE_CLEAR_ALL);
6155 return PCI_ERS_RESULT_RECOVERED;
6158 static void be_eeh_resume(struct pci_dev *pdev)
6160 int status = 0;
6161 struct be_adapter *adapter = pci_get_drvdata(pdev);
6163 dev_info(&adapter->pdev->dev, "EEH resume\n");
6165 pci_save_state(pdev);
6167 status = be_resume(adapter);
6168 if (status)
6169 goto err;
6171 be_roce_dev_add(adapter);
6173 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6174 return;
6175 err:
6176 dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6179 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6181 struct be_adapter *adapter = pci_get_drvdata(pdev);
6182 struct be_resources vft_res = {0};
6183 int status;
6185 if (!num_vfs)
6186 be_vf_clear(adapter);
6188 adapter->num_vfs = num_vfs;
6190 if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6191 dev_warn(&pdev->dev,
6192 "Cannot disable VFs while they are assigned\n");
6193 return -EBUSY;
6196 /* When the HW is in SRIOV capable configuration, the PF-pool resources
6197 * are equally distributed across the max-number of VFs. The user may
6198 * request only a subset of the max-vfs to be enabled.
6199 * Based on num_vfs, redistribute the resources across num_vfs so that
6200 * each VF will have access to more number of resources.
6201 * This facility is not available in BE3 FW.
6202 * Also, this is done by FW in Lancer chip.
6204 if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6205 be_calculate_vf_res(adapter, adapter->num_vfs,
6206 &vft_res);
6207 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6208 adapter->num_vfs, &vft_res);
6209 if (status)
6210 dev_err(&pdev->dev,
6211 "Failed to optimize SR-IOV resources\n");
6214 status = be_get_resources(adapter);
6215 if (status)
6216 return be_cmd_status(status);
6218 /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6219 rtnl_lock();
6220 status = be_update_queues(adapter);
6221 rtnl_unlock();
6222 if (status)
6223 return be_cmd_status(status);
6225 if (adapter->num_vfs)
6226 status = be_vf_setup(adapter);
6228 if (!status)
6229 return adapter->num_vfs;
6231 return 0;
6234 static const struct pci_error_handlers be_eeh_handlers = {
6235 .error_detected = be_eeh_err_detected,
6236 .slot_reset = be_eeh_reset,
6237 .resume = be_eeh_resume,
6240 static struct pci_driver be_driver = {
6241 .name = DRV_NAME,
6242 .id_table = be_dev_ids,
6243 .probe = be_probe,
6244 .remove = be_remove,
6245 .suspend = be_suspend,
6246 .resume = be_pci_resume,
6247 .shutdown = be_shutdown,
6248 .sriov_configure = be_pci_sriov_configure,
6249 .err_handler = &be_eeh_handlers
6252 static int __init be_init_module(void)
6254 int status;
6256 if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6257 rx_frag_size != 2048) {
6258 printk(KERN_WARNING DRV_NAME
6259 " : Module param rx_frag_size must be 2048/4096/8192."
6260 " Using 2048\n");
6261 rx_frag_size = 2048;
6264 if (num_vfs > 0) {
6265 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6266 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6269 be_wq = create_singlethread_workqueue("be_wq");
6270 if (!be_wq) {
6271 pr_warn(DRV_NAME "workqueue creation failed\n");
6272 return -1;
6275 be_err_recovery_workq =
6276 create_singlethread_workqueue("be_err_recover");
6277 if (!be_err_recovery_workq)
6278 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6280 status = pci_register_driver(&be_driver);
6281 if (status) {
6282 destroy_workqueue(be_wq);
6283 be_destroy_err_recovery_workq();
6285 return status;
6287 module_init(be_init_module);
6289 static void __exit be_exit_module(void)
6291 pci_unregister_driver(&be_driver);
6293 be_destroy_err_recovery_workq();
6295 if (be_wq)
6296 destroy_workqueue(be_wq);
6298 module_exit(be_exit_module);