pvrusb2: reduce stack usage pvr2_eeprom_analyze()
[linux/fpc-iii.git] / drivers / net / ethernet / emulex / benet / be_main.c
blob9711ca4510fa4fe6633ea4929f83670f360d3a28
1 /*
2 * Copyright (C) 2005 - 2016 Broadcom
3 * All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License version 2
7 * as published by the Free Software Foundation. The full GNU General
8 * Public License is included in this distribution in the file called COPYING.
10 * Contact Information:
11 * linux-drivers@emulex.com
13 * Emulex
14 * 3333 Susan Street
15 * Costa Mesa, CA 92626
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
33 /* num_vfs module param is obsolete.
34 * Use sysfs method to enable/disable VFs.
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
44 /* Per-module error detection/recovery workq shared across all functions.
45 * Each function schedules its own work request on this shared workq.
47 static struct workqueue_struct *be_err_recovery_workq;
49 static const struct pci_device_id be_dev_ids[] = {
50 { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 { 0 }
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 "CEV",
68 "CTX",
69 "DBUF",
70 "ERX",
71 "Host",
72 "MPU",
73 "NDMA",
74 "PTC ",
75 "RDMA ",
76 "RXF ",
77 "RXIPS ",
78 "RXULP0 ",
79 "RXULP1 ",
80 "RXULP2 ",
81 "TIM ",
82 "TPOST ",
83 "TPRE ",
84 "TXIPS ",
85 "TXULP0 ",
86 "TXULP1 ",
87 "UC ",
88 "WDMA ",
89 "TXULP2 ",
90 "HOST1 ",
91 "P0_OB_LINK ",
92 "P1_OB_LINK ",
93 "HOST_GPIO ",
94 "MBOX ",
95 "ERX2 ",
96 "SPARE ",
97 "JTAG ",
98 "MPU_INTPEND "
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 "LPCMEMHOST",
104 "MGMT_MAC",
105 "PCS0ONLINE",
106 "MPU_IRAM",
107 "PCS1ONLINE",
108 "PCTL0",
109 "PCTL1",
110 "PMEM",
111 "RR",
112 "TXPB",
113 "RXPP",
114 "XAUI",
115 "TXP",
116 "ARM",
117 "IPC",
118 "HOST2",
119 "HOST3",
120 "HOST4",
121 "HOST5",
122 "HOST6",
123 "HOST7",
124 "ECRC",
125 "Poison TLP",
126 "NETC",
127 "PERIPH",
128 "LLTXULP",
129 "D2P",
130 "RCON",
131 "LDMA",
132 "LLTXP",
133 "LLTXPB",
134 "Unknown"
137 #define BE_VF_IF_EN_FLAGS (BE_IF_FLAGS_UNTAGGED | \
138 BE_IF_FLAGS_BROADCAST | \
139 BE_IF_FLAGS_MULTICAST | \
140 BE_IF_FLAGS_PASS_L3L4_ERRORS)
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
144 struct be_dma_mem *mem = &q->dma_mem;
146 if (mem->va) {
147 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 mem->dma);
149 mem->va = NULL;
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 u16 len, u16 entry_size)
156 struct be_dma_mem *mem = &q->dma_mem;
158 memset(q, 0, sizeof(*q));
159 q->len = len;
160 q->entry_size = entry_size;
161 mem->size = len * entry_size;
162 mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 GFP_KERNEL);
164 if (!mem->va)
165 return -ENOMEM;
166 return 0;
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
171 u32 reg, enabled;
173 pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 &reg);
175 enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
177 if (!enabled && enable)
178 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 else if (enabled && !enable)
180 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 else
182 return;
184 pci_write_config_dword(adapter->pdev,
185 PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
190 int status = 0;
192 /* On lancer interrupts can't be controlled via this register */
193 if (lancer_chip(adapter))
194 return;
196 if (be_check_error(adapter, BE_ERROR_EEH))
197 return;
199 status = be_cmd_intr_set(adapter, enable);
200 if (status)
201 be_reg_intr_set(adapter, enable);
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
206 u32 val = 0;
208 if (be_check_error(adapter, BE_ERROR_HW))
209 return;
211 val |= qid & DB_RQ_RING_ID_MASK;
212 val |= posted << DB_RQ_NUM_POSTED_SHIFT;
214 wmb();
215 iowrite32(val, adapter->db + DB_RQ_OFFSET);
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 u16 posted)
221 u32 val = 0;
223 if (be_check_error(adapter, BE_ERROR_HW))
224 return;
226 val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
229 wmb();
230 iowrite32(val, adapter->db + txo->db_offset);
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 bool arm, bool clear_int, u16 num_popped,
235 u32 eq_delay_mult_enc)
237 u32 val = 0;
239 val |= qid & DB_EQ_RING_ID_MASK;
240 val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
242 if (be_check_error(adapter, BE_ERROR_HW))
243 return;
245 if (arm)
246 val |= 1 << DB_EQ_REARM_SHIFT;
247 if (clear_int)
248 val |= 1 << DB_EQ_CLR_SHIFT;
249 val |= 1 << DB_EQ_EVNT_SHIFT;
250 val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 iowrite32(val, adapter->db + DB_EQ_OFFSET);
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
257 u32 val = 0;
259 val |= qid & DB_CQ_RING_ID_MASK;
260 val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 DB_CQ_RING_ID_EXT_MASK_SHIFT);
263 if (be_check_error(adapter, BE_ERROR_HW))
264 return;
266 if (arm)
267 val |= 1 << DB_CQ_REARM_SHIFT;
268 val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 iowrite32(val, adapter->db + DB_CQ_OFFSET);
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
274 int i;
276 /* Check if mac has already been added as part of uc-list */
277 for (i = 0; i < adapter->uc_macs; i++) {
278 if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
279 mac)) {
280 /* mac already added, skip addition */
281 adapter->pmac_id[0] = adapter->pmac_id[i + 1];
282 return 0;
286 return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
287 &adapter->pmac_id[0], 0);
290 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
292 int i;
294 /* Skip deletion if the programmed mac is
295 * being used in uc-list
297 for (i = 0; i < adapter->uc_macs; i++) {
298 if (adapter->pmac_id[i + 1] == pmac_id)
299 return;
301 be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
304 static int be_mac_addr_set(struct net_device *netdev, void *p)
306 struct be_adapter *adapter = netdev_priv(netdev);
307 struct device *dev = &adapter->pdev->dev;
308 struct sockaddr *addr = p;
309 int status;
310 u8 mac[ETH_ALEN];
311 u32 old_pmac_id = adapter->pmac_id[0];
313 if (!is_valid_ether_addr(addr->sa_data))
314 return -EADDRNOTAVAIL;
316 /* Proceed further only if, User provided MAC is different
317 * from active MAC
319 if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
320 return 0;
322 /* if device is not running, copy MAC to netdev->dev_addr */
323 if (!netif_running(netdev))
324 goto done;
326 /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
327 * privilege or if PF did not provision the new MAC address.
328 * On BE3, this cmd will always fail if the VF doesn't have the
329 * FILTMGMT privilege. This failure is OK, only if the PF programmed
330 * the MAC for the VF.
332 mutex_lock(&adapter->rx_filter_lock);
333 status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
334 if (!status) {
336 /* Delete the old programmed MAC. This call may fail if the
337 * old MAC was already deleted by the PF driver.
339 if (adapter->pmac_id[0] != old_pmac_id)
340 be_dev_mac_del(adapter, old_pmac_id);
343 mutex_unlock(&adapter->rx_filter_lock);
344 /* Decide if the new MAC is successfully activated only after
345 * querying the FW
347 status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
348 adapter->if_handle, true, 0);
349 if (status)
350 goto err;
352 /* The MAC change did not happen, either due to lack of privilege
353 * or PF didn't pre-provision.
355 if (!ether_addr_equal(addr->sa_data, mac)) {
356 status = -EPERM;
357 goto err;
359 done:
360 ether_addr_copy(adapter->dev_mac, addr->sa_data);
361 ether_addr_copy(netdev->dev_addr, addr->sa_data);
362 dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
363 return 0;
364 err:
365 dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
366 return status;
369 /* BE2 supports only v0 cmd */
370 static void *hw_stats_from_cmd(struct be_adapter *adapter)
372 if (BE2_chip(adapter)) {
373 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
375 return &cmd->hw_stats;
376 } else if (BE3_chip(adapter)) {
377 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
379 return &cmd->hw_stats;
380 } else {
381 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
383 return &cmd->hw_stats;
387 /* BE2 supports only v0 cmd */
388 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
390 if (BE2_chip(adapter)) {
391 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
393 return &hw_stats->erx;
394 } else if (BE3_chip(adapter)) {
395 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
397 return &hw_stats->erx;
398 } else {
399 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
401 return &hw_stats->erx;
405 static void populate_be_v0_stats(struct be_adapter *adapter)
407 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
409 struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
410 struct be_port_rxf_stats_v0 *port_stats =
411 &rxf_stats->port[adapter->port_num];
412 struct be_drv_stats *drvs = &adapter->drv_stats;
414 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
415 drvs->rx_pause_frames = port_stats->rx_pause_frames;
416 drvs->rx_crc_errors = port_stats->rx_crc_errors;
417 drvs->rx_control_frames = port_stats->rx_control_frames;
418 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
419 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
420 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
421 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
422 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
423 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
424 drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
425 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
426 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
427 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
428 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
429 drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
430 drvs->rx_dropped_header_too_small =
431 port_stats->rx_dropped_header_too_small;
432 drvs->rx_address_filtered =
433 port_stats->rx_address_filtered +
434 port_stats->rx_vlan_filtered;
435 drvs->rx_alignment_symbol_errors =
436 port_stats->rx_alignment_symbol_errors;
438 drvs->tx_pauseframes = port_stats->tx_pauseframes;
439 drvs->tx_controlframes = port_stats->tx_controlframes;
441 if (adapter->port_num)
442 drvs->jabber_events = rxf_stats->port1_jabber_events;
443 else
444 drvs->jabber_events = rxf_stats->port0_jabber_events;
445 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447 drvs->forwarded_packets = rxf_stats->forwarded_packets;
448 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
454 static void populate_be_v1_stats(struct be_adapter *adapter)
456 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
457 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458 struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
459 struct be_port_rxf_stats_v1 *port_stats =
460 &rxf_stats->port[adapter->port_num];
461 struct be_drv_stats *drvs = &adapter->drv_stats;
463 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464 drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465 drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466 drvs->rx_pause_frames = port_stats->rx_pause_frames;
467 drvs->rx_crc_errors = port_stats->rx_crc_errors;
468 drvs->rx_control_frames = port_stats->rx_control_frames;
469 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479 drvs->rx_dropped_header_too_small =
480 port_stats->rx_dropped_header_too_small;
481 drvs->rx_input_fifo_overflow_drop =
482 port_stats->rx_input_fifo_overflow_drop;
483 drvs->rx_address_filtered = port_stats->rx_address_filtered;
484 drvs->rx_alignment_symbol_errors =
485 port_stats->rx_alignment_symbol_errors;
486 drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487 drvs->tx_pauseframes = port_stats->tx_pauseframes;
488 drvs->tx_controlframes = port_stats->tx_controlframes;
489 drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490 drvs->jabber_events = port_stats->jabber_events;
491 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493 drvs->forwarded_packets = rxf_stats->forwarded_packets;
494 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
500 static void populate_be_v2_stats(struct be_adapter *adapter)
502 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
503 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
504 struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
505 struct be_port_rxf_stats_v2 *port_stats =
506 &rxf_stats->port[adapter->port_num];
507 struct be_drv_stats *drvs = &adapter->drv_stats;
509 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
510 drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
511 drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
512 drvs->rx_pause_frames = port_stats->rx_pause_frames;
513 drvs->rx_crc_errors = port_stats->rx_crc_errors;
514 drvs->rx_control_frames = port_stats->rx_control_frames;
515 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
516 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
517 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
518 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
519 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
520 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
521 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
522 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
523 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
524 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
525 drvs->rx_dropped_header_too_small =
526 port_stats->rx_dropped_header_too_small;
527 drvs->rx_input_fifo_overflow_drop =
528 port_stats->rx_input_fifo_overflow_drop;
529 drvs->rx_address_filtered = port_stats->rx_address_filtered;
530 drvs->rx_alignment_symbol_errors =
531 port_stats->rx_alignment_symbol_errors;
532 drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
533 drvs->tx_pauseframes = port_stats->tx_pauseframes;
534 drvs->tx_controlframes = port_stats->tx_controlframes;
535 drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
536 drvs->jabber_events = port_stats->jabber_events;
537 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
538 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
539 drvs->forwarded_packets = rxf_stats->forwarded_packets;
540 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
541 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
542 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
543 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
544 if (be_roce_supported(adapter)) {
545 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
546 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
547 drvs->rx_roce_frames = port_stats->roce_frames_received;
548 drvs->roce_drops_crc = port_stats->roce_drops_crc;
549 drvs->roce_drops_payload_len =
550 port_stats->roce_drops_payload_len;
554 static void populate_lancer_stats(struct be_adapter *adapter)
556 struct be_drv_stats *drvs = &adapter->drv_stats;
557 struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
559 be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
560 drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
561 drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
562 drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
563 drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
564 drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
565 drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
566 drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
567 drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
568 drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
569 drvs->rx_dropped_tcp_length =
570 pport_stats->rx_dropped_invalid_tcp_length;
571 drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
572 drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
573 drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
574 drvs->rx_dropped_header_too_small =
575 pport_stats->rx_dropped_header_too_small;
576 drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
577 drvs->rx_address_filtered =
578 pport_stats->rx_address_filtered +
579 pport_stats->rx_vlan_filtered;
580 drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
581 drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
582 drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
583 drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
584 drvs->jabber_events = pport_stats->rx_jabbers;
585 drvs->forwarded_packets = pport_stats->num_forwards_lo;
586 drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
587 drvs->rx_drops_too_many_frags =
588 pport_stats->rx_drops_too_many_frags_lo;
591 static void accumulate_16bit_val(u32 *acc, u16 val)
593 #define lo(x) (x & 0xFFFF)
594 #define hi(x) (x & 0xFFFF0000)
595 bool wrapped = val < lo(*acc);
596 u32 newacc = hi(*acc) + val;
598 if (wrapped)
599 newacc += 65536;
600 ACCESS_ONCE(*acc) = newacc;
603 static void populate_erx_stats(struct be_adapter *adapter,
604 struct be_rx_obj *rxo, u32 erx_stat)
606 if (!BEx_chip(adapter))
607 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
608 else
609 /* below erx HW counter can actually wrap around after
610 * 65535. Driver accumulates a 32-bit value
612 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
613 (u16)erx_stat);
616 void be_parse_stats(struct be_adapter *adapter)
618 struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
619 struct be_rx_obj *rxo;
620 int i;
621 u32 erx_stat;
623 if (lancer_chip(adapter)) {
624 populate_lancer_stats(adapter);
625 } else {
626 if (BE2_chip(adapter))
627 populate_be_v0_stats(adapter);
628 else if (BE3_chip(adapter))
629 /* for BE3 */
630 populate_be_v1_stats(adapter);
631 else
632 populate_be_v2_stats(adapter);
634 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
635 for_all_rx_queues(adapter, rxo, i) {
636 erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
637 populate_erx_stats(adapter, rxo, erx_stat);
642 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
643 struct rtnl_link_stats64 *stats)
645 struct be_adapter *adapter = netdev_priv(netdev);
646 struct be_drv_stats *drvs = &adapter->drv_stats;
647 struct be_rx_obj *rxo;
648 struct be_tx_obj *txo;
649 u64 pkts, bytes;
650 unsigned int start;
651 int i;
653 for_all_rx_queues(adapter, rxo, i) {
654 const struct be_rx_stats *rx_stats = rx_stats(rxo);
656 do {
657 start = u64_stats_fetch_begin_irq(&rx_stats->sync);
658 pkts = rx_stats(rxo)->rx_pkts;
659 bytes = rx_stats(rxo)->rx_bytes;
660 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
661 stats->rx_packets += pkts;
662 stats->rx_bytes += bytes;
663 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
664 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
665 rx_stats(rxo)->rx_drops_no_frags;
668 for_all_tx_queues(adapter, txo, i) {
669 const struct be_tx_stats *tx_stats = tx_stats(txo);
671 do {
672 start = u64_stats_fetch_begin_irq(&tx_stats->sync);
673 pkts = tx_stats(txo)->tx_pkts;
674 bytes = tx_stats(txo)->tx_bytes;
675 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
676 stats->tx_packets += pkts;
677 stats->tx_bytes += bytes;
680 /* bad pkts received */
681 stats->rx_errors = drvs->rx_crc_errors +
682 drvs->rx_alignment_symbol_errors +
683 drvs->rx_in_range_errors +
684 drvs->rx_out_range_errors +
685 drvs->rx_frame_too_long +
686 drvs->rx_dropped_too_small +
687 drvs->rx_dropped_too_short +
688 drvs->rx_dropped_header_too_small +
689 drvs->rx_dropped_tcp_length +
690 drvs->rx_dropped_runt;
692 /* detailed rx errors */
693 stats->rx_length_errors = drvs->rx_in_range_errors +
694 drvs->rx_out_range_errors +
695 drvs->rx_frame_too_long;
697 stats->rx_crc_errors = drvs->rx_crc_errors;
699 /* frame alignment errors */
700 stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
702 /* receiver fifo overrun */
703 /* drops_no_pbuf is no per i/f, it's per BE card */
704 stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
705 drvs->rx_input_fifo_overflow_drop +
706 drvs->rx_drops_no_pbuf;
707 return stats;
710 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
712 struct net_device *netdev = adapter->netdev;
714 if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
715 netif_carrier_off(netdev);
716 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
719 if (link_status)
720 netif_carrier_on(netdev);
721 else
722 netif_carrier_off(netdev);
724 netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
727 static int be_gso_hdr_len(struct sk_buff *skb)
729 if (skb->encapsulation)
730 return skb_inner_transport_offset(skb) +
731 inner_tcp_hdrlen(skb);
732 return skb_transport_offset(skb) + tcp_hdrlen(skb);
735 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
737 struct be_tx_stats *stats = tx_stats(txo);
738 u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
739 /* Account for headers which get duplicated in TSO pkt */
740 u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
742 u64_stats_update_begin(&stats->sync);
743 stats->tx_reqs++;
744 stats->tx_bytes += skb->len + dup_hdr_len;
745 stats->tx_pkts += tx_pkts;
746 if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
747 stats->tx_vxlan_offload_pkts += tx_pkts;
748 u64_stats_update_end(&stats->sync);
751 /* Returns number of WRBs needed for the skb */
752 static u32 skb_wrb_cnt(struct sk_buff *skb)
754 /* +1 for the header wrb */
755 return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
758 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
760 wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
761 wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
762 wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
763 wrb->rsvd0 = 0;
766 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
767 * to avoid the swap and shift/mask operations in wrb_fill().
769 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
771 wrb->frag_pa_hi = 0;
772 wrb->frag_pa_lo = 0;
773 wrb->frag_len = 0;
774 wrb->rsvd0 = 0;
777 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
778 struct sk_buff *skb)
780 u8 vlan_prio;
781 u16 vlan_tag;
783 vlan_tag = skb_vlan_tag_get(skb);
784 vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
785 /* If vlan priority provided by OS is NOT in available bmap */
786 if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
787 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
788 adapter->recommended_prio_bits;
790 return vlan_tag;
793 /* Used only for IP tunnel packets */
794 static u16 skb_inner_ip_proto(struct sk_buff *skb)
796 return (inner_ip_hdr(skb)->version == 4) ?
797 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
800 static u16 skb_ip_proto(struct sk_buff *skb)
802 return (ip_hdr(skb)->version == 4) ?
803 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
806 static inline bool be_is_txq_full(struct be_tx_obj *txo)
808 return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
811 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
813 return atomic_read(&txo->q.used) < txo->q.len / 2;
816 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
818 return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
821 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
822 struct sk_buff *skb,
823 struct be_wrb_params *wrb_params)
825 u16 proto;
827 if (skb_is_gso(skb)) {
828 BE_WRB_F_SET(wrb_params->features, LSO, 1);
829 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
830 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
831 BE_WRB_F_SET(wrb_params->features, LSO6, 1);
832 } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
833 if (skb->encapsulation) {
834 BE_WRB_F_SET(wrb_params->features, IPCS, 1);
835 proto = skb_inner_ip_proto(skb);
836 } else {
837 proto = skb_ip_proto(skb);
839 if (proto == IPPROTO_TCP)
840 BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
841 else if (proto == IPPROTO_UDP)
842 BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
845 if (skb_vlan_tag_present(skb)) {
846 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
847 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
850 BE_WRB_F_SET(wrb_params->features, CRC, 1);
853 static void wrb_fill_hdr(struct be_adapter *adapter,
854 struct be_eth_hdr_wrb *hdr,
855 struct be_wrb_params *wrb_params,
856 struct sk_buff *skb)
858 memset(hdr, 0, sizeof(*hdr));
860 SET_TX_WRB_HDR_BITS(crc, hdr,
861 BE_WRB_F_GET(wrb_params->features, CRC));
862 SET_TX_WRB_HDR_BITS(ipcs, hdr,
863 BE_WRB_F_GET(wrb_params->features, IPCS));
864 SET_TX_WRB_HDR_BITS(tcpcs, hdr,
865 BE_WRB_F_GET(wrb_params->features, TCPCS));
866 SET_TX_WRB_HDR_BITS(udpcs, hdr,
867 BE_WRB_F_GET(wrb_params->features, UDPCS));
869 SET_TX_WRB_HDR_BITS(lso, hdr,
870 BE_WRB_F_GET(wrb_params->features, LSO));
871 SET_TX_WRB_HDR_BITS(lso6, hdr,
872 BE_WRB_F_GET(wrb_params->features, LSO6));
873 SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
875 /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
876 * hack is not needed, the evt bit is set while ringing DB.
878 SET_TX_WRB_HDR_BITS(event, hdr,
879 BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
880 SET_TX_WRB_HDR_BITS(vlan, hdr,
881 BE_WRB_F_GET(wrb_params->features, VLAN));
882 SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
884 SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
885 SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
886 SET_TX_WRB_HDR_BITS(mgmt, hdr,
887 BE_WRB_F_GET(wrb_params->features, OS2BMC));
890 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
891 bool unmap_single)
893 dma_addr_t dma;
894 u32 frag_len = le32_to_cpu(wrb->frag_len);
897 dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
898 (u64)le32_to_cpu(wrb->frag_pa_lo);
899 if (frag_len) {
900 if (unmap_single)
901 dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
902 else
903 dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
907 /* Grab a WRB header for xmit */
908 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
910 u32 head = txo->q.head;
912 queue_head_inc(&txo->q);
913 return head;
916 /* Set up the WRB header for xmit */
917 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
918 struct be_tx_obj *txo,
919 struct be_wrb_params *wrb_params,
920 struct sk_buff *skb, u16 head)
922 u32 num_frags = skb_wrb_cnt(skb);
923 struct be_queue_info *txq = &txo->q;
924 struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
926 wrb_fill_hdr(adapter, hdr, wrb_params, skb);
927 be_dws_cpu_to_le(hdr, sizeof(*hdr));
929 BUG_ON(txo->sent_skb_list[head]);
930 txo->sent_skb_list[head] = skb;
931 txo->last_req_hdr = head;
932 atomic_add(num_frags, &txq->used);
933 txo->last_req_wrb_cnt = num_frags;
934 txo->pend_wrb_cnt += num_frags;
937 /* Setup a WRB fragment (buffer descriptor) for xmit */
938 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
939 int len)
941 struct be_eth_wrb *wrb;
942 struct be_queue_info *txq = &txo->q;
944 wrb = queue_head_node(txq);
945 wrb_fill(wrb, busaddr, len);
946 queue_head_inc(txq);
949 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
950 * was invoked. The producer index is restored to the previous packet and the
951 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
953 static void be_xmit_restore(struct be_adapter *adapter,
954 struct be_tx_obj *txo, u32 head, bool map_single,
955 u32 copied)
957 struct device *dev;
958 struct be_eth_wrb *wrb;
959 struct be_queue_info *txq = &txo->q;
961 dev = &adapter->pdev->dev;
962 txq->head = head;
964 /* skip the first wrb (hdr); it's not mapped */
965 queue_head_inc(txq);
966 while (copied) {
967 wrb = queue_head_node(txq);
968 unmap_tx_frag(dev, wrb, map_single);
969 map_single = false;
970 copied -= le32_to_cpu(wrb->frag_len);
971 queue_head_inc(txq);
974 txq->head = head;
977 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
978 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
979 * of WRBs used up by the packet.
981 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
982 struct sk_buff *skb,
983 struct be_wrb_params *wrb_params)
985 u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
986 struct device *dev = &adapter->pdev->dev;
987 struct be_queue_info *txq = &txo->q;
988 bool map_single = false;
989 u32 head = txq->head;
990 dma_addr_t busaddr;
991 int len;
993 head = be_tx_get_wrb_hdr(txo);
995 if (skb->len > skb->data_len) {
996 len = skb_headlen(skb);
998 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
999 if (dma_mapping_error(dev, busaddr))
1000 goto dma_err;
1001 map_single = true;
1002 be_tx_setup_wrb_frag(txo, busaddr, len);
1003 copied += len;
1006 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1007 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1008 len = skb_frag_size(frag);
1010 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1011 if (dma_mapping_error(dev, busaddr))
1012 goto dma_err;
1013 be_tx_setup_wrb_frag(txo, busaddr, len);
1014 copied += len;
1017 be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1019 be_tx_stats_update(txo, skb);
1020 return wrb_cnt;
1022 dma_err:
1023 adapter->drv_stats.dma_map_errors++;
1024 be_xmit_restore(adapter, txo, head, map_single, copied);
1025 return 0;
1028 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1030 return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1033 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1034 struct sk_buff *skb,
1035 struct be_wrb_params
1036 *wrb_params)
1038 u16 vlan_tag = 0;
1040 skb = skb_share_check(skb, GFP_ATOMIC);
1041 if (unlikely(!skb))
1042 return skb;
1044 if (skb_vlan_tag_present(skb))
1045 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1047 if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1048 if (!vlan_tag)
1049 vlan_tag = adapter->pvid;
1050 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1051 * skip VLAN insertion
1053 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1056 if (vlan_tag) {
1057 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1058 vlan_tag);
1059 if (unlikely(!skb))
1060 return skb;
1061 skb->vlan_tci = 0;
1064 /* Insert the outer VLAN, if any */
1065 if (adapter->qnq_vid) {
1066 vlan_tag = adapter->qnq_vid;
1067 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1068 vlan_tag);
1069 if (unlikely(!skb))
1070 return skb;
1071 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1074 return skb;
1077 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1079 struct ethhdr *eh = (struct ethhdr *)skb->data;
1080 u16 offset = ETH_HLEN;
1082 if (eh->h_proto == htons(ETH_P_IPV6)) {
1083 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1085 offset += sizeof(struct ipv6hdr);
1086 if (ip6h->nexthdr != NEXTHDR_TCP &&
1087 ip6h->nexthdr != NEXTHDR_UDP) {
1088 struct ipv6_opt_hdr *ehdr =
1089 (struct ipv6_opt_hdr *)(skb->data + offset);
1091 /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1092 if (ehdr->hdrlen == 0xff)
1093 return true;
1096 return false;
1099 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1101 return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1104 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1106 return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1109 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1110 struct sk_buff *skb,
1111 struct be_wrb_params
1112 *wrb_params)
1114 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1115 unsigned int eth_hdr_len;
1116 struct iphdr *ip;
1118 /* For padded packets, BE HW modifies tot_len field in IP header
1119 * incorrecly when VLAN tag is inserted by HW.
1120 * For padded packets, Lancer computes incorrect checksum.
1122 eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1123 VLAN_ETH_HLEN : ETH_HLEN;
1124 if (skb->len <= 60 &&
1125 (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1126 is_ipv4_pkt(skb)) {
1127 ip = (struct iphdr *)ip_hdr(skb);
1128 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1131 /* If vlan tag is already inlined in the packet, skip HW VLAN
1132 * tagging in pvid-tagging mode
1134 if (be_pvid_tagging_enabled(adapter) &&
1135 veh->h_vlan_proto == htons(ETH_P_8021Q))
1136 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1138 /* HW has a bug wherein it will calculate CSUM for VLAN
1139 * pkts even though it is disabled.
1140 * Manually insert VLAN in pkt.
1142 if (skb->ip_summed != CHECKSUM_PARTIAL &&
1143 skb_vlan_tag_present(skb)) {
1144 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1145 if (unlikely(!skb))
1146 goto err;
1149 /* HW may lockup when VLAN HW tagging is requested on
1150 * certain ipv6 packets. Drop such pkts if the HW workaround to
1151 * skip HW tagging is not enabled by FW.
1153 if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1154 (adapter->pvid || adapter->qnq_vid) &&
1155 !qnq_async_evt_rcvd(adapter)))
1156 goto tx_drop;
1158 /* Manual VLAN tag insertion to prevent:
1159 * ASIC lockup when the ASIC inserts VLAN tag into
1160 * certain ipv6 packets. Insert VLAN tags in driver,
1161 * and set event, completion, vlan bits accordingly
1162 * in the Tx WRB.
1164 if (be_ipv6_tx_stall_chk(adapter, skb) &&
1165 be_vlan_tag_tx_chk(adapter, skb)) {
1166 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1167 if (unlikely(!skb))
1168 goto err;
1171 return skb;
1172 tx_drop:
1173 dev_kfree_skb_any(skb);
1174 err:
1175 return NULL;
1178 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1179 struct sk_buff *skb,
1180 struct be_wrb_params *wrb_params)
1182 int err;
1184 /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1185 * packets that are 32b or less may cause a transmit stall
1186 * on that port. The workaround is to pad such packets
1187 * (len <= 32 bytes) to a minimum length of 36b.
1189 if (skb->len <= 32) {
1190 if (skb_put_padto(skb, 36))
1191 return NULL;
1194 if (BEx_chip(adapter) || lancer_chip(adapter)) {
1195 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1196 if (!skb)
1197 return NULL;
1200 /* The stack can send us skbs with length greater than
1201 * what the HW can handle. Trim the extra bytes.
1203 WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1204 err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1205 WARN_ON(err);
1207 return skb;
1210 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1212 struct be_queue_info *txq = &txo->q;
1213 struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1215 /* Mark the last request eventable if it hasn't been marked already */
1216 if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1217 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1219 /* compose a dummy wrb if there are odd set of wrbs to notify */
1220 if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1221 wrb_fill_dummy(queue_head_node(txq));
1222 queue_head_inc(txq);
1223 atomic_inc(&txq->used);
1224 txo->pend_wrb_cnt++;
1225 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1226 TX_HDR_WRB_NUM_SHIFT);
1227 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1228 TX_HDR_WRB_NUM_SHIFT);
1230 be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1231 txo->pend_wrb_cnt = 0;
1234 /* OS2BMC related */
1236 #define DHCP_CLIENT_PORT 68
1237 #define DHCP_SERVER_PORT 67
1238 #define NET_BIOS_PORT1 137
1239 #define NET_BIOS_PORT2 138
1240 #define DHCPV6_RAS_PORT 547
1242 #define is_mc_allowed_on_bmc(adapter, eh) \
1243 (!is_multicast_filt_enabled(adapter) && \
1244 is_multicast_ether_addr(eh->h_dest) && \
1245 !is_broadcast_ether_addr(eh->h_dest))
1247 #define is_bc_allowed_on_bmc(adapter, eh) \
1248 (!is_broadcast_filt_enabled(adapter) && \
1249 is_broadcast_ether_addr(eh->h_dest))
1251 #define is_arp_allowed_on_bmc(adapter, skb) \
1252 (is_arp(skb) && is_arp_filt_enabled(adapter))
1254 #define is_broadcast_packet(eh, adapter) \
1255 (is_multicast_ether_addr(eh->h_dest) && \
1256 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1258 #define is_arp(skb) (skb->protocol == htons(ETH_P_ARP))
1260 #define is_arp_filt_enabled(adapter) \
1261 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1263 #define is_dhcp_client_filt_enabled(adapter) \
1264 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1266 #define is_dhcp_srvr_filt_enabled(adapter) \
1267 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1269 #define is_nbios_filt_enabled(adapter) \
1270 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1272 #define is_ipv6_na_filt_enabled(adapter) \
1273 (adapter->bmc_filt_mask & \
1274 BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1276 #define is_ipv6_ra_filt_enabled(adapter) \
1277 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1279 #define is_ipv6_ras_filt_enabled(adapter) \
1280 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1282 #define is_broadcast_filt_enabled(adapter) \
1283 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1285 #define is_multicast_filt_enabled(adapter) \
1286 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1288 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1289 struct sk_buff **skb)
1291 struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1292 bool os2bmc = false;
1294 if (!be_is_os2bmc_enabled(adapter))
1295 goto done;
1297 if (!is_multicast_ether_addr(eh->h_dest))
1298 goto done;
1300 if (is_mc_allowed_on_bmc(adapter, eh) ||
1301 is_bc_allowed_on_bmc(adapter, eh) ||
1302 is_arp_allowed_on_bmc(adapter, (*skb))) {
1303 os2bmc = true;
1304 goto done;
1307 if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1308 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1309 u8 nexthdr = hdr->nexthdr;
1311 if (nexthdr == IPPROTO_ICMPV6) {
1312 struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1314 switch (icmp6->icmp6_type) {
1315 case NDISC_ROUTER_ADVERTISEMENT:
1316 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1317 goto done;
1318 case NDISC_NEIGHBOUR_ADVERTISEMENT:
1319 os2bmc = is_ipv6_na_filt_enabled(adapter);
1320 goto done;
1321 default:
1322 break;
1327 if (is_udp_pkt((*skb))) {
1328 struct udphdr *udp = udp_hdr((*skb));
1330 switch (ntohs(udp->dest)) {
1331 case DHCP_CLIENT_PORT:
1332 os2bmc = is_dhcp_client_filt_enabled(adapter);
1333 goto done;
1334 case DHCP_SERVER_PORT:
1335 os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1336 goto done;
1337 case NET_BIOS_PORT1:
1338 case NET_BIOS_PORT2:
1339 os2bmc = is_nbios_filt_enabled(adapter);
1340 goto done;
1341 case DHCPV6_RAS_PORT:
1342 os2bmc = is_ipv6_ras_filt_enabled(adapter);
1343 goto done;
1344 default:
1345 break;
1348 done:
1349 /* For packets over a vlan, which are destined
1350 * to BMC, asic expects the vlan to be inline in the packet.
1352 if (os2bmc)
1353 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1355 return os2bmc;
1358 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1360 struct be_adapter *adapter = netdev_priv(netdev);
1361 u16 q_idx = skb_get_queue_mapping(skb);
1362 struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1363 struct be_wrb_params wrb_params = { 0 };
1364 bool flush = !skb->xmit_more;
1365 u16 wrb_cnt;
1367 skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1368 if (unlikely(!skb))
1369 goto drop;
1371 be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1373 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1374 if (unlikely(!wrb_cnt)) {
1375 dev_kfree_skb_any(skb);
1376 goto drop;
1379 /* if os2bmc is enabled and if the pkt is destined to bmc,
1380 * enqueue the pkt a 2nd time with mgmt bit set.
1382 if (be_send_pkt_to_bmc(adapter, &skb)) {
1383 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1384 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1385 if (unlikely(!wrb_cnt))
1386 goto drop;
1387 else
1388 skb_get(skb);
1391 if (be_is_txq_full(txo)) {
1392 netif_stop_subqueue(netdev, q_idx);
1393 tx_stats(txo)->tx_stops++;
1396 if (flush || __netif_subqueue_stopped(netdev, q_idx))
1397 be_xmit_flush(adapter, txo);
1399 return NETDEV_TX_OK;
1400 drop:
1401 tx_stats(txo)->tx_drv_drops++;
1402 /* Flush the already enqueued tx requests */
1403 if (flush && txo->pend_wrb_cnt)
1404 be_xmit_flush(adapter, txo);
1406 return NETDEV_TX_OK;
1409 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1411 struct be_adapter *adapter = netdev_priv(netdev);
1412 struct device *dev = &adapter->pdev->dev;
1414 if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1415 dev_info(dev, "MTU must be between %d and %d bytes\n",
1416 BE_MIN_MTU, BE_MAX_MTU);
1417 return -EINVAL;
1420 dev_info(dev, "MTU changed from %d to %d bytes\n",
1421 netdev->mtu, new_mtu);
1422 netdev->mtu = new_mtu;
1423 return 0;
1426 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1428 return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1429 BE_IF_FLAGS_ALL_PROMISCUOUS;
1432 static int be_set_vlan_promisc(struct be_adapter *adapter)
1434 struct device *dev = &adapter->pdev->dev;
1435 int status;
1437 if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1438 return 0;
1440 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1441 if (!status) {
1442 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1443 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1444 } else {
1445 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1447 return status;
1450 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1452 struct device *dev = &adapter->pdev->dev;
1453 int status;
1455 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1456 if (!status) {
1457 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1458 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1460 return status;
1464 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1465 * If the user configures more, place BE in vlan promiscuous mode.
1467 static int be_vid_config(struct be_adapter *adapter)
1469 struct device *dev = &adapter->pdev->dev;
1470 u16 vids[BE_NUM_VLANS_SUPPORTED];
1471 u16 num = 0, i = 0;
1472 int status = 0;
1474 /* No need to change the VLAN state if the I/F is in promiscuous */
1475 if (adapter->netdev->flags & IFF_PROMISC)
1476 return 0;
1478 if (adapter->vlans_added > be_max_vlans(adapter))
1479 return be_set_vlan_promisc(adapter);
1481 if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1482 status = be_clear_vlan_promisc(adapter);
1483 if (status)
1484 return status;
1486 /* Construct VLAN Table to give to HW */
1487 for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1488 vids[num++] = cpu_to_le16(i);
1490 status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1491 if (status) {
1492 dev_err(dev, "Setting HW VLAN filtering failed\n");
1493 /* Set to VLAN promisc mode as setting VLAN filter failed */
1494 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1495 addl_status(status) ==
1496 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1497 return be_set_vlan_promisc(adapter);
1499 return status;
1502 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1504 struct be_adapter *adapter = netdev_priv(netdev);
1505 int status = 0;
1507 mutex_lock(&adapter->rx_filter_lock);
1509 /* Packets with VID 0 are always received by Lancer by default */
1510 if (lancer_chip(adapter) && vid == 0)
1511 goto done;
1513 if (test_bit(vid, adapter->vids))
1514 goto done;
1516 set_bit(vid, adapter->vids);
1517 adapter->vlans_added++;
1519 status = be_vid_config(adapter);
1520 done:
1521 mutex_unlock(&adapter->rx_filter_lock);
1522 return status;
1525 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1527 struct be_adapter *adapter = netdev_priv(netdev);
1528 int status = 0;
1530 mutex_lock(&adapter->rx_filter_lock);
1532 /* Packets with VID 0 are always received by Lancer by default */
1533 if (lancer_chip(adapter) && vid == 0)
1534 goto done;
1536 if (!test_bit(vid, adapter->vids))
1537 goto done;
1539 clear_bit(vid, adapter->vids);
1540 adapter->vlans_added--;
1542 status = be_vid_config(adapter);
1543 done:
1544 mutex_unlock(&adapter->rx_filter_lock);
1545 return status;
1548 static void be_set_all_promisc(struct be_adapter *adapter)
1550 be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1551 adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1554 static void be_set_mc_promisc(struct be_adapter *adapter)
1556 int status;
1558 if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1559 return;
1561 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1562 if (!status)
1563 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1566 static void be_set_uc_promisc(struct be_adapter *adapter)
1568 int status;
1570 if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1571 return;
1573 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1574 if (!status)
1575 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1578 static void be_clear_uc_promisc(struct be_adapter *adapter)
1580 int status;
1582 if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1583 return;
1585 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1586 if (!status)
1587 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1590 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1591 * We use a single callback function for both sync and unsync. We really don't
1592 * add/remove addresses through this callback. But, we use it to detect changes
1593 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1595 static int be_uc_list_update(struct net_device *netdev,
1596 const unsigned char *addr)
1598 struct be_adapter *adapter = netdev_priv(netdev);
1600 adapter->update_uc_list = true;
1601 return 0;
1604 static int be_mc_list_update(struct net_device *netdev,
1605 const unsigned char *addr)
1607 struct be_adapter *adapter = netdev_priv(netdev);
1609 adapter->update_mc_list = true;
1610 return 0;
1613 static void be_set_mc_list(struct be_adapter *adapter)
1615 struct net_device *netdev = adapter->netdev;
1616 struct netdev_hw_addr *ha;
1617 bool mc_promisc = false;
1618 int status;
1620 netif_addr_lock_bh(netdev);
1621 __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1623 if (netdev->flags & IFF_PROMISC) {
1624 adapter->update_mc_list = false;
1625 } else if (netdev->flags & IFF_ALLMULTI ||
1626 netdev_mc_count(netdev) > be_max_mc(adapter)) {
1627 /* Enable multicast promisc if num configured exceeds
1628 * what we support
1630 mc_promisc = true;
1631 adapter->update_mc_list = false;
1632 } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1633 /* Update mc-list unconditionally if the iface was previously
1634 * in mc-promisc mode and now is out of that mode.
1636 adapter->update_mc_list = true;
1639 if (adapter->update_mc_list) {
1640 int i = 0;
1642 /* cache the mc-list in adapter */
1643 netdev_for_each_mc_addr(ha, netdev) {
1644 ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1645 i++;
1647 adapter->mc_count = netdev_mc_count(netdev);
1649 netif_addr_unlock_bh(netdev);
1651 if (mc_promisc) {
1652 be_set_mc_promisc(adapter);
1653 } else if (adapter->update_mc_list) {
1654 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1655 if (!status)
1656 adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1657 else
1658 be_set_mc_promisc(adapter);
1660 adapter->update_mc_list = false;
1664 static void be_clear_mc_list(struct be_adapter *adapter)
1666 struct net_device *netdev = adapter->netdev;
1668 __dev_mc_unsync(netdev, NULL);
1669 be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1670 adapter->mc_count = 0;
1673 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1675 if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1676 adapter->dev_mac)) {
1677 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1678 return 0;
1681 return be_cmd_pmac_add(adapter,
1682 (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1683 adapter->if_handle,
1684 &adapter->pmac_id[uc_idx + 1], 0);
1687 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1689 if (pmac_id == adapter->pmac_id[0])
1690 return;
1692 be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1695 static void be_set_uc_list(struct be_adapter *adapter)
1697 struct net_device *netdev = adapter->netdev;
1698 struct netdev_hw_addr *ha;
1699 bool uc_promisc = false;
1700 int curr_uc_macs = 0, i;
1702 netif_addr_lock_bh(netdev);
1703 __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1705 if (netdev->flags & IFF_PROMISC) {
1706 adapter->update_uc_list = false;
1707 } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1708 uc_promisc = true;
1709 adapter->update_uc_list = false;
1710 } else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1711 /* Update uc-list unconditionally if the iface was previously
1712 * in uc-promisc mode and now is out of that mode.
1714 adapter->update_uc_list = true;
1717 if (adapter->update_uc_list) {
1718 i = 1; /* First slot is claimed by the Primary MAC */
1720 /* cache the uc-list in adapter array */
1721 netdev_for_each_uc_addr(ha, netdev) {
1722 ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1723 i++;
1725 curr_uc_macs = netdev_uc_count(netdev);
1727 netif_addr_unlock_bh(netdev);
1729 if (uc_promisc) {
1730 be_set_uc_promisc(adapter);
1731 } else if (adapter->update_uc_list) {
1732 be_clear_uc_promisc(adapter);
1734 for (i = 0; i < adapter->uc_macs; i++)
1735 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1737 for (i = 0; i < curr_uc_macs; i++)
1738 be_uc_mac_add(adapter, i);
1739 adapter->uc_macs = curr_uc_macs;
1740 adapter->update_uc_list = false;
1744 static void be_clear_uc_list(struct be_adapter *adapter)
1746 struct net_device *netdev = adapter->netdev;
1747 int i;
1749 __dev_uc_unsync(netdev, NULL);
1750 for (i = 0; i < adapter->uc_macs; i++)
1751 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1753 adapter->uc_macs = 0;
1756 static void __be_set_rx_mode(struct be_adapter *adapter)
1758 struct net_device *netdev = adapter->netdev;
1760 mutex_lock(&adapter->rx_filter_lock);
1762 if (netdev->flags & IFF_PROMISC) {
1763 if (!be_in_all_promisc(adapter))
1764 be_set_all_promisc(adapter);
1765 } else if (be_in_all_promisc(adapter)) {
1766 /* We need to re-program the vlan-list or clear
1767 * vlan-promisc mode (if needed) when the interface
1768 * comes out of promisc mode.
1770 be_vid_config(adapter);
1773 be_set_uc_list(adapter);
1774 be_set_mc_list(adapter);
1776 mutex_unlock(&adapter->rx_filter_lock);
1779 static void be_work_set_rx_mode(struct work_struct *work)
1781 struct be_cmd_work *cmd_work =
1782 container_of(work, struct be_cmd_work, work);
1784 __be_set_rx_mode(cmd_work->adapter);
1785 kfree(cmd_work);
1788 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1790 struct be_adapter *adapter = netdev_priv(netdev);
1791 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1792 int status;
1794 if (!sriov_enabled(adapter))
1795 return -EPERM;
1797 if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1798 return -EINVAL;
1800 /* Proceed further only if user provided MAC is different
1801 * from active MAC
1803 if (ether_addr_equal(mac, vf_cfg->mac_addr))
1804 return 0;
1806 if (BEx_chip(adapter)) {
1807 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1808 vf + 1);
1810 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1811 &vf_cfg->pmac_id, vf + 1);
1812 } else {
1813 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1814 vf + 1);
1817 if (status) {
1818 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1819 mac, vf, status);
1820 return be_cmd_status(status);
1823 ether_addr_copy(vf_cfg->mac_addr, mac);
1825 return 0;
1828 static int be_get_vf_config(struct net_device *netdev, int vf,
1829 struct ifla_vf_info *vi)
1831 struct be_adapter *adapter = netdev_priv(netdev);
1832 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1834 if (!sriov_enabled(adapter))
1835 return -EPERM;
1837 if (vf >= adapter->num_vfs)
1838 return -EINVAL;
1840 vi->vf = vf;
1841 vi->max_tx_rate = vf_cfg->tx_rate;
1842 vi->min_tx_rate = 0;
1843 vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1844 vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1845 memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1846 vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1847 vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1849 return 0;
1852 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1854 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1855 u16 vids[BE_NUM_VLANS_SUPPORTED];
1856 int vf_if_id = vf_cfg->if_handle;
1857 int status;
1859 /* Enable Transparent VLAN Tagging */
1860 status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1861 if (status)
1862 return status;
1864 /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1865 vids[0] = 0;
1866 status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1867 if (!status)
1868 dev_info(&adapter->pdev->dev,
1869 "Cleared guest VLANs on VF%d", vf);
1871 /* After TVT is enabled, disallow VFs to program VLAN filters */
1872 if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1873 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1874 ~BE_PRIV_FILTMGMT, vf + 1);
1875 if (!status)
1876 vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1878 return 0;
1881 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1883 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1884 struct device *dev = &adapter->pdev->dev;
1885 int status;
1887 /* Reset Transparent VLAN Tagging. */
1888 status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1889 vf_cfg->if_handle, 0, 0);
1890 if (status)
1891 return status;
1893 /* Allow VFs to program VLAN filtering */
1894 if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1895 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1896 BE_PRIV_FILTMGMT, vf + 1);
1897 if (!status) {
1898 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1899 dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1903 dev_info(dev,
1904 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1905 return 0;
1908 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1909 __be16 vlan_proto)
1911 struct be_adapter *adapter = netdev_priv(netdev);
1912 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1913 int status;
1915 if (!sriov_enabled(adapter))
1916 return -EPERM;
1918 if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1919 return -EINVAL;
1921 if (vlan_proto != htons(ETH_P_8021Q))
1922 return -EPROTONOSUPPORT;
1924 if (vlan || qos) {
1925 vlan |= qos << VLAN_PRIO_SHIFT;
1926 status = be_set_vf_tvt(adapter, vf, vlan);
1927 } else {
1928 status = be_clear_vf_tvt(adapter, vf);
1931 if (status) {
1932 dev_err(&adapter->pdev->dev,
1933 "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1934 status);
1935 return be_cmd_status(status);
1938 vf_cfg->vlan_tag = vlan;
1939 return 0;
1942 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1943 int min_tx_rate, int max_tx_rate)
1945 struct be_adapter *adapter = netdev_priv(netdev);
1946 struct device *dev = &adapter->pdev->dev;
1947 int percent_rate, status = 0;
1948 u16 link_speed = 0;
1949 u8 link_status;
1951 if (!sriov_enabled(adapter))
1952 return -EPERM;
1954 if (vf >= adapter->num_vfs)
1955 return -EINVAL;
1957 if (min_tx_rate)
1958 return -EINVAL;
1960 if (!max_tx_rate)
1961 goto config_qos;
1963 status = be_cmd_link_status_query(adapter, &link_speed,
1964 &link_status, 0);
1965 if (status)
1966 goto err;
1968 if (!link_status) {
1969 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1970 status = -ENETDOWN;
1971 goto err;
1974 if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1975 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1976 link_speed);
1977 status = -EINVAL;
1978 goto err;
1981 /* On Skyhawk the QOS setting must be done only as a % value */
1982 percent_rate = link_speed / 100;
1983 if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1984 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1985 percent_rate);
1986 status = -EINVAL;
1987 goto err;
1990 config_qos:
1991 status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1992 if (status)
1993 goto err;
1995 adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1996 return 0;
1998 err:
1999 dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2000 max_tx_rate, vf);
2001 return be_cmd_status(status);
2004 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2005 int link_state)
2007 struct be_adapter *adapter = netdev_priv(netdev);
2008 int status;
2010 if (!sriov_enabled(adapter))
2011 return -EPERM;
2013 if (vf >= adapter->num_vfs)
2014 return -EINVAL;
2016 status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2017 if (status) {
2018 dev_err(&adapter->pdev->dev,
2019 "Link state change on VF %d failed: %#x\n", vf, status);
2020 return be_cmd_status(status);
2023 adapter->vf_cfg[vf].plink_tracking = link_state;
2025 return 0;
2028 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2030 struct be_adapter *adapter = netdev_priv(netdev);
2031 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2032 u8 spoofchk;
2033 int status;
2035 if (!sriov_enabled(adapter))
2036 return -EPERM;
2038 if (vf >= adapter->num_vfs)
2039 return -EINVAL;
2041 if (BEx_chip(adapter))
2042 return -EOPNOTSUPP;
2044 if (enable == vf_cfg->spoofchk)
2045 return 0;
2047 spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2049 status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2050 0, spoofchk);
2051 if (status) {
2052 dev_err(&adapter->pdev->dev,
2053 "Spoofchk change on VF %d failed: %#x\n", vf, status);
2054 return be_cmd_status(status);
2057 vf_cfg->spoofchk = enable;
2058 return 0;
2061 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2062 ulong now)
2064 aic->rx_pkts_prev = rx_pkts;
2065 aic->tx_reqs_prev = tx_pkts;
2066 aic->jiffies = now;
2069 static int be_get_new_eqd(struct be_eq_obj *eqo)
2071 struct be_adapter *adapter = eqo->adapter;
2072 int eqd, start;
2073 struct be_aic_obj *aic;
2074 struct be_rx_obj *rxo;
2075 struct be_tx_obj *txo;
2076 u64 rx_pkts = 0, tx_pkts = 0;
2077 ulong now;
2078 u32 pps, delta;
2079 int i;
2081 aic = &adapter->aic_obj[eqo->idx];
2082 if (!aic->enable) {
2083 if (aic->jiffies)
2084 aic->jiffies = 0;
2085 eqd = aic->et_eqd;
2086 return eqd;
2089 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2090 do {
2091 start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2092 rx_pkts += rxo->stats.rx_pkts;
2093 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2096 for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2097 do {
2098 start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2099 tx_pkts += txo->stats.tx_reqs;
2100 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2103 /* Skip, if wrapped around or first calculation */
2104 now = jiffies;
2105 if (!aic->jiffies || time_before(now, aic->jiffies) ||
2106 rx_pkts < aic->rx_pkts_prev ||
2107 tx_pkts < aic->tx_reqs_prev) {
2108 be_aic_update(aic, rx_pkts, tx_pkts, now);
2109 return aic->prev_eqd;
2112 delta = jiffies_to_msecs(now - aic->jiffies);
2113 if (delta == 0)
2114 return aic->prev_eqd;
2116 pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2117 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2118 eqd = (pps / 15000) << 2;
2120 if (eqd < 8)
2121 eqd = 0;
2122 eqd = min_t(u32, eqd, aic->max_eqd);
2123 eqd = max_t(u32, eqd, aic->min_eqd);
2125 be_aic_update(aic, rx_pkts, tx_pkts, now);
2127 return eqd;
2130 /* For Skyhawk-R only */
2131 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2133 struct be_adapter *adapter = eqo->adapter;
2134 struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2135 ulong now = jiffies;
2136 int eqd;
2137 u32 mult_enc;
2139 if (!aic->enable)
2140 return 0;
2142 if (jiffies_to_msecs(now - aic->jiffies) < 1)
2143 eqd = aic->prev_eqd;
2144 else
2145 eqd = be_get_new_eqd(eqo);
2147 if (eqd > 100)
2148 mult_enc = R2I_DLY_ENC_1;
2149 else if (eqd > 60)
2150 mult_enc = R2I_DLY_ENC_2;
2151 else if (eqd > 20)
2152 mult_enc = R2I_DLY_ENC_3;
2153 else
2154 mult_enc = R2I_DLY_ENC_0;
2156 aic->prev_eqd = eqd;
2158 return mult_enc;
2161 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2163 struct be_set_eqd set_eqd[MAX_EVT_QS];
2164 struct be_aic_obj *aic;
2165 struct be_eq_obj *eqo;
2166 int i, num = 0, eqd;
2168 for_all_evt_queues(adapter, eqo, i) {
2169 aic = &adapter->aic_obj[eqo->idx];
2170 eqd = be_get_new_eqd(eqo);
2171 if (force_update || eqd != aic->prev_eqd) {
2172 set_eqd[num].delay_multiplier = (eqd * 65)/100;
2173 set_eqd[num].eq_id = eqo->q.id;
2174 aic->prev_eqd = eqd;
2175 num++;
2179 if (num)
2180 be_cmd_modify_eqd(adapter, set_eqd, num);
2183 static void be_rx_stats_update(struct be_rx_obj *rxo,
2184 struct be_rx_compl_info *rxcp)
2186 struct be_rx_stats *stats = rx_stats(rxo);
2188 u64_stats_update_begin(&stats->sync);
2189 stats->rx_compl++;
2190 stats->rx_bytes += rxcp->pkt_size;
2191 stats->rx_pkts++;
2192 if (rxcp->tunneled)
2193 stats->rx_vxlan_offload_pkts++;
2194 if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2195 stats->rx_mcast_pkts++;
2196 if (rxcp->err)
2197 stats->rx_compl_err++;
2198 u64_stats_update_end(&stats->sync);
2201 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2203 /* L4 checksum is not reliable for non TCP/UDP packets.
2204 * Also ignore ipcksm for ipv6 pkts
2206 return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2207 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2210 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2212 struct be_adapter *adapter = rxo->adapter;
2213 struct be_rx_page_info *rx_page_info;
2214 struct be_queue_info *rxq = &rxo->q;
2215 u32 frag_idx = rxq->tail;
2217 rx_page_info = &rxo->page_info_tbl[frag_idx];
2218 BUG_ON(!rx_page_info->page);
2220 if (rx_page_info->last_frag) {
2221 dma_unmap_page(&adapter->pdev->dev,
2222 dma_unmap_addr(rx_page_info, bus),
2223 adapter->big_page_size, DMA_FROM_DEVICE);
2224 rx_page_info->last_frag = false;
2225 } else {
2226 dma_sync_single_for_cpu(&adapter->pdev->dev,
2227 dma_unmap_addr(rx_page_info, bus),
2228 rx_frag_size, DMA_FROM_DEVICE);
2231 queue_tail_inc(rxq);
2232 atomic_dec(&rxq->used);
2233 return rx_page_info;
2236 /* Throwaway the data in the Rx completion */
2237 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2238 struct be_rx_compl_info *rxcp)
2240 struct be_rx_page_info *page_info;
2241 u16 i, num_rcvd = rxcp->num_rcvd;
2243 for (i = 0; i < num_rcvd; i++) {
2244 page_info = get_rx_page_info(rxo);
2245 put_page(page_info->page);
2246 memset(page_info, 0, sizeof(*page_info));
2251 * skb_fill_rx_data forms a complete skb for an ether frame
2252 * indicated by rxcp.
2254 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2255 struct be_rx_compl_info *rxcp)
2257 struct be_rx_page_info *page_info;
2258 u16 i, j;
2259 u16 hdr_len, curr_frag_len, remaining;
2260 u8 *start;
2262 page_info = get_rx_page_info(rxo);
2263 start = page_address(page_info->page) + page_info->page_offset;
2264 prefetch(start);
2266 /* Copy data in the first descriptor of this completion */
2267 curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2269 skb->len = curr_frag_len;
2270 if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2271 memcpy(skb->data, start, curr_frag_len);
2272 /* Complete packet has now been moved to data */
2273 put_page(page_info->page);
2274 skb->data_len = 0;
2275 skb->tail += curr_frag_len;
2276 } else {
2277 hdr_len = ETH_HLEN;
2278 memcpy(skb->data, start, hdr_len);
2279 skb_shinfo(skb)->nr_frags = 1;
2280 skb_frag_set_page(skb, 0, page_info->page);
2281 skb_shinfo(skb)->frags[0].page_offset =
2282 page_info->page_offset + hdr_len;
2283 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2284 curr_frag_len - hdr_len);
2285 skb->data_len = curr_frag_len - hdr_len;
2286 skb->truesize += rx_frag_size;
2287 skb->tail += hdr_len;
2289 page_info->page = NULL;
2291 if (rxcp->pkt_size <= rx_frag_size) {
2292 BUG_ON(rxcp->num_rcvd != 1);
2293 return;
2296 /* More frags present for this completion */
2297 remaining = rxcp->pkt_size - curr_frag_len;
2298 for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2299 page_info = get_rx_page_info(rxo);
2300 curr_frag_len = min(remaining, rx_frag_size);
2302 /* Coalesce all frags from the same physical page in one slot */
2303 if (page_info->page_offset == 0) {
2304 /* Fresh page */
2305 j++;
2306 skb_frag_set_page(skb, j, page_info->page);
2307 skb_shinfo(skb)->frags[j].page_offset =
2308 page_info->page_offset;
2309 skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2310 skb_shinfo(skb)->nr_frags++;
2311 } else {
2312 put_page(page_info->page);
2315 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2316 skb->len += curr_frag_len;
2317 skb->data_len += curr_frag_len;
2318 skb->truesize += rx_frag_size;
2319 remaining -= curr_frag_len;
2320 page_info->page = NULL;
2322 BUG_ON(j > MAX_SKB_FRAGS);
2325 /* Process the RX completion indicated by rxcp when GRO is disabled */
2326 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2327 struct be_rx_compl_info *rxcp)
2329 struct be_adapter *adapter = rxo->adapter;
2330 struct net_device *netdev = adapter->netdev;
2331 struct sk_buff *skb;
2333 skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2334 if (unlikely(!skb)) {
2335 rx_stats(rxo)->rx_drops_no_skbs++;
2336 be_rx_compl_discard(rxo, rxcp);
2337 return;
2340 skb_fill_rx_data(rxo, skb, rxcp);
2342 if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2343 skb->ip_summed = CHECKSUM_UNNECESSARY;
2344 else
2345 skb_checksum_none_assert(skb);
2347 skb->protocol = eth_type_trans(skb, netdev);
2348 skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2349 if (netdev->features & NETIF_F_RXHASH)
2350 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2352 skb->csum_level = rxcp->tunneled;
2353 skb_mark_napi_id(skb, napi);
2355 if (rxcp->vlanf)
2356 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2358 netif_receive_skb(skb);
2361 /* Process the RX completion indicated by rxcp when GRO is enabled */
2362 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2363 struct napi_struct *napi,
2364 struct be_rx_compl_info *rxcp)
2366 struct be_adapter *adapter = rxo->adapter;
2367 struct be_rx_page_info *page_info;
2368 struct sk_buff *skb = NULL;
2369 u16 remaining, curr_frag_len;
2370 u16 i, j;
2372 skb = napi_get_frags(napi);
2373 if (!skb) {
2374 be_rx_compl_discard(rxo, rxcp);
2375 return;
2378 remaining = rxcp->pkt_size;
2379 for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2380 page_info = get_rx_page_info(rxo);
2382 curr_frag_len = min(remaining, rx_frag_size);
2384 /* Coalesce all frags from the same physical page in one slot */
2385 if (i == 0 || page_info->page_offset == 0) {
2386 /* First frag or Fresh page */
2387 j++;
2388 skb_frag_set_page(skb, j, page_info->page);
2389 skb_shinfo(skb)->frags[j].page_offset =
2390 page_info->page_offset;
2391 skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2392 } else {
2393 put_page(page_info->page);
2395 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2396 skb->truesize += rx_frag_size;
2397 remaining -= curr_frag_len;
2398 memset(page_info, 0, sizeof(*page_info));
2400 BUG_ON(j > MAX_SKB_FRAGS);
2402 skb_shinfo(skb)->nr_frags = j + 1;
2403 skb->len = rxcp->pkt_size;
2404 skb->data_len = rxcp->pkt_size;
2405 skb->ip_summed = CHECKSUM_UNNECESSARY;
2406 skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2407 if (adapter->netdev->features & NETIF_F_RXHASH)
2408 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2410 skb->csum_level = rxcp->tunneled;
2412 if (rxcp->vlanf)
2413 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2415 napi_gro_frags(napi);
2418 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2419 struct be_rx_compl_info *rxcp)
2421 rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2422 rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2423 rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2424 rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2425 rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2426 rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2427 rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2428 rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2429 rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2430 rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2431 rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2432 if (rxcp->vlanf) {
2433 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2434 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2436 rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2437 rxcp->tunneled =
2438 GET_RX_COMPL_V1_BITS(tunneled, compl);
2441 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2442 struct be_rx_compl_info *rxcp)
2444 rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2445 rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2446 rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2447 rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2448 rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2449 rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2450 rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2451 rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2452 rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2453 rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2454 rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2455 if (rxcp->vlanf) {
2456 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2457 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2459 rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2460 rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2463 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2465 struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2466 struct be_rx_compl_info *rxcp = &rxo->rxcp;
2467 struct be_adapter *adapter = rxo->adapter;
2469 /* For checking the valid bit it is Ok to use either definition as the
2470 * valid bit is at the same position in both v0 and v1 Rx compl */
2471 if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2472 return NULL;
2474 rmb();
2475 be_dws_le_to_cpu(compl, sizeof(*compl));
2477 if (adapter->be3_native)
2478 be_parse_rx_compl_v1(compl, rxcp);
2479 else
2480 be_parse_rx_compl_v0(compl, rxcp);
2482 if (rxcp->ip_frag)
2483 rxcp->l4_csum = 0;
2485 if (rxcp->vlanf) {
2486 /* In QNQ modes, if qnq bit is not set, then the packet was
2487 * tagged only with the transparent outer vlan-tag and must
2488 * not be treated as a vlan packet by host
2490 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2491 rxcp->vlanf = 0;
2493 if (!lancer_chip(adapter))
2494 rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2496 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2497 !test_bit(rxcp->vlan_tag, adapter->vids))
2498 rxcp->vlanf = 0;
2501 /* As the compl has been parsed, reset it; we wont touch it again */
2502 compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2504 queue_tail_inc(&rxo->cq);
2505 return rxcp;
2508 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2510 u32 order = get_order(size);
2512 if (order > 0)
2513 gfp |= __GFP_COMP;
2514 return alloc_pages(gfp, order);
2518 * Allocate a page, split it to fragments of size rx_frag_size and post as
2519 * receive buffers to BE
2521 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2523 struct be_adapter *adapter = rxo->adapter;
2524 struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2525 struct be_queue_info *rxq = &rxo->q;
2526 struct page *pagep = NULL;
2527 struct device *dev = &adapter->pdev->dev;
2528 struct be_eth_rx_d *rxd;
2529 u64 page_dmaaddr = 0, frag_dmaaddr;
2530 u32 posted, page_offset = 0, notify = 0;
2532 page_info = &rxo->page_info_tbl[rxq->head];
2533 for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2534 if (!pagep) {
2535 pagep = be_alloc_pages(adapter->big_page_size, gfp);
2536 if (unlikely(!pagep)) {
2537 rx_stats(rxo)->rx_post_fail++;
2538 break;
2540 page_dmaaddr = dma_map_page(dev, pagep, 0,
2541 adapter->big_page_size,
2542 DMA_FROM_DEVICE);
2543 if (dma_mapping_error(dev, page_dmaaddr)) {
2544 put_page(pagep);
2545 pagep = NULL;
2546 adapter->drv_stats.dma_map_errors++;
2547 break;
2549 page_offset = 0;
2550 } else {
2551 get_page(pagep);
2552 page_offset += rx_frag_size;
2554 page_info->page_offset = page_offset;
2555 page_info->page = pagep;
2557 rxd = queue_head_node(rxq);
2558 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2559 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2560 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2562 /* Any space left in the current big page for another frag? */
2563 if ((page_offset + rx_frag_size + rx_frag_size) >
2564 adapter->big_page_size) {
2565 pagep = NULL;
2566 page_info->last_frag = true;
2567 dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2568 } else {
2569 dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2572 prev_page_info = page_info;
2573 queue_head_inc(rxq);
2574 page_info = &rxo->page_info_tbl[rxq->head];
2577 /* Mark the last frag of a page when we break out of the above loop
2578 * with no more slots available in the RXQ
2580 if (pagep) {
2581 prev_page_info->last_frag = true;
2582 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2585 if (posted) {
2586 atomic_add(posted, &rxq->used);
2587 if (rxo->rx_post_starved)
2588 rxo->rx_post_starved = false;
2589 do {
2590 notify = min(MAX_NUM_POST_ERX_DB, posted);
2591 be_rxq_notify(adapter, rxq->id, notify);
2592 posted -= notify;
2593 } while (posted);
2594 } else if (atomic_read(&rxq->used) == 0) {
2595 /* Let be_worker replenish when memory is available */
2596 rxo->rx_post_starved = true;
2600 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2602 struct be_queue_info *tx_cq = &txo->cq;
2603 struct be_tx_compl_info *txcp = &txo->txcp;
2604 struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2606 if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2607 return NULL;
2609 /* Ensure load ordering of valid bit dword and other dwords below */
2610 rmb();
2611 be_dws_le_to_cpu(compl, sizeof(*compl));
2613 txcp->status = GET_TX_COMPL_BITS(status, compl);
2614 txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2616 compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2617 queue_tail_inc(tx_cq);
2618 return txcp;
2621 static u16 be_tx_compl_process(struct be_adapter *adapter,
2622 struct be_tx_obj *txo, u16 last_index)
2624 struct sk_buff **sent_skbs = txo->sent_skb_list;
2625 struct be_queue_info *txq = &txo->q;
2626 struct sk_buff *skb = NULL;
2627 bool unmap_skb_hdr = false;
2628 struct be_eth_wrb *wrb;
2629 u16 num_wrbs = 0;
2630 u32 frag_index;
2632 do {
2633 if (sent_skbs[txq->tail]) {
2634 /* Free skb from prev req */
2635 if (skb)
2636 dev_consume_skb_any(skb);
2637 skb = sent_skbs[txq->tail];
2638 sent_skbs[txq->tail] = NULL;
2639 queue_tail_inc(txq); /* skip hdr wrb */
2640 num_wrbs++;
2641 unmap_skb_hdr = true;
2643 wrb = queue_tail_node(txq);
2644 frag_index = txq->tail;
2645 unmap_tx_frag(&adapter->pdev->dev, wrb,
2646 (unmap_skb_hdr && skb_headlen(skb)));
2647 unmap_skb_hdr = false;
2648 queue_tail_inc(txq);
2649 num_wrbs++;
2650 } while (frag_index != last_index);
2651 dev_consume_skb_any(skb);
2653 return num_wrbs;
2656 /* Return the number of events in the event queue */
2657 static inline int events_get(struct be_eq_obj *eqo)
2659 struct be_eq_entry *eqe;
2660 int num = 0;
2662 do {
2663 eqe = queue_tail_node(&eqo->q);
2664 if (eqe->evt == 0)
2665 break;
2667 rmb();
2668 eqe->evt = 0;
2669 num++;
2670 queue_tail_inc(&eqo->q);
2671 } while (true);
2673 return num;
2676 /* Leaves the EQ is disarmed state */
2677 static void be_eq_clean(struct be_eq_obj *eqo)
2679 int num = events_get(eqo);
2681 be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2684 /* Free posted rx buffers that were not used */
2685 static void be_rxq_clean(struct be_rx_obj *rxo)
2687 struct be_queue_info *rxq = &rxo->q;
2688 struct be_rx_page_info *page_info;
2690 while (atomic_read(&rxq->used) > 0) {
2691 page_info = get_rx_page_info(rxo);
2692 put_page(page_info->page);
2693 memset(page_info, 0, sizeof(*page_info));
2695 BUG_ON(atomic_read(&rxq->used));
2696 rxq->tail = 0;
2697 rxq->head = 0;
2700 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2702 struct be_queue_info *rx_cq = &rxo->cq;
2703 struct be_rx_compl_info *rxcp;
2704 struct be_adapter *adapter = rxo->adapter;
2705 int flush_wait = 0;
2707 /* Consume pending rx completions.
2708 * Wait for the flush completion (identified by zero num_rcvd)
2709 * to arrive. Notify CQ even when there are no more CQ entries
2710 * for HW to flush partially coalesced CQ entries.
2711 * In Lancer, there is no need to wait for flush compl.
2713 for (;;) {
2714 rxcp = be_rx_compl_get(rxo);
2715 if (!rxcp) {
2716 if (lancer_chip(adapter))
2717 break;
2719 if (flush_wait++ > 50 ||
2720 be_check_error(adapter,
2721 BE_ERROR_HW)) {
2722 dev_warn(&adapter->pdev->dev,
2723 "did not receive flush compl\n");
2724 break;
2726 be_cq_notify(adapter, rx_cq->id, true, 0);
2727 mdelay(1);
2728 } else {
2729 be_rx_compl_discard(rxo, rxcp);
2730 be_cq_notify(adapter, rx_cq->id, false, 1);
2731 if (rxcp->num_rcvd == 0)
2732 break;
2736 /* After cleanup, leave the CQ in unarmed state */
2737 be_cq_notify(adapter, rx_cq->id, false, 0);
2740 static void be_tx_compl_clean(struct be_adapter *adapter)
2742 struct device *dev = &adapter->pdev->dev;
2743 u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2744 struct be_tx_compl_info *txcp;
2745 struct be_queue_info *txq;
2746 u32 end_idx, notified_idx;
2747 struct be_tx_obj *txo;
2748 int i, pending_txqs;
2750 /* Stop polling for compls when HW has been silent for 10ms */
2751 do {
2752 pending_txqs = adapter->num_tx_qs;
2754 for_all_tx_queues(adapter, txo, i) {
2755 cmpl = 0;
2756 num_wrbs = 0;
2757 txq = &txo->q;
2758 while ((txcp = be_tx_compl_get(txo))) {
2759 num_wrbs +=
2760 be_tx_compl_process(adapter, txo,
2761 txcp->end_index);
2762 cmpl++;
2764 if (cmpl) {
2765 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2766 atomic_sub(num_wrbs, &txq->used);
2767 timeo = 0;
2769 if (!be_is_tx_compl_pending(txo))
2770 pending_txqs--;
2773 if (pending_txqs == 0 || ++timeo > 10 ||
2774 be_check_error(adapter, BE_ERROR_HW))
2775 break;
2777 mdelay(1);
2778 } while (true);
2780 /* Free enqueued TX that was never notified to HW */
2781 for_all_tx_queues(adapter, txo, i) {
2782 txq = &txo->q;
2784 if (atomic_read(&txq->used)) {
2785 dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2786 i, atomic_read(&txq->used));
2787 notified_idx = txq->tail;
2788 end_idx = txq->tail;
2789 index_adv(&end_idx, atomic_read(&txq->used) - 1,
2790 txq->len);
2791 /* Use the tx-compl process logic to handle requests
2792 * that were not sent to the HW.
2794 num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2795 atomic_sub(num_wrbs, &txq->used);
2796 BUG_ON(atomic_read(&txq->used));
2797 txo->pend_wrb_cnt = 0;
2798 /* Since hw was never notified of these requests,
2799 * reset TXQ indices
2801 txq->head = notified_idx;
2802 txq->tail = notified_idx;
2807 static void be_evt_queues_destroy(struct be_adapter *adapter)
2809 struct be_eq_obj *eqo;
2810 int i;
2812 for_all_evt_queues(adapter, eqo, i) {
2813 if (eqo->q.created) {
2814 be_eq_clean(eqo);
2815 be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2816 netif_napi_del(&eqo->napi);
2817 free_cpumask_var(eqo->affinity_mask);
2819 be_queue_free(adapter, &eqo->q);
2823 static int be_evt_queues_create(struct be_adapter *adapter)
2825 struct be_queue_info *eq;
2826 struct be_eq_obj *eqo;
2827 struct be_aic_obj *aic;
2828 int i, rc;
2830 /* need enough EQs to service both RX and TX queues */
2831 adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2832 max(adapter->cfg_num_rx_irqs,
2833 adapter->cfg_num_tx_irqs));
2835 for_all_evt_queues(adapter, eqo, i) {
2836 int numa_node = dev_to_node(&adapter->pdev->dev);
2838 aic = &adapter->aic_obj[i];
2839 eqo->adapter = adapter;
2840 eqo->idx = i;
2841 aic->max_eqd = BE_MAX_EQD;
2842 aic->enable = true;
2844 eq = &eqo->q;
2845 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2846 sizeof(struct be_eq_entry));
2847 if (rc)
2848 return rc;
2850 rc = be_cmd_eq_create(adapter, eqo);
2851 if (rc)
2852 return rc;
2854 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2855 return -ENOMEM;
2856 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2857 eqo->affinity_mask);
2858 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2859 BE_NAPI_WEIGHT);
2861 return 0;
2864 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2866 struct be_queue_info *q;
2868 q = &adapter->mcc_obj.q;
2869 if (q->created)
2870 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2871 be_queue_free(adapter, q);
2873 q = &adapter->mcc_obj.cq;
2874 if (q->created)
2875 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2876 be_queue_free(adapter, q);
2879 /* Must be called only after TX qs are created as MCC shares TX EQ */
2880 static int be_mcc_queues_create(struct be_adapter *adapter)
2882 struct be_queue_info *q, *cq;
2884 cq = &adapter->mcc_obj.cq;
2885 if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2886 sizeof(struct be_mcc_compl)))
2887 goto err;
2889 /* Use the default EQ for MCC completions */
2890 if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2891 goto mcc_cq_free;
2893 q = &adapter->mcc_obj.q;
2894 if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2895 goto mcc_cq_destroy;
2897 if (be_cmd_mccq_create(adapter, q, cq))
2898 goto mcc_q_free;
2900 return 0;
2902 mcc_q_free:
2903 be_queue_free(adapter, q);
2904 mcc_cq_destroy:
2905 be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2906 mcc_cq_free:
2907 be_queue_free(adapter, cq);
2908 err:
2909 return -1;
2912 static void be_tx_queues_destroy(struct be_adapter *adapter)
2914 struct be_queue_info *q;
2915 struct be_tx_obj *txo;
2916 u8 i;
2918 for_all_tx_queues(adapter, txo, i) {
2919 q = &txo->q;
2920 if (q->created)
2921 be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2922 be_queue_free(adapter, q);
2924 q = &txo->cq;
2925 if (q->created)
2926 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2927 be_queue_free(adapter, q);
2931 static int be_tx_qs_create(struct be_adapter *adapter)
2933 struct be_queue_info *cq;
2934 struct be_tx_obj *txo;
2935 struct be_eq_obj *eqo;
2936 int status, i;
2938 adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2940 for_all_tx_queues(adapter, txo, i) {
2941 cq = &txo->cq;
2942 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2943 sizeof(struct be_eth_tx_compl));
2944 if (status)
2945 return status;
2947 u64_stats_init(&txo->stats.sync);
2948 u64_stats_init(&txo->stats.sync_compl);
2950 /* If num_evt_qs is less than num_tx_qs, then more than
2951 * one txq share an eq
2953 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2954 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2955 if (status)
2956 return status;
2958 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2959 sizeof(struct be_eth_wrb));
2960 if (status)
2961 return status;
2963 status = be_cmd_txq_create(adapter, txo);
2964 if (status)
2965 return status;
2967 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2968 eqo->idx);
2971 dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2972 adapter->num_tx_qs);
2973 return 0;
2976 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2978 struct be_queue_info *q;
2979 struct be_rx_obj *rxo;
2980 int i;
2982 for_all_rx_queues(adapter, rxo, i) {
2983 q = &rxo->cq;
2984 if (q->created)
2985 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2986 be_queue_free(adapter, q);
2990 static int be_rx_cqs_create(struct be_adapter *adapter)
2992 struct be_queue_info *eq, *cq;
2993 struct be_rx_obj *rxo;
2994 int rc, i;
2996 adapter->num_rss_qs =
2997 min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2999 /* We'll use RSS only if atleast 2 RSS rings are supported. */
3000 if (adapter->num_rss_qs < 2)
3001 adapter->num_rss_qs = 0;
3003 adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3005 /* When the interface is not capable of RSS rings (and there is no
3006 * need to create a default RXQ) we'll still need one RXQ
3008 if (adapter->num_rx_qs == 0)
3009 adapter->num_rx_qs = 1;
3011 adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3012 for_all_rx_queues(adapter, rxo, i) {
3013 rxo->adapter = adapter;
3014 cq = &rxo->cq;
3015 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3016 sizeof(struct be_eth_rx_compl));
3017 if (rc)
3018 return rc;
3020 u64_stats_init(&rxo->stats.sync);
3021 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3022 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3023 if (rc)
3024 return rc;
3027 dev_info(&adapter->pdev->dev,
3028 "created %d RX queue(s)\n", adapter->num_rx_qs);
3029 return 0;
3032 static irqreturn_t be_intx(int irq, void *dev)
3034 struct be_eq_obj *eqo = dev;
3035 struct be_adapter *adapter = eqo->adapter;
3036 int num_evts = 0;
3038 /* IRQ is not expected when NAPI is scheduled as the EQ
3039 * will not be armed.
3040 * But, this can happen on Lancer INTx where it takes
3041 * a while to de-assert INTx or in BE2 where occasionaly
3042 * an interrupt may be raised even when EQ is unarmed.
3043 * If NAPI is already scheduled, then counting & notifying
3044 * events will orphan them.
3046 if (napi_schedule_prep(&eqo->napi)) {
3047 num_evts = events_get(eqo);
3048 __napi_schedule(&eqo->napi);
3049 if (num_evts)
3050 eqo->spurious_intr = 0;
3052 be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3054 /* Return IRQ_HANDLED only for the the first spurious intr
3055 * after a valid intr to stop the kernel from branding
3056 * this irq as a bad one!
3058 if (num_evts || eqo->spurious_intr++ == 0)
3059 return IRQ_HANDLED;
3060 else
3061 return IRQ_NONE;
3064 static irqreturn_t be_msix(int irq, void *dev)
3066 struct be_eq_obj *eqo = dev;
3068 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3069 napi_schedule(&eqo->napi);
3070 return IRQ_HANDLED;
3073 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3075 return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3078 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3079 int budget, int polling)
3081 struct be_adapter *adapter = rxo->adapter;
3082 struct be_queue_info *rx_cq = &rxo->cq;
3083 struct be_rx_compl_info *rxcp;
3084 u32 work_done;
3085 u32 frags_consumed = 0;
3087 for (work_done = 0; work_done < budget; work_done++) {
3088 rxcp = be_rx_compl_get(rxo);
3089 if (!rxcp)
3090 break;
3092 /* Is it a flush compl that has no data */
3093 if (unlikely(rxcp->num_rcvd == 0))
3094 goto loop_continue;
3096 /* Discard compl with partial DMA Lancer B0 */
3097 if (unlikely(!rxcp->pkt_size)) {
3098 be_rx_compl_discard(rxo, rxcp);
3099 goto loop_continue;
3102 /* On BE drop pkts that arrive due to imperfect filtering in
3103 * promiscuous mode on some skews
3105 if (unlikely(rxcp->port != adapter->port_num &&
3106 !lancer_chip(adapter))) {
3107 be_rx_compl_discard(rxo, rxcp);
3108 goto loop_continue;
3111 /* Don't do gro when we're busy_polling */
3112 if (do_gro(rxcp) && polling != BUSY_POLLING)
3113 be_rx_compl_process_gro(rxo, napi, rxcp);
3114 else
3115 be_rx_compl_process(rxo, napi, rxcp);
3117 loop_continue:
3118 frags_consumed += rxcp->num_rcvd;
3119 be_rx_stats_update(rxo, rxcp);
3122 if (work_done) {
3123 be_cq_notify(adapter, rx_cq->id, true, work_done);
3125 /* When an rx-obj gets into post_starved state, just
3126 * let be_worker do the posting.
3128 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3129 !rxo->rx_post_starved)
3130 be_post_rx_frags(rxo, GFP_ATOMIC,
3131 max_t(u32, MAX_RX_POST,
3132 frags_consumed));
3135 return work_done;
3138 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 switch (status) {
3141 case BE_TX_COMP_HDR_PARSE_ERR:
3142 tx_stats(txo)->tx_hdr_parse_err++;
3143 break;
3144 case BE_TX_COMP_NDMA_ERR:
3145 tx_stats(txo)->tx_dma_err++;
3146 break;
3147 case BE_TX_COMP_ACL_ERR:
3148 tx_stats(txo)->tx_spoof_check_err++;
3149 break;
3153 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3155 switch (status) {
3156 case LANCER_TX_COMP_LSO_ERR:
3157 tx_stats(txo)->tx_tso_err++;
3158 break;
3159 case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3160 case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3161 tx_stats(txo)->tx_spoof_check_err++;
3162 break;
3163 case LANCER_TX_COMP_QINQ_ERR:
3164 tx_stats(txo)->tx_qinq_err++;
3165 break;
3166 case LANCER_TX_COMP_PARITY_ERR:
3167 tx_stats(txo)->tx_internal_parity_err++;
3168 break;
3169 case LANCER_TX_COMP_DMA_ERR:
3170 tx_stats(txo)->tx_dma_err++;
3171 break;
3175 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3176 int idx)
3178 int num_wrbs = 0, work_done = 0;
3179 struct be_tx_compl_info *txcp;
3181 while ((txcp = be_tx_compl_get(txo))) {
3182 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3183 work_done++;
3185 if (txcp->status) {
3186 if (lancer_chip(adapter))
3187 lancer_update_tx_err(txo, txcp->status);
3188 else
3189 be_update_tx_err(txo, txcp->status);
3193 if (work_done) {
3194 be_cq_notify(adapter, txo->cq.id, true, work_done);
3195 atomic_sub(num_wrbs, &txo->q.used);
3197 /* As Tx wrbs have been freed up, wake up netdev queue
3198 * if it was stopped due to lack of tx wrbs. */
3199 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3200 be_can_txq_wake(txo)) {
3201 netif_wake_subqueue(adapter->netdev, idx);
3204 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3205 tx_stats(txo)->tx_compl += work_done;
3206 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3210 #ifdef CONFIG_NET_RX_BUSY_POLL
3211 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3213 bool status = true;
3215 spin_lock(&eqo->lock); /* BH is already disabled */
3216 if (eqo->state & BE_EQ_LOCKED) {
3217 WARN_ON(eqo->state & BE_EQ_NAPI);
3218 eqo->state |= BE_EQ_NAPI_YIELD;
3219 status = false;
3220 } else {
3221 eqo->state = BE_EQ_NAPI;
3223 spin_unlock(&eqo->lock);
3224 return status;
3227 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3229 spin_lock(&eqo->lock); /* BH is already disabled */
3231 WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3232 eqo->state = BE_EQ_IDLE;
3234 spin_unlock(&eqo->lock);
3237 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3239 bool status = true;
3241 spin_lock_bh(&eqo->lock);
3242 if (eqo->state & BE_EQ_LOCKED) {
3243 eqo->state |= BE_EQ_POLL_YIELD;
3244 status = false;
3245 } else {
3246 eqo->state |= BE_EQ_POLL;
3248 spin_unlock_bh(&eqo->lock);
3249 return status;
3252 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3254 spin_lock_bh(&eqo->lock);
3256 WARN_ON(eqo->state & (BE_EQ_NAPI));
3257 eqo->state = BE_EQ_IDLE;
3259 spin_unlock_bh(&eqo->lock);
3262 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3264 spin_lock_init(&eqo->lock);
3265 eqo->state = BE_EQ_IDLE;
3268 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3270 local_bh_disable();
3272 /* It's enough to just acquire napi lock on the eqo to stop
3273 * be_busy_poll() from processing any queueus.
3275 while (!be_lock_napi(eqo))
3276 mdelay(1);
3278 local_bh_enable();
3281 #else /* CONFIG_NET_RX_BUSY_POLL */
3283 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3285 return true;
3288 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3292 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3294 return false;
3297 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3301 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3305 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3308 #endif /* CONFIG_NET_RX_BUSY_POLL */
3310 int be_poll(struct napi_struct *napi, int budget)
3312 struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3313 struct be_adapter *adapter = eqo->adapter;
3314 int max_work = 0, work, i, num_evts;
3315 struct be_rx_obj *rxo;
3316 struct be_tx_obj *txo;
3317 u32 mult_enc = 0;
3319 num_evts = events_get(eqo);
3321 for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3322 be_process_tx(adapter, txo, i);
3324 if (be_lock_napi(eqo)) {
3325 /* This loop will iterate twice for EQ0 in which
3326 * completions of the last RXQ (default one) are also processed
3327 * For other EQs the loop iterates only once
3329 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3330 work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3331 max_work = max(work, max_work);
3333 be_unlock_napi(eqo);
3334 } else {
3335 max_work = budget;
3338 if (is_mcc_eqo(eqo))
3339 be_process_mcc(adapter);
3341 if (max_work < budget) {
3342 napi_complete(napi);
3344 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3345 * delay via a delay multiplier encoding value
3347 if (skyhawk_chip(adapter))
3348 mult_enc = be_get_eq_delay_mult_enc(eqo);
3350 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3351 mult_enc);
3352 } else {
3353 /* As we'll continue in polling mode, count and clear events */
3354 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3356 return max_work;
3359 #ifdef CONFIG_NET_RX_BUSY_POLL
3360 static int be_busy_poll(struct napi_struct *napi)
3362 struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3363 struct be_adapter *adapter = eqo->adapter;
3364 struct be_rx_obj *rxo;
3365 int i, work = 0;
3367 if (!be_lock_busy_poll(eqo))
3368 return LL_FLUSH_BUSY;
3370 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3371 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3372 if (work)
3373 break;
3376 be_unlock_busy_poll(eqo);
3377 return work;
3379 #endif
3381 void be_detect_error(struct be_adapter *adapter)
3383 u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3384 u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3385 u32 i;
3386 struct device *dev = &adapter->pdev->dev;
3388 if (be_check_error(adapter, BE_ERROR_HW))
3389 return;
3391 if (lancer_chip(adapter)) {
3392 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3393 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3394 be_set_error(adapter, BE_ERROR_UE);
3395 sliport_err1 = ioread32(adapter->db +
3396 SLIPORT_ERROR1_OFFSET);
3397 sliport_err2 = ioread32(adapter->db +
3398 SLIPORT_ERROR2_OFFSET);
3399 /* Do not log error messages if its a FW reset */
3400 if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3401 sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3402 dev_info(dev, "Firmware update in progress\n");
3403 } else {
3404 dev_err(dev, "Error detected in the card\n");
3405 dev_err(dev, "ERR: sliport status 0x%x\n",
3406 sliport_status);
3407 dev_err(dev, "ERR: sliport error1 0x%x\n",
3408 sliport_err1);
3409 dev_err(dev, "ERR: sliport error2 0x%x\n",
3410 sliport_err2);
3413 } else {
3414 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3415 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3416 ue_lo_mask = ioread32(adapter->pcicfg +
3417 PCICFG_UE_STATUS_LOW_MASK);
3418 ue_hi_mask = ioread32(adapter->pcicfg +
3419 PCICFG_UE_STATUS_HI_MASK);
3421 ue_lo = (ue_lo & ~ue_lo_mask);
3422 ue_hi = (ue_hi & ~ue_hi_mask);
3424 /* On certain platforms BE hardware can indicate spurious UEs.
3425 * Allow HW to stop working completely in case of a real UE.
3426 * Hence not setting the hw_error for UE detection.
3429 if (ue_lo || ue_hi) {
3430 dev_err(dev, "Error detected in the adapter");
3431 if (skyhawk_chip(adapter))
3432 be_set_error(adapter, BE_ERROR_UE);
3434 for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3435 if (ue_lo & 1)
3436 dev_err(dev, "UE: %s bit set\n",
3437 ue_status_low_desc[i]);
3439 for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3440 if (ue_hi & 1)
3441 dev_err(dev, "UE: %s bit set\n",
3442 ue_status_hi_desc[i]);
3448 static void be_msix_disable(struct be_adapter *adapter)
3450 if (msix_enabled(adapter)) {
3451 pci_disable_msix(adapter->pdev);
3452 adapter->num_msix_vec = 0;
3453 adapter->num_msix_roce_vec = 0;
3457 static int be_msix_enable(struct be_adapter *adapter)
3459 unsigned int i, max_roce_eqs;
3460 struct device *dev = &adapter->pdev->dev;
3461 int num_vec;
3463 /* If RoCE is supported, program the max number of vectors that
3464 * could be used for NIC and RoCE, else, just program the number
3465 * we'll use initially.
3467 if (be_roce_supported(adapter)) {
3468 max_roce_eqs =
3469 be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3470 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3471 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3472 } else {
3473 num_vec = max(adapter->cfg_num_rx_irqs,
3474 adapter->cfg_num_tx_irqs);
3477 for (i = 0; i < num_vec; i++)
3478 adapter->msix_entries[i].entry = i;
3480 num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3481 MIN_MSIX_VECTORS, num_vec);
3482 if (num_vec < 0)
3483 goto fail;
3485 if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3486 adapter->num_msix_roce_vec = num_vec / 2;
3487 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3488 adapter->num_msix_roce_vec);
3491 adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3493 dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3494 adapter->num_msix_vec);
3495 return 0;
3497 fail:
3498 dev_warn(dev, "MSIx enable failed\n");
3500 /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3501 if (be_virtfn(adapter))
3502 return num_vec;
3503 return 0;
3506 static inline int be_msix_vec_get(struct be_adapter *adapter,
3507 struct be_eq_obj *eqo)
3509 return adapter->msix_entries[eqo->msix_idx].vector;
3512 static int be_msix_register(struct be_adapter *adapter)
3514 struct net_device *netdev = adapter->netdev;
3515 struct be_eq_obj *eqo;
3516 int status, i, vec;
3518 for_all_evt_queues(adapter, eqo, i) {
3519 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3520 vec = be_msix_vec_get(adapter, eqo);
3521 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3522 if (status)
3523 goto err_msix;
3525 irq_set_affinity_hint(vec, eqo->affinity_mask);
3528 return 0;
3529 err_msix:
3530 for (i--; i >= 0; i--) {
3531 eqo = &adapter->eq_obj[i];
3532 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3534 dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3535 status);
3536 be_msix_disable(adapter);
3537 return status;
3540 static int be_irq_register(struct be_adapter *adapter)
3542 struct net_device *netdev = adapter->netdev;
3543 int status;
3545 if (msix_enabled(adapter)) {
3546 status = be_msix_register(adapter);
3547 if (status == 0)
3548 goto done;
3549 /* INTx is not supported for VF */
3550 if (be_virtfn(adapter))
3551 return status;
3554 /* INTx: only the first EQ is used */
3555 netdev->irq = adapter->pdev->irq;
3556 status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3557 &adapter->eq_obj[0]);
3558 if (status) {
3559 dev_err(&adapter->pdev->dev,
3560 "INTx request IRQ failed - err %d\n", status);
3561 return status;
3563 done:
3564 adapter->isr_registered = true;
3565 return 0;
3568 static void be_irq_unregister(struct be_adapter *adapter)
3570 struct net_device *netdev = adapter->netdev;
3571 struct be_eq_obj *eqo;
3572 int i, vec;
3574 if (!adapter->isr_registered)
3575 return;
3577 /* INTx */
3578 if (!msix_enabled(adapter)) {
3579 free_irq(netdev->irq, &adapter->eq_obj[0]);
3580 goto done;
3583 /* MSIx */
3584 for_all_evt_queues(adapter, eqo, i) {
3585 vec = be_msix_vec_get(adapter, eqo);
3586 irq_set_affinity_hint(vec, NULL);
3587 free_irq(vec, eqo);
3590 done:
3591 adapter->isr_registered = false;
3594 static void be_rx_qs_destroy(struct be_adapter *adapter)
3596 struct rss_info *rss = &adapter->rss_info;
3597 struct be_queue_info *q;
3598 struct be_rx_obj *rxo;
3599 int i;
3601 for_all_rx_queues(adapter, rxo, i) {
3602 q = &rxo->q;
3603 if (q->created) {
3604 /* If RXQs are destroyed while in an "out of buffer"
3605 * state, there is a possibility of an HW stall on
3606 * Lancer. So, post 64 buffers to each queue to relieve
3607 * the "out of buffer" condition.
3608 * Make sure there's space in the RXQ before posting.
3610 if (lancer_chip(adapter)) {
3611 be_rx_cq_clean(rxo);
3612 if (atomic_read(&q->used) == 0)
3613 be_post_rx_frags(rxo, GFP_KERNEL,
3614 MAX_RX_POST);
3617 be_cmd_rxq_destroy(adapter, q);
3618 be_rx_cq_clean(rxo);
3619 be_rxq_clean(rxo);
3621 be_queue_free(adapter, q);
3624 if (rss->rss_flags) {
3625 rss->rss_flags = RSS_ENABLE_NONE;
3626 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3627 128, rss->rss_hkey);
3631 static void be_disable_if_filters(struct be_adapter *adapter)
3633 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3634 be_clear_uc_list(adapter);
3635 be_clear_mc_list(adapter);
3637 /* The IFACE flags are enabled in the open path and cleared
3638 * in the close path. When a VF gets detached from the host and
3639 * assigned to a VM the following happens:
3640 * - VF's IFACE flags get cleared in the detach path
3641 * - IFACE create is issued by the VF in the attach path
3642 * Due to a bug in the BE3/Skyhawk-R FW
3643 * (Lancer FW doesn't have the bug), the IFACE capability flags
3644 * specified along with the IFACE create cmd issued by a VF are not
3645 * honoured by FW. As a consequence, if a *new* driver
3646 * (that enables/disables IFACE flags in open/close)
3647 * is loaded in the host and an *old* driver is * used by a VM/VF,
3648 * the IFACE gets created *without* the needed flags.
3649 * To avoid this, disable RX-filter flags only for Lancer.
3651 if (lancer_chip(adapter)) {
3652 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3653 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3657 static int be_close(struct net_device *netdev)
3659 struct be_adapter *adapter = netdev_priv(netdev);
3660 struct be_eq_obj *eqo;
3661 int i;
3663 /* This protection is needed as be_close() may be called even when the
3664 * adapter is in cleared state (after eeh perm failure)
3666 if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3667 return 0;
3669 /* Before attempting cleanup ensure all the pending cmds in the
3670 * config_wq have finished execution
3672 flush_workqueue(be_wq);
3674 be_disable_if_filters(adapter);
3676 if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3677 for_all_evt_queues(adapter, eqo, i) {
3678 napi_disable(&eqo->napi);
3679 be_disable_busy_poll(eqo);
3681 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3684 be_async_mcc_disable(adapter);
3686 /* Wait for all pending tx completions to arrive so that
3687 * all tx skbs are freed.
3689 netif_tx_disable(netdev);
3690 be_tx_compl_clean(adapter);
3692 be_rx_qs_destroy(adapter);
3694 for_all_evt_queues(adapter, eqo, i) {
3695 if (msix_enabled(adapter))
3696 synchronize_irq(be_msix_vec_get(adapter, eqo));
3697 else
3698 synchronize_irq(netdev->irq);
3699 be_eq_clean(eqo);
3702 be_irq_unregister(adapter);
3704 return 0;
3707 static int be_rx_qs_create(struct be_adapter *adapter)
3709 struct rss_info *rss = &adapter->rss_info;
3710 u8 rss_key[RSS_HASH_KEY_LEN];
3711 struct be_rx_obj *rxo;
3712 int rc, i, j;
3714 for_all_rx_queues(adapter, rxo, i) {
3715 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3716 sizeof(struct be_eth_rx_d));
3717 if (rc)
3718 return rc;
3721 if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3722 rxo = default_rxo(adapter);
3723 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3724 rx_frag_size, adapter->if_handle,
3725 false, &rxo->rss_id);
3726 if (rc)
3727 return rc;
3730 for_all_rss_queues(adapter, rxo, i) {
3731 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3732 rx_frag_size, adapter->if_handle,
3733 true, &rxo->rss_id);
3734 if (rc)
3735 return rc;
3738 if (be_multi_rxq(adapter)) {
3739 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3740 for_all_rss_queues(adapter, rxo, i) {
3741 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3742 break;
3743 rss->rsstable[j + i] = rxo->rss_id;
3744 rss->rss_queue[j + i] = i;
3747 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3748 RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3750 if (!BEx_chip(adapter))
3751 rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3752 RSS_ENABLE_UDP_IPV6;
3754 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3755 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3756 RSS_INDIR_TABLE_LEN, rss_key);
3757 if (rc) {
3758 rss->rss_flags = RSS_ENABLE_NONE;
3759 return rc;
3762 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3763 } else {
3764 /* Disable RSS, if only default RX Q is created */
3765 rss->rss_flags = RSS_ENABLE_NONE;
3769 /* Post 1 less than RXQ-len to avoid head being equal to tail,
3770 * which is a queue empty condition
3772 for_all_rx_queues(adapter, rxo, i)
3773 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3775 return 0;
3778 static int be_enable_if_filters(struct be_adapter *adapter)
3780 int status;
3782 status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3783 if (status)
3784 return status;
3786 /* For BE3 VFs, the PF programs the initial MAC address */
3787 if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3788 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3789 if (status)
3790 return status;
3791 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3794 if (adapter->vlans_added)
3795 be_vid_config(adapter);
3797 __be_set_rx_mode(adapter);
3799 return 0;
3802 static int be_open(struct net_device *netdev)
3804 struct be_adapter *adapter = netdev_priv(netdev);
3805 struct be_eq_obj *eqo;
3806 struct be_rx_obj *rxo;
3807 struct be_tx_obj *txo;
3808 u8 link_status;
3809 int status, i;
3811 status = be_rx_qs_create(adapter);
3812 if (status)
3813 goto err;
3815 status = be_enable_if_filters(adapter);
3816 if (status)
3817 goto err;
3819 status = be_irq_register(adapter);
3820 if (status)
3821 goto err;
3823 for_all_rx_queues(adapter, rxo, i)
3824 be_cq_notify(adapter, rxo->cq.id, true, 0);
3826 for_all_tx_queues(adapter, txo, i)
3827 be_cq_notify(adapter, txo->cq.id, true, 0);
3829 be_async_mcc_enable(adapter);
3831 for_all_evt_queues(adapter, eqo, i) {
3832 napi_enable(&eqo->napi);
3833 be_enable_busy_poll(eqo);
3834 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3836 adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3838 status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3839 if (!status)
3840 be_link_status_update(adapter, link_status);
3842 netif_tx_start_all_queues(netdev);
3843 if (skyhawk_chip(adapter))
3844 udp_tunnel_get_rx_info(netdev);
3846 return 0;
3847 err:
3848 be_close(adapter->netdev);
3849 return -EIO;
3852 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3854 u32 addr;
3856 addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3858 mac[5] = (u8)(addr & 0xFF);
3859 mac[4] = (u8)((addr >> 8) & 0xFF);
3860 mac[3] = (u8)((addr >> 16) & 0xFF);
3861 /* Use the OUI from the current MAC address */
3862 memcpy(mac, adapter->netdev->dev_addr, 3);
3866 * Generate a seed MAC address from the PF MAC Address using jhash.
3867 * MAC Address for VFs are assigned incrementally starting from the seed.
3868 * These addresses are programmed in the ASIC by the PF and the VF driver
3869 * queries for the MAC address during its probe.
3871 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3873 u32 vf;
3874 int status = 0;
3875 u8 mac[ETH_ALEN];
3876 struct be_vf_cfg *vf_cfg;
3878 be_vf_eth_addr_generate(adapter, mac);
3880 for_all_vfs(adapter, vf_cfg, vf) {
3881 if (BEx_chip(adapter))
3882 status = be_cmd_pmac_add(adapter, mac,
3883 vf_cfg->if_handle,
3884 &vf_cfg->pmac_id, vf + 1);
3885 else
3886 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3887 vf + 1);
3889 if (status)
3890 dev_err(&adapter->pdev->dev,
3891 "Mac address assignment failed for VF %d\n",
3892 vf);
3893 else
3894 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3896 mac[5] += 1;
3898 return status;
3901 static int be_vfs_mac_query(struct be_adapter *adapter)
3903 int status, vf;
3904 u8 mac[ETH_ALEN];
3905 struct be_vf_cfg *vf_cfg;
3907 for_all_vfs(adapter, vf_cfg, vf) {
3908 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3909 mac, vf_cfg->if_handle,
3910 false, vf+1);
3911 if (status)
3912 return status;
3913 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3915 return 0;
3918 static void be_vf_clear(struct be_adapter *adapter)
3920 struct be_vf_cfg *vf_cfg;
3921 u32 vf;
3923 if (pci_vfs_assigned(adapter->pdev)) {
3924 dev_warn(&adapter->pdev->dev,
3925 "VFs are assigned to VMs: not disabling VFs\n");
3926 goto done;
3929 pci_disable_sriov(adapter->pdev);
3931 for_all_vfs(adapter, vf_cfg, vf) {
3932 if (BEx_chip(adapter))
3933 be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3934 vf_cfg->pmac_id, vf + 1);
3935 else
3936 be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3937 vf + 1);
3939 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3942 if (BE3_chip(adapter))
3943 be_cmd_set_hsw_config(adapter, 0, 0,
3944 adapter->if_handle,
3945 PORT_FWD_TYPE_PASSTHRU, 0);
3946 done:
3947 kfree(adapter->vf_cfg);
3948 adapter->num_vfs = 0;
3949 adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3952 static void be_clear_queues(struct be_adapter *adapter)
3954 be_mcc_queues_destroy(adapter);
3955 be_rx_cqs_destroy(adapter);
3956 be_tx_queues_destroy(adapter);
3957 be_evt_queues_destroy(adapter);
3960 static void be_cancel_worker(struct be_adapter *adapter)
3962 if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3963 cancel_delayed_work_sync(&adapter->work);
3964 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3968 static void be_cancel_err_detection(struct be_adapter *adapter)
3970 struct be_error_recovery *err_rec = &adapter->error_recovery;
3972 if (!be_err_recovery_workq)
3973 return;
3975 if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3976 cancel_delayed_work_sync(&err_rec->err_detection_work);
3977 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3981 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3983 struct net_device *netdev = adapter->netdev;
3985 if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3986 be_cmd_manage_iface(adapter, adapter->if_handle,
3987 OP_CONVERT_TUNNEL_TO_NORMAL);
3989 if (adapter->vxlan_port)
3990 be_cmd_set_vxlan_port(adapter, 0);
3992 adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3993 adapter->vxlan_port = 0;
3995 netdev->hw_enc_features = 0;
3996 netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3997 netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
4000 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4001 struct be_resources *vft_res)
4003 struct be_resources res = adapter->pool_res;
4004 u32 vf_if_cap_flags = res.vf_if_cap_flags;
4005 struct be_resources res_mod = {0};
4006 u16 num_vf_qs = 1;
4008 /* Distribute the queue resources among the PF and it's VFs */
4009 if (num_vfs) {
4010 /* Divide the rx queues evenly among the VFs and the PF, capped
4011 * at VF-EQ-count. Any remainder queues belong to the PF.
4013 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4014 res.max_rss_qs / (num_vfs + 1));
4016 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4017 * RSS Tables per port. Provide RSS on VFs, only if number of
4018 * VFs requested is less than it's PF Pool's RSS Tables limit.
4020 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4021 num_vf_qs = 1;
4024 /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4025 * which are modifiable using SET_PROFILE_CONFIG cmd.
4027 be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4028 RESOURCE_MODIFIABLE, 0);
4030 /* If RSS IFACE capability flags are modifiable for a VF, set the
4031 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4032 * more than 1 RSSQ is available for a VF.
4033 * Otherwise, provision only 1 queue pair for VF.
4035 if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4036 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4037 if (num_vf_qs > 1) {
4038 vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4039 if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4040 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4041 } else {
4042 vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4043 BE_IF_FLAGS_DEFQ_RSS);
4045 } else {
4046 num_vf_qs = 1;
4049 if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4050 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4051 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4054 vft_res->vf_if_cap_flags = vf_if_cap_flags;
4055 vft_res->max_rx_qs = num_vf_qs;
4056 vft_res->max_rss_qs = num_vf_qs;
4057 vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4058 vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4060 /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4061 * among the PF and it's VFs, if the fields are changeable
4063 if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4064 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4066 if (res_mod.max_vlans == FIELD_MODIFIABLE)
4067 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4069 if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4070 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4072 if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4073 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4076 static void be_if_destroy(struct be_adapter *adapter)
4078 be_cmd_if_destroy(adapter, adapter->if_handle, 0);
4080 kfree(adapter->pmac_id);
4081 adapter->pmac_id = NULL;
4083 kfree(adapter->mc_list);
4084 adapter->mc_list = NULL;
4086 kfree(adapter->uc_list);
4087 adapter->uc_list = NULL;
4090 static int be_clear(struct be_adapter *adapter)
4092 struct pci_dev *pdev = adapter->pdev;
4093 struct be_resources vft_res = {0};
4095 be_cancel_worker(adapter);
4097 flush_workqueue(be_wq);
4099 if (sriov_enabled(adapter))
4100 be_vf_clear(adapter);
4102 /* Re-configure FW to distribute resources evenly across max-supported
4103 * number of VFs, only when VFs are not already enabled.
4105 if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4106 !pci_vfs_assigned(pdev)) {
4107 be_calculate_vf_res(adapter,
4108 pci_sriov_get_totalvfs(pdev),
4109 &vft_res);
4110 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4111 pci_sriov_get_totalvfs(pdev),
4112 &vft_res);
4115 be_disable_vxlan_offloads(adapter);
4117 be_if_destroy(adapter);
4119 be_clear_queues(adapter);
4121 be_msix_disable(adapter);
4122 adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4123 return 0;
4126 static int be_vfs_if_create(struct be_adapter *adapter)
4128 struct be_resources res = {0};
4129 u32 cap_flags, en_flags, vf;
4130 struct be_vf_cfg *vf_cfg;
4131 int status;
4133 /* If a FW profile exists, then cap_flags are updated */
4134 cap_flags = BE_VF_IF_EN_FLAGS;
4136 for_all_vfs(adapter, vf_cfg, vf) {
4137 if (!BE3_chip(adapter)) {
4138 status = be_cmd_get_profile_config(adapter, &res, NULL,
4139 ACTIVE_PROFILE_TYPE,
4140 RESOURCE_LIMITS,
4141 vf + 1);
4142 if (!status) {
4143 cap_flags = res.if_cap_flags;
4144 /* Prevent VFs from enabling VLAN promiscuous
4145 * mode
4147 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4151 /* PF should enable IF flags during proxy if_create call */
4152 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4153 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4154 &vf_cfg->if_handle, vf + 1);
4155 if (status)
4156 return status;
4159 return 0;
4162 static int be_vf_setup_init(struct be_adapter *adapter)
4164 struct be_vf_cfg *vf_cfg;
4165 int vf;
4167 adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4168 GFP_KERNEL);
4169 if (!adapter->vf_cfg)
4170 return -ENOMEM;
4172 for_all_vfs(adapter, vf_cfg, vf) {
4173 vf_cfg->if_handle = -1;
4174 vf_cfg->pmac_id = -1;
4176 return 0;
4179 static int be_vf_setup(struct be_adapter *adapter)
4181 struct device *dev = &adapter->pdev->dev;
4182 struct be_vf_cfg *vf_cfg;
4183 int status, old_vfs, vf;
4184 bool spoofchk;
4186 old_vfs = pci_num_vf(adapter->pdev);
4188 status = be_vf_setup_init(adapter);
4189 if (status)
4190 goto err;
4192 if (old_vfs) {
4193 for_all_vfs(adapter, vf_cfg, vf) {
4194 status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4195 if (status)
4196 goto err;
4199 status = be_vfs_mac_query(adapter);
4200 if (status)
4201 goto err;
4202 } else {
4203 status = be_vfs_if_create(adapter);
4204 if (status)
4205 goto err;
4207 status = be_vf_eth_addr_config(adapter);
4208 if (status)
4209 goto err;
4212 for_all_vfs(adapter, vf_cfg, vf) {
4213 /* Allow VFs to programs MAC/VLAN filters */
4214 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4215 vf + 1);
4216 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4217 status = be_cmd_set_fn_privileges(adapter,
4218 vf_cfg->privileges |
4219 BE_PRIV_FILTMGMT,
4220 vf + 1);
4221 if (!status) {
4222 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4223 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4224 vf);
4228 /* Allow full available bandwidth */
4229 if (!old_vfs)
4230 be_cmd_config_qos(adapter, 0, 0, vf + 1);
4232 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4233 vf_cfg->if_handle, NULL,
4234 &spoofchk);
4235 if (!status)
4236 vf_cfg->spoofchk = spoofchk;
4238 if (!old_vfs) {
4239 be_cmd_enable_vf(adapter, vf + 1);
4240 be_cmd_set_logical_link_config(adapter,
4241 IFLA_VF_LINK_STATE_AUTO,
4242 vf+1);
4246 if (!old_vfs) {
4247 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4248 if (status) {
4249 dev_err(dev, "SRIOV enable failed\n");
4250 adapter->num_vfs = 0;
4251 goto err;
4255 if (BE3_chip(adapter)) {
4256 /* On BE3, enable VEB only when SRIOV is enabled */
4257 status = be_cmd_set_hsw_config(adapter, 0, 0,
4258 adapter->if_handle,
4259 PORT_FWD_TYPE_VEB, 0);
4260 if (status)
4261 goto err;
4264 adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4265 return 0;
4266 err:
4267 dev_err(dev, "VF setup failed\n");
4268 be_vf_clear(adapter);
4269 return status;
4272 /* Converting function_mode bits on BE3 to SH mc_type enums */
4274 static u8 be_convert_mc_type(u32 function_mode)
4276 if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4277 return vNIC1;
4278 else if (function_mode & QNQ_MODE)
4279 return FLEX10;
4280 else if (function_mode & VNIC_MODE)
4281 return vNIC2;
4282 else if (function_mode & UMC_ENABLED)
4283 return UMC;
4284 else
4285 return MC_NONE;
4288 /* On BE2/BE3 FW does not suggest the supported limits */
4289 static void BEx_get_resources(struct be_adapter *adapter,
4290 struct be_resources *res)
4292 bool use_sriov = adapter->num_vfs ? 1 : 0;
4294 if (be_physfn(adapter))
4295 res->max_uc_mac = BE_UC_PMAC_COUNT;
4296 else
4297 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4299 adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4301 if (be_is_mc(adapter)) {
4302 /* Assuming that there are 4 channels per port,
4303 * when multi-channel is enabled
4305 if (be_is_qnq_mode(adapter))
4306 res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4307 else
4308 /* In a non-qnq multichannel mode, the pvid
4309 * takes up one vlan entry
4311 res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4312 } else {
4313 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4316 res->max_mcast_mac = BE_MAX_MC;
4318 /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4319 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4320 * *only* if it is RSS-capable.
4322 if (BE2_chip(adapter) || use_sriov || (adapter->port_num > 1) ||
4323 be_virtfn(adapter) ||
4324 (be_is_mc(adapter) &&
4325 !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4326 res->max_tx_qs = 1;
4327 } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4328 struct be_resources super_nic_res = {0};
4330 /* On a SuperNIC profile, the driver needs to use the
4331 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4333 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4334 ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4336 /* Some old versions of BE3 FW don't report max_tx_qs value */
4337 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4338 } else {
4339 res->max_tx_qs = BE3_MAX_TX_QS;
4342 if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4343 !use_sriov && be_physfn(adapter))
4344 res->max_rss_qs = (adapter->be3_native) ?
4345 BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4346 res->max_rx_qs = res->max_rss_qs + 1;
4348 if (be_physfn(adapter))
4349 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4350 BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4351 else
4352 res->max_evt_qs = 1;
4354 res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4355 res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4356 if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4357 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4360 static void be_setup_init(struct be_adapter *adapter)
4362 adapter->vlan_prio_bmap = 0xff;
4363 adapter->phy.link_speed = -1;
4364 adapter->if_handle = -1;
4365 adapter->be3_native = false;
4366 adapter->if_flags = 0;
4367 adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4368 if (be_physfn(adapter))
4369 adapter->cmd_privileges = MAX_PRIVILEGES;
4370 else
4371 adapter->cmd_privileges = MIN_PRIVILEGES;
4374 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4375 * However, this HW limitation is not exposed to the host via any SLI cmd.
4376 * As a result, in the case of SRIOV and in particular multi-partition configs
4377 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4378 * for distribution between the VFs. This self-imposed limit will determine the
4379 * no: of VFs for which RSS can be enabled.
4381 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4383 struct be_port_resources port_res = {0};
4384 u8 rss_tables_on_port;
4385 u16 max_vfs = be_max_vfs(adapter);
4387 be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4388 RESOURCE_LIMITS, 0);
4390 rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4392 /* Each PF Pool's RSS Tables limit =
4393 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4395 adapter->pool_res.max_rss_tables =
4396 max_vfs * rss_tables_on_port / port_res.max_vfs;
4399 static int be_get_sriov_config(struct be_adapter *adapter)
4401 struct be_resources res = {0};
4402 int max_vfs, old_vfs;
4404 be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4405 RESOURCE_LIMITS, 0);
4407 /* Some old versions of BE3 FW don't report max_vfs value */
4408 if (BE3_chip(adapter) && !res.max_vfs) {
4409 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4410 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4413 adapter->pool_res = res;
4415 /* If during previous unload of the driver, the VFs were not disabled,
4416 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4417 * Instead use the TotalVFs value stored in the pci-dev struct.
4419 old_vfs = pci_num_vf(adapter->pdev);
4420 if (old_vfs) {
4421 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4422 old_vfs);
4424 adapter->pool_res.max_vfs =
4425 pci_sriov_get_totalvfs(adapter->pdev);
4426 adapter->num_vfs = old_vfs;
4429 if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4430 be_calculate_pf_pool_rss_tables(adapter);
4431 dev_info(&adapter->pdev->dev,
4432 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4433 be_max_pf_pool_rss_tables(adapter));
4435 return 0;
4438 static void be_alloc_sriov_res(struct be_adapter *adapter)
4440 int old_vfs = pci_num_vf(adapter->pdev);
4441 struct be_resources vft_res = {0};
4442 int status;
4444 be_get_sriov_config(adapter);
4446 if (!old_vfs)
4447 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4449 /* When the HW is in SRIOV capable configuration, the PF-pool
4450 * resources are given to PF during driver load, if there are no
4451 * old VFs. This facility is not available in BE3 FW.
4452 * Also, this is done by FW in Lancer chip.
4454 if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4455 be_calculate_vf_res(adapter, 0, &vft_res);
4456 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4457 &vft_res);
4458 if (status)
4459 dev_err(&adapter->pdev->dev,
4460 "Failed to optimize SRIOV resources\n");
4464 static int be_get_resources(struct be_adapter *adapter)
4466 struct device *dev = &adapter->pdev->dev;
4467 struct be_resources res = {0};
4468 int status;
4470 /* For Lancer, SH etc read per-function resource limits from FW.
4471 * GET_FUNC_CONFIG returns per function guaranteed limits.
4472 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4474 if (BEx_chip(adapter)) {
4475 BEx_get_resources(adapter, &res);
4476 } else {
4477 status = be_cmd_get_func_config(adapter, &res);
4478 if (status)
4479 return status;
4481 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4482 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4483 !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4484 res.max_rss_qs -= 1;
4487 /* If RoCE is supported stash away half the EQs for RoCE */
4488 res.max_nic_evt_qs = be_roce_supported(adapter) ?
4489 res.max_evt_qs / 2 : res.max_evt_qs;
4490 adapter->res = res;
4492 /* If FW supports RSS default queue, then skip creating non-RSS
4493 * queue for non-IP traffic.
4495 adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4496 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4498 dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4499 be_max_txqs(adapter), be_max_rxqs(adapter),
4500 be_max_rss(adapter), be_max_nic_eqs(adapter),
4501 be_max_vfs(adapter));
4502 dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4503 be_max_uc(adapter), be_max_mc(adapter),
4504 be_max_vlans(adapter));
4506 /* Ensure RX and TX queues are created in pairs at init time */
4507 adapter->cfg_num_rx_irqs =
4508 min_t(u16, netif_get_num_default_rss_queues(),
4509 be_max_qp_irqs(adapter));
4510 adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4511 return 0;
4514 static int be_get_config(struct be_adapter *adapter)
4516 int status, level;
4517 u16 profile_id;
4519 status = be_cmd_get_cntl_attributes(adapter);
4520 if (status)
4521 return status;
4523 status = be_cmd_query_fw_cfg(adapter);
4524 if (status)
4525 return status;
4527 if (!lancer_chip(adapter) && be_physfn(adapter))
4528 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4530 if (BEx_chip(adapter)) {
4531 level = be_cmd_get_fw_log_level(adapter);
4532 adapter->msg_enable =
4533 level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4536 be_cmd_get_acpi_wol_cap(adapter);
4537 pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4538 pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4540 be_cmd_query_port_name(adapter);
4542 if (be_physfn(adapter)) {
4543 status = be_cmd_get_active_profile(adapter, &profile_id);
4544 if (!status)
4545 dev_info(&adapter->pdev->dev,
4546 "Using profile 0x%x\n", profile_id);
4549 return 0;
4552 static int be_mac_setup(struct be_adapter *adapter)
4554 u8 mac[ETH_ALEN];
4555 int status;
4557 if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4558 status = be_cmd_get_perm_mac(adapter, mac);
4559 if (status)
4560 return status;
4562 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4563 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4566 return 0;
4569 static void be_schedule_worker(struct be_adapter *adapter)
4571 queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4572 adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4575 static void be_destroy_err_recovery_workq(void)
4577 if (!be_err_recovery_workq)
4578 return;
4580 flush_workqueue(be_err_recovery_workq);
4581 destroy_workqueue(be_err_recovery_workq);
4582 be_err_recovery_workq = NULL;
4585 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4587 struct be_error_recovery *err_rec = &adapter->error_recovery;
4589 if (!be_err_recovery_workq)
4590 return;
4592 queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4593 msecs_to_jiffies(delay));
4594 adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4597 static int be_setup_queues(struct be_adapter *adapter)
4599 struct net_device *netdev = adapter->netdev;
4600 int status;
4602 status = be_evt_queues_create(adapter);
4603 if (status)
4604 goto err;
4606 status = be_tx_qs_create(adapter);
4607 if (status)
4608 goto err;
4610 status = be_rx_cqs_create(adapter);
4611 if (status)
4612 goto err;
4614 status = be_mcc_queues_create(adapter);
4615 if (status)
4616 goto err;
4618 status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4619 if (status)
4620 goto err;
4622 status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4623 if (status)
4624 goto err;
4626 return 0;
4627 err:
4628 dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4629 return status;
4632 static int be_if_create(struct be_adapter *adapter)
4634 u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4635 u32 cap_flags = be_if_cap_flags(adapter);
4636 int status;
4638 /* alloc required memory for other filtering fields */
4639 adapter->pmac_id = kcalloc(be_max_uc(adapter),
4640 sizeof(*adapter->pmac_id), GFP_KERNEL);
4641 if (!adapter->pmac_id)
4642 return -ENOMEM;
4644 adapter->mc_list = kcalloc(be_max_mc(adapter),
4645 sizeof(*adapter->mc_list), GFP_KERNEL);
4646 if (!adapter->mc_list)
4647 return -ENOMEM;
4649 adapter->uc_list = kcalloc(be_max_uc(adapter),
4650 sizeof(*adapter->uc_list), GFP_KERNEL);
4651 if (!adapter->uc_list)
4652 return -ENOMEM;
4654 if (adapter->cfg_num_rx_irqs == 1)
4655 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4657 en_flags &= cap_flags;
4658 /* will enable all the needed filter flags in be_open() */
4659 status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4660 &adapter->if_handle, 0);
4662 if (status)
4663 return status;
4665 return 0;
4668 int be_update_queues(struct be_adapter *adapter)
4670 struct net_device *netdev = adapter->netdev;
4671 int status;
4673 if (netif_running(netdev))
4674 be_close(netdev);
4676 be_cancel_worker(adapter);
4678 /* If any vectors have been shared with RoCE we cannot re-program
4679 * the MSIx table.
4681 if (!adapter->num_msix_roce_vec)
4682 be_msix_disable(adapter);
4684 be_clear_queues(adapter);
4685 status = be_cmd_if_destroy(adapter, adapter->if_handle, 0);
4686 if (status)
4687 return status;
4689 if (!msix_enabled(adapter)) {
4690 status = be_msix_enable(adapter);
4691 if (status)
4692 return status;
4695 status = be_if_create(adapter);
4696 if (status)
4697 return status;
4699 status = be_setup_queues(adapter);
4700 if (status)
4701 return status;
4703 be_schedule_worker(adapter);
4705 if (netif_running(netdev))
4706 status = be_open(netdev);
4708 return status;
4711 static inline int fw_major_num(const char *fw_ver)
4713 int fw_major = 0, i;
4715 i = sscanf(fw_ver, "%d.", &fw_major);
4716 if (i != 1)
4717 return 0;
4719 return fw_major;
4722 /* If it is error recovery, FLR the PF
4723 * Else if any VFs are already enabled don't FLR the PF
4725 static bool be_reset_required(struct be_adapter *adapter)
4727 if (be_error_recovering(adapter))
4728 return true;
4729 else
4730 return pci_num_vf(adapter->pdev) == 0;
4733 /* Wait for the FW to be ready and perform the required initialization */
4734 static int be_func_init(struct be_adapter *adapter)
4736 int status;
4738 status = be_fw_wait_ready(adapter);
4739 if (status)
4740 return status;
4742 /* FW is now ready; clear errors to allow cmds/doorbell */
4743 be_clear_error(adapter, BE_CLEAR_ALL);
4745 if (be_reset_required(adapter)) {
4746 status = be_cmd_reset_function(adapter);
4747 if (status)
4748 return status;
4750 /* Wait for interrupts to quiesce after an FLR */
4751 msleep(100);
4754 /* Tell FW we're ready to fire cmds */
4755 status = be_cmd_fw_init(adapter);
4756 if (status)
4757 return status;
4759 /* Allow interrupts for other ULPs running on NIC function */
4760 be_intr_set(adapter, true);
4762 return 0;
4765 static int be_setup(struct be_adapter *adapter)
4767 struct device *dev = &adapter->pdev->dev;
4768 int status;
4770 status = be_func_init(adapter);
4771 if (status)
4772 return status;
4774 be_setup_init(adapter);
4776 if (!lancer_chip(adapter))
4777 be_cmd_req_native_mode(adapter);
4779 /* invoke this cmd first to get pf_num and vf_num which are needed
4780 * for issuing profile related cmds
4782 if (!BEx_chip(adapter)) {
4783 status = be_cmd_get_func_config(adapter, NULL);
4784 if (status)
4785 return status;
4788 status = be_get_config(adapter);
4789 if (status)
4790 goto err;
4792 if (!BE2_chip(adapter) && be_physfn(adapter))
4793 be_alloc_sriov_res(adapter);
4795 status = be_get_resources(adapter);
4796 if (status)
4797 goto err;
4799 status = be_msix_enable(adapter);
4800 if (status)
4801 goto err;
4803 /* will enable all the needed filter flags in be_open() */
4804 status = be_if_create(adapter);
4805 if (status)
4806 goto err;
4808 /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4809 rtnl_lock();
4810 status = be_setup_queues(adapter);
4811 rtnl_unlock();
4812 if (status)
4813 goto err;
4815 be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4817 status = be_mac_setup(adapter);
4818 if (status)
4819 goto err;
4821 be_cmd_get_fw_ver(adapter);
4822 dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4824 if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4825 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4826 adapter->fw_ver);
4827 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4830 status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4831 adapter->rx_fc);
4832 if (status)
4833 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4834 &adapter->rx_fc);
4836 dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4837 adapter->tx_fc, adapter->rx_fc);
4839 if (be_physfn(adapter))
4840 be_cmd_set_logical_link_config(adapter,
4841 IFLA_VF_LINK_STATE_AUTO, 0);
4843 /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4844 * confusing a linux bridge or OVS that it might be connected to.
4845 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4846 * when SRIOV is not enabled.
4848 if (BE3_chip(adapter))
4849 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4850 PORT_FWD_TYPE_PASSTHRU, 0);
4852 if (adapter->num_vfs)
4853 be_vf_setup(adapter);
4855 status = be_cmd_get_phy_info(adapter);
4856 if (!status && be_pause_supported(adapter))
4857 adapter->phy.fc_autoneg = 1;
4859 if (be_physfn(adapter) && !lancer_chip(adapter))
4860 be_cmd_set_features(adapter);
4862 be_schedule_worker(adapter);
4863 adapter->flags |= BE_FLAGS_SETUP_DONE;
4864 return 0;
4865 err:
4866 be_clear(adapter);
4867 return status;
4870 #ifdef CONFIG_NET_POLL_CONTROLLER
4871 static void be_netpoll(struct net_device *netdev)
4873 struct be_adapter *adapter = netdev_priv(netdev);
4874 struct be_eq_obj *eqo;
4875 int i;
4877 for_all_evt_queues(adapter, eqo, i) {
4878 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4879 napi_schedule(&eqo->napi);
4882 #endif
4884 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4886 const struct firmware *fw;
4887 int status;
4889 if (!netif_running(adapter->netdev)) {
4890 dev_err(&adapter->pdev->dev,
4891 "Firmware load not allowed (interface is down)\n");
4892 return -ENETDOWN;
4895 status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4896 if (status)
4897 goto fw_exit;
4899 dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4901 if (lancer_chip(adapter))
4902 status = lancer_fw_download(adapter, fw);
4903 else
4904 status = be_fw_download(adapter, fw);
4906 if (!status)
4907 be_cmd_get_fw_ver(adapter);
4909 fw_exit:
4910 release_firmware(fw);
4911 return status;
4914 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4915 u16 flags)
4917 struct be_adapter *adapter = netdev_priv(dev);
4918 struct nlattr *attr, *br_spec;
4919 int rem;
4920 int status = 0;
4921 u16 mode = 0;
4923 if (!sriov_enabled(adapter))
4924 return -EOPNOTSUPP;
4926 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4927 if (!br_spec)
4928 return -EINVAL;
4930 nla_for_each_nested(attr, br_spec, rem) {
4931 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4932 continue;
4934 if (nla_len(attr) < sizeof(mode))
4935 return -EINVAL;
4937 mode = nla_get_u16(attr);
4938 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4939 return -EOPNOTSUPP;
4941 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4942 return -EINVAL;
4944 status = be_cmd_set_hsw_config(adapter, 0, 0,
4945 adapter->if_handle,
4946 mode == BRIDGE_MODE_VEPA ?
4947 PORT_FWD_TYPE_VEPA :
4948 PORT_FWD_TYPE_VEB, 0);
4949 if (status)
4950 goto err;
4952 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4953 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4955 return status;
4957 err:
4958 dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4959 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4961 return status;
4964 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4965 struct net_device *dev, u32 filter_mask,
4966 int nlflags)
4968 struct be_adapter *adapter = netdev_priv(dev);
4969 int status = 0;
4970 u8 hsw_mode;
4972 /* BE and Lancer chips support VEB mode only */
4973 if (BEx_chip(adapter) || lancer_chip(adapter)) {
4974 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4975 if (!pci_sriov_get_totalvfs(adapter->pdev))
4976 return 0;
4977 hsw_mode = PORT_FWD_TYPE_VEB;
4978 } else {
4979 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4980 adapter->if_handle, &hsw_mode,
4981 NULL);
4982 if (status)
4983 return 0;
4985 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4986 return 0;
4989 return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4990 hsw_mode == PORT_FWD_TYPE_VEPA ?
4991 BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4992 0, 0, nlflags, filter_mask, NULL);
4995 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4996 void (*func)(struct work_struct *))
4998 struct be_cmd_work *work;
5000 work = kzalloc(sizeof(*work), GFP_ATOMIC);
5001 if (!work) {
5002 dev_err(&adapter->pdev->dev,
5003 "be_work memory allocation failed\n");
5004 return NULL;
5007 INIT_WORK(&work->work, func);
5008 work->adapter = adapter;
5009 return work;
5012 /* VxLAN offload Notes:
5014 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5015 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5016 * is expected to work across all types of IP tunnels once exported. Skyhawk
5017 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5018 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5019 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5020 * those other tunnels are unexported on the fly through ndo_features_check().
5022 * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5023 * adds more than one port, disable offloads and don't re-enable them again
5024 * until after all the tunnels are removed.
5026 static void be_work_add_vxlan_port(struct work_struct *work)
5028 struct be_cmd_work *cmd_work =
5029 container_of(work, struct be_cmd_work, work);
5030 struct be_adapter *adapter = cmd_work->adapter;
5031 struct net_device *netdev = adapter->netdev;
5032 struct device *dev = &adapter->pdev->dev;
5033 __be16 port = cmd_work->info.vxlan_port;
5034 int status;
5036 if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5037 adapter->vxlan_port_aliases++;
5038 goto done;
5041 if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5042 dev_info(dev,
5043 "Only one UDP port supported for VxLAN offloads\n");
5044 dev_info(dev, "Disabling VxLAN offloads\n");
5045 adapter->vxlan_port_count++;
5046 goto err;
5049 if (adapter->vxlan_port_count++ >= 1)
5050 goto done;
5052 status = be_cmd_manage_iface(adapter, adapter->if_handle,
5053 OP_CONVERT_NORMAL_TO_TUNNEL);
5054 if (status) {
5055 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5056 goto err;
5059 status = be_cmd_set_vxlan_port(adapter, port);
5060 if (status) {
5061 dev_warn(dev, "Failed to add VxLAN port\n");
5062 goto err;
5064 adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5065 adapter->vxlan_port = port;
5067 netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5068 NETIF_F_TSO | NETIF_F_TSO6 |
5069 NETIF_F_GSO_UDP_TUNNEL;
5070 netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5071 netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5073 dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5074 be16_to_cpu(port));
5075 goto done;
5076 err:
5077 be_disable_vxlan_offloads(adapter);
5078 done:
5079 kfree(cmd_work);
5082 static void be_work_del_vxlan_port(struct work_struct *work)
5084 struct be_cmd_work *cmd_work =
5085 container_of(work, struct be_cmd_work, work);
5086 struct be_adapter *adapter = cmd_work->adapter;
5087 __be16 port = cmd_work->info.vxlan_port;
5089 if (adapter->vxlan_port != port)
5090 goto done;
5092 if (adapter->vxlan_port_aliases) {
5093 adapter->vxlan_port_aliases--;
5094 goto out;
5097 be_disable_vxlan_offloads(adapter);
5099 dev_info(&adapter->pdev->dev,
5100 "Disabled VxLAN offloads for UDP port %d\n",
5101 be16_to_cpu(port));
5102 done:
5103 adapter->vxlan_port_count--;
5104 out:
5105 kfree(cmd_work);
5108 static void be_cfg_vxlan_port(struct net_device *netdev,
5109 struct udp_tunnel_info *ti,
5110 void (*func)(struct work_struct *))
5112 struct be_adapter *adapter = netdev_priv(netdev);
5113 struct be_cmd_work *cmd_work;
5115 if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5116 return;
5118 if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5119 return;
5121 cmd_work = be_alloc_work(adapter, func);
5122 if (cmd_work) {
5123 cmd_work->info.vxlan_port = ti->port;
5124 queue_work(be_wq, &cmd_work->work);
5128 static void be_del_vxlan_port(struct net_device *netdev,
5129 struct udp_tunnel_info *ti)
5131 be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5134 static void be_add_vxlan_port(struct net_device *netdev,
5135 struct udp_tunnel_info *ti)
5137 be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5140 static netdev_features_t be_features_check(struct sk_buff *skb,
5141 struct net_device *dev,
5142 netdev_features_t features)
5144 struct be_adapter *adapter = netdev_priv(dev);
5145 u8 l4_hdr = 0;
5147 /* The code below restricts offload features for some tunneled and
5148 * Q-in-Q packets.
5149 * Offload features for normal (non tunnel) packets are unchanged.
5151 features = vlan_features_check(skb, features);
5152 if (!skb->encapsulation ||
5153 !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5154 return features;
5156 /* It's an encapsulated packet and VxLAN offloads are enabled. We
5157 * should disable tunnel offload features if it's not a VxLAN packet,
5158 * as tunnel offloads have been enabled only for VxLAN. This is done to
5159 * allow other tunneled traffic like GRE work fine while VxLAN
5160 * offloads are configured in Skyhawk-R.
5162 switch (vlan_get_protocol(skb)) {
5163 case htons(ETH_P_IP):
5164 l4_hdr = ip_hdr(skb)->protocol;
5165 break;
5166 case htons(ETH_P_IPV6):
5167 l4_hdr = ipv6_hdr(skb)->nexthdr;
5168 break;
5169 default:
5170 return features;
5173 if (l4_hdr != IPPROTO_UDP ||
5174 skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5175 skb->inner_protocol != htons(ETH_P_TEB) ||
5176 skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5177 sizeof(struct udphdr) + sizeof(struct vxlanhdr))
5178 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5180 return features;
5183 static int be_get_phys_port_id(struct net_device *dev,
5184 struct netdev_phys_item_id *ppid)
5186 int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5187 struct be_adapter *adapter = netdev_priv(dev);
5188 u8 *id;
5190 if (MAX_PHYS_ITEM_ID_LEN < id_len)
5191 return -ENOSPC;
5193 ppid->id[0] = adapter->hba_port_num + 1;
5194 id = &ppid->id[1];
5195 for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5196 i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5197 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5199 ppid->id_len = id_len;
5201 return 0;
5204 static void be_set_rx_mode(struct net_device *dev)
5206 struct be_adapter *adapter = netdev_priv(dev);
5207 struct be_cmd_work *work;
5209 work = be_alloc_work(adapter, be_work_set_rx_mode);
5210 if (work)
5211 queue_work(be_wq, &work->work);
5214 static const struct net_device_ops be_netdev_ops = {
5215 .ndo_open = be_open,
5216 .ndo_stop = be_close,
5217 .ndo_start_xmit = be_xmit,
5218 .ndo_set_rx_mode = be_set_rx_mode,
5219 .ndo_set_mac_address = be_mac_addr_set,
5220 .ndo_change_mtu = be_change_mtu,
5221 .ndo_get_stats64 = be_get_stats64,
5222 .ndo_validate_addr = eth_validate_addr,
5223 .ndo_vlan_rx_add_vid = be_vlan_add_vid,
5224 .ndo_vlan_rx_kill_vid = be_vlan_rem_vid,
5225 .ndo_set_vf_mac = be_set_vf_mac,
5226 .ndo_set_vf_vlan = be_set_vf_vlan,
5227 .ndo_set_vf_rate = be_set_vf_tx_rate,
5228 .ndo_get_vf_config = be_get_vf_config,
5229 .ndo_set_vf_link_state = be_set_vf_link_state,
5230 .ndo_set_vf_spoofchk = be_set_vf_spoofchk,
5231 #ifdef CONFIG_NET_POLL_CONTROLLER
5232 .ndo_poll_controller = be_netpoll,
5233 #endif
5234 .ndo_bridge_setlink = be_ndo_bridge_setlink,
5235 .ndo_bridge_getlink = be_ndo_bridge_getlink,
5236 #ifdef CONFIG_NET_RX_BUSY_POLL
5237 .ndo_busy_poll = be_busy_poll,
5238 #endif
5239 .ndo_udp_tunnel_add = be_add_vxlan_port,
5240 .ndo_udp_tunnel_del = be_del_vxlan_port,
5241 .ndo_features_check = be_features_check,
5242 .ndo_get_phys_port_id = be_get_phys_port_id,
5245 static void be_netdev_init(struct net_device *netdev)
5247 struct be_adapter *adapter = netdev_priv(netdev);
5249 netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5250 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5251 NETIF_F_HW_VLAN_CTAG_TX;
5252 if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5253 netdev->hw_features |= NETIF_F_RXHASH;
5255 netdev->features |= netdev->hw_features |
5256 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5258 netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5259 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5261 netdev->priv_flags |= IFF_UNICAST_FLT;
5263 netdev->flags |= IFF_MULTICAST;
5265 netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5267 netdev->netdev_ops = &be_netdev_ops;
5269 netdev->ethtool_ops = &be_ethtool_ops;
5272 static void be_cleanup(struct be_adapter *adapter)
5274 struct net_device *netdev = adapter->netdev;
5276 rtnl_lock();
5277 netif_device_detach(netdev);
5278 if (netif_running(netdev))
5279 be_close(netdev);
5280 rtnl_unlock();
5282 be_clear(adapter);
5285 static int be_resume(struct be_adapter *adapter)
5287 struct net_device *netdev = adapter->netdev;
5288 int status;
5290 status = be_setup(adapter);
5291 if (status)
5292 return status;
5294 rtnl_lock();
5295 if (netif_running(netdev))
5296 status = be_open(netdev);
5297 rtnl_unlock();
5299 if (status)
5300 return status;
5302 netif_device_attach(netdev);
5304 return 0;
5307 static void be_soft_reset(struct be_adapter *adapter)
5309 u32 val;
5311 dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5312 val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5313 val |= SLIPORT_SOFTRESET_SR_MASK;
5314 iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5317 static bool be_err_is_recoverable(struct be_adapter *adapter)
5319 struct be_error_recovery *err_rec = &adapter->error_recovery;
5320 unsigned long initial_idle_time =
5321 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5322 unsigned long recovery_interval =
5323 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5324 u16 ue_err_code;
5325 u32 val;
5327 val = be_POST_stage_get(adapter);
5328 if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5329 return false;
5330 ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5331 if (ue_err_code == 0)
5332 return false;
5334 dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5335 ue_err_code);
5337 if (jiffies - err_rec->probe_time <= initial_idle_time) {
5338 dev_err(&adapter->pdev->dev,
5339 "Cannot recover within %lu sec from driver load\n",
5340 jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5341 return false;
5344 if (err_rec->last_recovery_time &&
5345 (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5346 dev_err(&adapter->pdev->dev,
5347 "Cannot recover within %lu sec from last recovery\n",
5348 jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5349 return false;
5352 if (ue_err_code == err_rec->last_err_code) {
5353 dev_err(&adapter->pdev->dev,
5354 "Cannot recover from a consecutive TPE error\n");
5355 return false;
5358 err_rec->last_recovery_time = jiffies;
5359 err_rec->last_err_code = ue_err_code;
5360 return true;
5363 static int be_tpe_recover(struct be_adapter *adapter)
5365 struct be_error_recovery *err_rec = &adapter->error_recovery;
5366 int status = -EAGAIN;
5367 u32 val;
5369 switch (err_rec->recovery_state) {
5370 case ERR_RECOVERY_ST_NONE:
5371 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5372 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5373 break;
5375 case ERR_RECOVERY_ST_DETECT:
5376 val = be_POST_stage_get(adapter);
5377 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5378 POST_STAGE_RECOVERABLE_ERR) {
5379 dev_err(&adapter->pdev->dev,
5380 "Unrecoverable HW error detected: 0x%x\n", val);
5381 status = -EINVAL;
5382 err_rec->resched_delay = 0;
5383 break;
5386 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5388 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5389 * milliseconds before it checks for final error status in
5390 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5391 * If it does, then PF0 initiates a Soft Reset.
5393 if (adapter->pf_num == 0) {
5394 err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5395 err_rec->resched_delay = err_rec->ue_to_reset_time -
5396 ERR_RECOVERY_UE_DETECT_DURATION;
5397 break;
5400 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5401 err_rec->resched_delay = err_rec->ue_to_poll_time -
5402 ERR_RECOVERY_UE_DETECT_DURATION;
5403 break;
5405 case ERR_RECOVERY_ST_RESET:
5406 if (!be_err_is_recoverable(adapter)) {
5407 dev_err(&adapter->pdev->dev,
5408 "Failed to meet recovery criteria\n");
5409 status = -EIO;
5410 err_rec->resched_delay = 0;
5411 break;
5413 be_soft_reset(adapter);
5414 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5415 err_rec->resched_delay = err_rec->ue_to_poll_time -
5416 err_rec->ue_to_reset_time;
5417 break;
5419 case ERR_RECOVERY_ST_PRE_POLL:
5420 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5421 err_rec->resched_delay = 0;
5422 status = 0; /* done */
5423 break;
5425 default:
5426 status = -EINVAL;
5427 err_rec->resched_delay = 0;
5428 break;
5431 return status;
5434 static int be_err_recover(struct be_adapter *adapter)
5436 int status;
5438 if (!lancer_chip(adapter)) {
5439 if (!adapter->error_recovery.recovery_supported ||
5440 adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5441 return -EIO;
5442 status = be_tpe_recover(adapter);
5443 if (status)
5444 goto err;
5447 /* Wait for adapter to reach quiescent state before
5448 * destroying queues
5450 status = be_fw_wait_ready(adapter);
5451 if (status)
5452 goto err;
5454 adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5456 be_cleanup(adapter);
5458 status = be_resume(adapter);
5459 if (status)
5460 goto err;
5462 adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5464 err:
5465 return status;
5468 static void be_err_detection_task(struct work_struct *work)
5470 struct be_error_recovery *err_rec =
5471 container_of(work, struct be_error_recovery,
5472 err_detection_work.work);
5473 struct be_adapter *adapter =
5474 container_of(err_rec, struct be_adapter,
5475 error_recovery);
5476 u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5477 struct device *dev = &adapter->pdev->dev;
5478 int recovery_status;
5480 be_detect_error(adapter);
5481 if (!be_check_error(adapter, BE_ERROR_HW))
5482 goto reschedule_task;
5484 recovery_status = be_err_recover(adapter);
5485 if (!recovery_status) {
5486 err_rec->recovery_retries = 0;
5487 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5488 dev_info(dev, "Adapter recovery successful\n");
5489 goto reschedule_task;
5490 } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5491 /* BEx/SH recovery state machine */
5492 if (adapter->pf_num == 0 &&
5493 err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5494 dev_err(&adapter->pdev->dev,
5495 "Adapter recovery in progress\n");
5496 resched_delay = err_rec->resched_delay;
5497 goto reschedule_task;
5498 } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5499 /* For VFs, check if PF have allocated resources
5500 * every second.
5502 dev_err(dev, "Re-trying adapter recovery\n");
5503 goto reschedule_task;
5504 } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5505 ERR_RECOVERY_MAX_RETRY_COUNT) {
5506 /* In case of another error during recovery, it takes 30 sec
5507 * for adapter to come out of error. Retry error recovery after
5508 * this time interval.
5510 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5511 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5512 goto reschedule_task;
5513 } else {
5514 dev_err(dev, "Adapter recovery failed\n");
5515 dev_err(dev, "Please reboot server to recover\n");
5518 return;
5520 reschedule_task:
5521 be_schedule_err_detection(adapter, resched_delay);
5524 static void be_log_sfp_info(struct be_adapter *adapter)
5526 int status;
5528 status = be_cmd_query_sfp_info(adapter);
5529 if (!status) {
5530 dev_err(&adapter->pdev->dev,
5531 "Port %c: %s Vendor: %s part no: %s",
5532 adapter->port_name,
5533 be_misconfig_evt_port_state[adapter->phy_state],
5534 adapter->phy.vendor_name,
5535 adapter->phy.vendor_pn);
5537 adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5540 static void be_worker(struct work_struct *work)
5542 struct be_adapter *adapter =
5543 container_of(work, struct be_adapter, work.work);
5544 struct be_rx_obj *rxo;
5545 int i;
5547 if (be_physfn(adapter) &&
5548 MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5549 be_cmd_get_die_temperature(adapter);
5551 /* when interrupts are not yet enabled, just reap any pending
5552 * mcc completions
5554 if (!netif_running(adapter->netdev)) {
5555 local_bh_disable();
5556 be_process_mcc(adapter);
5557 local_bh_enable();
5558 goto reschedule;
5561 if (!adapter->stats_cmd_sent) {
5562 if (lancer_chip(adapter))
5563 lancer_cmd_get_pport_stats(adapter,
5564 &adapter->stats_cmd);
5565 else
5566 be_cmd_get_stats(adapter, &adapter->stats_cmd);
5569 for_all_rx_queues(adapter, rxo, i) {
5570 /* Replenish RX-queues starved due to memory
5571 * allocation failures.
5573 if (rxo->rx_post_starved)
5574 be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5577 /* EQ-delay update for Skyhawk is done while notifying EQ */
5578 if (!skyhawk_chip(adapter))
5579 be_eqd_update(adapter, false);
5581 if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5582 be_log_sfp_info(adapter);
5584 reschedule:
5585 adapter->work_counter++;
5586 queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5589 static void be_unmap_pci_bars(struct be_adapter *adapter)
5591 if (adapter->csr)
5592 pci_iounmap(adapter->pdev, adapter->csr);
5593 if (adapter->db)
5594 pci_iounmap(adapter->pdev, adapter->db);
5595 if (adapter->pcicfg && adapter->pcicfg_mapped)
5596 pci_iounmap(adapter->pdev, adapter->pcicfg);
5599 static int db_bar(struct be_adapter *adapter)
5601 if (lancer_chip(adapter) || be_virtfn(adapter))
5602 return 0;
5603 else
5604 return 4;
5607 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5609 if (skyhawk_chip(adapter)) {
5610 adapter->roce_db.size = 4096;
5611 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5612 db_bar(adapter));
5613 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5614 db_bar(adapter));
5616 return 0;
5619 static int be_map_pci_bars(struct be_adapter *adapter)
5621 struct pci_dev *pdev = adapter->pdev;
5622 u8 __iomem *addr;
5623 u32 sli_intf;
5625 pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5626 adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5627 SLI_INTF_FAMILY_SHIFT;
5628 adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5630 if (BEx_chip(adapter) && be_physfn(adapter)) {
5631 adapter->csr = pci_iomap(pdev, 2, 0);
5632 if (!adapter->csr)
5633 return -ENOMEM;
5636 addr = pci_iomap(pdev, db_bar(adapter), 0);
5637 if (!addr)
5638 goto pci_map_err;
5639 adapter->db = addr;
5641 if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5642 if (be_physfn(adapter)) {
5643 /* PCICFG is the 2nd BAR in BE2 */
5644 addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5645 if (!addr)
5646 goto pci_map_err;
5647 adapter->pcicfg = addr;
5648 adapter->pcicfg_mapped = true;
5649 } else {
5650 adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5651 adapter->pcicfg_mapped = false;
5655 be_roce_map_pci_bars(adapter);
5656 return 0;
5658 pci_map_err:
5659 dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5660 be_unmap_pci_bars(adapter);
5661 return -ENOMEM;
5664 static void be_drv_cleanup(struct be_adapter *adapter)
5666 struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5667 struct device *dev = &adapter->pdev->dev;
5669 if (mem->va)
5670 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5672 mem = &adapter->rx_filter;
5673 if (mem->va)
5674 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5676 mem = &adapter->stats_cmd;
5677 if (mem->va)
5678 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5681 /* Allocate and initialize various fields in be_adapter struct */
5682 static int be_drv_init(struct be_adapter *adapter)
5684 struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5685 struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5686 struct be_dma_mem *rx_filter = &adapter->rx_filter;
5687 struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5688 struct device *dev = &adapter->pdev->dev;
5689 int status = 0;
5691 mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5692 mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5693 &mbox_mem_alloc->dma,
5694 GFP_KERNEL);
5695 if (!mbox_mem_alloc->va)
5696 return -ENOMEM;
5698 mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5699 mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5700 mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5702 rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5703 rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5704 &rx_filter->dma, GFP_KERNEL);
5705 if (!rx_filter->va) {
5706 status = -ENOMEM;
5707 goto free_mbox;
5710 if (lancer_chip(adapter))
5711 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5712 else if (BE2_chip(adapter))
5713 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5714 else if (BE3_chip(adapter))
5715 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5716 else
5717 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5718 stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5719 &stats_cmd->dma, GFP_KERNEL);
5720 if (!stats_cmd->va) {
5721 status = -ENOMEM;
5722 goto free_rx_filter;
5725 mutex_init(&adapter->mbox_lock);
5726 mutex_init(&adapter->mcc_lock);
5727 mutex_init(&adapter->rx_filter_lock);
5728 spin_lock_init(&adapter->mcc_cq_lock);
5729 init_completion(&adapter->et_cmd_compl);
5731 pci_save_state(adapter->pdev);
5733 INIT_DELAYED_WORK(&adapter->work, be_worker);
5735 adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5736 adapter->error_recovery.resched_delay = 0;
5737 INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5738 be_err_detection_task);
5740 adapter->rx_fc = true;
5741 adapter->tx_fc = true;
5743 /* Must be a power of 2 or else MODULO will BUG_ON */
5744 adapter->be_get_temp_freq = 64;
5746 return 0;
5748 free_rx_filter:
5749 dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5750 free_mbox:
5751 dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5752 mbox_mem_alloc->dma);
5753 return status;
5756 static void be_remove(struct pci_dev *pdev)
5758 struct be_adapter *adapter = pci_get_drvdata(pdev);
5760 if (!adapter)
5761 return;
5763 be_roce_dev_remove(adapter);
5764 be_intr_set(adapter, false);
5766 be_cancel_err_detection(adapter);
5768 unregister_netdev(adapter->netdev);
5770 be_clear(adapter);
5772 if (!pci_vfs_assigned(adapter->pdev))
5773 be_cmd_reset_function(adapter);
5775 /* tell fw we're done with firing cmds */
5776 be_cmd_fw_clean(adapter);
5778 be_unmap_pci_bars(adapter);
5779 be_drv_cleanup(adapter);
5781 pci_disable_pcie_error_reporting(pdev);
5783 pci_release_regions(pdev);
5784 pci_disable_device(pdev);
5786 free_netdev(adapter->netdev);
5789 static ssize_t be_hwmon_show_temp(struct device *dev,
5790 struct device_attribute *dev_attr,
5791 char *buf)
5793 struct be_adapter *adapter = dev_get_drvdata(dev);
5795 /* Unit: millidegree Celsius */
5796 if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5797 return -EIO;
5798 else
5799 return sprintf(buf, "%u\n",
5800 adapter->hwmon_info.be_on_die_temp * 1000);
5803 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5804 be_hwmon_show_temp, NULL, 1);
5806 static struct attribute *be_hwmon_attrs[] = {
5807 &sensor_dev_attr_temp1_input.dev_attr.attr,
5808 NULL
5811 ATTRIBUTE_GROUPS(be_hwmon);
5813 static char *mc_name(struct be_adapter *adapter)
5815 char *str = ""; /* default */
5817 switch (adapter->mc_type) {
5818 case UMC:
5819 str = "UMC";
5820 break;
5821 case FLEX10:
5822 str = "FLEX10";
5823 break;
5824 case vNIC1:
5825 str = "vNIC-1";
5826 break;
5827 case nPAR:
5828 str = "nPAR";
5829 break;
5830 case UFP:
5831 str = "UFP";
5832 break;
5833 case vNIC2:
5834 str = "vNIC-2";
5835 break;
5836 default:
5837 str = "";
5840 return str;
5843 static inline char *func_name(struct be_adapter *adapter)
5845 return be_physfn(adapter) ? "PF" : "VF";
5848 static inline char *nic_name(struct pci_dev *pdev)
5850 switch (pdev->device) {
5851 case OC_DEVICE_ID1:
5852 return OC_NAME;
5853 case OC_DEVICE_ID2:
5854 return OC_NAME_BE;
5855 case OC_DEVICE_ID3:
5856 case OC_DEVICE_ID4:
5857 return OC_NAME_LANCER;
5858 case BE_DEVICE_ID2:
5859 return BE3_NAME;
5860 case OC_DEVICE_ID5:
5861 case OC_DEVICE_ID6:
5862 return OC_NAME_SH;
5863 default:
5864 return BE_NAME;
5868 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5870 struct be_adapter *adapter;
5871 struct net_device *netdev;
5872 int status = 0;
5874 dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5876 status = pci_enable_device(pdev);
5877 if (status)
5878 goto do_none;
5880 status = pci_request_regions(pdev, DRV_NAME);
5881 if (status)
5882 goto disable_dev;
5883 pci_set_master(pdev);
5885 netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5886 if (!netdev) {
5887 status = -ENOMEM;
5888 goto rel_reg;
5890 adapter = netdev_priv(netdev);
5891 adapter->pdev = pdev;
5892 pci_set_drvdata(pdev, adapter);
5893 adapter->netdev = netdev;
5894 SET_NETDEV_DEV(netdev, &pdev->dev);
5896 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5897 if (!status) {
5898 netdev->features |= NETIF_F_HIGHDMA;
5899 } else {
5900 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5901 if (status) {
5902 dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5903 goto free_netdev;
5907 status = pci_enable_pcie_error_reporting(pdev);
5908 if (!status)
5909 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5911 status = be_map_pci_bars(adapter);
5912 if (status)
5913 goto free_netdev;
5915 status = be_drv_init(adapter);
5916 if (status)
5917 goto unmap_bars;
5919 status = be_setup(adapter);
5920 if (status)
5921 goto drv_cleanup;
5923 be_netdev_init(netdev);
5924 status = register_netdev(netdev);
5925 if (status != 0)
5926 goto unsetup;
5928 be_roce_dev_add(adapter);
5930 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5931 adapter->error_recovery.probe_time = jiffies;
5933 /* On Die temperature not supported for VF. */
5934 if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5935 adapter->hwmon_info.hwmon_dev =
5936 devm_hwmon_device_register_with_groups(&pdev->dev,
5937 DRV_NAME,
5938 adapter,
5939 be_hwmon_groups);
5940 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5943 dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5944 func_name(adapter), mc_name(adapter), adapter->port_name);
5946 return 0;
5948 unsetup:
5949 be_clear(adapter);
5950 drv_cleanup:
5951 be_drv_cleanup(adapter);
5952 unmap_bars:
5953 be_unmap_pci_bars(adapter);
5954 free_netdev:
5955 free_netdev(netdev);
5956 rel_reg:
5957 pci_release_regions(pdev);
5958 disable_dev:
5959 pci_disable_device(pdev);
5960 do_none:
5961 dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5962 return status;
5965 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5967 struct be_adapter *adapter = pci_get_drvdata(pdev);
5969 be_intr_set(adapter, false);
5970 be_cancel_err_detection(adapter);
5972 be_cleanup(adapter);
5974 pci_save_state(pdev);
5975 pci_disable_device(pdev);
5976 pci_set_power_state(pdev, pci_choose_state(pdev, state));
5977 return 0;
5980 static int be_pci_resume(struct pci_dev *pdev)
5982 struct be_adapter *adapter = pci_get_drvdata(pdev);
5983 int status = 0;
5985 status = pci_enable_device(pdev);
5986 if (status)
5987 return status;
5989 pci_restore_state(pdev);
5991 status = be_resume(adapter);
5992 if (status)
5993 return status;
5995 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5997 return 0;
6001 * An FLR will stop BE from DMAing any data.
6003 static void be_shutdown(struct pci_dev *pdev)
6005 struct be_adapter *adapter = pci_get_drvdata(pdev);
6007 if (!adapter)
6008 return;
6010 be_roce_dev_shutdown(adapter);
6011 cancel_delayed_work_sync(&adapter->work);
6012 be_cancel_err_detection(adapter);
6014 netif_device_detach(adapter->netdev);
6016 be_cmd_reset_function(adapter);
6018 pci_disable_device(pdev);
6021 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6022 pci_channel_state_t state)
6024 struct be_adapter *adapter = pci_get_drvdata(pdev);
6026 dev_err(&adapter->pdev->dev, "EEH error detected\n");
6028 be_roce_dev_remove(adapter);
6030 if (!be_check_error(adapter, BE_ERROR_EEH)) {
6031 be_set_error(adapter, BE_ERROR_EEH);
6033 be_cancel_err_detection(adapter);
6035 be_cleanup(adapter);
6038 if (state == pci_channel_io_perm_failure)
6039 return PCI_ERS_RESULT_DISCONNECT;
6041 pci_disable_device(pdev);
6043 /* The error could cause the FW to trigger a flash debug dump.
6044 * Resetting the card while flash dump is in progress
6045 * can cause it not to recover; wait for it to finish.
6046 * Wait only for first function as it is needed only once per
6047 * adapter.
6049 if (pdev->devfn == 0)
6050 ssleep(30);
6052 return PCI_ERS_RESULT_NEED_RESET;
6055 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6057 struct be_adapter *adapter = pci_get_drvdata(pdev);
6058 int status;
6060 dev_info(&adapter->pdev->dev, "EEH reset\n");
6062 status = pci_enable_device(pdev);
6063 if (status)
6064 return PCI_ERS_RESULT_DISCONNECT;
6066 pci_set_master(pdev);
6067 pci_restore_state(pdev);
6069 /* Check if card is ok and fw is ready */
6070 dev_info(&adapter->pdev->dev,
6071 "Waiting for FW to be ready after EEH reset\n");
6072 status = be_fw_wait_ready(adapter);
6073 if (status)
6074 return PCI_ERS_RESULT_DISCONNECT;
6076 pci_cleanup_aer_uncorrect_error_status(pdev);
6077 be_clear_error(adapter, BE_CLEAR_ALL);
6078 return PCI_ERS_RESULT_RECOVERED;
6081 static void be_eeh_resume(struct pci_dev *pdev)
6083 int status = 0;
6084 struct be_adapter *adapter = pci_get_drvdata(pdev);
6086 dev_info(&adapter->pdev->dev, "EEH resume\n");
6088 pci_save_state(pdev);
6090 status = be_resume(adapter);
6091 if (status)
6092 goto err;
6094 be_roce_dev_add(adapter);
6096 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6097 return;
6098 err:
6099 dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6102 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6104 struct be_adapter *adapter = pci_get_drvdata(pdev);
6105 struct be_resources vft_res = {0};
6106 int status;
6108 if (!num_vfs)
6109 be_vf_clear(adapter);
6111 adapter->num_vfs = num_vfs;
6113 if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6114 dev_warn(&pdev->dev,
6115 "Cannot disable VFs while they are assigned\n");
6116 return -EBUSY;
6119 /* When the HW is in SRIOV capable configuration, the PF-pool resources
6120 * are equally distributed across the max-number of VFs. The user may
6121 * request only a subset of the max-vfs to be enabled.
6122 * Based on num_vfs, redistribute the resources across num_vfs so that
6123 * each VF will have access to more number of resources.
6124 * This facility is not available in BE3 FW.
6125 * Also, this is done by FW in Lancer chip.
6127 if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6128 be_calculate_vf_res(adapter, adapter->num_vfs,
6129 &vft_res);
6130 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6131 adapter->num_vfs, &vft_res);
6132 if (status)
6133 dev_err(&pdev->dev,
6134 "Failed to optimize SR-IOV resources\n");
6137 status = be_get_resources(adapter);
6138 if (status)
6139 return be_cmd_status(status);
6141 /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6142 rtnl_lock();
6143 status = be_update_queues(adapter);
6144 rtnl_unlock();
6145 if (status)
6146 return be_cmd_status(status);
6148 if (adapter->num_vfs)
6149 status = be_vf_setup(adapter);
6151 if (!status)
6152 return adapter->num_vfs;
6154 return 0;
6157 static const struct pci_error_handlers be_eeh_handlers = {
6158 .error_detected = be_eeh_err_detected,
6159 .slot_reset = be_eeh_reset,
6160 .resume = be_eeh_resume,
6163 static struct pci_driver be_driver = {
6164 .name = DRV_NAME,
6165 .id_table = be_dev_ids,
6166 .probe = be_probe,
6167 .remove = be_remove,
6168 .suspend = be_suspend,
6169 .resume = be_pci_resume,
6170 .shutdown = be_shutdown,
6171 .sriov_configure = be_pci_sriov_configure,
6172 .err_handler = &be_eeh_handlers
6175 static int __init be_init_module(void)
6177 int status;
6179 if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6180 rx_frag_size != 2048) {
6181 printk(KERN_WARNING DRV_NAME
6182 " : Module param rx_frag_size must be 2048/4096/8192."
6183 " Using 2048\n");
6184 rx_frag_size = 2048;
6187 if (num_vfs > 0) {
6188 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6189 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6192 be_wq = create_singlethread_workqueue("be_wq");
6193 if (!be_wq) {
6194 pr_warn(DRV_NAME "workqueue creation failed\n");
6195 return -1;
6198 be_err_recovery_workq =
6199 create_singlethread_workqueue("be_err_recover");
6200 if (!be_err_recovery_workq)
6201 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6203 status = pci_register_driver(&be_driver);
6204 if (status) {
6205 destroy_workqueue(be_wq);
6206 be_destroy_err_recovery_workq();
6208 return status;
6210 module_init(be_init_module);
6212 static void __exit be_exit_module(void)
6214 pci_unregister_driver(&be_driver);
6216 be_destroy_err_recovery_workq();
6218 if (be_wq)
6219 destroy_workqueue(be_wq);
6221 module_exit(be_exit_module);