rename dev_hw_addr_random and remove redundant second
[linux/fpc-iii.git] / drivers / net / ethernet / intel / igb / igb_main.c
blobfda824735e18fba5da0a4af035493bf6eff93fe9
1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2012 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
73 static const struct e1000_info *igb_info_tbl[] = {
74 [board_82575] = &e1000_82575_info,
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103 /* required last entry */
104 {0, }
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133 struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163 int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166 struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
175 #ifdef CONFIG_PM
176 #ifdef CONFIG_PM_SLEEP
177 static int igb_suspend(struct device *);
178 #endif
179 static int igb_resume(struct device *);
180 #ifdef CONFIG_PM_RUNTIME
181 static int igb_runtime_suspend(struct device *dev);
182 static int igb_runtime_resume(struct device *dev);
183 static int igb_runtime_idle(struct device *dev);
184 #endif
185 static const struct dev_pm_ops igb_pm_ops = {
186 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188 igb_runtime_idle)
190 #endif
191 static void igb_shutdown(struct pci_dev *);
192 #ifdef CONFIG_IGB_DCA
193 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194 static struct notifier_block dca_notifier = {
195 .notifier_call = igb_notify_dca,
196 .next = NULL,
197 .priority = 0
199 #endif
200 #ifdef CONFIG_NET_POLL_CONTROLLER
201 /* for netdump / net console */
202 static void igb_netpoll(struct net_device *);
203 #endif
204 #ifdef CONFIG_PCI_IOV
205 static unsigned int max_vfs = 0;
206 module_param(max_vfs, uint, 0);
207 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208 "per physical function");
209 #endif /* CONFIG_PCI_IOV */
211 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212 pci_channel_state_t);
213 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214 static void igb_io_resume(struct pci_dev *);
216 static struct pci_error_handlers igb_err_handler = {
217 .error_detected = igb_io_error_detected,
218 .slot_reset = igb_io_slot_reset,
219 .resume = igb_io_resume,
222 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
224 static struct pci_driver igb_driver = {
225 .name = igb_driver_name,
226 .id_table = igb_pci_tbl,
227 .probe = igb_probe,
228 .remove = __devexit_p(igb_remove),
229 #ifdef CONFIG_PM
230 .driver.pm = &igb_pm_ops,
231 #endif
232 .shutdown = igb_shutdown,
233 .err_handler = &igb_err_handler
236 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238 MODULE_LICENSE("GPL");
239 MODULE_VERSION(DRV_VERSION);
241 struct igb_reg_info {
242 u32 ofs;
243 char *name;
246 static const struct igb_reg_info igb_reg_info_tbl[] = {
248 /* General Registers */
249 {E1000_CTRL, "CTRL"},
250 {E1000_STATUS, "STATUS"},
251 {E1000_CTRL_EXT, "CTRL_EXT"},
253 /* Interrupt Registers */
254 {E1000_ICR, "ICR"},
256 /* RX Registers */
257 {E1000_RCTL, "RCTL"},
258 {E1000_RDLEN(0), "RDLEN"},
259 {E1000_RDH(0), "RDH"},
260 {E1000_RDT(0), "RDT"},
261 {E1000_RXDCTL(0), "RXDCTL"},
262 {E1000_RDBAL(0), "RDBAL"},
263 {E1000_RDBAH(0), "RDBAH"},
265 /* TX Registers */
266 {E1000_TCTL, "TCTL"},
267 {E1000_TDBAL(0), "TDBAL"},
268 {E1000_TDBAH(0), "TDBAH"},
269 {E1000_TDLEN(0), "TDLEN"},
270 {E1000_TDH(0), "TDH"},
271 {E1000_TDT(0), "TDT"},
272 {E1000_TXDCTL(0), "TXDCTL"},
273 {E1000_TDFH, "TDFH"},
274 {E1000_TDFT, "TDFT"},
275 {E1000_TDFHS, "TDFHS"},
276 {E1000_TDFPC, "TDFPC"},
278 /* List Terminator */
283 * igb_regdump - register printout routine
285 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
287 int n = 0;
288 char rname[16];
289 u32 regs[8];
291 switch (reginfo->ofs) {
292 case E1000_RDLEN(0):
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDLEN(n));
295 break;
296 case E1000_RDH(0):
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_RDH(n));
299 break;
300 case E1000_RDT(0):
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_RDT(n));
303 break;
304 case E1000_RXDCTL(0):
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_RXDCTL(n));
307 break;
308 case E1000_RDBAL(0):
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_RDBAL(n));
311 break;
312 case E1000_RDBAH(0):
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_RDBAH(n));
315 break;
316 case E1000_TDBAL(0):
317 for (n = 0; n < 4; n++)
318 regs[n] = rd32(E1000_RDBAL(n));
319 break;
320 case E1000_TDBAH(0):
321 for (n = 0; n < 4; n++)
322 regs[n] = rd32(E1000_TDBAH(n));
323 break;
324 case E1000_TDLEN(0):
325 for (n = 0; n < 4; n++)
326 regs[n] = rd32(E1000_TDLEN(n));
327 break;
328 case E1000_TDH(0):
329 for (n = 0; n < 4; n++)
330 regs[n] = rd32(E1000_TDH(n));
331 break;
332 case E1000_TDT(0):
333 for (n = 0; n < 4; n++)
334 regs[n] = rd32(E1000_TDT(n));
335 break;
336 case E1000_TXDCTL(0):
337 for (n = 0; n < 4; n++)
338 regs[n] = rd32(E1000_TXDCTL(n));
339 break;
340 default:
341 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
342 return;
345 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
346 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
347 regs[2], regs[3]);
351 * igb_dump - Print registers, tx-rings and rx-rings
353 static void igb_dump(struct igb_adapter *adapter)
355 struct net_device *netdev = adapter->netdev;
356 struct e1000_hw *hw = &adapter->hw;
357 struct igb_reg_info *reginfo;
358 struct igb_ring *tx_ring;
359 union e1000_adv_tx_desc *tx_desc;
360 struct my_u0 { u64 a; u64 b; } *u0;
361 struct igb_ring *rx_ring;
362 union e1000_adv_rx_desc *rx_desc;
363 u32 staterr;
364 u16 i, n;
366 if (!netif_msg_hw(adapter))
367 return;
369 /* Print netdevice Info */
370 if (netdev) {
371 dev_info(&adapter->pdev->dev, "Net device Info\n");
372 pr_info("Device Name state trans_start "
373 "last_rx\n");
374 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
375 netdev->state, netdev->trans_start, netdev->last_rx);
378 /* Print Registers */
379 dev_info(&adapter->pdev->dev, "Register Dump\n");
380 pr_info(" Register Name Value\n");
381 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
382 reginfo->name; reginfo++) {
383 igb_regdump(hw, reginfo);
386 /* Print TX Ring Summary */
387 if (!netdev || !netif_running(netdev))
388 goto exit;
390 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
391 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
392 for (n = 0; n < adapter->num_tx_queues; n++) {
393 struct igb_tx_buffer *buffer_info;
394 tx_ring = adapter->tx_ring[n];
395 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
396 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
397 n, tx_ring->next_to_use, tx_ring->next_to_clean,
398 (u64)buffer_info->dma,
399 buffer_info->length,
400 buffer_info->next_to_watch,
401 (u64)buffer_info->time_stamp);
404 /* Print TX Rings */
405 if (!netif_msg_tx_done(adapter))
406 goto rx_ring_summary;
408 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
410 /* Transmit Descriptor Formats
412 * Advanced Transmit Descriptor
413 * +--------------------------------------------------------------+
414 * 0 | Buffer Address [63:0] |
415 * +--------------------------------------------------------------+
416 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
417 * +--------------------------------------------------------------+
418 * 63 46 45 40 39 38 36 35 32 31 24 15 0
421 for (n = 0; n < adapter->num_tx_queues; n++) {
422 tx_ring = adapter->tx_ring[n];
423 pr_info("------------------------------------\n");
424 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
425 pr_info("------------------------------------\n");
426 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
427 "[bi->dma ] leng ntw timestamp "
428 "bi->skb\n");
430 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
431 const char *next_desc;
432 struct igb_tx_buffer *buffer_info;
433 tx_desc = IGB_TX_DESC(tx_ring, i);
434 buffer_info = &tx_ring->tx_buffer_info[i];
435 u0 = (struct my_u0 *)tx_desc;
436 if (i == tx_ring->next_to_use &&
437 i == tx_ring->next_to_clean)
438 next_desc = " NTC/U";
439 else if (i == tx_ring->next_to_use)
440 next_desc = " NTU";
441 else if (i == tx_ring->next_to_clean)
442 next_desc = " NTC";
443 else
444 next_desc = "";
446 pr_info("T [0x%03X] %016llX %016llX %016llX"
447 " %04X %p %016llX %p%s\n", i,
448 le64_to_cpu(u0->a),
449 le64_to_cpu(u0->b),
450 (u64)buffer_info->dma,
451 buffer_info->length,
452 buffer_info->next_to_watch,
453 (u64)buffer_info->time_stamp,
454 buffer_info->skb, next_desc);
456 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
457 print_hex_dump(KERN_INFO, "",
458 DUMP_PREFIX_ADDRESS,
459 16, 1, phys_to_virt(buffer_info->dma),
460 buffer_info->length, true);
464 /* Print RX Rings Summary */
465 rx_ring_summary:
466 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
467 pr_info("Queue [NTU] [NTC]\n");
468 for (n = 0; n < adapter->num_rx_queues; n++) {
469 rx_ring = adapter->rx_ring[n];
470 pr_info(" %5d %5X %5X\n",
471 n, rx_ring->next_to_use, rx_ring->next_to_clean);
474 /* Print RX Rings */
475 if (!netif_msg_rx_status(adapter))
476 goto exit;
478 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
480 /* Advanced Receive Descriptor (Read) Format
481 * 63 1 0
482 * +-----------------------------------------------------+
483 * 0 | Packet Buffer Address [63:1] |A0/NSE|
484 * +----------------------------------------------+------+
485 * 8 | Header Buffer Address [63:1] | DD |
486 * +-----------------------------------------------------+
489 * Advanced Receive Descriptor (Write-Back) Format
491 * 63 48 47 32 31 30 21 20 17 16 4 3 0
492 * +------------------------------------------------------+
493 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
494 * | Checksum Ident | | | | Type | Type |
495 * +------------------------------------------------------+
496 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
497 * +------------------------------------------------------+
498 * 63 48 47 32 31 20 19 0
501 for (n = 0; n < adapter->num_rx_queues; n++) {
502 rx_ring = adapter->rx_ring[n];
503 pr_info("------------------------------------\n");
504 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
505 pr_info("------------------------------------\n");
506 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
507 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
508 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
509 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
511 for (i = 0; i < rx_ring->count; i++) {
512 const char *next_desc;
513 struct igb_rx_buffer *buffer_info;
514 buffer_info = &rx_ring->rx_buffer_info[i];
515 rx_desc = IGB_RX_DESC(rx_ring, i);
516 u0 = (struct my_u0 *)rx_desc;
517 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
519 if (i == rx_ring->next_to_use)
520 next_desc = " NTU";
521 else if (i == rx_ring->next_to_clean)
522 next_desc = " NTC";
523 else
524 next_desc = "";
526 if (staterr & E1000_RXD_STAT_DD) {
527 /* Descriptor Done */
528 pr_info("%s[0x%03X] %016llX %016llX -------"
529 "--------- %p%s\n", "RWB", i,
530 le64_to_cpu(u0->a),
531 le64_to_cpu(u0->b),
532 buffer_info->skb, next_desc);
533 } else {
534 pr_info("%s[0x%03X] %016llX %016llX %016llX"
535 " %p%s\n", "R ", i,
536 le64_to_cpu(u0->a),
537 le64_to_cpu(u0->b),
538 (u64)buffer_info->dma,
539 buffer_info->skb, next_desc);
541 if (netif_msg_pktdata(adapter)) {
542 print_hex_dump(KERN_INFO, "",
543 DUMP_PREFIX_ADDRESS,
544 16, 1,
545 phys_to_virt(buffer_info->dma),
546 IGB_RX_HDR_LEN, true);
547 print_hex_dump(KERN_INFO, "",
548 DUMP_PREFIX_ADDRESS,
549 16, 1,
550 phys_to_virt(
551 buffer_info->page_dma +
552 buffer_info->page_offset),
553 PAGE_SIZE/2, true);
559 exit:
560 return;
565 * igb_read_clock - read raw cycle counter (to be used by time counter)
567 static cycle_t igb_read_clock(const struct cyclecounter *tc)
569 struct igb_adapter *adapter =
570 container_of(tc, struct igb_adapter, cycles);
571 struct e1000_hw *hw = &adapter->hw;
572 u64 stamp = 0;
573 int shift = 0;
576 * The timestamp latches on lowest register read. For the 82580
577 * the lowest register is SYSTIMR instead of SYSTIML. However we never
578 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
580 if (hw->mac.type >= e1000_82580) {
581 stamp = rd32(E1000_SYSTIMR) >> 8;
582 shift = IGB_82580_TSYNC_SHIFT;
585 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
586 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
587 return stamp;
591 * igb_get_hw_dev - return device
592 * used by hardware layer to print debugging information
594 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
596 struct igb_adapter *adapter = hw->back;
597 return adapter->netdev;
601 * igb_init_module - Driver Registration Routine
603 * igb_init_module is the first routine called when the driver is
604 * loaded. All it does is register with the PCI subsystem.
606 static int __init igb_init_module(void)
608 int ret;
609 pr_info("%s - version %s\n",
610 igb_driver_string, igb_driver_version);
612 pr_info("%s\n", igb_copyright);
614 #ifdef CONFIG_IGB_DCA
615 dca_register_notify(&dca_notifier);
616 #endif
617 ret = pci_register_driver(&igb_driver);
618 return ret;
621 module_init(igb_init_module);
624 * igb_exit_module - Driver Exit Cleanup Routine
626 * igb_exit_module is called just before the driver is removed
627 * from memory.
629 static void __exit igb_exit_module(void)
631 #ifdef CONFIG_IGB_DCA
632 dca_unregister_notify(&dca_notifier);
633 #endif
634 pci_unregister_driver(&igb_driver);
637 module_exit(igb_exit_module);
639 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
641 * igb_cache_ring_register - Descriptor ring to register mapping
642 * @adapter: board private structure to initialize
644 * Once we know the feature-set enabled for the device, we'll cache
645 * the register offset the descriptor ring is assigned to.
647 static void igb_cache_ring_register(struct igb_adapter *adapter)
649 int i = 0, j = 0;
650 u32 rbase_offset = adapter->vfs_allocated_count;
652 switch (adapter->hw.mac.type) {
653 case e1000_82576:
654 /* The queues are allocated for virtualization such that VF 0
655 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
656 * In order to avoid collision we start at the first free queue
657 * and continue consuming queues in the same sequence
659 if (adapter->vfs_allocated_count) {
660 for (; i < adapter->rss_queues; i++)
661 adapter->rx_ring[i]->reg_idx = rbase_offset +
662 Q_IDX_82576(i);
664 case e1000_82575:
665 case e1000_82580:
666 case e1000_i350:
667 default:
668 for (; i < adapter->num_rx_queues; i++)
669 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
670 for (; j < adapter->num_tx_queues; j++)
671 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
672 break;
676 static void igb_free_queues(struct igb_adapter *adapter)
678 int i;
680 for (i = 0; i < adapter->num_tx_queues; i++) {
681 kfree(adapter->tx_ring[i]);
682 adapter->tx_ring[i] = NULL;
684 for (i = 0; i < adapter->num_rx_queues; i++) {
685 kfree(adapter->rx_ring[i]);
686 adapter->rx_ring[i] = NULL;
688 adapter->num_rx_queues = 0;
689 adapter->num_tx_queues = 0;
693 * igb_alloc_queues - Allocate memory for all rings
694 * @adapter: board private structure to initialize
696 * We allocate one ring per queue at run-time since we don't know the
697 * number of queues at compile-time.
699 static int igb_alloc_queues(struct igb_adapter *adapter)
701 struct igb_ring *ring;
702 int i;
703 int orig_node = adapter->node;
705 for (i = 0; i < adapter->num_tx_queues; i++) {
706 if (orig_node == -1) {
707 int cur_node = next_online_node(adapter->node);
708 if (cur_node == MAX_NUMNODES)
709 cur_node = first_online_node;
710 adapter->node = cur_node;
712 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
713 adapter->node);
714 if (!ring)
715 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
716 if (!ring)
717 goto err;
718 ring->count = adapter->tx_ring_count;
719 ring->queue_index = i;
720 ring->dev = &adapter->pdev->dev;
721 ring->netdev = adapter->netdev;
722 ring->numa_node = adapter->node;
723 /* For 82575, context index must be unique per ring. */
724 if (adapter->hw.mac.type == e1000_82575)
725 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
726 adapter->tx_ring[i] = ring;
728 /* Restore the adapter's original node */
729 adapter->node = orig_node;
731 for (i = 0; i < adapter->num_rx_queues; i++) {
732 if (orig_node == -1) {
733 int cur_node = next_online_node(adapter->node);
734 if (cur_node == MAX_NUMNODES)
735 cur_node = first_online_node;
736 adapter->node = cur_node;
738 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
739 adapter->node);
740 if (!ring)
741 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
742 if (!ring)
743 goto err;
744 ring->count = adapter->rx_ring_count;
745 ring->queue_index = i;
746 ring->dev = &adapter->pdev->dev;
747 ring->netdev = adapter->netdev;
748 ring->numa_node = adapter->node;
749 /* set flag indicating ring supports SCTP checksum offload */
750 if (adapter->hw.mac.type >= e1000_82576)
751 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
753 /* On i350, loopback VLAN packets have the tag byte-swapped. */
754 if (adapter->hw.mac.type == e1000_i350)
755 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
757 adapter->rx_ring[i] = ring;
759 /* Restore the adapter's original node */
760 adapter->node = orig_node;
762 igb_cache_ring_register(adapter);
764 return 0;
766 err:
767 /* Restore the adapter's original node */
768 adapter->node = orig_node;
769 igb_free_queues(adapter);
771 return -ENOMEM;
775 * igb_write_ivar - configure ivar for given MSI-X vector
776 * @hw: pointer to the HW structure
777 * @msix_vector: vector number we are allocating to a given ring
778 * @index: row index of IVAR register to write within IVAR table
779 * @offset: column offset of in IVAR, should be multiple of 8
781 * This function is intended to handle the writing of the IVAR register
782 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
783 * each containing an cause allocation for an Rx and Tx ring, and a
784 * variable number of rows depending on the number of queues supported.
786 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
787 int index, int offset)
789 u32 ivar = array_rd32(E1000_IVAR0, index);
791 /* clear any bits that are currently set */
792 ivar &= ~((u32)0xFF << offset);
794 /* write vector and valid bit */
795 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
797 array_wr32(E1000_IVAR0, index, ivar);
800 #define IGB_N0_QUEUE -1
801 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
803 struct igb_adapter *adapter = q_vector->adapter;
804 struct e1000_hw *hw = &adapter->hw;
805 int rx_queue = IGB_N0_QUEUE;
806 int tx_queue = IGB_N0_QUEUE;
807 u32 msixbm = 0;
809 if (q_vector->rx.ring)
810 rx_queue = q_vector->rx.ring->reg_idx;
811 if (q_vector->tx.ring)
812 tx_queue = q_vector->tx.ring->reg_idx;
814 switch (hw->mac.type) {
815 case e1000_82575:
816 /* The 82575 assigns vectors using a bitmask, which matches the
817 bitmask for the EICR/EIMS/EIMC registers. To assign one
818 or more queues to a vector, we write the appropriate bits
819 into the MSIXBM register for that vector. */
820 if (rx_queue > IGB_N0_QUEUE)
821 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
822 if (tx_queue > IGB_N0_QUEUE)
823 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
824 if (!adapter->msix_entries && msix_vector == 0)
825 msixbm |= E1000_EIMS_OTHER;
826 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
827 q_vector->eims_value = msixbm;
828 break;
829 case e1000_82576:
831 * 82576 uses a table that essentially consists of 2 columns
832 * with 8 rows. The ordering is column-major so we use the
833 * lower 3 bits as the row index, and the 4th bit as the
834 * column offset.
836 if (rx_queue > IGB_N0_QUEUE)
837 igb_write_ivar(hw, msix_vector,
838 rx_queue & 0x7,
839 (rx_queue & 0x8) << 1);
840 if (tx_queue > IGB_N0_QUEUE)
841 igb_write_ivar(hw, msix_vector,
842 tx_queue & 0x7,
843 ((tx_queue & 0x8) << 1) + 8);
844 q_vector->eims_value = 1 << msix_vector;
845 break;
846 case e1000_82580:
847 case e1000_i350:
849 * On 82580 and newer adapters the scheme is similar to 82576
850 * however instead of ordering column-major we have things
851 * ordered row-major. So we traverse the table by using
852 * bit 0 as the column offset, and the remaining bits as the
853 * row index.
855 if (rx_queue > IGB_N0_QUEUE)
856 igb_write_ivar(hw, msix_vector,
857 rx_queue >> 1,
858 (rx_queue & 0x1) << 4);
859 if (tx_queue > IGB_N0_QUEUE)
860 igb_write_ivar(hw, msix_vector,
861 tx_queue >> 1,
862 ((tx_queue & 0x1) << 4) + 8);
863 q_vector->eims_value = 1 << msix_vector;
864 break;
865 default:
866 BUG();
867 break;
870 /* add q_vector eims value to global eims_enable_mask */
871 adapter->eims_enable_mask |= q_vector->eims_value;
873 /* configure q_vector to set itr on first interrupt */
874 q_vector->set_itr = 1;
878 * igb_configure_msix - Configure MSI-X hardware
880 * igb_configure_msix sets up the hardware to properly
881 * generate MSI-X interrupts.
883 static void igb_configure_msix(struct igb_adapter *adapter)
885 u32 tmp;
886 int i, vector = 0;
887 struct e1000_hw *hw = &adapter->hw;
889 adapter->eims_enable_mask = 0;
891 /* set vector for other causes, i.e. link changes */
892 switch (hw->mac.type) {
893 case e1000_82575:
894 tmp = rd32(E1000_CTRL_EXT);
895 /* enable MSI-X PBA support*/
896 tmp |= E1000_CTRL_EXT_PBA_CLR;
898 /* Auto-Mask interrupts upon ICR read. */
899 tmp |= E1000_CTRL_EXT_EIAME;
900 tmp |= E1000_CTRL_EXT_IRCA;
902 wr32(E1000_CTRL_EXT, tmp);
904 /* enable msix_other interrupt */
905 array_wr32(E1000_MSIXBM(0), vector++,
906 E1000_EIMS_OTHER);
907 adapter->eims_other = E1000_EIMS_OTHER;
909 break;
911 case e1000_82576:
912 case e1000_82580:
913 case e1000_i350:
914 /* Turn on MSI-X capability first, or our settings
915 * won't stick. And it will take days to debug. */
916 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
917 E1000_GPIE_PBA | E1000_GPIE_EIAME |
918 E1000_GPIE_NSICR);
920 /* enable msix_other interrupt */
921 adapter->eims_other = 1 << vector;
922 tmp = (vector++ | E1000_IVAR_VALID) << 8;
924 wr32(E1000_IVAR_MISC, tmp);
925 break;
926 default:
927 /* do nothing, since nothing else supports MSI-X */
928 break;
929 } /* switch (hw->mac.type) */
931 adapter->eims_enable_mask |= adapter->eims_other;
933 for (i = 0; i < adapter->num_q_vectors; i++)
934 igb_assign_vector(adapter->q_vector[i], vector++);
936 wrfl();
940 * igb_request_msix - Initialize MSI-X interrupts
942 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
943 * kernel.
945 static int igb_request_msix(struct igb_adapter *adapter)
947 struct net_device *netdev = adapter->netdev;
948 struct e1000_hw *hw = &adapter->hw;
949 int i, err = 0, vector = 0;
951 err = request_irq(adapter->msix_entries[vector].vector,
952 igb_msix_other, 0, netdev->name, adapter);
953 if (err)
954 goto out;
955 vector++;
957 for (i = 0; i < adapter->num_q_vectors; i++) {
958 struct igb_q_vector *q_vector = adapter->q_vector[i];
960 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
962 if (q_vector->rx.ring && q_vector->tx.ring)
963 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
964 q_vector->rx.ring->queue_index);
965 else if (q_vector->tx.ring)
966 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
967 q_vector->tx.ring->queue_index);
968 else if (q_vector->rx.ring)
969 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
970 q_vector->rx.ring->queue_index);
971 else
972 sprintf(q_vector->name, "%s-unused", netdev->name);
974 err = request_irq(adapter->msix_entries[vector].vector,
975 igb_msix_ring, 0, q_vector->name,
976 q_vector);
977 if (err)
978 goto out;
979 vector++;
982 igb_configure_msix(adapter);
983 return 0;
984 out:
985 return err;
988 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
990 if (adapter->msix_entries) {
991 pci_disable_msix(adapter->pdev);
992 kfree(adapter->msix_entries);
993 adapter->msix_entries = NULL;
994 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
995 pci_disable_msi(adapter->pdev);
1000 * igb_free_q_vectors - Free memory allocated for interrupt vectors
1001 * @adapter: board private structure to initialize
1003 * This function frees the memory allocated to the q_vectors. In addition if
1004 * NAPI is enabled it will delete any references to the NAPI struct prior
1005 * to freeing the q_vector.
1007 static void igb_free_q_vectors(struct igb_adapter *adapter)
1009 int v_idx;
1011 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1012 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1013 adapter->q_vector[v_idx] = NULL;
1014 if (!q_vector)
1015 continue;
1016 netif_napi_del(&q_vector->napi);
1017 kfree(q_vector);
1019 adapter->num_q_vectors = 0;
1023 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1025 * This function resets the device so that it has 0 rx queues, tx queues, and
1026 * MSI-X interrupts allocated.
1028 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1030 igb_free_queues(adapter);
1031 igb_free_q_vectors(adapter);
1032 igb_reset_interrupt_capability(adapter);
1036 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1038 * Attempt to configure interrupts using the best available
1039 * capabilities of the hardware and kernel.
1041 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1043 int err;
1044 int numvecs, i;
1046 /* Number of supported queues. */
1047 adapter->num_rx_queues = adapter->rss_queues;
1048 if (adapter->vfs_allocated_count)
1049 adapter->num_tx_queues = 1;
1050 else
1051 adapter->num_tx_queues = adapter->rss_queues;
1053 /* start with one vector for every rx queue */
1054 numvecs = adapter->num_rx_queues;
1056 /* if tx handler is separate add 1 for every tx queue */
1057 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1058 numvecs += adapter->num_tx_queues;
1060 /* store the number of vectors reserved for queues */
1061 adapter->num_q_vectors = numvecs;
1063 /* add 1 vector for link status interrupts */
1064 numvecs++;
1065 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1066 GFP_KERNEL);
1067 if (!adapter->msix_entries)
1068 goto msi_only;
1070 for (i = 0; i < numvecs; i++)
1071 adapter->msix_entries[i].entry = i;
1073 err = pci_enable_msix(adapter->pdev,
1074 adapter->msix_entries,
1075 numvecs);
1076 if (err == 0)
1077 goto out;
1079 igb_reset_interrupt_capability(adapter);
1081 /* If we can't do MSI-X, try MSI */
1082 msi_only:
1083 #ifdef CONFIG_PCI_IOV
1084 /* disable SR-IOV for non MSI-X configurations */
1085 if (adapter->vf_data) {
1086 struct e1000_hw *hw = &adapter->hw;
1087 /* disable iov and allow time for transactions to clear */
1088 pci_disable_sriov(adapter->pdev);
1089 msleep(500);
1091 kfree(adapter->vf_data);
1092 adapter->vf_data = NULL;
1093 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1094 wrfl();
1095 msleep(100);
1096 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1098 #endif
1099 adapter->vfs_allocated_count = 0;
1100 adapter->rss_queues = 1;
1101 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1102 adapter->num_rx_queues = 1;
1103 adapter->num_tx_queues = 1;
1104 adapter->num_q_vectors = 1;
1105 if (!pci_enable_msi(adapter->pdev))
1106 adapter->flags |= IGB_FLAG_HAS_MSI;
1107 out:
1108 /* Notify the stack of the (possibly) reduced queue counts. */
1109 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1110 return netif_set_real_num_rx_queues(adapter->netdev,
1111 adapter->num_rx_queues);
1115 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1116 * @adapter: board private structure to initialize
1118 * We allocate one q_vector per queue interrupt. If allocation fails we
1119 * return -ENOMEM.
1121 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1123 struct igb_q_vector *q_vector;
1124 struct e1000_hw *hw = &adapter->hw;
1125 int v_idx;
1126 int orig_node = adapter->node;
1128 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1129 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1130 adapter->num_tx_queues)) &&
1131 (adapter->num_rx_queues == v_idx))
1132 adapter->node = orig_node;
1133 if (orig_node == -1) {
1134 int cur_node = next_online_node(adapter->node);
1135 if (cur_node == MAX_NUMNODES)
1136 cur_node = first_online_node;
1137 adapter->node = cur_node;
1139 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1140 adapter->node);
1141 if (!q_vector)
1142 q_vector = kzalloc(sizeof(struct igb_q_vector),
1143 GFP_KERNEL);
1144 if (!q_vector)
1145 goto err_out;
1146 q_vector->adapter = adapter;
1147 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1148 q_vector->itr_val = IGB_START_ITR;
1149 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1150 adapter->q_vector[v_idx] = q_vector;
1152 /* Restore the adapter's original node */
1153 adapter->node = orig_node;
1155 return 0;
1157 err_out:
1158 /* Restore the adapter's original node */
1159 adapter->node = orig_node;
1160 igb_free_q_vectors(adapter);
1161 return -ENOMEM;
1164 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1165 int ring_idx, int v_idx)
1167 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1169 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1170 q_vector->rx.ring->q_vector = q_vector;
1171 q_vector->rx.count++;
1172 q_vector->itr_val = adapter->rx_itr_setting;
1173 if (q_vector->itr_val && q_vector->itr_val <= 3)
1174 q_vector->itr_val = IGB_START_ITR;
1177 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1178 int ring_idx, int v_idx)
1180 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1182 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1183 q_vector->tx.ring->q_vector = q_vector;
1184 q_vector->tx.count++;
1185 q_vector->itr_val = adapter->tx_itr_setting;
1186 q_vector->tx.work_limit = adapter->tx_work_limit;
1187 if (q_vector->itr_val && q_vector->itr_val <= 3)
1188 q_vector->itr_val = IGB_START_ITR;
1192 * igb_map_ring_to_vector - maps allocated queues to vectors
1194 * This function maps the recently allocated queues to vectors.
1196 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1198 int i;
1199 int v_idx = 0;
1201 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1202 (adapter->num_q_vectors < adapter->num_tx_queues))
1203 return -ENOMEM;
1205 if (adapter->num_q_vectors >=
1206 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1207 for (i = 0; i < adapter->num_rx_queues; i++)
1208 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209 for (i = 0; i < adapter->num_tx_queues; i++)
1210 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1211 } else {
1212 for (i = 0; i < adapter->num_rx_queues; i++) {
1213 if (i < adapter->num_tx_queues)
1214 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1215 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1217 for (; i < adapter->num_tx_queues; i++)
1218 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1220 return 0;
1224 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1226 * This function initializes the interrupts and allocates all of the queues.
1228 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1230 struct pci_dev *pdev = adapter->pdev;
1231 int err;
1233 err = igb_set_interrupt_capability(adapter);
1234 if (err)
1235 return err;
1237 err = igb_alloc_q_vectors(adapter);
1238 if (err) {
1239 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1240 goto err_alloc_q_vectors;
1243 err = igb_alloc_queues(adapter);
1244 if (err) {
1245 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1246 goto err_alloc_queues;
1249 err = igb_map_ring_to_vector(adapter);
1250 if (err) {
1251 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1252 goto err_map_queues;
1256 return 0;
1257 err_map_queues:
1258 igb_free_queues(adapter);
1259 err_alloc_queues:
1260 igb_free_q_vectors(adapter);
1261 err_alloc_q_vectors:
1262 igb_reset_interrupt_capability(adapter);
1263 return err;
1267 * igb_request_irq - initialize interrupts
1269 * Attempts to configure interrupts using the best available
1270 * capabilities of the hardware and kernel.
1272 static int igb_request_irq(struct igb_adapter *adapter)
1274 struct net_device *netdev = adapter->netdev;
1275 struct pci_dev *pdev = adapter->pdev;
1276 int err = 0;
1278 if (adapter->msix_entries) {
1279 err = igb_request_msix(adapter);
1280 if (!err)
1281 goto request_done;
1282 /* fall back to MSI */
1283 igb_clear_interrupt_scheme(adapter);
1284 if (!pci_enable_msi(pdev))
1285 adapter->flags |= IGB_FLAG_HAS_MSI;
1286 igb_free_all_tx_resources(adapter);
1287 igb_free_all_rx_resources(adapter);
1288 adapter->num_tx_queues = 1;
1289 adapter->num_rx_queues = 1;
1290 adapter->num_q_vectors = 1;
1291 err = igb_alloc_q_vectors(adapter);
1292 if (err) {
1293 dev_err(&pdev->dev,
1294 "Unable to allocate memory for vectors\n");
1295 goto request_done;
1297 err = igb_alloc_queues(adapter);
1298 if (err) {
1299 dev_err(&pdev->dev,
1300 "Unable to allocate memory for queues\n");
1301 igb_free_q_vectors(adapter);
1302 goto request_done;
1304 igb_setup_all_tx_resources(adapter);
1305 igb_setup_all_rx_resources(adapter);
1308 igb_assign_vector(adapter->q_vector[0], 0);
1310 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1311 err = request_irq(pdev->irq, igb_intr_msi, 0,
1312 netdev->name, adapter);
1313 if (!err)
1314 goto request_done;
1316 /* fall back to legacy interrupts */
1317 igb_reset_interrupt_capability(adapter);
1318 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1321 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1322 netdev->name, adapter);
1324 if (err)
1325 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1326 err);
1328 request_done:
1329 return err;
1332 static void igb_free_irq(struct igb_adapter *adapter)
1334 if (adapter->msix_entries) {
1335 int vector = 0, i;
1337 free_irq(adapter->msix_entries[vector++].vector, adapter);
1339 for (i = 0; i < adapter->num_q_vectors; i++)
1340 free_irq(adapter->msix_entries[vector++].vector,
1341 adapter->q_vector[i]);
1342 } else {
1343 free_irq(adapter->pdev->irq, adapter);
1348 * igb_irq_disable - Mask off interrupt generation on the NIC
1349 * @adapter: board private structure
1351 static void igb_irq_disable(struct igb_adapter *adapter)
1353 struct e1000_hw *hw = &adapter->hw;
1356 * we need to be careful when disabling interrupts. The VFs are also
1357 * mapped into these registers and so clearing the bits can cause
1358 * issues on the VF drivers so we only need to clear what we set
1360 if (adapter->msix_entries) {
1361 u32 regval = rd32(E1000_EIAM);
1362 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1363 wr32(E1000_EIMC, adapter->eims_enable_mask);
1364 regval = rd32(E1000_EIAC);
1365 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1368 wr32(E1000_IAM, 0);
1369 wr32(E1000_IMC, ~0);
1370 wrfl();
1371 if (adapter->msix_entries) {
1372 int i;
1373 for (i = 0; i < adapter->num_q_vectors; i++)
1374 synchronize_irq(adapter->msix_entries[i].vector);
1375 } else {
1376 synchronize_irq(adapter->pdev->irq);
1381 * igb_irq_enable - Enable default interrupt generation settings
1382 * @adapter: board private structure
1384 static void igb_irq_enable(struct igb_adapter *adapter)
1386 struct e1000_hw *hw = &adapter->hw;
1388 if (adapter->msix_entries) {
1389 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1390 u32 regval = rd32(E1000_EIAC);
1391 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1392 regval = rd32(E1000_EIAM);
1393 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1394 wr32(E1000_EIMS, adapter->eims_enable_mask);
1395 if (adapter->vfs_allocated_count) {
1396 wr32(E1000_MBVFIMR, 0xFF);
1397 ims |= E1000_IMS_VMMB;
1399 wr32(E1000_IMS, ims);
1400 } else {
1401 wr32(E1000_IMS, IMS_ENABLE_MASK |
1402 E1000_IMS_DRSTA);
1403 wr32(E1000_IAM, IMS_ENABLE_MASK |
1404 E1000_IMS_DRSTA);
1408 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1410 struct e1000_hw *hw = &adapter->hw;
1411 u16 vid = adapter->hw.mng_cookie.vlan_id;
1412 u16 old_vid = adapter->mng_vlan_id;
1414 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1415 /* add VID to filter table */
1416 igb_vfta_set(hw, vid, true);
1417 adapter->mng_vlan_id = vid;
1418 } else {
1419 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1422 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1423 (vid != old_vid) &&
1424 !test_bit(old_vid, adapter->active_vlans)) {
1425 /* remove VID from filter table */
1426 igb_vfta_set(hw, old_vid, false);
1431 * igb_release_hw_control - release control of the h/w to f/w
1432 * @adapter: address of board private structure
1434 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1435 * For ASF and Pass Through versions of f/w this means that the
1436 * driver is no longer loaded.
1439 static void igb_release_hw_control(struct igb_adapter *adapter)
1441 struct e1000_hw *hw = &adapter->hw;
1442 u32 ctrl_ext;
1444 /* Let firmware take over control of h/w */
1445 ctrl_ext = rd32(E1000_CTRL_EXT);
1446 wr32(E1000_CTRL_EXT,
1447 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1451 * igb_get_hw_control - get control of the h/w from f/w
1452 * @adapter: address of board private structure
1454 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1455 * For ASF and Pass Through versions of f/w this means that
1456 * the driver is loaded.
1459 static void igb_get_hw_control(struct igb_adapter *adapter)
1461 struct e1000_hw *hw = &adapter->hw;
1462 u32 ctrl_ext;
1464 /* Let firmware know the driver has taken over */
1465 ctrl_ext = rd32(E1000_CTRL_EXT);
1466 wr32(E1000_CTRL_EXT,
1467 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1471 * igb_configure - configure the hardware for RX and TX
1472 * @adapter: private board structure
1474 static void igb_configure(struct igb_adapter *adapter)
1476 struct net_device *netdev = adapter->netdev;
1477 int i;
1479 igb_get_hw_control(adapter);
1480 igb_set_rx_mode(netdev);
1482 igb_restore_vlan(adapter);
1484 igb_setup_tctl(adapter);
1485 igb_setup_mrqc(adapter);
1486 igb_setup_rctl(adapter);
1488 igb_configure_tx(adapter);
1489 igb_configure_rx(adapter);
1491 igb_rx_fifo_flush_82575(&adapter->hw);
1493 /* call igb_desc_unused which always leaves
1494 * at least 1 descriptor unused to make sure
1495 * next_to_use != next_to_clean */
1496 for (i = 0; i < adapter->num_rx_queues; i++) {
1497 struct igb_ring *ring = adapter->rx_ring[i];
1498 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1503 * igb_power_up_link - Power up the phy/serdes link
1504 * @adapter: address of board private structure
1506 void igb_power_up_link(struct igb_adapter *adapter)
1508 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1509 igb_power_up_phy_copper(&adapter->hw);
1510 else
1511 igb_power_up_serdes_link_82575(&adapter->hw);
1512 igb_reset_phy(&adapter->hw);
1516 * igb_power_down_link - Power down the phy/serdes link
1517 * @adapter: address of board private structure
1519 static void igb_power_down_link(struct igb_adapter *adapter)
1521 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1522 igb_power_down_phy_copper_82575(&adapter->hw);
1523 else
1524 igb_shutdown_serdes_link_82575(&adapter->hw);
1528 * igb_up - Open the interface and prepare it to handle traffic
1529 * @adapter: board private structure
1531 int igb_up(struct igb_adapter *adapter)
1533 struct e1000_hw *hw = &adapter->hw;
1534 int i;
1536 /* hardware has been reset, we need to reload some things */
1537 igb_configure(adapter);
1539 clear_bit(__IGB_DOWN, &adapter->state);
1541 for (i = 0; i < adapter->num_q_vectors; i++)
1542 napi_enable(&(adapter->q_vector[i]->napi));
1544 if (adapter->msix_entries)
1545 igb_configure_msix(adapter);
1546 else
1547 igb_assign_vector(adapter->q_vector[0], 0);
1549 /* Clear any pending interrupts. */
1550 rd32(E1000_ICR);
1551 igb_irq_enable(adapter);
1553 /* notify VFs that reset has been completed */
1554 if (adapter->vfs_allocated_count) {
1555 u32 reg_data = rd32(E1000_CTRL_EXT);
1556 reg_data |= E1000_CTRL_EXT_PFRSTD;
1557 wr32(E1000_CTRL_EXT, reg_data);
1560 netif_tx_start_all_queues(adapter->netdev);
1562 /* start the watchdog. */
1563 hw->mac.get_link_status = 1;
1564 schedule_work(&adapter->watchdog_task);
1566 return 0;
1569 void igb_down(struct igb_adapter *adapter)
1571 struct net_device *netdev = adapter->netdev;
1572 struct e1000_hw *hw = &adapter->hw;
1573 u32 tctl, rctl;
1574 int i;
1576 /* signal that we're down so the interrupt handler does not
1577 * reschedule our watchdog timer */
1578 set_bit(__IGB_DOWN, &adapter->state);
1580 /* disable receives in the hardware */
1581 rctl = rd32(E1000_RCTL);
1582 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1583 /* flush and sleep below */
1585 netif_tx_stop_all_queues(netdev);
1587 /* disable transmits in the hardware */
1588 tctl = rd32(E1000_TCTL);
1589 tctl &= ~E1000_TCTL_EN;
1590 wr32(E1000_TCTL, tctl);
1591 /* flush both disables and wait for them to finish */
1592 wrfl();
1593 msleep(10);
1595 for (i = 0; i < adapter->num_q_vectors; i++)
1596 napi_disable(&(adapter->q_vector[i]->napi));
1598 igb_irq_disable(adapter);
1600 del_timer_sync(&adapter->watchdog_timer);
1601 del_timer_sync(&adapter->phy_info_timer);
1603 netif_carrier_off(netdev);
1605 /* record the stats before reset*/
1606 spin_lock(&adapter->stats64_lock);
1607 igb_update_stats(adapter, &adapter->stats64);
1608 spin_unlock(&adapter->stats64_lock);
1610 adapter->link_speed = 0;
1611 adapter->link_duplex = 0;
1613 if (!pci_channel_offline(adapter->pdev))
1614 igb_reset(adapter);
1615 igb_clean_all_tx_rings(adapter);
1616 igb_clean_all_rx_rings(adapter);
1617 #ifdef CONFIG_IGB_DCA
1619 /* since we reset the hardware DCA settings were cleared */
1620 igb_setup_dca(adapter);
1621 #endif
1624 void igb_reinit_locked(struct igb_adapter *adapter)
1626 WARN_ON(in_interrupt());
1627 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1628 msleep(1);
1629 igb_down(adapter);
1630 igb_up(adapter);
1631 clear_bit(__IGB_RESETTING, &adapter->state);
1634 void igb_reset(struct igb_adapter *adapter)
1636 struct pci_dev *pdev = adapter->pdev;
1637 struct e1000_hw *hw = &adapter->hw;
1638 struct e1000_mac_info *mac = &hw->mac;
1639 struct e1000_fc_info *fc = &hw->fc;
1640 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1641 u16 hwm;
1643 /* Repartition Pba for greater than 9k mtu
1644 * To take effect CTRL.RST is required.
1646 switch (mac->type) {
1647 case e1000_i350:
1648 case e1000_82580:
1649 pba = rd32(E1000_RXPBS);
1650 pba = igb_rxpbs_adjust_82580(pba);
1651 break;
1652 case e1000_82576:
1653 pba = rd32(E1000_RXPBS);
1654 pba &= E1000_RXPBS_SIZE_MASK_82576;
1655 break;
1656 case e1000_82575:
1657 default:
1658 pba = E1000_PBA_34K;
1659 break;
1662 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1663 (mac->type < e1000_82576)) {
1664 /* adjust PBA for jumbo frames */
1665 wr32(E1000_PBA, pba);
1667 /* To maintain wire speed transmits, the Tx FIFO should be
1668 * large enough to accommodate two full transmit packets,
1669 * rounded up to the next 1KB and expressed in KB. Likewise,
1670 * the Rx FIFO should be large enough to accommodate at least
1671 * one full receive packet and is similarly rounded up and
1672 * expressed in KB. */
1673 pba = rd32(E1000_PBA);
1674 /* upper 16 bits has Tx packet buffer allocation size in KB */
1675 tx_space = pba >> 16;
1676 /* lower 16 bits has Rx packet buffer allocation size in KB */
1677 pba &= 0xffff;
1678 /* the tx fifo also stores 16 bytes of information about the tx
1679 * but don't include ethernet FCS because hardware appends it */
1680 min_tx_space = (adapter->max_frame_size +
1681 sizeof(union e1000_adv_tx_desc) -
1682 ETH_FCS_LEN) * 2;
1683 min_tx_space = ALIGN(min_tx_space, 1024);
1684 min_tx_space >>= 10;
1685 /* software strips receive CRC, so leave room for it */
1686 min_rx_space = adapter->max_frame_size;
1687 min_rx_space = ALIGN(min_rx_space, 1024);
1688 min_rx_space >>= 10;
1690 /* If current Tx allocation is less than the min Tx FIFO size,
1691 * and the min Tx FIFO size is less than the current Rx FIFO
1692 * allocation, take space away from current Rx allocation */
1693 if (tx_space < min_tx_space &&
1694 ((min_tx_space - tx_space) < pba)) {
1695 pba = pba - (min_tx_space - tx_space);
1697 /* if short on rx space, rx wins and must trump tx
1698 * adjustment */
1699 if (pba < min_rx_space)
1700 pba = min_rx_space;
1702 wr32(E1000_PBA, pba);
1705 /* flow control settings */
1706 /* The high water mark must be low enough to fit one full frame
1707 * (or the size used for early receive) above it in the Rx FIFO.
1708 * Set it to the lower of:
1709 * - 90% of the Rx FIFO size, or
1710 * - the full Rx FIFO size minus one full frame */
1711 hwm = min(((pba << 10) * 9 / 10),
1712 ((pba << 10) - 2 * adapter->max_frame_size));
1714 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1715 fc->low_water = fc->high_water - 16;
1716 fc->pause_time = 0xFFFF;
1717 fc->send_xon = 1;
1718 fc->current_mode = fc->requested_mode;
1720 /* disable receive for all VFs and wait one second */
1721 if (adapter->vfs_allocated_count) {
1722 int i;
1723 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1724 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1726 /* ping all the active vfs to let them know we are going down */
1727 igb_ping_all_vfs(adapter);
1729 /* disable transmits and receives */
1730 wr32(E1000_VFRE, 0);
1731 wr32(E1000_VFTE, 0);
1734 /* Allow time for pending master requests to run */
1735 hw->mac.ops.reset_hw(hw);
1736 wr32(E1000_WUC, 0);
1738 if (hw->mac.ops.init_hw(hw))
1739 dev_err(&pdev->dev, "Hardware Error\n");
1741 igb_init_dmac(adapter, pba);
1742 if (!netif_running(adapter->netdev))
1743 igb_power_down_link(adapter);
1745 igb_update_mng_vlan(adapter);
1747 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1748 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1750 igb_get_phy_info(hw);
1753 static netdev_features_t igb_fix_features(struct net_device *netdev,
1754 netdev_features_t features)
1757 * Since there is no support for separate rx/tx vlan accel
1758 * enable/disable make sure tx flag is always in same state as rx.
1760 if (features & NETIF_F_HW_VLAN_RX)
1761 features |= NETIF_F_HW_VLAN_TX;
1762 else
1763 features &= ~NETIF_F_HW_VLAN_TX;
1765 return features;
1768 static int igb_set_features(struct net_device *netdev,
1769 netdev_features_t features)
1771 netdev_features_t changed = netdev->features ^ features;
1773 if (changed & NETIF_F_HW_VLAN_RX)
1774 igb_vlan_mode(netdev, features);
1776 return 0;
1779 static const struct net_device_ops igb_netdev_ops = {
1780 .ndo_open = igb_open,
1781 .ndo_stop = igb_close,
1782 .ndo_start_xmit = igb_xmit_frame,
1783 .ndo_get_stats64 = igb_get_stats64,
1784 .ndo_set_rx_mode = igb_set_rx_mode,
1785 .ndo_set_mac_address = igb_set_mac,
1786 .ndo_change_mtu = igb_change_mtu,
1787 .ndo_do_ioctl = igb_ioctl,
1788 .ndo_tx_timeout = igb_tx_timeout,
1789 .ndo_validate_addr = eth_validate_addr,
1790 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1791 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1792 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1793 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1794 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1795 .ndo_get_vf_config = igb_ndo_get_vf_config,
1796 #ifdef CONFIG_NET_POLL_CONTROLLER
1797 .ndo_poll_controller = igb_netpoll,
1798 #endif
1799 .ndo_fix_features = igb_fix_features,
1800 .ndo_set_features = igb_set_features,
1804 * igb_probe - Device Initialization Routine
1805 * @pdev: PCI device information struct
1806 * @ent: entry in igb_pci_tbl
1808 * Returns 0 on success, negative on failure
1810 * igb_probe initializes an adapter identified by a pci_dev structure.
1811 * The OS initialization, configuring of the adapter private structure,
1812 * and a hardware reset occur.
1814 static int __devinit igb_probe(struct pci_dev *pdev,
1815 const struct pci_device_id *ent)
1817 struct net_device *netdev;
1818 struct igb_adapter *adapter;
1819 struct e1000_hw *hw;
1820 u16 eeprom_data = 0;
1821 s32 ret_val;
1822 static int global_quad_port_a; /* global quad port a indication */
1823 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1824 unsigned long mmio_start, mmio_len;
1825 int err, pci_using_dac;
1826 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1827 u8 part_str[E1000_PBANUM_LENGTH];
1829 /* Catch broken hardware that put the wrong VF device ID in
1830 * the PCIe SR-IOV capability.
1832 if (pdev->is_virtfn) {
1833 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1834 pci_name(pdev), pdev->vendor, pdev->device);
1835 return -EINVAL;
1838 err = pci_enable_device_mem(pdev);
1839 if (err)
1840 return err;
1842 pci_using_dac = 0;
1843 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1844 if (!err) {
1845 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1846 if (!err)
1847 pci_using_dac = 1;
1848 } else {
1849 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1850 if (err) {
1851 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1852 if (err) {
1853 dev_err(&pdev->dev, "No usable DMA "
1854 "configuration, aborting\n");
1855 goto err_dma;
1860 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1861 IORESOURCE_MEM),
1862 igb_driver_name);
1863 if (err)
1864 goto err_pci_reg;
1866 pci_enable_pcie_error_reporting(pdev);
1868 pci_set_master(pdev);
1869 pci_save_state(pdev);
1871 err = -ENOMEM;
1872 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1873 IGB_MAX_TX_QUEUES);
1874 if (!netdev)
1875 goto err_alloc_etherdev;
1877 SET_NETDEV_DEV(netdev, &pdev->dev);
1879 pci_set_drvdata(pdev, netdev);
1880 adapter = netdev_priv(netdev);
1881 adapter->netdev = netdev;
1882 adapter->pdev = pdev;
1883 hw = &adapter->hw;
1884 hw->back = adapter;
1885 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1887 mmio_start = pci_resource_start(pdev, 0);
1888 mmio_len = pci_resource_len(pdev, 0);
1890 err = -EIO;
1891 hw->hw_addr = ioremap(mmio_start, mmio_len);
1892 if (!hw->hw_addr)
1893 goto err_ioremap;
1895 netdev->netdev_ops = &igb_netdev_ops;
1896 igb_set_ethtool_ops(netdev);
1897 netdev->watchdog_timeo = 5 * HZ;
1899 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1901 netdev->mem_start = mmio_start;
1902 netdev->mem_end = mmio_start + mmio_len;
1904 /* PCI config space info */
1905 hw->vendor_id = pdev->vendor;
1906 hw->device_id = pdev->device;
1907 hw->revision_id = pdev->revision;
1908 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1909 hw->subsystem_device_id = pdev->subsystem_device;
1911 /* Copy the default MAC, PHY and NVM function pointers */
1912 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1913 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1914 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1915 /* Initialize skew-specific constants */
1916 err = ei->get_invariants(hw);
1917 if (err)
1918 goto err_sw_init;
1920 /* setup the private structure */
1921 err = igb_sw_init(adapter);
1922 if (err)
1923 goto err_sw_init;
1925 igb_get_bus_info_pcie(hw);
1927 hw->phy.autoneg_wait_to_complete = false;
1929 /* Copper options */
1930 if (hw->phy.media_type == e1000_media_type_copper) {
1931 hw->phy.mdix = AUTO_ALL_MODES;
1932 hw->phy.disable_polarity_correction = false;
1933 hw->phy.ms_type = e1000_ms_hw_default;
1936 if (igb_check_reset_block(hw))
1937 dev_info(&pdev->dev,
1938 "PHY reset is blocked due to SOL/IDER session.\n");
1941 * features is initialized to 0 in allocation, it might have bits
1942 * set by igb_sw_init so we should use an or instead of an
1943 * assignment.
1945 netdev->features |= NETIF_F_SG |
1946 NETIF_F_IP_CSUM |
1947 NETIF_F_IPV6_CSUM |
1948 NETIF_F_TSO |
1949 NETIF_F_TSO6 |
1950 NETIF_F_RXHASH |
1951 NETIF_F_RXCSUM |
1952 NETIF_F_HW_VLAN_RX |
1953 NETIF_F_HW_VLAN_TX;
1955 /* copy netdev features into list of user selectable features */
1956 netdev->hw_features |= netdev->features;
1958 /* set this bit last since it cannot be part of hw_features */
1959 netdev->features |= NETIF_F_HW_VLAN_FILTER;
1961 netdev->vlan_features |= NETIF_F_TSO |
1962 NETIF_F_TSO6 |
1963 NETIF_F_IP_CSUM |
1964 NETIF_F_IPV6_CSUM |
1965 NETIF_F_SG;
1967 if (pci_using_dac) {
1968 netdev->features |= NETIF_F_HIGHDMA;
1969 netdev->vlan_features |= NETIF_F_HIGHDMA;
1972 if (hw->mac.type >= e1000_82576) {
1973 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1974 netdev->features |= NETIF_F_SCTP_CSUM;
1977 netdev->priv_flags |= IFF_UNICAST_FLT;
1979 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1981 /* before reading the NVM, reset the controller to put the device in a
1982 * known good starting state */
1983 hw->mac.ops.reset_hw(hw);
1985 /* make sure the NVM is good */
1986 if (hw->nvm.ops.validate(hw) < 0) {
1987 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1988 err = -EIO;
1989 goto err_eeprom;
1992 /* copy the MAC address out of the NVM */
1993 if (hw->mac.ops.read_mac_addr(hw))
1994 dev_err(&pdev->dev, "NVM Read Error\n");
1996 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1997 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1999 if (!is_valid_ether_addr(netdev->perm_addr)) {
2000 dev_err(&pdev->dev, "Invalid MAC Address\n");
2001 err = -EIO;
2002 goto err_eeprom;
2005 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2006 (unsigned long) adapter);
2007 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2008 (unsigned long) adapter);
2010 INIT_WORK(&adapter->reset_task, igb_reset_task);
2011 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2013 /* Initialize link properties that are user-changeable */
2014 adapter->fc_autoneg = true;
2015 hw->mac.autoneg = true;
2016 hw->phy.autoneg_advertised = 0x2f;
2018 hw->fc.requested_mode = e1000_fc_default;
2019 hw->fc.current_mode = e1000_fc_default;
2021 igb_validate_mdi_setting(hw);
2023 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2024 * enable the ACPI Magic Packet filter
2027 if (hw->bus.func == 0)
2028 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2029 else if (hw->mac.type >= e1000_82580)
2030 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2031 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2032 &eeprom_data);
2033 else if (hw->bus.func == 1)
2034 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2036 if (eeprom_data & eeprom_apme_mask)
2037 adapter->eeprom_wol |= E1000_WUFC_MAG;
2039 /* now that we have the eeprom settings, apply the special cases where
2040 * the eeprom may be wrong or the board simply won't support wake on
2041 * lan on a particular port */
2042 switch (pdev->device) {
2043 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2044 adapter->eeprom_wol = 0;
2045 break;
2046 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2047 case E1000_DEV_ID_82576_FIBER:
2048 case E1000_DEV_ID_82576_SERDES:
2049 /* Wake events only supported on port A for dual fiber
2050 * regardless of eeprom setting */
2051 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2052 adapter->eeprom_wol = 0;
2053 break;
2054 case E1000_DEV_ID_82576_QUAD_COPPER:
2055 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2056 /* if quad port adapter, disable WoL on all but port A */
2057 if (global_quad_port_a != 0)
2058 adapter->eeprom_wol = 0;
2059 else
2060 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2061 /* Reset for multiple quad port adapters */
2062 if (++global_quad_port_a == 4)
2063 global_quad_port_a = 0;
2064 break;
2067 /* initialize the wol settings based on the eeprom settings */
2068 adapter->wol = adapter->eeprom_wol;
2069 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2071 /* reset the hardware with the new settings */
2072 igb_reset(adapter);
2074 /* let the f/w know that the h/w is now under the control of the
2075 * driver. */
2076 igb_get_hw_control(adapter);
2078 strcpy(netdev->name, "eth%d");
2079 err = register_netdev(netdev);
2080 if (err)
2081 goto err_register;
2083 /* carrier off reporting is important to ethtool even BEFORE open */
2084 netif_carrier_off(netdev);
2086 #ifdef CONFIG_IGB_DCA
2087 if (dca_add_requester(&pdev->dev) == 0) {
2088 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2089 dev_info(&pdev->dev, "DCA enabled\n");
2090 igb_setup_dca(adapter);
2093 #endif
2094 /* do hw tstamp init after resetting */
2095 igb_init_hw_timer(adapter);
2097 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2098 /* print bus type/speed/width info */
2099 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2100 netdev->name,
2101 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2102 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2103 "unknown"),
2104 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2105 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2106 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2107 "unknown"),
2108 netdev->dev_addr);
2110 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2111 if (ret_val)
2112 strcpy(part_str, "Unknown");
2113 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2114 dev_info(&pdev->dev,
2115 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2116 adapter->msix_entries ? "MSI-X" :
2117 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2118 adapter->num_rx_queues, adapter->num_tx_queues);
2119 switch (hw->mac.type) {
2120 case e1000_i350:
2121 igb_set_eee_i350(hw);
2122 break;
2123 default:
2124 break;
2127 pm_runtime_put_noidle(&pdev->dev);
2128 return 0;
2130 err_register:
2131 igb_release_hw_control(adapter);
2132 err_eeprom:
2133 if (!igb_check_reset_block(hw))
2134 igb_reset_phy(hw);
2136 if (hw->flash_address)
2137 iounmap(hw->flash_address);
2138 err_sw_init:
2139 igb_clear_interrupt_scheme(adapter);
2140 iounmap(hw->hw_addr);
2141 err_ioremap:
2142 free_netdev(netdev);
2143 err_alloc_etherdev:
2144 pci_release_selected_regions(pdev,
2145 pci_select_bars(pdev, IORESOURCE_MEM));
2146 err_pci_reg:
2147 err_dma:
2148 pci_disable_device(pdev);
2149 return err;
2153 * igb_remove - Device Removal Routine
2154 * @pdev: PCI device information struct
2156 * igb_remove is called by the PCI subsystem to alert the driver
2157 * that it should release a PCI device. The could be caused by a
2158 * Hot-Plug event, or because the driver is going to be removed from
2159 * memory.
2161 static void __devexit igb_remove(struct pci_dev *pdev)
2163 struct net_device *netdev = pci_get_drvdata(pdev);
2164 struct igb_adapter *adapter = netdev_priv(netdev);
2165 struct e1000_hw *hw = &adapter->hw;
2167 pm_runtime_get_noresume(&pdev->dev);
2170 * The watchdog timer may be rescheduled, so explicitly
2171 * disable watchdog from being rescheduled.
2173 set_bit(__IGB_DOWN, &adapter->state);
2174 del_timer_sync(&adapter->watchdog_timer);
2175 del_timer_sync(&adapter->phy_info_timer);
2177 cancel_work_sync(&adapter->reset_task);
2178 cancel_work_sync(&adapter->watchdog_task);
2180 #ifdef CONFIG_IGB_DCA
2181 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2182 dev_info(&pdev->dev, "DCA disabled\n");
2183 dca_remove_requester(&pdev->dev);
2184 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2185 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2187 #endif
2189 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2190 * would have already happened in close and is redundant. */
2191 igb_release_hw_control(adapter);
2193 unregister_netdev(netdev);
2195 igb_clear_interrupt_scheme(adapter);
2197 #ifdef CONFIG_PCI_IOV
2198 /* reclaim resources allocated to VFs */
2199 if (adapter->vf_data) {
2200 /* disable iov and allow time for transactions to clear */
2201 if (!igb_check_vf_assignment(adapter)) {
2202 pci_disable_sriov(pdev);
2203 msleep(500);
2204 } else {
2205 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2208 kfree(adapter->vf_data);
2209 adapter->vf_data = NULL;
2210 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2211 wrfl();
2212 msleep(100);
2213 dev_info(&pdev->dev, "IOV Disabled\n");
2215 #endif
2217 iounmap(hw->hw_addr);
2218 if (hw->flash_address)
2219 iounmap(hw->flash_address);
2220 pci_release_selected_regions(pdev,
2221 pci_select_bars(pdev, IORESOURCE_MEM));
2223 kfree(adapter->shadow_vfta);
2224 free_netdev(netdev);
2226 pci_disable_pcie_error_reporting(pdev);
2228 pci_disable_device(pdev);
2232 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2233 * @adapter: board private structure to initialize
2235 * This function initializes the vf specific data storage and then attempts to
2236 * allocate the VFs. The reason for ordering it this way is because it is much
2237 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2238 * the memory for the VFs.
2240 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2242 #ifdef CONFIG_PCI_IOV
2243 struct pci_dev *pdev = adapter->pdev;
2244 int old_vfs = igb_find_enabled_vfs(adapter);
2245 int i;
2247 if (old_vfs) {
2248 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2249 "max_vfs setting of %d\n", old_vfs, max_vfs);
2250 adapter->vfs_allocated_count = old_vfs;
2253 if (!adapter->vfs_allocated_count)
2254 return;
2256 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2257 sizeof(struct vf_data_storage), GFP_KERNEL);
2258 /* if allocation failed then we do not support SR-IOV */
2259 if (!adapter->vf_data) {
2260 adapter->vfs_allocated_count = 0;
2261 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2262 "Data Storage\n");
2263 goto out;
2266 if (!old_vfs) {
2267 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2268 goto err_out;
2270 dev_info(&pdev->dev, "%d VFs allocated\n",
2271 adapter->vfs_allocated_count);
2272 for (i = 0; i < adapter->vfs_allocated_count; i++)
2273 igb_vf_configure(adapter, i);
2275 /* DMA Coalescing is not supported in IOV mode. */
2276 adapter->flags &= ~IGB_FLAG_DMAC;
2277 goto out;
2278 err_out:
2279 kfree(adapter->vf_data);
2280 adapter->vf_data = NULL;
2281 adapter->vfs_allocated_count = 0;
2282 out:
2283 return;
2284 #endif /* CONFIG_PCI_IOV */
2288 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2289 * @adapter: board private structure to initialize
2291 * igb_init_hw_timer initializes the function pointer and values for the hw
2292 * timer found in hardware.
2294 static void igb_init_hw_timer(struct igb_adapter *adapter)
2296 struct e1000_hw *hw = &adapter->hw;
2298 switch (hw->mac.type) {
2299 case e1000_i350:
2300 case e1000_82580:
2301 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2302 adapter->cycles.read = igb_read_clock;
2303 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2304 adapter->cycles.mult = 1;
2306 * The 82580 timesync updates the system timer every 8ns by 8ns
2307 * and the value cannot be shifted. Instead we need to shift
2308 * the registers to generate a 64bit timer value. As a result
2309 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2310 * 24 in order to generate a larger value for synchronization.
2312 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2313 /* disable system timer temporarily by setting bit 31 */
2314 wr32(E1000_TSAUXC, 0x80000000);
2315 wrfl();
2317 /* Set registers so that rollover occurs soon to test this. */
2318 wr32(E1000_SYSTIMR, 0x00000000);
2319 wr32(E1000_SYSTIML, 0x80000000);
2320 wr32(E1000_SYSTIMH, 0x000000FF);
2321 wrfl();
2323 /* enable system timer by clearing bit 31 */
2324 wr32(E1000_TSAUXC, 0x0);
2325 wrfl();
2327 timecounter_init(&adapter->clock,
2328 &adapter->cycles,
2329 ktime_to_ns(ktime_get_real()));
2331 * Synchronize our NIC clock against system wall clock. NIC
2332 * time stamp reading requires ~3us per sample, each sample
2333 * was pretty stable even under load => only require 10
2334 * samples for each offset comparison.
2336 memset(&adapter->compare, 0, sizeof(adapter->compare));
2337 adapter->compare.source = &adapter->clock;
2338 adapter->compare.target = ktime_get_real;
2339 adapter->compare.num_samples = 10;
2340 timecompare_update(&adapter->compare, 0);
2341 break;
2342 case e1000_82576:
2344 * Initialize hardware timer: we keep it running just in case
2345 * that some program needs it later on.
2347 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2348 adapter->cycles.read = igb_read_clock;
2349 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2350 adapter->cycles.mult = 1;
2352 * Scale the NIC clock cycle by a large factor so that
2353 * relatively small clock corrections can be added or
2354 * subtracted at each clock tick. The drawbacks of a large
2355 * factor are a) that the clock register overflows more quickly
2356 * (not such a big deal) and b) that the increment per tick has
2357 * to fit into 24 bits. As a result we need to use a shift of
2358 * 19 so we can fit a value of 16 into the TIMINCA register.
2360 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2361 wr32(E1000_TIMINCA,
2362 (1 << E1000_TIMINCA_16NS_SHIFT) |
2363 (16 << IGB_82576_TSYNC_SHIFT));
2365 /* Set registers so that rollover occurs soon to test this. */
2366 wr32(E1000_SYSTIML, 0x00000000);
2367 wr32(E1000_SYSTIMH, 0xFF800000);
2368 wrfl();
2370 timecounter_init(&adapter->clock,
2371 &adapter->cycles,
2372 ktime_to_ns(ktime_get_real()));
2374 * Synchronize our NIC clock against system wall clock. NIC
2375 * time stamp reading requires ~3us per sample, each sample
2376 * was pretty stable even under load => only require 10
2377 * samples for each offset comparison.
2379 memset(&adapter->compare, 0, sizeof(adapter->compare));
2380 adapter->compare.source = &adapter->clock;
2381 adapter->compare.target = ktime_get_real;
2382 adapter->compare.num_samples = 10;
2383 timecompare_update(&adapter->compare, 0);
2384 break;
2385 case e1000_82575:
2386 /* 82575 does not support timesync */
2387 default:
2388 break;
2394 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2395 * @adapter: board private structure to initialize
2397 * igb_sw_init initializes the Adapter private data structure.
2398 * Fields are initialized based on PCI device information and
2399 * OS network device settings (MTU size).
2401 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2403 struct e1000_hw *hw = &adapter->hw;
2404 struct net_device *netdev = adapter->netdev;
2405 struct pci_dev *pdev = adapter->pdev;
2407 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2409 /* set default ring sizes */
2410 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2411 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2413 /* set default ITR values */
2414 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2415 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2417 /* set default work limits */
2418 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2420 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2421 VLAN_HLEN;
2422 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2424 adapter->node = -1;
2426 spin_lock_init(&adapter->stats64_lock);
2427 #ifdef CONFIG_PCI_IOV
2428 switch (hw->mac.type) {
2429 case e1000_82576:
2430 case e1000_i350:
2431 if (max_vfs > 7) {
2432 dev_warn(&pdev->dev,
2433 "Maximum of 7 VFs per PF, using max\n");
2434 adapter->vfs_allocated_count = 7;
2435 } else
2436 adapter->vfs_allocated_count = max_vfs;
2437 break;
2438 default:
2439 break;
2441 #endif /* CONFIG_PCI_IOV */
2442 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2443 /* i350 cannot do RSS and SR-IOV at the same time */
2444 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2445 adapter->rss_queues = 1;
2448 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2449 * then we should combine the queues into a queue pair in order to
2450 * conserve interrupts due to limited supply
2452 if ((adapter->rss_queues > 4) ||
2453 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2454 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2456 /* Setup and initialize a copy of the hw vlan table array */
2457 adapter->shadow_vfta = kzalloc(sizeof(u32) *
2458 E1000_VLAN_FILTER_TBL_SIZE,
2459 GFP_ATOMIC);
2461 /* This call may decrease the number of queues */
2462 if (igb_init_interrupt_scheme(adapter)) {
2463 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2464 return -ENOMEM;
2467 igb_probe_vfs(adapter);
2469 /* Explicitly disable IRQ since the NIC can be in any state. */
2470 igb_irq_disable(adapter);
2472 if (hw->mac.type == e1000_i350)
2473 adapter->flags &= ~IGB_FLAG_DMAC;
2475 set_bit(__IGB_DOWN, &adapter->state);
2476 return 0;
2480 * igb_open - Called when a network interface is made active
2481 * @netdev: network interface device structure
2483 * Returns 0 on success, negative value on failure
2485 * The open entry point is called when a network interface is made
2486 * active by the system (IFF_UP). At this point all resources needed
2487 * for transmit and receive operations are allocated, the interrupt
2488 * handler is registered with the OS, the watchdog timer is started,
2489 * and the stack is notified that the interface is ready.
2491 static int __igb_open(struct net_device *netdev, bool resuming)
2493 struct igb_adapter *adapter = netdev_priv(netdev);
2494 struct e1000_hw *hw = &adapter->hw;
2495 struct pci_dev *pdev = adapter->pdev;
2496 int err;
2497 int i;
2499 /* disallow open during test */
2500 if (test_bit(__IGB_TESTING, &adapter->state)) {
2501 WARN_ON(resuming);
2502 return -EBUSY;
2505 if (!resuming)
2506 pm_runtime_get_sync(&pdev->dev);
2508 netif_carrier_off(netdev);
2510 /* allocate transmit descriptors */
2511 err = igb_setup_all_tx_resources(adapter);
2512 if (err)
2513 goto err_setup_tx;
2515 /* allocate receive descriptors */
2516 err = igb_setup_all_rx_resources(adapter);
2517 if (err)
2518 goto err_setup_rx;
2520 igb_power_up_link(adapter);
2522 /* before we allocate an interrupt, we must be ready to handle it.
2523 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2524 * as soon as we call pci_request_irq, so we have to setup our
2525 * clean_rx handler before we do so. */
2526 igb_configure(adapter);
2528 err = igb_request_irq(adapter);
2529 if (err)
2530 goto err_req_irq;
2532 /* From here on the code is the same as igb_up() */
2533 clear_bit(__IGB_DOWN, &adapter->state);
2535 for (i = 0; i < adapter->num_q_vectors; i++)
2536 napi_enable(&(adapter->q_vector[i]->napi));
2538 /* Clear any pending interrupts. */
2539 rd32(E1000_ICR);
2541 igb_irq_enable(adapter);
2543 /* notify VFs that reset has been completed */
2544 if (adapter->vfs_allocated_count) {
2545 u32 reg_data = rd32(E1000_CTRL_EXT);
2546 reg_data |= E1000_CTRL_EXT_PFRSTD;
2547 wr32(E1000_CTRL_EXT, reg_data);
2550 netif_tx_start_all_queues(netdev);
2552 if (!resuming)
2553 pm_runtime_put(&pdev->dev);
2555 /* start the watchdog. */
2556 hw->mac.get_link_status = 1;
2557 schedule_work(&adapter->watchdog_task);
2559 return 0;
2561 err_req_irq:
2562 igb_release_hw_control(adapter);
2563 igb_power_down_link(adapter);
2564 igb_free_all_rx_resources(adapter);
2565 err_setup_rx:
2566 igb_free_all_tx_resources(adapter);
2567 err_setup_tx:
2568 igb_reset(adapter);
2569 if (!resuming)
2570 pm_runtime_put(&pdev->dev);
2572 return err;
2575 static int igb_open(struct net_device *netdev)
2577 return __igb_open(netdev, false);
2581 * igb_close - Disables a network interface
2582 * @netdev: network interface device structure
2584 * Returns 0, this is not allowed to fail
2586 * The close entry point is called when an interface is de-activated
2587 * by the OS. The hardware is still under the driver's control, but
2588 * needs to be disabled. A global MAC reset is issued to stop the
2589 * hardware, and all transmit and receive resources are freed.
2591 static int __igb_close(struct net_device *netdev, bool suspending)
2593 struct igb_adapter *adapter = netdev_priv(netdev);
2594 struct pci_dev *pdev = adapter->pdev;
2596 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2598 if (!suspending)
2599 pm_runtime_get_sync(&pdev->dev);
2601 igb_down(adapter);
2602 igb_free_irq(adapter);
2604 igb_free_all_tx_resources(adapter);
2605 igb_free_all_rx_resources(adapter);
2607 if (!suspending)
2608 pm_runtime_put_sync(&pdev->dev);
2609 return 0;
2612 static int igb_close(struct net_device *netdev)
2614 return __igb_close(netdev, false);
2618 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2619 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2621 * Return 0 on success, negative on failure
2623 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2625 struct device *dev = tx_ring->dev;
2626 int orig_node = dev_to_node(dev);
2627 int size;
2629 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2630 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2631 if (!tx_ring->tx_buffer_info)
2632 tx_ring->tx_buffer_info = vzalloc(size);
2633 if (!tx_ring->tx_buffer_info)
2634 goto err;
2636 /* round up to nearest 4K */
2637 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2638 tx_ring->size = ALIGN(tx_ring->size, 4096);
2640 set_dev_node(dev, tx_ring->numa_node);
2641 tx_ring->desc = dma_alloc_coherent(dev,
2642 tx_ring->size,
2643 &tx_ring->dma,
2644 GFP_KERNEL);
2645 set_dev_node(dev, orig_node);
2646 if (!tx_ring->desc)
2647 tx_ring->desc = dma_alloc_coherent(dev,
2648 tx_ring->size,
2649 &tx_ring->dma,
2650 GFP_KERNEL);
2652 if (!tx_ring->desc)
2653 goto err;
2655 tx_ring->next_to_use = 0;
2656 tx_ring->next_to_clean = 0;
2658 return 0;
2660 err:
2661 vfree(tx_ring->tx_buffer_info);
2662 dev_err(dev,
2663 "Unable to allocate memory for the transmit descriptor ring\n");
2664 return -ENOMEM;
2668 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2669 * (Descriptors) for all queues
2670 * @adapter: board private structure
2672 * Return 0 on success, negative on failure
2674 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2676 struct pci_dev *pdev = adapter->pdev;
2677 int i, err = 0;
2679 for (i = 0; i < adapter->num_tx_queues; i++) {
2680 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2681 if (err) {
2682 dev_err(&pdev->dev,
2683 "Allocation for Tx Queue %u failed\n", i);
2684 for (i--; i >= 0; i--)
2685 igb_free_tx_resources(adapter->tx_ring[i]);
2686 break;
2690 return err;
2694 * igb_setup_tctl - configure the transmit control registers
2695 * @adapter: Board private structure
2697 void igb_setup_tctl(struct igb_adapter *adapter)
2699 struct e1000_hw *hw = &adapter->hw;
2700 u32 tctl;
2702 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2703 wr32(E1000_TXDCTL(0), 0);
2705 /* Program the Transmit Control Register */
2706 tctl = rd32(E1000_TCTL);
2707 tctl &= ~E1000_TCTL_CT;
2708 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2709 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2711 igb_config_collision_dist(hw);
2713 /* Enable transmits */
2714 tctl |= E1000_TCTL_EN;
2716 wr32(E1000_TCTL, tctl);
2720 * igb_configure_tx_ring - Configure transmit ring after Reset
2721 * @adapter: board private structure
2722 * @ring: tx ring to configure
2724 * Configure a transmit ring after a reset.
2726 void igb_configure_tx_ring(struct igb_adapter *adapter,
2727 struct igb_ring *ring)
2729 struct e1000_hw *hw = &adapter->hw;
2730 u32 txdctl = 0;
2731 u64 tdba = ring->dma;
2732 int reg_idx = ring->reg_idx;
2734 /* disable the queue */
2735 wr32(E1000_TXDCTL(reg_idx), 0);
2736 wrfl();
2737 mdelay(10);
2739 wr32(E1000_TDLEN(reg_idx),
2740 ring->count * sizeof(union e1000_adv_tx_desc));
2741 wr32(E1000_TDBAL(reg_idx),
2742 tdba & 0x00000000ffffffffULL);
2743 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2745 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2746 wr32(E1000_TDH(reg_idx), 0);
2747 writel(0, ring->tail);
2749 txdctl |= IGB_TX_PTHRESH;
2750 txdctl |= IGB_TX_HTHRESH << 8;
2751 txdctl |= IGB_TX_WTHRESH << 16;
2753 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2754 wr32(E1000_TXDCTL(reg_idx), txdctl);
2758 * igb_configure_tx - Configure transmit Unit after Reset
2759 * @adapter: board private structure
2761 * Configure the Tx unit of the MAC after a reset.
2763 static void igb_configure_tx(struct igb_adapter *adapter)
2765 int i;
2767 for (i = 0; i < adapter->num_tx_queues; i++)
2768 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2772 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2773 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2775 * Returns 0 on success, negative on failure
2777 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2779 struct device *dev = rx_ring->dev;
2780 int orig_node = dev_to_node(dev);
2781 int size, desc_len;
2783 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2784 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2785 if (!rx_ring->rx_buffer_info)
2786 rx_ring->rx_buffer_info = vzalloc(size);
2787 if (!rx_ring->rx_buffer_info)
2788 goto err;
2790 desc_len = sizeof(union e1000_adv_rx_desc);
2792 /* Round up to nearest 4K */
2793 rx_ring->size = rx_ring->count * desc_len;
2794 rx_ring->size = ALIGN(rx_ring->size, 4096);
2796 set_dev_node(dev, rx_ring->numa_node);
2797 rx_ring->desc = dma_alloc_coherent(dev,
2798 rx_ring->size,
2799 &rx_ring->dma,
2800 GFP_KERNEL);
2801 set_dev_node(dev, orig_node);
2802 if (!rx_ring->desc)
2803 rx_ring->desc = dma_alloc_coherent(dev,
2804 rx_ring->size,
2805 &rx_ring->dma,
2806 GFP_KERNEL);
2808 if (!rx_ring->desc)
2809 goto err;
2811 rx_ring->next_to_clean = 0;
2812 rx_ring->next_to_use = 0;
2814 return 0;
2816 err:
2817 vfree(rx_ring->rx_buffer_info);
2818 rx_ring->rx_buffer_info = NULL;
2819 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2820 " ring\n");
2821 return -ENOMEM;
2825 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2826 * (Descriptors) for all queues
2827 * @adapter: board private structure
2829 * Return 0 on success, negative on failure
2831 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2833 struct pci_dev *pdev = adapter->pdev;
2834 int i, err = 0;
2836 for (i = 0; i < adapter->num_rx_queues; i++) {
2837 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2838 if (err) {
2839 dev_err(&pdev->dev,
2840 "Allocation for Rx Queue %u failed\n", i);
2841 for (i--; i >= 0; i--)
2842 igb_free_rx_resources(adapter->rx_ring[i]);
2843 break;
2847 return err;
2851 * igb_setup_mrqc - configure the multiple receive queue control registers
2852 * @adapter: Board private structure
2854 static void igb_setup_mrqc(struct igb_adapter *adapter)
2856 struct e1000_hw *hw = &adapter->hw;
2857 u32 mrqc, rxcsum;
2858 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2859 union e1000_reta {
2860 u32 dword;
2861 u8 bytes[4];
2862 } reta;
2863 static const u8 rsshash[40] = {
2864 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2865 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2866 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2867 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2869 /* Fill out hash function seeds */
2870 for (j = 0; j < 10; j++) {
2871 u32 rsskey = rsshash[(j * 4)];
2872 rsskey |= rsshash[(j * 4) + 1] << 8;
2873 rsskey |= rsshash[(j * 4) + 2] << 16;
2874 rsskey |= rsshash[(j * 4) + 3] << 24;
2875 array_wr32(E1000_RSSRK(0), j, rsskey);
2878 num_rx_queues = adapter->rss_queues;
2880 if (adapter->vfs_allocated_count) {
2881 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2882 switch (hw->mac.type) {
2883 case e1000_i350:
2884 case e1000_82580:
2885 num_rx_queues = 1;
2886 shift = 0;
2887 break;
2888 case e1000_82576:
2889 shift = 3;
2890 num_rx_queues = 2;
2891 break;
2892 case e1000_82575:
2893 shift = 2;
2894 shift2 = 6;
2895 default:
2896 break;
2898 } else {
2899 if (hw->mac.type == e1000_82575)
2900 shift = 6;
2903 for (j = 0; j < (32 * 4); j++) {
2904 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2905 if (shift2)
2906 reta.bytes[j & 3] |= num_rx_queues << shift2;
2907 if ((j & 3) == 3)
2908 wr32(E1000_RETA(j >> 2), reta.dword);
2912 * Disable raw packet checksumming so that RSS hash is placed in
2913 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2914 * offloads as they are enabled by default
2916 rxcsum = rd32(E1000_RXCSUM);
2917 rxcsum |= E1000_RXCSUM_PCSD;
2919 if (adapter->hw.mac.type >= e1000_82576)
2920 /* Enable Receive Checksum Offload for SCTP */
2921 rxcsum |= E1000_RXCSUM_CRCOFL;
2923 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2924 wr32(E1000_RXCSUM, rxcsum);
2926 /* If VMDq is enabled then we set the appropriate mode for that, else
2927 * we default to RSS so that an RSS hash is calculated per packet even
2928 * if we are only using one queue */
2929 if (adapter->vfs_allocated_count) {
2930 if (hw->mac.type > e1000_82575) {
2931 /* Set the default pool for the PF's first queue */
2932 u32 vtctl = rd32(E1000_VT_CTL);
2933 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2934 E1000_VT_CTL_DISABLE_DEF_POOL);
2935 vtctl |= adapter->vfs_allocated_count <<
2936 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2937 wr32(E1000_VT_CTL, vtctl);
2939 if (adapter->rss_queues > 1)
2940 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2941 else
2942 mrqc = E1000_MRQC_ENABLE_VMDQ;
2943 } else {
2944 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2946 igb_vmm_control(adapter);
2949 * Generate RSS hash based on TCP port numbers and/or
2950 * IPv4/v6 src and dst addresses since UDP cannot be
2951 * hashed reliably due to IP fragmentation
2953 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2954 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2955 E1000_MRQC_RSS_FIELD_IPV6 |
2956 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2957 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2959 wr32(E1000_MRQC, mrqc);
2963 * igb_setup_rctl - configure the receive control registers
2964 * @adapter: Board private structure
2966 void igb_setup_rctl(struct igb_adapter *adapter)
2968 struct e1000_hw *hw = &adapter->hw;
2969 u32 rctl;
2971 rctl = rd32(E1000_RCTL);
2973 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2974 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2976 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2977 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2980 * enable stripping of CRC. It's unlikely this will break BMC
2981 * redirection as it did with e1000. Newer features require
2982 * that the HW strips the CRC.
2984 rctl |= E1000_RCTL_SECRC;
2986 /* disable store bad packets and clear size bits. */
2987 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2989 /* enable LPE to prevent packets larger than max_frame_size */
2990 rctl |= E1000_RCTL_LPE;
2992 /* disable queue 0 to prevent tail write w/o re-config */
2993 wr32(E1000_RXDCTL(0), 0);
2995 /* Attention!!! For SR-IOV PF driver operations you must enable
2996 * queue drop for all VF and PF queues to prevent head of line blocking
2997 * if an un-trusted VF does not provide descriptors to hardware.
2999 if (adapter->vfs_allocated_count) {
3000 /* set all queue drop enable bits */
3001 wr32(E1000_QDE, ALL_QUEUES);
3004 wr32(E1000_RCTL, rctl);
3007 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3008 int vfn)
3010 struct e1000_hw *hw = &adapter->hw;
3011 u32 vmolr;
3013 /* if it isn't the PF check to see if VFs are enabled and
3014 * increase the size to support vlan tags */
3015 if (vfn < adapter->vfs_allocated_count &&
3016 adapter->vf_data[vfn].vlans_enabled)
3017 size += VLAN_TAG_SIZE;
3019 vmolr = rd32(E1000_VMOLR(vfn));
3020 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3021 vmolr |= size | E1000_VMOLR_LPE;
3022 wr32(E1000_VMOLR(vfn), vmolr);
3024 return 0;
3028 * igb_rlpml_set - set maximum receive packet size
3029 * @adapter: board private structure
3031 * Configure maximum receivable packet size.
3033 static void igb_rlpml_set(struct igb_adapter *adapter)
3035 u32 max_frame_size = adapter->max_frame_size;
3036 struct e1000_hw *hw = &adapter->hw;
3037 u16 pf_id = adapter->vfs_allocated_count;
3039 if (pf_id) {
3040 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3042 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3043 * to our max jumbo frame size, in case we need to enable
3044 * jumbo frames on one of the rings later.
3045 * This will not pass over-length frames into the default
3046 * queue because it's gated by the VMOLR.RLPML.
3048 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3051 wr32(E1000_RLPML, max_frame_size);
3054 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3055 int vfn, bool aupe)
3057 struct e1000_hw *hw = &adapter->hw;
3058 u32 vmolr;
3061 * This register exists only on 82576 and newer so if we are older then
3062 * we should exit and do nothing
3064 if (hw->mac.type < e1000_82576)
3065 return;
3067 vmolr = rd32(E1000_VMOLR(vfn));
3068 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3069 if (aupe)
3070 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3071 else
3072 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3074 /* clear all bits that might not be set */
3075 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3077 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3078 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3080 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3081 * multicast packets
3083 if (vfn <= adapter->vfs_allocated_count)
3084 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3086 wr32(E1000_VMOLR(vfn), vmolr);
3090 * igb_configure_rx_ring - Configure a receive ring after Reset
3091 * @adapter: board private structure
3092 * @ring: receive ring to be configured
3094 * Configure the Rx unit of the MAC after a reset.
3096 void igb_configure_rx_ring(struct igb_adapter *adapter,
3097 struct igb_ring *ring)
3099 struct e1000_hw *hw = &adapter->hw;
3100 u64 rdba = ring->dma;
3101 int reg_idx = ring->reg_idx;
3102 u32 srrctl = 0, rxdctl = 0;
3104 /* disable the queue */
3105 wr32(E1000_RXDCTL(reg_idx), 0);
3107 /* Set DMA base address registers */
3108 wr32(E1000_RDBAL(reg_idx),
3109 rdba & 0x00000000ffffffffULL);
3110 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3111 wr32(E1000_RDLEN(reg_idx),
3112 ring->count * sizeof(union e1000_adv_rx_desc));
3114 /* initialize head and tail */
3115 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3116 wr32(E1000_RDH(reg_idx), 0);
3117 writel(0, ring->tail);
3119 /* set descriptor configuration */
3120 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3121 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3122 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3123 #else
3124 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3125 #endif
3126 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3127 if (hw->mac.type >= e1000_82580)
3128 srrctl |= E1000_SRRCTL_TIMESTAMP;
3129 /* Only set Drop Enable if we are supporting multiple queues */
3130 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3131 srrctl |= E1000_SRRCTL_DROP_EN;
3133 wr32(E1000_SRRCTL(reg_idx), srrctl);
3135 /* set filtering for VMDQ pools */
3136 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3138 rxdctl |= IGB_RX_PTHRESH;
3139 rxdctl |= IGB_RX_HTHRESH << 8;
3140 rxdctl |= IGB_RX_WTHRESH << 16;
3142 /* enable receive descriptor fetching */
3143 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3144 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3148 * igb_configure_rx - Configure receive Unit after Reset
3149 * @adapter: board private structure
3151 * Configure the Rx unit of the MAC after a reset.
3153 static void igb_configure_rx(struct igb_adapter *adapter)
3155 int i;
3157 /* set UTA to appropriate mode */
3158 igb_set_uta(adapter);
3160 /* set the correct pool for the PF default MAC address in entry 0 */
3161 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3162 adapter->vfs_allocated_count);
3164 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3165 * the Base and Length of the Rx Descriptor Ring */
3166 for (i = 0; i < adapter->num_rx_queues; i++)
3167 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3171 * igb_free_tx_resources - Free Tx Resources per Queue
3172 * @tx_ring: Tx descriptor ring for a specific queue
3174 * Free all transmit software resources
3176 void igb_free_tx_resources(struct igb_ring *tx_ring)
3178 igb_clean_tx_ring(tx_ring);
3180 vfree(tx_ring->tx_buffer_info);
3181 tx_ring->tx_buffer_info = NULL;
3183 /* if not set, then don't free */
3184 if (!tx_ring->desc)
3185 return;
3187 dma_free_coherent(tx_ring->dev, tx_ring->size,
3188 tx_ring->desc, tx_ring->dma);
3190 tx_ring->desc = NULL;
3194 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3195 * @adapter: board private structure
3197 * Free all transmit software resources
3199 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3201 int i;
3203 for (i = 0; i < adapter->num_tx_queues; i++)
3204 igb_free_tx_resources(adapter->tx_ring[i]);
3207 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3208 struct igb_tx_buffer *tx_buffer)
3210 if (tx_buffer->skb) {
3211 dev_kfree_skb_any(tx_buffer->skb);
3212 if (tx_buffer->dma)
3213 dma_unmap_single(ring->dev,
3214 tx_buffer->dma,
3215 tx_buffer->length,
3216 DMA_TO_DEVICE);
3217 } else if (tx_buffer->dma) {
3218 dma_unmap_page(ring->dev,
3219 tx_buffer->dma,
3220 tx_buffer->length,
3221 DMA_TO_DEVICE);
3223 tx_buffer->next_to_watch = NULL;
3224 tx_buffer->skb = NULL;
3225 tx_buffer->dma = 0;
3226 /* buffer_info must be completely set up in the transmit path */
3230 * igb_clean_tx_ring - Free Tx Buffers
3231 * @tx_ring: ring to be cleaned
3233 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3235 struct igb_tx_buffer *buffer_info;
3236 unsigned long size;
3237 u16 i;
3239 if (!tx_ring->tx_buffer_info)
3240 return;
3241 /* Free all the Tx ring sk_buffs */
3243 for (i = 0; i < tx_ring->count; i++) {
3244 buffer_info = &tx_ring->tx_buffer_info[i];
3245 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3247 netdev_tx_reset_queue(txring_txq(tx_ring));
3249 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3250 memset(tx_ring->tx_buffer_info, 0, size);
3252 /* Zero out the descriptor ring */
3253 memset(tx_ring->desc, 0, tx_ring->size);
3255 tx_ring->next_to_use = 0;
3256 tx_ring->next_to_clean = 0;
3260 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3261 * @adapter: board private structure
3263 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3265 int i;
3267 for (i = 0; i < adapter->num_tx_queues; i++)
3268 igb_clean_tx_ring(adapter->tx_ring[i]);
3272 * igb_free_rx_resources - Free Rx Resources
3273 * @rx_ring: ring to clean the resources from
3275 * Free all receive software resources
3277 void igb_free_rx_resources(struct igb_ring *rx_ring)
3279 igb_clean_rx_ring(rx_ring);
3281 vfree(rx_ring->rx_buffer_info);
3282 rx_ring->rx_buffer_info = NULL;
3284 /* if not set, then don't free */
3285 if (!rx_ring->desc)
3286 return;
3288 dma_free_coherent(rx_ring->dev, rx_ring->size,
3289 rx_ring->desc, rx_ring->dma);
3291 rx_ring->desc = NULL;
3295 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3296 * @adapter: board private structure
3298 * Free all receive software resources
3300 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3302 int i;
3304 for (i = 0; i < adapter->num_rx_queues; i++)
3305 igb_free_rx_resources(adapter->rx_ring[i]);
3309 * igb_clean_rx_ring - Free Rx Buffers per Queue
3310 * @rx_ring: ring to free buffers from
3312 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3314 unsigned long size;
3315 u16 i;
3317 if (!rx_ring->rx_buffer_info)
3318 return;
3320 /* Free all the Rx ring sk_buffs */
3321 for (i = 0; i < rx_ring->count; i++) {
3322 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3323 if (buffer_info->dma) {
3324 dma_unmap_single(rx_ring->dev,
3325 buffer_info->dma,
3326 IGB_RX_HDR_LEN,
3327 DMA_FROM_DEVICE);
3328 buffer_info->dma = 0;
3331 if (buffer_info->skb) {
3332 dev_kfree_skb(buffer_info->skb);
3333 buffer_info->skb = NULL;
3335 if (buffer_info->page_dma) {
3336 dma_unmap_page(rx_ring->dev,
3337 buffer_info->page_dma,
3338 PAGE_SIZE / 2,
3339 DMA_FROM_DEVICE);
3340 buffer_info->page_dma = 0;
3342 if (buffer_info->page) {
3343 put_page(buffer_info->page);
3344 buffer_info->page = NULL;
3345 buffer_info->page_offset = 0;
3349 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3350 memset(rx_ring->rx_buffer_info, 0, size);
3352 /* Zero out the descriptor ring */
3353 memset(rx_ring->desc, 0, rx_ring->size);
3355 rx_ring->next_to_clean = 0;
3356 rx_ring->next_to_use = 0;
3360 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3361 * @adapter: board private structure
3363 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3365 int i;
3367 for (i = 0; i < adapter->num_rx_queues; i++)
3368 igb_clean_rx_ring(adapter->rx_ring[i]);
3372 * igb_set_mac - Change the Ethernet Address of the NIC
3373 * @netdev: network interface device structure
3374 * @p: pointer to an address structure
3376 * Returns 0 on success, negative on failure
3378 static int igb_set_mac(struct net_device *netdev, void *p)
3380 struct igb_adapter *adapter = netdev_priv(netdev);
3381 struct e1000_hw *hw = &adapter->hw;
3382 struct sockaddr *addr = p;
3384 if (!is_valid_ether_addr(addr->sa_data))
3385 return -EADDRNOTAVAIL;
3387 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3388 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3390 /* set the correct pool for the new PF MAC address in entry 0 */
3391 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3392 adapter->vfs_allocated_count);
3394 return 0;
3398 * igb_write_mc_addr_list - write multicast addresses to MTA
3399 * @netdev: network interface device structure
3401 * Writes multicast address list to the MTA hash table.
3402 * Returns: -ENOMEM on failure
3403 * 0 on no addresses written
3404 * X on writing X addresses to MTA
3406 static int igb_write_mc_addr_list(struct net_device *netdev)
3408 struct igb_adapter *adapter = netdev_priv(netdev);
3409 struct e1000_hw *hw = &adapter->hw;
3410 struct netdev_hw_addr *ha;
3411 u8 *mta_list;
3412 int i;
3414 if (netdev_mc_empty(netdev)) {
3415 /* nothing to program, so clear mc list */
3416 igb_update_mc_addr_list(hw, NULL, 0);
3417 igb_restore_vf_multicasts(adapter);
3418 return 0;
3421 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3422 if (!mta_list)
3423 return -ENOMEM;
3425 /* The shared function expects a packed array of only addresses. */
3426 i = 0;
3427 netdev_for_each_mc_addr(ha, netdev)
3428 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3430 igb_update_mc_addr_list(hw, mta_list, i);
3431 kfree(mta_list);
3433 return netdev_mc_count(netdev);
3437 * igb_write_uc_addr_list - write unicast addresses to RAR table
3438 * @netdev: network interface device structure
3440 * Writes unicast address list to the RAR table.
3441 * Returns: -ENOMEM on failure/insufficient address space
3442 * 0 on no addresses written
3443 * X on writing X addresses to the RAR table
3445 static int igb_write_uc_addr_list(struct net_device *netdev)
3447 struct igb_adapter *adapter = netdev_priv(netdev);
3448 struct e1000_hw *hw = &adapter->hw;
3449 unsigned int vfn = adapter->vfs_allocated_count;
3450 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3451 int count = 0;
3453 /* return ENOMEM indicating insufficient memory for addresses */
3454 if (netdev_uc_count(netdev) > rar_entries)
3455 return -ENOMEM;
3457 if (!netdev_uc_empty(netdev) && rar_entries) {
3458 struct netdev_hw_addr *ha;
3460 netdev_for_each_uc_addr(ha, netdev) {
3461 if (!rar_entries)
3462 break;
3463 igb_rar_set_qsel(adapter, ha->addr,
3464 rar_entries--,
3465 vfn);
3466 count++;
3469 /* write the addresses in reverse order to avoid write combining */
3470 for (; rar_entries > 0 ; rar_entries--) {
3471 wr32(E1000_RAH(rar_entries), 0);
3472 wr32(E1000_RAL(rar_entries), 0);
3474 wrfl();
3476 return count;
3480 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3481 * @netdev: network interface device structure
3483 * The set_rx_mode entry point is called whenever the unicast or multicast
3484 * address lists or the network interface flags are updated. This routine is
3485 * responsible for configuring the hardware for proper unicast, multicast,
3486 * promiscuous mode, and all-multi behavior.
3488 static void igb_set_rx_mode(struct net_device *netdev)
3490 struct igb_adapter *adapter = netdev_priv(netdev);
3491 struct e1000_hw *hw = &adapter->hw;
3492 unsigned int vfn = adapter->vfs_allocated_count;
3493 u32 rctl, vmolr = 0;
3494 int count;
3496 /* Check for Promiscuous and All Multicast modes */
3497 rctl = rd32(E1000_RCTL);
3499 /* clear the effected bits */
3500 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3502 if (netdev->flags & IFF_PROMISC) {
3503 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3504 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3505 } else {
3506 if (netdev->flags & IFF_ALLMULTI) {
3507 rctl |= E1000_RCTL_MPE;
3508 vmolr |= E1000_VMOLR_MPME;
3509 } else {
3511 * Write addresses to the MTA, if the attempt fails
3512 * then we should just turn on promiscuous mode so
3513 * that we can at least receive multicast traffic
3515 count = igb_write_mc_addr_list(netdev);
3516 if (count < 0) {
3517 rctl |= E1000_RCTL_MPE;
3518 vmolr |= E1000_VMOLR_MPME;
3519 } else if (count) {
3520 vmolr |= E1000_VMOLR_ROMPE;
3524 * Write addresses to available RAR registers, if there is not
3525 * sufficient space to store all the addresses then enable
3526 * unicast promiscuous mode
3528 count = igb_write_uc_addr_list(netdev);
3529 if (count < 0) {
3530 rctl |= E1000_RCTL_UPE;
3531 vmolr |= E1000_VMOLR_ROPE;
3533 rctl |= E1000_RCTL_VFE;
3535 wr32(E1000_RCTL, rctl);
3538 * In order to support SR-IOV and eventually VMDq it is necessary to set
3539 * the VMOLR to enable the appropriate modes. Without this workaround
3540 * we will have issues with VLAN tag stripping not being done for frames
3541 * that are only arriving because we are the default pool
3543 if (hw->mac.type < e1000_82576)
3544 return;
3546 vmolr |= rd32(E1000_VMOLR(vfn)) &
3547 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3548 wr32(E1000_VMOLR(vfn), vmolr);
3549 igb_restore_vf_multicasts(adapter);
3552 static void igb_check_wvbr(struct igb_adapter *adapter)
3554 struct e1000_hw *hw = &adapter->hw;
3555 u32 wvbr = 0;
3557 switch (hw->mac.type) {
3558 case e1000_82576:
3559 case e1000_i350:
3560 if (!(wvbr = rd32(E1000_WVBR)))
3561 return;
3562 break;
3563 default:
3564 break;
3567 adapter->wvbr |= wvbr;
3570 #define IGB_STAGGERED_QUEUE_OFFSET 8
3572 static void igb_spoof_check(struct igb_adapter *adapter)
3574 int j;
3576 if (!adapter->wvbr)
3577 return;
3579 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3580 if (adapter->wvbr & (1 << j) ||
3581 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3582 dev_warn(&adapter->pdev->dev,
3583 "Spoof event(s) detected on VF %d\n", j);
3584 adapter->wvbr &=
3585 ~((1 << j) |
3586 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3591 /* Need to wait a few seconds after link up to get diagnostic information from
3592 * the phy */
3593 static void igb_update_phy_info(unsigned long data)
3595 struct igb_adapter *adapter = (struct igb_adapter *) data;
3596 igb_get_phy_info(&adapter->hw);
3600 * igb_has_link - check shared code for link and determine up/down
3601 * @adapter: pointer to driver private info
3603 bool igb_has_link(struct igb_adapter *adapter)
3605 struct e1000_hw *hw = &adapter->hw;
3606 bool link_active = false;
3607 s32 ret_val = 0;
3609 /* get_link_status is set on LSC (link status) interrupt or
3610 * rx sequence error interrupt. get_link_status will stay
3611 * false until the e1000_check_for_link establishes link
3612 * for copper adapters ONLY
3614 switch (hw->phy.media_type) {
3615 case e1000_media_type_copper:
3616 if (hw->mac.get_link_status) {
3617 ret_val = hw->mac.ops.check_for_link(hw);
3618 link_active = !hw->mac.get_link_status;
3619 } else {
3620 link_active = true;
3622 break;
3623 case e1000_media_type_internal_serdes:
3624 ret_val = hw->mac.ops.check_for_link(hw);
3625 link_active = hw->mac.serdes_has_link;
3626 break;
3627 default:
3628 case e1000_media_type_unknown:
3629 break;
3632 return link_active;
3635 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3637 bool ret = false;
3638 u32 ctrl_ext, thstat;
3640 /* check for thermal sensor event on i350, copper only */
3641 if (hw->mac.type == e1000_i350) {
3642 thstat = rd32(E1000_THSTAT);
3643 ctrl_ext = rd32(E1000_CTRL_EXT);
3645 if ((hw->phy.media_type == e1000_media_type_copper) &&
3646 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3647 ret = !!(thstat & event);
3651 return ret;
3655 * igb_watchdog - Timer Call-back
3656 * @data: pointer to adapter cast into an unsigned long
3658 static void igb_watchdog(unsigned long data)
3660 struct igb_adapter *adapter = (struct igb_adapter *)data;
3661 /* Do the rest outside of interrupt context */
3662 schedule_work(&adapter->watchdog_task);
3665 static void igb_watchdog_task(struct work_struct *work)
3667 struct igb_adapter *adapter = container_of(work,
3668 struct igb_adapter,
3669 watchdog_task);
3670 struct e1000_hw *hw = &adapter->hw;
3671 struct net_device *netdev = adapter->netdev;
3672 u32 link;
3673 int i;
3675 link = igb_has_link(adapter);
3676 if (link) {
3677 /* Cancel scheduled suspend requests. */
3678 pm_runtime_resume(netdev->dev.parent);
3680 if (!netif_carrier_ok(netdev)) {
3681 u32 ctrl;
3682 hw->mac.ops.get_speed_and_duplex(hw,
3683 &adapter->link_speed,
3684 &adapter->link_duplex);
3686 ctrl = rd32(E1000_CTRL);
3687 /* Links status message must follow this format */
3688 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3689 "Duplex, Flow Control: %s\n",
3690 netdev->name,
3691 adapter->link_speed,
3692 adapter->link_duplex == FULL_DUPLEX ?
3693 "Full" : "Half",
3694 (ctrl & E1000_CTRL_TFCE) &&
3695 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3696 (ctrl & E1000_CTRL_RFCE) ? "RX" :
3697 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None");
3699 /* check for thermal sensor event */
3700 if (igb_thermal_sensor_event(hw,
3701 E1000_THSTAT_LINK_THROTTLE)) {
3702 netdev_info(netdev, "The network adapter link "
3703 "speed was downshifted because it "
3704 "overheated\n");
3707 /* adjust timeout factor according to speed/duplex */
3708 adapter->tx_timeout_factor = 1;
3709 switch (adapter->link_speed) {
3710 case SPEED_10:
3711 adapter->tx_timeout_factor = 14;
3712 break;
3713 case SPEED_100:
3714 /* maybe add some timeout factor ? */
3715 break;
3718 netif_carrier_on(netdev);
3720 igb_ping_all_vfs(adapter);
3721 igb_check_vf_rate_limit(adapter);
3723 /* link state has changed, schedule phy info update */
3724 if (!test_bit(__IGB_DOWN, &adapter->state))
3725 mod_timer(&adapter->phy_info_timer,
3726 round_jiffies(jiffies + 2 * HZ));
3728 } else {
3729 if (netif_carrier_ok(netdev)) {
3730 adapter->link_speed = 0;
3731 adapter->link_duplex = 0;
3733 /* check for thermal sensor event */
3734 if (igb_thermal_sensor_event(hw,
3735 E1000_THSTAT_PWR_DOWN)) {
3736 netdev_err(netdev, "The network adapter was "
3737 "stopped because it overheated\n");
3740 /* Links status message must follow this format */
3741 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3742 netdev->name);
3743 netif_carrier_off(netdev);
3745 igb_ping_all_vfs(adapter);
3747 /* link state has changed, schedule phy info update */
3748 if (!test_bit(__IGB_DOWN, &adapter->state))
3749 mod_timer(&adapter->phy_info_timer,
3750 round_jiffies(jiffies + 2 * HZ));
3752 pm_schedule_suspend(netdev->dev.parent,
3753 MSEC_PER_SEC * 5);
3757 spin_lock(&adapter->stats64_lock);
3758 igb_update_stats(adapter, &adapter->stats64);
3759 spin_unlock(&adapter->stats64_lock);
3761 for (i = 0; i < adapter->num_tx_queues; i++) {
3762 struct igb_ring *tx_ring = adapter->tx_ring[i];
3763 if (!netif_carrier_ok(netdev)) {
3764 /* We've lost link, so the controller stops DMA,
3765 * but we've got queued Tx work that's never going
3766 * to get done, so reset controller to flush Tx.
3767 * (Do the reset outside of interrupt context). */
3768 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3769 adapter->tx_timeout_count++;
3770 schedule_work(&adapter->reset_task);
3771 /* return immediately since reset is imminent */
3772 return;
3776 /* Force detection of hung controller every watchdog period */
3777 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3780 /* Cause software interrupt to ensure rx ring is cleaned */
3781 if (adapter->msix_entries) {
3782 u32 eics = 0;
3783 for (i = 0; i < adapter->num_q_vectors; i++)
3784 eics |= adapter->q_vector[i]->eims_value;
3785 wr32(E1000_EICS, eics);
3786 } else {
3787 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3790 igb_spoof_check(adapter);
3792 /* Reset the timer */
3793 if (!test_bit(__IGB_DOWN, &adapter->state))
3794 mod_timer(&adapter->watchdog_timer,
3795 round_jiffies(jiffies + 2 * HZ));
3798 enum latency_range {
3799 lowest_latency = 0,
3800 low_latency = 1,
3801 bulk_latency = 2,
3802 latency_invalid = 255
3806 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3808 * Stores a new ITR value based on strictly on packet size. This
3809 * algorithm is less sophisticated than that used in igb_update_itr,
3810 * due to the difficulty of synchronizing statistics across multiple
3811 * receive rings. The divisors and thresholds used by this function
3812 * were determined based on theoretical maximum wire speed and testing
3813 * data, in order to minimize response time while increasing bulk
3814 * throughput.
3815 * This functionality is controlled by the InterruptThrottleRate module
3816 * parameter (see igb_param.c)
3817 * NOTE: This function is called only when operating in a multiqueue
3818 * receive environment.
3819 * @q_vector: pointer to q_vector
3821 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3823 int new_val = q_vector->itr_val;
3824 int avg_wire_size = 0;
3825 struct igb_adapter *adapter = q_vector->adapter;
3826 unsigned int packets;
3828 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3829 * ints/sec - ITR timer value of 120 ticks.
3831 if (adapter->link_speed != SPEED_1000) {
3832 new_val = IGB_4K_ITR;
3833 goto set_itr_val;
3836 packets = q_vector->rx.total_packets;
3837 if (packets)
3838 avg_wire_size = q_vector->rx.total_bytes / packets;
3840 packets = q_vector->tx.total_packets;
3841 if (packets)
3842 avg_wire_size = max_t(u32, avg_wire_size,
3843 q_vector->tx.total_bytes / packets);
3845 /* if avg_wire_size isn't set no work was done */
3846 if (!avg_wire_size)
3847 goto clear_counts;
3849 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3850 avg_wire_size += 24;
3852 /* Don't starve jumbo frames */
3853 avg_wire_size = min(avg_wire_size, 3000);
3855 /* Give a little boost to mid-size frames */
3856 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3857 new_val = avg_wire_size / 3;
3858 else
3859 new_val = avg_wire_size / 2;
3861 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3862 if (new_val < IGB_20K_ITR &&
3863 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3864 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3865 new_val = IGB_20K_ITR;
3867 set_itr_val:
3868 if (new_val != q_vector->itr_val) {
3869 q_vector->itr_val = new_val;
3870 q_vector->set_itr = 1;
3872 clear_counts:
3873 q_vector->rx.total_bytes = 0;
3874 q_vector->rx.total_packets = 0;
3875 q_vector->tx.total_bytes = 0;
3876 q_vector->tx.total_packets = 0;
3880 * igb_update_itr - update the dynamic ITR value based on statistics
3881 * Stores a new ITR value based on packets and byte
3882 * counts during the last interrupt. The advantage of per interrupt
3883 * computation is faster updates and more accurate ITR for the current
3884 * traffic pattern. Constants in this function were computed
3885 * based on theoretical maximum wire speed and thresholds were set based
3886 * on testing data as well as attempting to minimize response time
3887 * while increasing bulk throughput.
3888 * this functionality is controlled by the InterruptThrottleRate module
3889 * parameter (see igb_param.c)
3890 * NOTE: These calculations are only valid when operating in a single-
3891 * queue environment.
3892 * @q_vector: pointer to q_vector
3893 * @ring_container: ring info to update the itr for
3895 static void igb_update_itr(struct igb_q_vector *q_vector,
3896 struct igb_ring_container *ring_container)
3898 unsigned int packets = ring_container->total_packets;
3899 unsigned int bytes = ring_container->total_bytes;
3900 u8 itrval = ring_container->itr;
3902 /* no packets, exit with status unchanged */
3903 if (packets == 0)
3904 return;
3906 switch (itrval) {
3907 case lowest_latency:
3908 /* handle TSO and jumbo frames */
3909 if (bytes/packets > 8000)
3910 itrval = bulk_latency;
3911 else if ((packets < 5) && (bytes > 512))
3912 itrval = low_latency;
3913 break;
3914 case low_latency: /* 50 usec aka 20000 ints/s */
3915 if (bytes > 10000) {
3916 /* this if handles the TSO accounting */
3917 if (bytes/packets > 8000) {
3918 itrval = bulk_latency;
3919 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3920 itrval = bulk_latency;
3921 } else if ((packets > 35)) {
3922 itrval = lowest_latency;
3924 } else if (bytes/packets > 2000) {
3925 itrval = bulk_latency;
3926 } else if (packets <= 2 && bytes < 512) {
3927 itrval = lowest_latency;
3929 break;
3930 case bulk_latency: /* 250 usec aka 4000 ints/s */
3931 if (bytes > 25000) {
3932 if (packets > 35)
3933 itrval = low_latency;
3934 } else if (bytes < 1500) {
3935 itrval = low_latency;
3937 break;
3940 /* clear work counters since we have the values we need */
3941 ring_container->total_bytes = 0;
3942 ring_container->total_packets = 0;
3944 /* write updated itr to ring container */
3945 ring_container->itr = itrval;
3948 static void igb_set_itr(struct igb_q_vector *q_vector)
3950 struct igb_adapter *adapter = q_vector->adapter;
3951 u32 new_itr = q_vector->itr_val;
3952 u8 current_itr = 0;
3954 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3955 if (adapter->link_speed != SPEED_1000) {
3956 current_itr = 0;
3957 new_itr = IGB_4K_ITR;
3958 goto set_itr_now;
3961 igb_update_itr(q_vector, &q_vector->tx);
3962 igb_update_itr(q_vector, &q_vector->rx);
3964 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3966 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3967 if (current_itr == lowest_latency &&
3968 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3969 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3970 current_itr = low_latency;
3972 switch (current_itr) {
3973 /* counts and packets in update_itr are dependent on these numbers */
3974 case lowest_latency:
3975 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3976 break;
3977 case low_latency:
3978 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3979 break;
3980 case bulk_latency:
3981 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3982 break;
3983 default:
3984 break;
3987 set_itr_now:
3988 if (new_itr != q_vector->itr_val) {
3989 /* this attempts to bias the interrupt rate towards Bulk
3990 * by adding intermediate steps when interrupt rate is
3991 * increasing */
3992 new_itr = new_itr > q_vector->itr_val ?
3993 max((new_itr * q_vector->itr_val) /
3994 (new_itr + (q_vector->itr_val >> 2)),
3995 new_itr) :
3996 new_itr;
3997 /* Don't write the value here; it resets the adapter's
3998 * internal timer, and causes us to delay far longer than
3999 * we should between interrupts. Instead, we write the ITR
4000 * value at the beginning of the next interrupt so the timing
4001 * ends up being correct.
4003 q_vector->itr_val = new_itr;
4004 q_vector->set_itr = 1;
4008 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4009 u32 type_tucmd, u32 mss_l4len_idx)
4011 struct e1000_adv_tx_context_desc *context_desc;
4012 u16 i = tx_ring->next_to_use;
4014 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4016 i++;
4017 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4019 /* set bits to identify this as an advanced context descriptor */
4020 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4022 /* For 82575, context index must be unique per ring. */
4023 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4024 mss_l4len_idx |= tx_ring->reg_idx << 4;
4026 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4027 context_desc->seqnum_seed = 0;
4028 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4029 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4032 static int igb_tso(struct igb_ring *tx_ring,
4033 struct igb_tx_buffer *first,
4034 u8 *hdr_len)
4036 struct sk_buff *skb = first->skb;
4037 u32 vlan_macip_lens, type_tucmd;
4038 u32 mss_l4len_idx, l4len;
4040 if (!skb_is_gso(skb))
4041 return 0;
4043 if (skb_header_cloned(skb)) {
4044 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4045 if (err)
4046 return err;
4049 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4050 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4052 if (first->protocol == __constant_htons(ETH_P_IP)) {
4053 struct iphdr *iph = ip_hdr(skb);
4054 iph->tot_len = 0;
4055 iph->check = 0;
4056 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4057 iph->daddr, 0,
4058 IPPROTO_TCP,
4060 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4061 first->tx_flags |= IGB_TX_FLAGS_TSO |
4062 IGB_TX_FLAGS_CSUM |
4063 IGB_TX_FLAGS_IPV4;
4064 } else if (skb_is_gso_v6(skb)) {
4065 ipv6_hdr(skb)->payload_len = 0;
4066 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4067 &ipv6_hdr(skb)->daddr,
4068 0, IPPROTO_TCP, 0);
4069 first->tx_flags |= IGB_TX_FLAGS_TSO |
4070 IGB_TX_FLAGS_CSUM;
4073 /* compute header lengths */
4074 l4len = tcp_hdrlen(skb);
4075 *hdr_len = skb_transport_offset(skb) + l4len;
4077 /* update gso size and bytecount with header size */
4078 first->gso_segs = skb_shinfo(skb)->gso_segs;
4079 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4081 /* MSS L4LEN IDX */
4082 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4083 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4085 /* VLAN MACLEN IPLEN */
4086 vlan_macip_lens = skb_network_header_len(skb);
4087 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4088 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4090 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4092 return 1;
4095 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4097 struct sk_buff *skb = first->skb;
4098 u32 vlan_macip_lens = 0;
4099 u32 mss_l4len_idx = 0;
4100 u32 type_tucmd = 0;
4102 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4103 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4104 return;
4105 } else {
4106 u8 l4_hdr = 0;
4107 switch (first->protocol) {
4108 case __constant_htons(ETH_P_IP):
4109 vlan_macip_lens |= skb_network_header_len(skb);
4110 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4111 l4_hdr = ip_hdr(skb)->protocol;
4112 break;
4113 case __constant_htons(ETH_P_IPV6):
4114 vlan_macip_lens |= skb_network_header_len(skb);
4115 l4_hdr = ipv6_hdr(skb)->nexthdr;
4116 break;
4117 default:
4118 if (unlikely(net_ratelimit())) {
4119 dev_warn(tx_ring->dev,
4120 "partial checksum but proto=%x!\n",
4121 first->protocol);
4123 break;
4126 switch (l4_hdr) {
4127 case IPPROTO_TCP:
4128 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4129 mss_l4len_idx = tcp_hdrlen(skb) <<
4130 E1000_ADVTXD_L4LEN_SHIFT;
4131 break;
4132 case IPPROTO_SCTP:
4133 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4134 mss_l4len_idx = sizeof(struct sctphdr) <<
4135 E1000_ADVTXD_L4LEN_SHIFT;
4136 break;
4137 case IPPROTO_UDP:
4138 mss_l4len_idx = sizeof(struct udphdr) <<
4139 E1000_ADVTXD_L4LEN_SHIFT;
4140 break;
4141 default:
4142 if (unlikely(net_ratelimit())) {
4143 dev_warn(tx_ring->dev,
4144 "partial checksum but l4 proto=%x!\n",
4145 l4_hdr);
4147 break;
4150 /* update TX checksum flag */
4151 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4154 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4155 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4157 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4160 static __le32 igb_tx_cmd_type(u32 tx_flags)
4162 /* set type for advanced descriptor with frame checksum insertion */
4163 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4164 E1000_ADVTXD_DCMD_IFCS |
4165 E1000_ADVTXD_DCMD_DEXT);
4167 /* set HW vlan bit if vlan is present */
4168 if (tx_flags & IGB_TX_FLAGS_VLAN)
4169 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4171 /* set timestamp bit if present */
4172 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4173 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4175 /* set segmentation bits for TSO */
4176 if (tx_flags & IGB_TX_FLAGS_TSO)
4177 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4179 return cmd_type;
4182 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4183 union e1000_adv_tx_desc *tx_desc,
4184 u32 tx_flags, unsigned int paylen)
4186 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4188 /* 82575 requires a unique index per ring if any offload is enabled */
4189 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4190 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4191 olinfo_status |= tx_ring->reg_idx << 4;
4193 /* insert L4 checksum */
4194 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4195 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4197 /* insert IPv4 checksum */
4198 if (tx_flags & IGB_TX_FLAGS_IPV4)
4199 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4202 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4206 * The largest size we can write to the descriptor is 65535. In order to
4207 * maintain a power of two alignment we have to limit ourselves to 32K.
4209 #define IGB_MAX_TXD_PWR 15
4210 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4212 static void igb_tx_map(struct igb_ring *tx_ring,
4213 struct igb_tx_buffer *first,
4214 const u8 hdr_len)
4216 struct sk_buff *skb = first->skb;
4217 struct igb_tx_buffer *tx_buffer_info;
4218 union e1000_adv_tx_desc *tx_desc;
4219 dma_addr_t dma;
4220 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4221 unsigned int data_len = skb->data_len;
4222 unsigned int size = skb_headlen(skb);
4223 unsigned int paylen = skb->len - hdr_len;
4224 __le32 cmd_type;
4225 u32 tx_flags = first->tx_flags;
4226 u16 i = tx_ring->next_to_use;
4228 tx_desc = IGB_TX_DESC(tx_ring, i);
4230 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4231 cmd_type = igb_tx_cmd_type(tx_flags);
4233 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4234 if (dma_mapping_error(tx_ring->dev, dma))
4235 goto dma_error;
4237 /* record length, and DMA address */
4238 first->length = size;
4239 first->dma = dma;
4240 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4242 for (;;) {
4243 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4244 tx_desc->read.cmd_type_len =
4245 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4247 i++;
4248 tx_desc++;
4249 if (i == tx_ring->count) {
4250 tx_desc = IGB_TX_DESC(tx_ring, 0);
4251 i = 0;
4254 dma += IGB_MAX_DATA_PER_TXD;
4255 size -= IGB_MAX_DATA_PER_TXD;
4257 tx_desc->read.olinfo_status = 0;
4258 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4261 if (likely(!data_len))
4262 break;
4264 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4266 i++;
4267 tx_desc++;
4268 if (i == tx_ring->count) {
4269 tx_desc = IGB_TX_DESC(tx_ring, 0);
4270 i = 0;
4273 size = skb_frag_size(frag);
4274 data_len -= size;
4276 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4277 size, DMA_TO_DEVICE);
4278 if (dma_mapping_error(tx_ring->dev, dma))
4279 goto dma_error;
4281 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4282 tx_buffer_info->length = size;
4283 tx_buffer_info->dma = dma;
4285 tx_desc->read.olinfo_status = 0;
4286 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4288 frag++;
4291 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4293 /* write last descriptor with RS and EOP bits */
4294 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4295 tx_desc->read.cmd_type_len = cmd_type;
4297 /* set the timestamp */
4298 first->time_stamp = jiffies;
4301 * Force memory writes to complete before letting h/w know there
4302 * are new descriptors to fetch. (Only applicable for weak-ordered
4303 * memory model archs, such as IA-64).
4305 * We also need this memory barrier to make certain all of the
4306 * status bits have been updated before next_to_watch is written.
4308 wmb();
4310 /* set next_to_watch value indicating a packet is present */
4311 first->next_to_watch = tx_desc;
4313 i++;
4314 if (i == tx_ring->count)
4315 i = 0;
4317 tx_ring->next_to_use = i;
4319 writel(i, tx_ring->tail);
4321 /* we need this if more than one processor can write to our tail
4322 * at a time, it syncronizes IO on IA64/Altix systems */
4323 mmiowb();
4325 return;
4327 dma_error:
4328 dev_err(tx_ring->dev, "TX DMA map failed\n");
4330 /* clear dma mappings for failed tx_buffer_info map */
4331 for (;;) {
4332 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4333 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4334 if (tx_buffer_info == first)
4335 break;
4336 if (i == 0)
4337 i = tx_ring->count;
4338 i--;
4341 tx_ring->next_to_use = i;
4344 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4346 struct net_device *netdev = tx_ring->netdev;
4348 netif_stop_subqueue(netdev, tx_ring->queue_index);
4350 /* Herbert's original patch had:
4351 * smp_mb__after_netif_stop_queue();
4352 * but since that doesn't exist yet, just open code it. */
4353 smp_mb();
4355 /* We need to check again in a case another CPU has just
4356 * made room available. */
4357 if (igb_desc_unused(tx_ring) < size)
4358 return -EBUSY;
4360 /* A reprieve! */
4361 netif_wake_subqueue(netdev, tx_ring->queue_index);
4363 u64_stats_update_begin(&tx_ring->tx_syncp2);
4364 tx_ring->tx_stats.restart_queue2++;
4365 u64_stats_update_end(&tx_ring->tx_syncp2);
4367 return 0;
4370 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4372 if (igb_desc_unused(tx_ring) >= size)
4373 return 0;
4374 return __igb_maybe_stop_tx(tx_ring, size);
4377 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4378 struct igb_ring *tx_ring)
4380 struct igb_tx_buffer *first;
4381 int tso;
4382 u32 tx_flags = 0;
4383 __be16 protocol = vlan_get_protocol(skb);
4384 u8 hdr_len = 0;
4386 /* need: 1 descriptor per page,
4387 * + 2 desc gap to keep tail from touching head,
4388 * + 1 desc for skb->data,
4389 * + 1 desc for context descriptor,
4390 * otherwise try next time */
4391 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4392 /* this is a hard error */
4393 return NETDEV_TX_BUSY;
4396 /* record the location of the first descriptor for this packet */
4397 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4398 first->skb = skb;
4399 first->bytecount = skb->len;
4400 first->gso_segs = 1;
4402 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4403 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4404 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4407 if (vlan_tx_tag_present(skb)) {
4408 tx_flags |= IGB_TX_FLAGS_VLAN;
4409 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4412 /* record initial flags and protocol */
4413 first->tx_flags = tx_flags;
4414 first->protocol = protocol;
4416 tso = igb_tso(tx_ring, first, &hdr_len);
4417 if (tso < 0)
4418 goto out_drop;
4419 else if (!tso)
4420 igb_tx_csum(tx_ring, first);
4422 igb_tx_map(tx_ring, first, hdr_len);
4424 /* Make sure there is space in the ring for the next send. */
4425 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4427 return NETDEV_TX_OK;
4429 out_drop:
4430 igb_unmap_and_free_tx_resource(tx_ring, first);
4432 return NETDEV_TX_OK;
4435 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4436 struct sk_buff *skb)
4438 unsigned int r_idx = skb->queue_mapping;
4440 if (r_idx >= adapter->num_tx_queues)
4441 r_idx = r_idx % adapter->num_tx_queues;
4443 return adapter->tx_ring[r_idx];
4446 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4447 struct net_device *netdev)
4449 struct igb_adapter *adapter = netdev_priv(netdev);
4451 if (test_bit(__IGB_DOWN, &adapter->state)) {
4452 dev_kfree_skb_any(skb);
4453 return NETDEV_TX_OK;
4456 if (skb->len <= 0) {
4457 dev_kfree_skb_any(skb);
4458 return NETDEV_TX_OK;
4462 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4463 * in order to meet this minimum size requirement.
4465 if (skb->len < 17) {
4466 if (skb_padto(skb, 17))
4467 return NETDEV_TX_OK;
4468 skb->len = 17;
4471 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4475 * igb_tx_timeout - Respond to a Tx Hang
4476 * @netdev: network interface device structure
4478 static void igb_tx_timeout(struct net_device *netdev)
4480 struct igb_adapter *adapter = netdev_priv(netdev);
4481 struct e1000_hw *hw = &adapter->hw;
4483 /* Do the reset outside of interrupt context */
4484 adapter->tx_timeout_count++;
4486 if (hw->mac.type >= e1000_82580)
4487 hw->dev_spec._82575.global_device_reset = true;
4489 schedule_work(&adapter->reset_task);
4490 wr32(E1000_EICS,
4491 (adapter->eims_enable_mask & ~adapter->eims_other));
4494 static void igb_reset_task(struct work_struct *work)
4496 struct igb_adapter *adapter;
4497 adapter = container_of(work, struct igb_adapter, reset_task);
4499 igb_dump(adapter);
4500 netdev_err(adapter->netdev, "Reset adapter\n");
4501 igb_reinit_locked(adapter);
4505 * igb_get_stats64 - Get System Network Statistics
4506 * @netdev: network interface device structure
4507 * @stats: rtnl_link_stats64 pointer
4510 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4511 struct rtnl_link_stats64 *stats)
4513 struct igb_adapter *adapter = netdev_priv(netdev);
4515 spin_lock(&adapter->stats64_lock);
4516 igb_update_stats(adapter, &adapter->stats64);
4517 memcpy(stats, &adapter->stats64, sizeof(*stats));
4518 spin_unlock(&adapter->stats64_lock);
4520 return stats;
4524 * igb_change_mtu - Change the Maximum Transfer Unit
4525 * @netdev: network interface device structure
4526 * @new_mtu: new value for maximum frame size
4528 * Returns 0 on success, negative on failure
4530 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4532 struct igb_adapter *adapter = netdev_priv(netdev);
4533 struct pci_dev *pdev = adapter->pdev;
4534 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4536 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4537 dev_err(&pdev->dev, "Invalid MTU setting\n");
4538 return -EINVAL;
4541 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4542 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4543 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4544 return -EINVAL;
4547 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4548 msleep(1);
4550 /* igb_down has a dependency on max_frame_size */
4551 adapter->max_frame_size = max_frame;
4553 if (netif_running(netdev))
4554 igb_down(adapter);
4556 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4557 netdev->mtu, new_mtu);
4558 netdev->mtu = new_mtu;
4560 if (netif_running(netdev))
4561 igb_up(adapter);
4562 else
4563 igb_reset(adapter);
4565 clear_bit(__IGB_RESETTING, &adapter->state);
4567 return 0;
4571 * igb_update_stats - Update the board statistics counters
4572 * @adapter: board private structure
4575 void igb_update_stats(struct igb_adapter *adapter,
4576 struct rtnl_link_stats64 *net_stats)
4578 struct e1000_hw *hw = &adapter->hw;
4579 struct pci_dev *pdev = adapter->pdev;
4580 u32 reg, mpc;
4581 u16 phy_tmp;
4582 int i;
4583 u64 bytes, packets;
4584 unsigned int start;
4585 u64 _bytes, _packets;
4587 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4590 * Prevent stats update while adapter is being reset, or if the pci
4591 * connection is down.
4593 if (adapter->link_speed == 0)
4594 return;
4595 if (pci_channel_offline(pdev))
4596 return;
4598 bytes = 0;
4599 packets = 0;
4600 for (i = 0; i < adapter->num_rx_queues; i++) {
4601 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4602 struct igb_ring *ring = adapter->rx_ring[i];
4604 ring->rx_stats.drops += rqdpc_tmp;
4605 net_stats->rx_fifo_errors += rqdpc_tmp;
4607 do {
4608 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4609 _bytes = ring->rx_stats.bytes;
4610 _packets = ring->rx_stats.packets;
4611 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4612 bytes += _bytes;
4613 packets += _packets;
4616 net_stats->rx_bytes = bytes;
4617 net_stats->rx_packets = packets;
4619 bytes = 0;
4620 packets = 0;
4621 for (i = 0; i < adapter->num_tx_queues; i++) {
4622 struct igb_ring *ring = adapter->tx_ring[i];
4623 do {
4624 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4625 _bytes = ring->tx_stats.bytes;
4626 _packets = ring->tx_stats.packets;
4627 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4628 bytes += _bytes;
4629 packets += _packets;
4631 net_stats->tx_bytes = bytes;
4632 net_stats->tx_packets = packets;
4634 /* read stats registers */
4635 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4636 adapter->stats.gprc += rd32(E1000_GPRC);
4637 adapter->stats.gorc += rd32(E1000_GORCL);
4638 rd32(E1000_GORCH); /* clear GORCL */
4639 adapter->stats.bprc += rd32(E1000_BPRC);
4640 adapter->stats.mprc += rd32(E1000_MPRC);
4641 adapter->stats.roc += rd32(E1000_ROC);
4643 adapter->stats.prc64 += rd32(E1000_PRC64);
4644 adapter->stats.prc127 += rd32(E1000_PRC127);
4645 adapter->stats.prc255 += rd32(E1000_PRC255);
4646 adapter->stats.prc511 += rd32(E1000_PRC511);
4647 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4648 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4649 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4650 adapter->stats.sec += rd32(E1000_SEC);
4652 mpc = rd32(E1000_MPC);
4653 adapter->stats.mpc += mpc;
4654 net_stats->rx_fifo_errors += mpc;
4655 adapter->stats.scc += rd32(E1000_SCC);
4656 adapter->stats.ecol += rd32(E1000_ECOL);
4657 adapter->stats.mcc += rd32(E1000_MCC);
4658 adapter->stats.latecol += rd32(E1000_LATECOL);
4659 adapter->stats.dc += rd32(E1000_DC);
4660 adapter->stats.rlec += rd32(E1000_RLEC);
4661 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4662 adapter->stats.xontxc += rd32(E1000_XONTXC);
4663 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4664 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4665 adapter->stats.fcruc += rd32(E1000_FCRUC);
4666 adapter->stats.gptc += rd32(E1000_GPTC);
4667 adapter->stats.gotc += rd32(E1000_GOTCL);
4668 rd32(E1000_GOTCH); /* clear GOTCL */
4669 adapter->stats.rnbc += rd32(E1000_RNBC);
4670 adapter->stats.ruc += rd32(E1000_RUC);
4671 adapter->stats.rfc += rd32(E1000_RFC);
4672 adapter->stats.rjc += rd32(E1000_RJC);
4673 adapter->stats.tor += rd32(E1000_TORH);
4674 adapter->stats.tot += rd32(E1000_TOTH);
4675 adapter->stats.tpr += rd32(E1000_TPR);
4677 adapter->stats.ptc64 += rd32(E1000_PTC64);
4678 adapter->stats.ptc127 += rd32(E1000_PTC127);
4679 adapter->stats.ptc255 += rd32(E1000_PTC255);
4680 adapter->stats.ptc511 += rd32(E1000_PTC511);
4681 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4682 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4684 adapter->stats.mptc += rd32(E1000_MPTC);
4685 adapter->stats.bptc += rd32(E1000_BPTC);
4687 adapter->stats.tpt += rd32(E1000_TPT);
4688 adapter->stats.colc += rd32(E1000_COLC);
4690 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4691 /* read internal phy specific stats */
4692 reg = rd32(E1000_CTRL_EXT);
4693 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4694 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4695 adapter->stats.tncrs += rd32(E1000_TNCRS);
4698 adapter->stats.tsctc += rd32(E1000_TSCTC);
4699 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4701 adapter->stats.iac += rd32(E1000_IAC);
4702 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4703 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4704 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4705 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4706 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4707 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4708 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4709 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4711 /* Fill out the OS statistics structure */
4712 net_stats->multicast = adapter->stats.mprc;
4713 net_stats->collisions = adapter->stats.colc;
4715 /* Rx Errors */
4717 /* RLEC on some newer hardware can be incorrect so build
4718 * our own version based on RUC and ROC */
4719 net_stats->rx_errors = adapter->stats.rxerrc +
4720 adapter->stats.crcerrs + adapter->stats.algnerrc +
4721 adapter->stats.ruc + adapter->stats.roc +
4722 adapter->stats.cexterr;
4723 net_stats->rx_length_errors = adapter->stats.ruc +
4724 adapter->stats.roc;
4725 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4726 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4727 net_stats->rx_missed_errors = adapter->stats.mpc;
4729 /* Tx Errors */
4730 net_stats->tx_errors = adapter->stats.ecol +
4731 adapter->stats.latecol;
4732 net_stats->tx_aborted_errors = adapter->stats.ecol;
4733 net_stats->tx_window_errors = adapter->stats.latecol;
4734 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4736 /* Tx Dropped needs to be maintained elsewhere */
4738 /* Phy Stats */
4739 if (hw->phy.media_type == e1000_media_type_copper) {
4740 if ((adapter->link_speed == SPEED_1000) &&
4741 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4742 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4743 adapter->phy_stats.idle_errors += phy_tmp;
4747 /* Management Stats */
4748 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4749 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4750 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4752 /* OS2BMC Stats */
4753 reg = rd32(E1000_MANC);
4754 if (reg & E1000_MANC_EN_BMC2OS) {
4755 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4756 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4757 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4758 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4762 static irqreturn_t igb_msix_other(int irq, void *data)
4764 struct igb_adapter *adapter = data;
4765 struct e1000_hw *hw = &adapter->hw;
4766 u32 icr = rd32(E1000_ICR);
4767 /* reading ICR causes bit 31 of EICR to be cleared */
4769 if (icr & E1000_ICR_DRSTA)
4770 schedule_work(&adapter->reset_task);
4772 if (icr & E1000_ICR_DOUTSYNC) {
4773 /* HW is reporting DMA is out of sync */
4774 adapter->stats.doosync++;
4775 /* The DMA Out of Sync is also indication of a spoof event
4776 * in IOV mode. Check the Wrong VM Behavior register to
4777 * see if it is really a spoof event. */
4778 igb_check_wvbr(adapter);
4781 /* Check for a mailbox event */
4782 if (icr & E1000_ICR_VMMB)
4783 igb_msg_task(adapter);
4785 if (icr & E1000_ICR_LSC) {
4786 hw->mac.get_link_status = 1;
4787 /* guard against interrupt when we're going down */
4788 if (!test_bit(__IGB_DOWN, &adapter->state))
4789 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4792 wr32(E1000_EIMS, adapter->eims_other);
4794 return IRQ_HANDLED;
4797 static void igb_write_itr(struct igb_q_vector *q_vector)
4799 struct igb_adapter *adapter = q_vector->adapter;
4800 u32 itr_val = q_vector->itr_val & 0x7FFC;
4802 if (!q_vector->set_itr)
4803 return;
4805 if (!itr_val)
4806 itr_val = 0x4;
4808 if (adapter->hw.mac.type == e1000_82575)
4809 itr_val |= itr_val << 16;
4810 else
4811 itr_val |= E1000_EITR_CNT_IGNR;
4813 writel(itr_val, q_vector->itr_register);
4814 q_vector->set_itr = 0;
4817 static irqreturn_t igb_msix_ring(int irq, void *data)
4819 struct igb_q_vector *q_vector = data;
4821 /* Write the ITR value calculated from the previous interrupt. */
4822 igb_write_itr(q_vector);
4824 napi_schedule(&q_vector->napi);
4826 return IRQ_HANDLED;
4829 #ifdef CONFIG_IGB_DCA
4830 static void igb_update_dca(struct igb_q_vector *q_vector)
4832 struct igb_adapter *adapter = q_vector->adapter;
4833 struct e1000_hw *hw = &adapter->hw;
4834 int cpu = get_cpu();
4836 if (q_vector->cpu == cpu)
4837 goto out_no_update;
4839 if (q_vector->tx.ring) {
4840 int q = q_vector->tx.ring->reg_idx;
4841 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4842 if (hw->mac.type == e1000_82575) {
4843 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4844 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4845 } else {
4846 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4847 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4848 E1000_DCA_TXCTRL_CPUID_SHIFT;
4850 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4851 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4853 if (q_vector->rx.ring) {
4854 int q = q_vector->rx.ring->reg_idx;
4855 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4856 if (hw->mac.type == e1000_82575) {
4857 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4858 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4859 } else {
4860 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4861 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4862 E1000_DCA_RXCTRL_CPUID_SHIFT;
4864 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4865 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4866 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4867 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4869 q_vector->cpu = cpu;
4870 out_no_update:
4871 put_cpu();
4874 static void igb_setup_dca(struct igb_adapter *adapter)
4876 struct e1000_hw *hw = &adapter->hw;
4877 int i;
4879 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4880 return;
4882 /* Always use CB2 mode, difference is masked in the CB driver. */
4883 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4885 for (i = 0; i < adapter->num_q_vectors; i++) {
4886 adapter->q_vector[i]->cpu = -1;
4887 igb_update_dca(adapter->q_vector[i]);
4891 static int __igb_notify_dca(struct device *dev, void *data)
4893 struct net_device *netdev = dev_get_drvdata(dev);
4894 struct igb_adapter *adapter = netdev_priv(netdev);
4895 struct pci_dev *pdev = adapter->pdev;
4896 struct e1000_hw *hw = &adapter->hw;
4897 unsigned long event = *(unsigned long *)data;
4899 switch (event) {
4900 case DCA_PROVIDER_ADD:
4901 /* if already enabled, don't do it again */
4902 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4903 break;
4904 if (dca_add_requester(dev) == 0) {
4905 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4906 dev_info(&pdev->dev, "DCA enabled\n");
4907 igb_setup_dca(adapter);
4908 break;
4910 /* Fall Through since DCA is disabled. */
4911 case DCA_PROVIDER_REMOVE:
4912 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4913 /* without this a class_device is left
4914 * hanging around in the sysfs model */
4915 dca_remove_requester(dev);
4916 dev_info(&pdev->dev, "DCA disabled\n");
4917 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4918 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4920 break;
4923 return 0;
4926 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4927 void *p)
4929 int ret_val;
4931 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4932 __igb_notify_dca);
4934 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4936 #endif /* CONFIG_IGB_DCA */
4938 #ifdef CONFIG_PCI_IOV
4939 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4941 unsigned char mac_addr[ETH_ALEN];
4942 struct pci_dev *pdev = adapter->pdev;
4943 struct e1000_hw *hw = &adapter->hw;
4944 struct pci_dev *pvfdev;
4945 unsigned int device_id;
4946 u16 thisvf_devfn;
4948 random_ether_addr(mac_addr);
4949 igb_set_vf_mac(adapter, vf, mac_addr);
4951 switch (adapter->hw.mac.type) {
4952 case e1000_82576:
4953 device_id = IGB_82576_VF_DEV_ID;
4954 /* VF Stride for 82576 is 2 */
4955 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4956 (pdev->devfn & 1);
4957 break;
4958 case e1000_i350:
4959 device_id = IGB_I350_VF_DEV_ID;
4960 /* VF Stride for I350 is 4 */
4961 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4962 (pdev->devfn & 3);
4963 break;
4964 default:
4965 device_id = 0;
4966 thisvf_devfn = 0;
4967 break;
4970 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4971 while (pvfdev) {
4972 if (pvfdev->devfn == thisvf_devfn)
4973 break;
4974 pvfdev = pci_get_device(hw->vendor_id,
4975 device_id, pvfdev);
4978 if (pvfdev)
4979 adapter->vf_data[vf].vfdev = pvfdev;
4980 else
4981 dev_err(&pdev->dev,
4982 "Couldn't find pci dev ptr for VF %4.4x\n",
4983 thisvf_devfn);
4984 return pvfdev != NULL;
4987 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4989 struct e1000_hw *hw = &adapter->hw;
4990 struct pci_dev *pdev = adapter->pdev;
4991 struct pci_dev *pvfdev;
4992 u16 vf_devfn = 0;
4993 u16 vf_stride;
4994 unsigned int device_id;
4995 int vfs_found = 0;
4997 switch (adapter->hw.mac.type) {
4998 case e1000_82576:
4999 device_id = IGB_82576_VF_DEV_ID;
5000 /* VF Stride for 82576 is 2 */
5001 vf_stride = 2;
5002 break;
5003 case e1000_i350:
5004 device_id = IGB_I350_VF_DEV_ID;
5005 /* VF Stride for I350 is 4 */
5006 vf_stride = 4;
5007 break;
5008 default:
5009 device_id = 0;
5010 vf_stride = 0;
5011 break;
5014 vf_devfn = pdev->devfn + 0x80;
5015 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5016 while (pvfdev) {
5017 if (pvfdev->devfn == vf_devfn &&
5018 (pvfdev->bus->number >= pdev->bus->number))
5019 vfs_found++;
5020 vf_devfn += vf_stride;
5021 pvfdev = pci_get_device(hw->vendor_id,
5022 device_id, pvfdev);
5025 return vfs_found;
5028 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5030 int i;
5031 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5032 if (adapter->vf_data[i].vfdev) {
5033 if (adapter->vf_data[i].vfdev->dev_flags &
5034 PCI_DEV_FLAGS_ASSIGNED)
5035 return true;
5038 return false;
5041 #endif
5042 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5044 struct e1000_hw *hw = &adapter->hw;
5045 u32 ping;
5046 int i;
5048 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5049 ping = E1000_PF_CONTROL_MSG;
5050 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5051 ping |= E1000_VT_MSGTYPE_CTS;
5052 igb_write_mbx(hw, &ping, 1, i);
5056 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5058 struct e1000_hw *hw = &adapter->hw;
5059 u32 vmolr = rd32(E1000_VMOLR(vf));
5060 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5062 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5063 IGB_VF_FLAG_MULTI_PROMISC);
5064 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5066 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5067 vmolr |= E1000_VMOLR_MPME;
5068 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5069 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5070 } else {
5072 * if we have hashes and we are clearing a multicast promisc
5073 * flag we need to write the hashes to the MTA as this step
5074 * was previously skipped
5076 if (vf_data->num_vf_mc_hashes > 30) {
5077 vmolr |= E1000_VMOLR_MPME;
5078 } else if (vf_data->num_vf_mc_hashes) {
5079 int j;
5080 vmolr |= E1000_VMOLR_ROMPE;
5081 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5082 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5086 wr32(E1000_VMOLR(vf), vmolr);
5088 /* there are flags left unprocessed, likely not supported */
5089 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5090 return -EINVAL;
5092 return 0;
5096 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5097 u32 *msgbuf, u32 vf)
5099 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5100 u16 *hash_list = (u16 *)&msgbuf[1];
5101 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5102 int i;
5104 /* salt away the number of multicast addresses assigned
5105 * to this VF for later use to restore when the PF multi cast
5106 * list changes
5108 vf_data->num_vf_mc_hashes = n;
5110 /* only up to 30 hash values supported */
5111 if (n > 30)
5112 n = 30;
5114 /* store the hashes for later use */
5115 for (i = 0; i < n; i++)
5116 vf_data->vf_mc_hashes[i] = hash_list[i];
5118 /* Flush and reset the mta with the new values */
5119 igb_set_rx_mode(adapter->netdev);
5121 return 0;
5124 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5126 struct e1000_hw *hw = &adapter->hw;
5127 struct vf_data_storage *vf_data;
5128 int i, j;
5130 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5131 u32 vmolr = rd32(E1000_VMOLR(i));
5132 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5134 vf_data = &adapter->vf_data[i];
5136 if ((vf_data->num_vf_mc_hashes > 30) ||
5137 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5138 vmolr |= E1000_VMOLR_MPME;
5139 } else if (vf_data->num_vf_mc_hashes) {
5140 vmolr |= E1000_VMOLR_ROMPE;
5141 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5142 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5144 wr32(E1000_VMOLR(i), vmolr);
5148 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5150 struct e1000_hw *hw = &adapter->hw;
5151 u32 pool_mask, reg, vid;
5152 int i;
5154 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5156 /* Find the vlan filter for this id */
5157 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5158 reg = rd32(E1000_VLVF(i));
5160 /* remove the vf from the pool */
5161 reg &= ~pool_mask;
5163 /* if pool is empty then remove entry from vfta */
5164 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5165 (reg & E1000_VLVF_VLANID_ENABLE)) {
5166 reg = 0;
5167 vid = reg & E1000_VLVF_VLANID_MASK;
5168 igb_vfta_set(hw, vid, false);
5171 wr32(E1000_VLVF(i), reg);
5174 adapter->vf_data[vf].vlans_enabled = 0;
5177 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5179 struct e1000_hw *hw = &adapter->hw;
5180 u32 reg, i;
5182 /* The vlvf table only exists on 82576 hardware and newer */
5183 if (hw->mac.type < e1000_82576)
5184 return -1;
5186 /* we only need to do this if VMDq is enabled */
5187 if (!adapter->vfs_allocated_count)
5188 return -1;
5190 /* Find the vlan filter for this id */
5191 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5192 reg = rd32(E1000_VLVF(i));
5193 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5194 vid == (reg & E1000_VLVF_VLANID_MASK))
5195 break;
5198 if (add) {
5199 if (i == E1000_VLVF_ARRAY_SIZE) {
5200 /* Did not find a matching VLAN ID entry that was
5201 * enabled. Search for a free filter entry, i.e.
5202 * one without the enable bit set
5204 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5205 reg = rd32(E1000_VLVF(i));
5206 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5207 break;
5210 if (i < E1000_VLVF_ARRAY_SIZE) {
5211 /* Found an enabled/available entry */
5212 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5214 /* if !enabled we need to set this up in vfta */
5215 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5216 /* add VID to filter table */
5217 igb_vfta_set(hw, vid, true);
5218 reg |= E1000_VLVF_VLANID_ENABLE;
5220 reg &= ~E1000_VLVF_VLANID_MASK;
5221 reg |= vid;
5222 wr32(E1000_VLVF(i), reg);
5224 /* do not modify RLPML for PF devices */
5225 if (vf >= adapter->vfs_allocated_count)
5226 return 0;
5228 if (!adapter->vf_data[vf].vlans_enabled) {
5229 u32 size;
5230 reg = rd32(E1000_VMOLR(vf));
5231 size = reg & E1000_VMOLR_RLPML_MASK;
5232 size += 4;
5233 reg &= ~E1000_VMOLR_RLPML_MASK;
5234 reg |= size;
5235 wr32(E1000_VMOLR(vf), reg);
5238 adapter->vf_data[vf].vlans_enabled++;
5240 } else {
5241 if (i < E1000_VLVF_ARRAY_SIZE) {
5242 /* remove vf from the pool */
5243 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5244 /* if pool is empty then remove entry from vfta */
5245 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5246 reg = 0;
5247 igb_vfta_set(hw, vid, false);
5249 wr32(E1000_VLVF(i), reg);
5251 /* do not modify RLPML for PF devices */
5252 if (vf >= adapter->vfs_allocated_count)
5253 return 0;
5255 adapter->vf_data[vf].vlans_enabled--;
5256 if (!adapter->vf_data[vf].vlans_enabled) {
5257 u32 size;
5258 reg = rd32(E1000_VMOLR(vf));
5259 size = reg & E1000_VMOLR_RLPML_MASK;
5260 size -= 4;
5261 reg &= ~E1000_VMOLR_RLPML_MASK;
5262 reg |= size;
5263 wr32(E1000_VMOLR(vf), reg);
5267 return 0;
5270 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5272 struct e1000_hw *hw = &adapter->hw;
5274 if (vid)
5275 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5276 else
5277 wr32(E1000_VMVIR(vf), 0);
5280 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5281 int vf, u16 vlan, u8 qos)
5283 int err = 0;
5284 struct igb_adapter *adapter = netdev_priv(netdev);
5286 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5287 return -EINVAL;
5288 if (vlan || qos) {
5289 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5290 if (err)
5291 goto out;
5292 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5293 igb_set_vmolr(adapter, vf, !vlan);
5294 adapter->vf_data[vf].pf_vlan = vlan;
5295 adapter->vf_data[vf].pf_qos = qos;
5296 dev_info(&adapter->pdev->dev,
5297 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5298 if (test_bit(__IGB_DOWN, &adapter->state)) {
5299 dev_warn(&adapter->pdev->dev,
5300 "The VF VLAN has been set,"
5301 " but the PF device is not up.\n");
5302 dev_warn(&adapter->pdev->dev,
5303 "Bring the PF device up before"
5304 " attempting to use the VF device.\n");
5306 } else {
5307 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5308 false, vf);
5309 igb_set_vmvir(adapter, vlan, vf);
5310 igb_set_vmolr(adapter, vf, true);
5311 adapter->vf_data[vf].pf_vlan = 0;
5312 adapter->vf_data[vf].pf_qos = 0;
5314 out:
5315 return err;
5318 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5320 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5321 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5323 return igb_vlvf_set(adapter, vid, add, vf);
5326 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5328 /* clear flags - except flag that indicates PF has set the MAC */
5329 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5330 adapter->vf_data[vf].last_nack = jiffies;
5332 /* reset offloads to defaults */
5333 igb_set_vmolr(adapter, vf, true);
5335 /* reset vlans for device */
5336 igb_clear_vf_vfta(adapter, vf);
5337 if (adapter->vf_data[vf].pf_vlan)
5338 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5339 adapter->vf_data[vf].pf_vlan,
5340 adapter->vf_data[vf].pf_qos);
5341 else
5342 igb_clear_vf_vfta(adapter, vf);
5344 /* reset multicast table array for vf */
5345 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5347 /* Flush and reset the mta with the new values */
5348 igb_set_rx_mode(adapter->netdev);
5351 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5353 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5355 /* generate a new mac address as we were hotplug removed/added */
5356 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5357 random_ether_addr(vf_mac);
5359 /* process remaining reset events */
5360 igb_vf_reset(adapter, vf);
5363 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5365 struct e1000_hw *hw = &adapter->hw;
5366 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5367 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5368 u32 reg, msgbuf[3];
5369 u8 *addr = (u8 *)(&msgbuf[1]);
5371 /* process all the same items cleared in a function level reset */
5372 igb_vf_reset(adapter, vf);
5374 /* set vf mac address */
5375 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5377 /* enable transmit and receive for vf */
5378 reg = rd32(E1000_VFTE);
5379 wr32(E1000_VFTE, reg | (1 << vf));
5380 reg = rd32(E1000_VFRE);
5381 wr32(E1000_VFRE, reg | (1 << vf));
5383 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5385 /* reply to reset with ack and vf mac address */
5386 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5387 memcpy(addr, vf_mac, 6);
5388 igb_write_mbx(hw, msgbuf, 3, vf);
5391 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5394 * The VF MAC Address is stored in a packed array of bytes
5395 * starting at the second 32 bit word of the msg array
5397 unsigned char *addr = (char *)&msg[1];
5398 int err = -1;
5400 if (is_valid_ether_addr(addr))
5401 err = igb_set_vf_mac(adapter, vf, addr);
5403 return err;
5406 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5408 struct e1000_hw *hw = &adapter->hw;
5409 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5410 u32 msg = E1000_VT_MSGTYPE_NACK;
5412 /* if device isn't clear to send it shouldn't be reading either */
5413 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5414 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5415 igb_write_mbx(hw, &msg, 1, vf);
5416 vf_data->last_nack = jiffies;
5420 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5422 struct pci_dev *pdev = adapter->pdev;
5423 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5424 struct e1000_hw *hw = &adapter->hw;
5425 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5426 s32 retval;
5428 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5430 if (retval) {
5431 /* if receive failed revoke VF CTS stats and restart init */
5432 dev_err(&pdev->dev, "Error receiving message from VF\n");
5433 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5434 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5435 return;
5436 goto out;
5439 /* this is a message we already processed, do nothing */
5440 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5441 return;
5444 * until the vf completes a reset it should not be
5445 * allowed to start any configuration.
5448 if (msgbuf[0] == E1000_VF_RESET) {
5449 igb_vf_reset_msg(adapter, vf);
5450 return;
5453 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5454 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5455 return;
5456 retval = -1;
5457 goto out;
5460 switch ((msgbuf[0] & 0xFFFF)) {
5461 case E1000_VF_SET_MAC_ADDR:
5462 retval = -EINVAL;
5463 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5464 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5465 else
5466 dev_warn(&pdev->dev,
5467 "VF %d attempted to override administratively "
5468 "set MAC address\nReload the VF driver to "
5469 "resume operations\n", vf);
5470 break;
5471 case E1000_VF_SET_PROMISC:
5472 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5473 break;
5474 case E1000_VF_SET_MULTICAST:
5475 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5476 break;
5477 case E1000_VF_SET_LPE:
5478 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5479 break;
5480 case E1000_VF_SET_VLAN:
5481 retval = -1;
5482 if (vf_data->pf_vlan)
5483 dev_warn(&pdev->dev,
5484 "VF %d attempted to override administratively "
5485 "set VLAN tag\nReload the VF driver to "
5486 "resume operations\n", vf);
5487 else
5488 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5489 break;
5490 default:
5491 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5492 retval = -1;
5493 break;
5496 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5497 out:
5498 /* notify the VF of the results of what it sent us */
5499 if (retval)
5500 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5501 else
5502 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5504 igb_write_mbx(hw, msgbuf, 1, vf);
5507 static void igb_msg_task(struct igb_adapter *adapter)
5509 struct e1000_hw *hw = &adapter->hw;
5510 u32 vf;
5512 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5513 /* process any reset requests */
5514 if (!igb_check_for_rst(hw, vf))
5515 igb_vf_reset_event(adapter, vf);
5517 /* process any messages pending */
5518 if (!igb_check_for_msg(hw, vf))
5519 igb_rcv_msg_from_vf(adapter, vf);
5521 /* process any acks */
5522 if (!igb_check_for_ack(hw, vf))
5523 igb_rcv_ack_from_vf(adapter, vf);
5528 * igb_set_uta - Set unicast filter table address
5529 * @adapter: board private structure
5531 * The unicast table address is a register array of 32-bit registers.
5532 * The table is meant to be used in a way similar to how the MTA is used
5533 * however due to certain limitations in the hardware it is necessary to
5534 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5535 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5537 static void igb_set_uta(struct igb_adapter *adapter)
5539 struct e1000_hw *hw = &adapter->hw;
5540 int i;
5542 /* The UTA table only exists on 82576 hardware and newer */
5543 if (hw->mac.type < e1000_82576)
5544 return;
5546 /* we only need to do this if VMDq is enabled */
5547 if (!adapter->vfs_allocated_count)
5548 return;
5550 for (i = 0; i < hw->mac.uta_reg_count; i++)
5551 array_wr32(E1000_UTA, i, ~0);
5555 * igb_intr_msi - Interrupt Handler
5556 * @irq: interrupt number
5557 * @data: pointer to a network interface device structure
5559 static irqreturn_t igb_intr_msi(int irq, void *data)
5561 struct igb_adapter *adapter = data;
5562 struct igb_q_vector *q_vector = adapter->q_vector[0];
5563 struct e1000_hw *hw = &adapter->hw;
5564 /* read ICR disables interrupts using IAM */
5565 u32 icr = rd32(E1000_ICR);
5567 igb_write_itr(q_vector);
5569 if (icr & E1000_ICR_DRSTA)
5570 schedule_work(&adapter->reset_task);
5572 if (icr & E1000_ICR_DOUTSYNC) {
5573 /* HW is reporting DMA is out of sync */
5574 adapter->stats.doosync++;
5577 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5578 hw->mac.get_link_status = 1;
5579 if (!test_bit(__IGB_DOWN, &adapter->state))
5580 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5583 napi_schedule(&q_vector->napi);
5585 return IRQ_HANDLED;
5589 * igb_intr - Legacy Interrupt Handler
5590 * @irq: interrupt number
5591 * @data: pointer to a network interface device structure
5593 static irqreturn_t igb_intr(int irq, void *data)
5595 struct igb_adapter *adapter = data;
5596 struct igb_q_vector *q_vector = adapter->q_vector[0];
5597 struct e1000_hw *hw = &adapter->hw;
5598 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5599 * need for the IMC write */
5600 u32 icr = rd32(E1000_ICR);
5602 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5603 * not set, then the adapter didn't send an interrupt */
5604 if (!(icr & E1000_ICR_INT_ASSERTED))
5605 return IRQ_NONE;
5607 igb_write_itr(q_vector);
5609 if (icr & E1000_ICR_DRSTA)
5610 schedule_work(&adapter->reset_task);
5612 if (icr & E1000_ICR_DOUTSYNC) {
5613 /* HW is reporting DMA is out of sync */
5614 adapter->stats.doosync++;
5617 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5618 hw->mac.get_link_status = 1;
5619 /* guard against interrupt when we're going down */
5620 if (!test_bit(__IGB_DOWN, &adapter->state))
5621 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5624 napi_schedule(&q_vector->napi);
5626 return IRQ_HANDLED;
5629 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5631 struct igb_adapter *adapter = q_vector->adapter;
5632 struct e1000_hw *hw = &adapter->hw;
5634 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5635 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5636 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5637 igb_set_itr(q_vector);
5638 else
5639 igb_update_ring_itr(q_vector);
5642 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5643 if (adapter->msix_entries)
5644 wr32(E1000_EIMS, q_vector->eims_value);
5645 else
5646 igb_irq_enable(adapter);
5651 * igb_poll - NAPI Rx polling callback
5652 * @napi: napi polling structure
5653 * @budget: count of how many packets we should handle
5655 static int igb_poll(struct napi_struct *napi, int budget)
5657 struct igb_q_vector *q_vector = container_of(napi,
5658 struct igb_q_vector,
5659 napi);
5660 bool clean_complete = true;
5662 #ifdef CONFIG_IGB_DCA
5663 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5664 igb_update_dca(q_vector);
5665 #endif
5666 if (q_vector->tx.ring)
5667 clean_complete = igb_clean_tx_irq(q_vector);
5669 if (q_vector->rx.ring)
5670 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5672 /* If all work not completed, return budget and keep polling */
5673 if (!clean_complete)
5674 return budget;
5676 /* If not enough Rx work done, exit the polling mode */
5677 napi_complete(napi);
5678 igb_ring_irq_enable(q_vector);
5680 return 0;
5684 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5685 * @adapter: board private structure
5686 * @shhwtstamps: timestamp structure to update
5687 * @regval: unsigned 64bit system time value.
5689 * We need to convert the system time value stored in the RX/TXSTMP registers
5690 * into a hwtstamp which can be used by the upper level timestamping functions
5692 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5693 struct skb_shared_hwtstamps *shhwtstamps,
5694 u64 regval)
5696 u64 ns;
5699 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5700 * 24 to match clock shift we setup earlier.
5702 if (adapter->hw.mac.type >= e1000_82580)
5703 regval <<= IGB_82580_TSYNC_SHIFT;
5705 ns = timecounter_cyc2time(&adapter->clock, regval);
5706 timecompare_update(&adapter->compare, ns);
5707 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5708 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5709 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5713 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5714 * @q_vector: pointer to q_vector containing needed info
5715 * @buffer: pointer to igb_tx_buffer structure
5717 * If we were asked to do hardware stamping and such a time stamp is
5718 * available, then it must have been for this skb here because we only
5719 * allow only one such packet into the queue.
5721 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5722 struct igb_tx_buffer *buffer_info)
5724 struct igb_adapter *adapter = q_vector->adapter;
5725 struct e1000_hw *hw = &adapter->hw;
5726 struct skb_shared_hwtstamps shhwtstamps;
5727 u64 regval;
5729 /* if skb does not support hw timestamp or TX stamp not valid exit */
5730 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5731 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5732 return;
5734 regval = rd32(E1000_TXSTMPL);
5735 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5737 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5738 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5742 * igb_clean_tx_irq - Reclaim resources after transmit completes
5743 * @q_vector: pointer to q_vector containing needed info
5744 * returns true if ring is completely cleaned
5746 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5748 struct igb_adapter *adapter = q_vector->adapter;
5749 struct igb_ring *tx_ring = q_vector->tx.ring;
5750 struct igb_tx_buffer *tx_buffer;
5751 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5752 unsigned int total_bytes = 0, total_packets = 0;
5753 unsigned int budget = q_vector->tx.work_limit;
5754 unsigned int i = tx_ring->next_to_clean;
5756 if (test_bit(__IGB_DOWN, &adapter->state))
5757 return true;
5759 tx_buffer = &tx_ring->tx_buffer_info[i];
5760 tx_desc = IGB_TX_DESC(tx_ring, i);
5761 i -= tx_ring->count;
5763 for (; budget; budget--) {
5764 eop_desc = tx_buffer->next_to_watch;
5766 /* prevent any other reads prior to eop_desc */
5767 rmb();
5769 /* if next_to_watch is not set then there is no work pending */
5770 if (!eop_desc)
5771 break;
5773 /* if DD is not set pending work has not been completed */
5774 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5775 break;
5777 /* clear next_to_watch to prevent false hangs */
5778 tx_buffer->next_to_watch = NULL;
5780 /* update the statistics for this packet */
5781 total_bytes += tx_buffer->bytecount;
5782 total_packets += tx_buffer->gso_segs;
5784 /* retrieve hardware timestamp */
5785 igb_tx_hwtstamp(q_vector, tx_buffer);
5787 /* free the skb */
5788 dev_kfree_skb_any(tx_buffer->skb);
5789 tx_buffer->skb = NULL;
5791 /* unmap skb header data */
5792 dma_unmap_single(tx_ring->dev,
5793 tx_buffer->dma,
5794 tx_buffer->length,
5795 DMA_TO_DEVICE);
5797 /* clear last DMA location and unmap remaining buffers */
5798 while (tx_desc != eop_desc) {
5799 tx_buffer->dma = 0;
5801 tx_buffer++;
5802 tx_desc++;
5803 i++;
5804 if (unlikely(!i)) {
5805 i -= tx_ring->count;
5806 tx_buffer = tx_ring->tx_buffer_info;
5807 tx_desc = IGB_TX_DESC(tx_ring, 0);
5810 /* unmap any remaining paged data */
5811 if (tx_buffer->dma) {
5812 dma_unmap_page(tx_ring->dev,
5813 tx_buffer->dma,
5814 tx_buffer->length,
5815 DMA_TO_DEVICE);
5819 /* clear last DMA location */
5820 tx_buffer->dma = 0;
5822 /* move us one more past the eop_desc for start of next pkt */
5823 tx_buffer++;
5824 tx_desc++;
5825 i++;
5826 if (unlikely(!i)) {
5827 i -= tx_ring->count;
5828 tx_buffer = tx_ring->tx_buffer_info;
5829 tx_desc = IGB_TX_DESC(tx_ring, 0);
5833 netdev_tx_completed_queue(txring_txq(tx_ring),
5834 total_packets, total_bytes);
5835 i += tx_ring->count;
5836 tx_ring->next_to_clean = i;
5837 u64_stats_update_begin(&tx_ring->tx_syncp);
5838 tx_ring->tx_stats.bytes += total_bytes;
5839 tx_ring->tx_stats.packets += total_packets;
5840 u64_stats_update_end(&tx_ring->tx_syncp);
5841 q_vector->tx.total_bytes += total_bytes;
5842 q_vector->tx.total_packets += total_packets;
5844 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5845 struct e1000_hw *hw = &adapter->hw;
5847 eop_desc = tx_buffer->next_to_watch;
5849 /* Detect a transmit hang in hardware, this serializes the
5850 * check with the clearing of time_stamp and movement of i */
5851 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5852 if (eop_desc &&
5853 time_after(jiffies, tx_buffer->time_stamp +
5854 (adapter->tx_timeout_factor * HZ)) &&
5855 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5857 /* detected Tx unit hang */
5858 dev_err(tx_ring->dev,
5859 "Detected Tx Unit Hang\n"
5860 " Tx Queue <%d>\n"
5861 " TDH <%x>\n"
5862 " TDT <%x>\n"
5863 " next_to_use <%x>\n"
5864 " next_to_clean <%x>\n"
5865 "buffer_info[next_to_clean]\n"
5866 " time_stamp <%lx>\n"
5867 " next_to_watch <%p>\n"
5868 " jiffies <%lx>\n"
5869 " desc.status <%x>\n",
5870 tx_ring->queue_index,
5871 rd32(E1000_TDH(tx_ring->reg_idx)),
5872 readl(tx_ring->tail),
5873 tx_ring->next_to_use,
5874 tx_ring->next_to_clean,
5875 tx_buffer->time_stamp,
5876 eop_desc,
5877 jiffies,
5878 eop_desc->wb.status);
5879 netif_stop_subqueue(tx_ring->netdev,
5880 tx_ring->queue_index);
5882 /* we are about to reset, no point in enabling stuff */
5883 return true;
5887 if (unlikely(total_packets &&
5888 netif_carrier_ok(tx_ring->netdev) &&
5889 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5890 /* Make sure that anybody stopping the queue after this
5891 * sees the new next_to_clean.
5893 smp_mb();
5894 if (__netif_subqueue_stopped(tx_ring->netdev,
5895 tx_ring->queue_index) &&
5896 !(test_bit(__IGB_DOWN, &adapter->state))) {
5897 netif_wake_subqueue(tx_ring->netdev,
5898 tx_ring->queue_index);
5900 u64_stats_update_begin(&tx_ring->tx_syncp);
5901 tx_ring->tx_stats.restart_queue++;
5902 u64_stats_update_end(&tx_ring->tx_syncp);
5906 return !!budget;
5909 static inline void igb_rx_checksum(struct igb_ring *ring,
5910 union e1000_adv_rx_desc *rx_desc,
5911 struct sk_buff *skb)
5913 skb_checksum_none_assert(skb);
5915 /* Ignore Checksum bit is set */
5916 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5917 return;
5919 /* Rx checksum disabled via ethtool */
5920 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5921 return;
5923 /* TCP/UDP checksum error bit is set */
5924 if (igb_test_staterr(rx_desc,
5925 E1000_RXDEXT_STATERR_TCPE |
5926 E1000_RXDEXT_STATERR_IPE)) {
5928 * work around errata with sctp packets where the TCPE aka
5929 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5930 * packets, (aka let the stack check the crc32c)
5932 if (!((skb->len == 60) &&
5933 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5934 u64_stats_update_begin(&ring->rx_syncp);
5935 ring->rx_stats.csum_err++;
5936 u64_stats_update_end(&ring->rx_syncp);
5938 /* let the stack verify checksum errors */
5939 return;
5941 /* It must be a TCP or UDP packet with a valid checksum */
5942 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5943 E1000_RXD_STAT_UDPCS))
5944 skb->ip_summed = CHECKSUM_UNNECESSARY;
5946 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5947 le32_to_cpu(rx_desc->wb.upper.status_error));
5950 static inline void igb_rx_hash(struct igb_ring *ring,
5951 union e1000_adv_rx_desc *rx_desc,
5952 struct sk_buff *skb)
5954 if (ring->netdev->features & NETIF_F_RXHASH)
5955 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5958 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5959 union e1000_adv_rx_desc *rx_desc,
5960 struct sk_buff *skb)
5962 struct igb_adapter *adapter = q_vector->adapter;
5963 struct e1000_hw *hw = &adapter->hw;
5964 u64 regval;
5966 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5967 E1000_RXDADV_STAT_TS))
5968 return;
5971 * If this bit is set, then the RX registers contain the time stamp. No
5972 * other packet will be time stamped until we read these registers, so
5973 * read the registers to make them available again. Because only one
5974 * packet can be time stamped at a time, we know that the register
5975 * values must belong to this one here and therefore we don't need to
5976 * compare any of the additional attributes stored for it.
5978 * If nothing went wrong, then it should have a shared tx_flags that we
5979 * can turn into a skb_shared_hwtstamps.
5981 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5982 u32 *stamp = (u32 *)skb->data;
5983 regval = le32_to_cpu(*(stamp + 2));
5984 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5985 skb_pull(skb, IGB_TS_HDR_LEN);
5986 } else {
5987 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5988 return;
5990 regval = rd32(E1000_RXSTMPL);
5991 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5994 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5997 static void igb_rx_vlan(struct igb_ring *ring,
5998 union e1000_adv_rx_desc *rx_desc,
5999 struct sk_buff *skb)
6001 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6002 u16 vid;
6003 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6004 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6005 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6006 else
6007 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6009 __vlan_hwaccel_put_tag(skb, vid);
6013 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6015 /* HW will not DMA in data larger than the given buffer, even if it
6016 * parses the (NFS, of course) header to be larger. In that case, it
6017 * fills the header buffer and spills the rest into the page.
6019 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6020 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6021 if (hlen > IGB_RX_HDR_LEN)
6022 hlen = IGB_RX_HDR_LEN;
6023 return hlen;
6026 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6028 struct igb_ring *rx_ring = q_vector->rx.ring;
6029 union e1000_adv_rx_desc *rx_desc;
6030 const int current_node = numa_node_id();
6031 unsigned int total_bytes = 0, total_packets = 0;
6032 u16 cleaned_count = igb_desc_unused(rx_ring);
6033 u16 i = rx_ring->next_to_clean;
6035 rx_desc = IGB_RX_DESC(rx_ring, i);
6037 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6038 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6039 struct sk_buff *skb = buffer_info->skb;
6040 union e1000_adv_rx_desc *next_rxd;
6042 buffer_info->skb = NULL;
6043 prefetch(skb->data);
6045 i++;
6046 if (i == rx_ring->count)
6047 i = 0;
6049 next_rxd = IGB_RX_DESC(rx_ring, i);
6050 prefetch(next_rxd);
6053 * This memory barrier is needed to keep us from reading
6054 * any other fields out of the rx_desc until we know the
6055 * RXD_STAT_DD bit is set
6057 rmb();
6059 if (!skb_is_nonlinear(skb)) {
6060 __skb_put(skb, igb_get_hlen(rx_desc));
6061 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6062 IGB_RX_HDR_LEN,
6063 DMA_FROM_DEVICE);
6064 buffer_info->dma = 0;
6067 if (rx_desc->wb.upper.length) {
6068 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6070 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6071 buffer_info->page,
6072 buffer_info->page_offset,
6073 length);
6075 skb->len += length;
6076 skb->data_len += length;
6077 skb->truesize += PAGE_SIZE / 2;
6079 if ((page_count(buffer_info->page) != 1) ||
6080 (page_to_nid(buffer_info->page) != current_node))
6081 buffer_info->page = NULL;
6082 else
6083 get_page(buffer_info->page);
6085 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6086 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6087 buffer_info->page_dma = 0;
6090 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6091 struct igb_rx_buffer *next_buffer;
6092 next_buffer = &rx_ring->rx_buffer_info[i];
6093 buffer_info->skb = next_buffer->skb;
6094 buffer_info->dma = next_buffer->dma;
6095 next_buffer->skb = skb;
6096 next_buffer->dma = 0;
6097 goto next_desc;
6100 if (igb_test_staterr(rx_desc,
6101 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6102 dev_kfree_skb_any(skb);
6103 goto next_desc;
6106 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6107 igb_rx_hash(rx_ring, rx_desc, skb);
6108 igb_rx_checksum(rx_ring, rx_desc, skb);
6109 igb_rx_vlan(rx_ring, rx_desc, skb);
6111 total_bytes += skb->len;
6112 total_packets++;
6114 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6116 napi_gro_receive(&q_vector->napi, skb);
6118 budget--;
6119 next_desc:
6120 if (!budget)
6121 break;
6123 cleaned_count++;
6124 /* return some buffers to hardware, one at a time is too slow */
6125 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6126 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6127 cleaned_count = 0;
6130 /* use prefetched values */
6131 rx_desc = next_rxd;
6134 rx_ring->next_to_clean = i;
6135 u64_stats_update_begin(&rx_ring->rx_syncp);
6136 rx_ring->rx_stats.packets += total_packets;
6137 rx_ring->rx_stats.bytes += total_bytes;
6138 u64_stats_update_end(&rx_ring->rx_syncp);
6139 q_vector->rx.total_packets += total_packets;
6140 q_vector->rx.total_bytes += total_bytes;
6142 if (cleaned_count)
6143 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6145 return !!budget;
6148 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6149 struct igb_rx_buffer *bi)
6151 struct sk_buff *skb = bi->skb;
6152 dma_addr_t dma = bi->dma;
6154 if (dma)
6155 return true;
6157 if (likely(!skb)) {
6158 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6159 IGB_RX_HDR_LEN);
6160 bi->skb = skb;
6161 if (!skb) {
6162 rx_ring->rx_stats.alloc_failed++;
6163 return false;
6166 /* initialize skb for ring */
6167 skb_record_rx_queue(skb, rx_ring->queue_index);
6170 dma = dma_map_single(rx_ring->dev, skb->data,
6171 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6173 if (dma_mapping_error(rx_ring->dev, dma)) {
6174 rx_ring->rx_stats.alloc_failed++;
6175 return false;
6178 bi->dma = dma;
6179 return true;
6182 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6183 struct igb_rx_buffer *bi)
6185 struct page *page = bi->page;
6186 dma_addr_t page_dma = bi->page_dma;
6187 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6189 if (page_dma)
6190 return true;
6192 if (!page) {
6193 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6194 bi->page = page;
6195 if (unlikely(!page)) {
6196 rx_ring->rx_stats.alloc_failed++;
6197 return false;
6201 page_dma = dma_map_page(rx_ring->dev, page,
6202 page_offset, PAGE_SIZE / 2,
6203 DMA_FROM_DEVICE);
6205 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6206 rx_ring->rx_stats.alloc_failed++;
6207 return false;
6210 bi->page_dma = page_dma;
6211 bi->page_offset = page_offset;
6212 return true;
6216 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6217 * @adapter: address of board private structure
6219 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6221 union e1000_adv_rx_desc *rx_desc;
6222 struct igb_rx_buffer *bi;
6223 u16 i = rx_ring->next_to_use;
6225 rx_desc = IGB_RX_DESC(rx_ring, i);
6226 bi = &rx_ring->rx_buffer_info[i];
6227 i -= rx_ring->count;
6229 while (cleaned_count--) {
6230 if (!igb_alloc_mapped_skb(rx_ring, bi))
6231 break;
6233 /* Refresh the desc even if buffer_addrs didn't change
6234 * because each write-back erases this info. */
6235 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6237 if (!igb_alloc_mapped_page(rx_ring, bi))
6238 break;
6240 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6242 rx_desc++;
6243 bi++;
6244 i++;
6245 if (unlikely(!i)) {
6246 rx_desc = IGB_RX_DESC(rx_ring, 0);
6247 bi = rx_ring->rx_buffer_info;
6248 i -= rx_ring->count;
6251 /* clear the hdr_addr for the next_to_use descriptor */
6252 rx_desc->read.hdr_addr = 0;
6255 i += rx_ring->count;
6257 if (rx_ring->next_to_use != i) {
6258 rx_ring->next_to_use = i;
6260 /* Force memory writes to complete before letting h/w
6261 * know there are new descriptors to fetch. (Only
6262 * applicable for weak-ordered memory model archs,
6263 * such as IA-64). */
6264 wmb();
6265 writel(i, rx_ring->tail);
6270 * igb_mii_ioctl -
6271 * @netdev:
6272 * @ifreq:
6273 * @cmd:
6275 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6277 struct igb_adapter *adapter = netdev_priv(netdev);
6278 struct mii_ioctl_data *data = if_mii(ifr);
6280 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6281 return -EOPNOTSUPP;
6283 switch (cmd) {
6284 case SIOCGMIIPHY:
6285 data->phy_id = adapter->hw.phy.addr;
6286 break;
6287 case SIOCGMIIREG:
6288 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6289 &data->val_out))
6290 return -EIO;
6291 break;
6292 case SIOCSMIIREG:
6293 default:
6294 return -EOPNOTSUPP;
6296 return 0;
6300 * igb_hwtstamp_ioctl - control hardware time stamping
6301 * @netdev:
6302 * @ifreq:
6303 * @cmd:
6305 * Outgoing time stamping can be enabled and disabled. Play nice and
6306 * disable it when requested, although it shouldn't case any overhead
6307 * when no packet needs it. At most one packet in the queue may be
6308 * marked for time stamping, otherwise it would be impossible to tell
6309 * for sure to which packet the hardware time stamp belongs.
6311 * Incoming time stamping has to be configured via the hardware
6312 * filters. Not all combinations are supported, in particular event
6313 * type has to be specified. Matching the kind of event packet is
6314 * not supported, with the exception of "all V2 events regardless of
6315 * level 2 or 4".
6318 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6319 struct ifreq *ifr, int cmd)
6321 struct igb_adapter *adapter = netdev_priv(netdev);
6322 struct e1000_hw *hw = &adapter->hw;
6323 struct hwtstamp_config config;
6324 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6325 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6326 u32 tsync_rx_cfg = 0;
6327 bool is_l4 = false;
6328 bool is_l2 = false;
6329 u32 regval;
6331 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6332 return -EFAULT;
6334 /* reserved for future extensions */
6335 if (config.flags)
6336 return -EINVAL;
6338 switch (config.tx_type) {
6339 case HWTSTAMP_TX_OFF:
6340 tsync_tx_ctl = 0;
6341 case HWTSTAMP_TX_ON:
6342 break;
6343 default:
6344 return -ERANGE;
6347 switch (config.rx_filter) {
6348 case HWTSTAMP_FILTER_NONE:
6349 tsync_rx_ctl = 0;
6350 break;
6351 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6352 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6353 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6354 case HWTSTAMP_FILTER_ALL:
6356 * register TSYNCRXCFG must be set, therefore it is not
6357 * possible to time stamp both Sync and Delay_Req messages
6358 * => fall back to time stamping all packets
6360 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6361 config.rx_filter = HWTSTAMP_FILTER_ALL;
6362 break;
6363 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6364 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6365 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6366 is_l4 = true;
6367 break;
6368 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6369 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6370 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6371 is_l4 = true;
6372 break;
6373 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6374 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6375 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6376 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6377 is_l2 = true;
6378 is_l4 = true;
6379 config.rx_filter = HWTSTAMP_FILTER_SOME;
6380 break;
6381 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6382 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6383 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6384 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6385 is_l2 = true;
6386 is_l4 = true;
6387 config.rx_filter = HWTSTAMP_FILTER_SOME;
6388 break;
6389 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6390 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6391 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6392 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6393 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6394 is_l2 = true;
6395 is_l4 = true;
6396 break;
6397 default:
6398 return -ERANGE;
6401 if (hw->mac.type == e1000_82575) {
6402 if (tsync_rx_ctl | tsync_tx_ctl)
6403 return -EINVAL;
6404 return 0;
6408 * Per-packet timestamping only works if all packets are
6409 * timestamped, so enable timestamping in all packets as
6410 * long as one rx filter was configured.
6412 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6413 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6414 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6417 /* enable/disable TX */
6418 regval = rd32(E1000_TSYNCTXCTL);
6419 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6420 regval |= tsync_tx_ctl;
6421 wr32(E1000_TSYNCTXCTL, regval);
6423 /* enable/disable RX */
6424 regval = rd32(E1000_TSYNCRXCTL);
6425 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6426 regval |= tsync_rx_ctl;
6427 wr32(E1000_TSYNCRXCTL, regval);
6429 /* define which PTP packets are time stamped */
6430 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6432 /* define ethertype filter for timestamped packets */
6433 if (is_l2)
6434 wr32(E1000_ETQF(3),
6435 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6436 E1000_ETQF_1588 | /* enable timestamping */
6437 ETH_P_1588)); /* 1588 eth protocol type */
6438 else
6439 wr32(E1000_ETQF(3), 0);
6441 #define PTP_PORT 319
6442 /* L4 Queue Filter[3]: filter by destination port and protocol */
6443 if (is_l4) {
6444 u32 ftqf = (IPPROTO_UDP /* UDP */
6445 | E1000_FTQF_VF_BP /* VF not compared */
6446 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6447 | E1000_FTQF_MASK); /* mask all inputs */
6448 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6450 wr32(E1000_IMIR(3), htons(PTP_PORT));
6451 wr32(E1000_IMIREXT(3),
6452 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6453 if (hw->mac.type == e1000_82576) {
6454 /* enable source port check */
6455 wr32(E1000_SPQF(3), htons(PTP_PORT));
6456 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6458 wr32(E1000_FTQF(3), ftqf);
6459 } else {
6460 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6462 wrfl();
6464 adapter->hwtstamp_config = config;
6466 /* clear TX/RX time stamp registers, just to be sure */
6467 regval = rd32(E1000_TXSTMPH);
6468 regval = rd32(E1000_RXSTMPH);
6470 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6471 -EFAULT : 0;
6475 * igb_ioctl -
6476 * @netdev:
6477 * @ifreq:
6478 * @cmd:
6480 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6482 switch (cmd) {
6483 case SIOCGMIIPHY:
6484 case SIOCGMIIREG:
6485 case SIOCSMIIREG:
6486 return igb_mii_ioctl(netdev, ifr, cmd);
6487 case SIOCSHWTSTAMP:
6488 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6489 default:
6490 return -EOPNOTSUPP;
6494 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6496 struct igb_adapter *adapter = hw->back;
6497 u16 cap_offset;
6499 cap_offset = adapter->pdev->pcie_cap;
6500 if (!cap_offset)
6501 return -E1000_ERR_CONFIG;
6503 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6505 return 0;
6508 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6510 struct igb_adapter *adapter = hw->back;
6511 u16 cap_offset;
6513 cap_offset = adapter->pdev->pcie_cap;
6514 if (!cap_offset)
6515 return -E1000_ERR_CONFIG;
6517 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6519 return 0;
6522 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6524 struct igb_adapter *adapter = netdev_priv(netdev);
6525 struct e1000_hw *hw = &adapter->hw;
6526 u32 ctrl, rctl;
6527 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6529 if (enable) {
6530 /* enable VLAN tag insert/strip */
6531 ctrl = rd32(E1000_CTRL);
6532 ctrl |= E1000_CTRL_VME;
6533 wr32(E1000_CTRL, ctrl);
6535 /* Disable CFI check */
6536 rctl = rd32(E1000_RCTL);
6537 rctl &= ~E1000_RCTL_CFIEN;
6538 wr32(E1000_RCTL, rctl);
6539 } else {
6540 /* disable VLAN tag insert/strip */
6541 ctrl = rd32(E1000_CTRL);
6542 ctrl &= ~E1000_CTRL_VME;
6543 wr32(E1000_CTRL, ctrl);
6546 igb_rlpml_set(adapter);
6549 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6551 struct igb_adapter *adapter = netdev_priv(netdev);
6552 struct e1000_hw *hw = &adapter->hw;
6553 int pf_id = adapter->vfs_allocated_count;
6555 /* attempt to add filter to vlvf array */
6556 igb_vlvf_set(adapter, vid, true, pf_id);
6558 /* add the filter since PF can receive vlans w/o entry in vlvf */
6559 igb_vfta_set(hw, vid, true);
6561 set_bit(vid, adapter->active_vlans);
6563 return 0;
6566 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6568 struct igb_adapter *adapter = netdev_priv(netdev);
6569 struct e1000_hw *hw = &adapter->hw;
6570 int pf_id = adapter->vfs_allocated_count;
6571 s32 err;
6573 /* remove vlan from VLVF table array */
6574 err = igb_vlvf_set(adapter, vid, false, pf_id);
6576 /* if vid was not present in VLVF just remove it from table */
6577 if (err)
6578 igb_vfta_set(hw, vid, false);
6580 clear_bit(vid, adapter->active_vlans);
6582 return 0;
6585 static void igb_restore_vlan(struct igb_adapter *adapter)
6587 u16 vid;
6589 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6591 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6592 igb_vlan_rx_add_vid(adapter->netdev, vid);
6595 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6597 struct pci_dev *pdev = adapter->pdev;
6598 struct e1000_mac_info *mac = &adapter->hw.mac;
6600 mac->autoneg = 0;
6602 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6603 * for the switch() below to work */
6604 if ((spd & 1) || (dplx & ~1))
6605 goto err_inval;
6607 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6608 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6609 spd != SPEED_1000 &&
6610 dplx != DUPLEX_FULL)
6611 goto err_inval;
6613 switch (spd + dplx) {
6614 case SPEED_10 + DUPLEX_HALF:
6615 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6616 break;
6617 case SPEED_10 + DUPLEX_FULL:
6618 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6619 break;
6620 case SPEED_100 + DUPLEX_HALF:
6621 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6622 break;
6623 case SPEED_100 + DUPLEX_FULL:
6624 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6625 break;
6626 case SPEED_1000 + DUPLEX_FULL:
6627 mac->autoneg = 1;
6628 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6629 break;
6630 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6631 default:
6632 goto err_inval;
6634 return 0;
6636 err_inval:
6637 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6638 return -EINVAL;
6641 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6642 bool runtime)
6644 struct net_device *netdev = pci_get_drvdata(pdev);
6645 struct igb_adapter *adapter = netdev_priv(netdev);
6646 struct e1000_hw *hw = &adapter->hw;
6647 u32 ctrl, rctl, status;
6648 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6649 #ifdef CONFIG_PM
6650 int retval = 0;
6651 #endif
6653 netif_device_detach(netdev);
6655 if (netif_running(netdev))
6656 __igb_close(netdev, true);
6658 igb_clear_interrupt_scheme(adapter);
6660 #ifdef CONFIG_PM
6661 retval = pci_save_state(pdev);
6662 if (retval)
6663 return retval;
6664 #endif
6666 status = rd32(E1000_STATUS);
6667 if (status & E1000_STATUS_LU)
6668 wufc &= ~E1000_WUFC_LNKC;
6670 if (wufc) {
6671 igb_setup_rctl(adapter);
6672 igb_set_rx_mode(netdev);
6674 /* turn on all-multi mode if wake on multicast is enabled */
6675 if (wufc & E1000_WUFC_MC) {
6676 rctl = rd32(E1000_RCTL);
6677 rctl |= E1000_RCTL_MPE;
6678 wr32(E1000_RCTL, rctl);
6681 ctrl = rd32(E1000_CTRL);
6682 /* advertise wake from D3Cold */
6683 #define E1000_CTRL_ADVD3WUC 0x00100000
6684 /* phy power management enable */
6685 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6686 ctrl |= E1000_CTRL_ADVD3WUC;
6687 wr32(E1000_CTRL, ctrl);
6689 /* Allow time for pending master requests to run */
6690 igb_disable_pcie_master(hw);
6692 wr32(E1000_WUC, E1000_WUC_PME_EN);
6693 wr32(E1000_WUFC, wufc);
6694 } else {
6695 wr32(E1000_WUC, 0);
6696 wr32(E1000_WUFC, 0);
6699 *enable_wake = wufc || adapter->en_mng_pt;
6700 if (!*enable_wake)
6701 igb_power_down_link(adapter);
6702 else
6703 igb_power_up_link(adapter);
6705 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6706 * would have already happened in close and is redundant. */
6707 igb_release_hw_control(adapter);
6709 pci_disable_device(pdev);
6711 return 0;
6714 #ifdef CONFIG_PM
6715 #ifdef CONFIG_PM_SLEEP
6716 static int igb_suspend(struct device *dev)
6718 int retval;
6719 bool wake;
6720 struct pci_dev *pdev = to_pci_dev(dev);
6722 retval = __igb_shutdown(pdev, &wake, 0);
6723 if (retval)
6724 return retval;
6726 if (wake) {
6727 pci_prepare_to_sleep(pdev);
6728 } else {
6729 pci_wake_from_d3(pdev, false);
6730 pci_set_power_state(pdev, PCI_D3hot);
6733 return 0;
6735 #endif /* CONFIG_PM_SLEEP */
6737 static int igb_resume(struct device *dev)
6739 struct pci_dev *pdev = to_pci_dev(dev);
6740 struct net_device *netdev = pci_get_drvdata(pdev);
6741 struct igb_adapter *adapter = netdev_priv(netdev);
6742 struct e1000_hw *hw = &adapter->hw;
6743 u32 err;
6745 pci_set_power_state(pdev, PCI_D0);
6746 pci_restore_state(pdev);
6747 pci_save_state(pdev);
6749 err = pci_enable_device_mem(pdev);
6750 if (err) {
6751 dev_err(&pdev->dev,
6752 "igb: Cannot enable PCI device from suspend\n");
6753 return err;
6755 pci_set_master(pdev);
6757 pci_enable_wake(pdev, PCI_D3hot, 0);
6758 pci_enable_wake(pdev, PCI_D3cold, 0);
6760 if (!rtnl_is_locked()) {
6762 * shut up ASSERT_RTNL() warning in
6763 * netif_set_real_num_tx/rx_queues.
6765 rtnl_lock();
6766 err = igb_init_interrupt_scheme(adapter);
6767 rtnl_unlock();
6768 } else {
6769 err = igb_init_interrupt_scheme(adapter);
6771 if (err) {
6772 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6773 return -ENOMEM;
6776 igb_reset(adapter);
6778 /* let the f/w know that the h/w is now under the control of the
6779 * driver. */
6780 igb_get_hw_control(adapter);
6782 wr32(E1000_WUS, ~0);
6784 if (netdev->flags & IFF_UP) {
6785 err = __igb_open(netdev, true);
6786 if (err)
6787 return err;
6790 netif_device_attach(netdev);
6791 return 0;
6794 #ifdef CONFIG_PM_RUNTIME
6795 static int igb_runtime_idle(struct device *dev)
6797 struct pci_dev *pdev = to_pci_dev(dev);
6798 struct net_device *netdev = pci_get_drvdata(pdev);
6799 struct igb_adapter *adapter = netdev_priv(netdev);
6801 if (!igb_has_link(adapter))
6802 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6804 return -EBUSY;
6807 static int igb_runtime_suspend(struct device *dev)
6809 struct pci_dev *pdev = to_pci_dev(dev);
6810 int retval;
6811 bool wake;
6813 retval = __igb_shutdown(pdev, &wake, 1);
6814 if (retval)
6815 return retval;
6817 if (wake) {
6818 pci_prepare_to_sleep(pdev);
6819 } else {
6820 pci_wake_from_d3(pdev, false);
6821 pci_set_power_state(pdev, PCI_D3hot);
6824 return 0;
6827 static int igb_runtime_resume(struct device *dev)
6829 return igb_resume(dev);
6831 #endif /* CONFIG_PM_RUNTIME */
6832 #endif
6834 static void igb_shutdown(struct pci_dev *pdev)
6836 bool wake;
6838 __igb_shutdown(pdev, &wake, 0);
6840 if (system_state == SYSTEM_POWER_OFF) {
6841 pci_wake_from_d3(pdev, wake);
6842 pci_set_power_state(pdev, PCI_D3hot);
6846 #ifdef CONFIG_NET_POLL_CONTROLLER
6848 * Polling 'interrupt' - used by things like netconsole to send skbs
6849 * without having to re-enable interrupts. It's not called while
6850 * the interrupt routine is executing.
6852 static void igb_netpoll(struct net_device *netdev)
6854 struct igb_adapter *adapter = netdev_priv(netdev);
6855 struct e1000_hw *hw = &adapter->hw;
6856 struct igb_q_vector *q_vector;
6857 int i;
6859 for (i = 0; i < adapter->num_q_vectors; i++) {
6860 q_vector = adapter->q_vector[i];
6861 if (adapter->msix_entries)
6862 wr32(E1000_EIMC, q_vector->eims_value);
6863 else
6864 igb_irq_disable(adapter);
6865 napi_schedule(&q_vector->napi);
6868 #endif /* CONFIG_NET_POLL_CONTROLLER */
6871 * igb_io_error_detected - called when PCI error is detected
6872 * @pdev: Pointer to PCI device
6873 * @state: The current pci connection state
6875 * This function is called after a PCI bus error affecting
6876 * this device has been detected.
6878 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6879 pci_channel_state_t state)
6881 struct net_device *netdev = pci_get_drvdata(pdev);
6882 struct igb_adapter *adapter = netdev_priv(netdev);
6884 netif_device_detach(netdev);
6886 if (state == pci_channel_io_perm_failure)
6887 return PCI_ERS_RESULT_DISCONNECT;
6889 if (netif_running(netdev))
6890 igb_down(adapter);
6891 pci_disable_device(pdev);
6893 /* Request a slot slot reset. */
6894 return PCI_ERS_RESULT_NEED_RESET;
6898 * igb_io_slot_reset - called after the pci bus has been reset.
6899 * @pdev: Pointer to PCI device
6901 * Restart the card from scratch, as if from a cold-boot. Implementation
6902 * resembles the first-half of the igb_resume routine.
6904 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6906 struct net_device *netdev = pci_get_drvdata(pdev);
6907 struct igb_adapter *adapter = netdev_priv(netdev);
6908 struct e1000_hw *hw = &adapter->hw;
6909 pci_ers_result_t result;
6910 int err;
6912 if (pci_enable_device_mem(pdev)) {
6913 dev_err(&pdev->dev,
6914 "Cannot re-enable PCI device after reset.\n");
6915 result = PCI_ERS_RESULT_DISCONNECT;
6916 } else {
6917 pci_set_master(pdev);
6918 pci_restore_state(pdev);
6919 pci_save_state(pdev);
6921 pci_enable_wake(pdev, PCI_D3hot, 0);
6922 pci_enable_wake(pdev, PCI_D3cold, 0);
6924 igb_reset(adapter);
6925 wr32(E1000_WUS, ~0);
6926 result = PCI_ERS_RESULT_RECOVERED;
6929 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6930 if (err) {
6931 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6932 "failed 0x%0x\n", err);
6933 /* non-fatal, continue */
6936 return result;
6940 * igb_io_resume - called when traffic can start flowing again.
6941 * @pdev: Pointer to PCI device
6943 * This callback is called when the error recovery driver tells us that
6944 * its OK to resume normal operation. Implementation resembles the
6945 * second-half of the igb_resume routine.
6947 static void igb_io_resume(struct pci_dev *pdev)
6949 struct net_device *netdev = pci_get_drvdata(pdev);
6950 struct igb_adapter *adapter = netdev_priv(netdev);
6952 if (netif_running(netdev)) {
6953 if (igb_up(adapter)) {
6954 dev_err(&pdev->dev, "igb_up failed after reset\n");
6955 return;
6959 netif_device_attach(netdev);
6961 /* let the f/w know that the h/w is now under the control of the
6962 * driver. */
6963 igb_get_hw_control(adapter);
6966 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6967 u8 qsel)
6969 u32 rar_low, rar_high;
6970 struct e1000_hw *hw = &adapter->hw;
6972 /* HW expects these in little endian so we reverse the byte order
6973 * from network order (big endian) to little endian
6975 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6976 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6977 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6979 /* Indicate to hardware the Address is Valid. */
6980 rar_high |= E1000_RAH_AV;
6982 if (hw->mac.type == e1000_82575)
6983 rar_high |= E1000_RAH_POOL_1 * qsel;
6984 else
6985 rar_high |= E1000_RAH_POOL_1 << qsel;
6987 wr32(E1000_RAL(index), rar_low);
6988 wrfl();
6989 wr32(E1000_RAH(index), rar_high);
6990 wrfl();
6993 static int igb_set_vf_mac(struct igb_adapter *adapter,
6994 int vf, unsigned char *mac_addr)
6996 struct e1000_hw *hw = &adapter->hw;
6997 /* VF MAC addresses start at end of receive addresses and moves
6998 * torwards the first, as a result a collision should not be possible */
6999 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7001 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7003 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7005 return 0;
7008 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7010 struct igb_adapter *adapter = netdev_priv(netdev);
7011 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7012 return -EINVAL;
7013 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7014 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7015 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7016 " change effective.");
7017 if (test_bit(__IGB_DOWN, &adapter->state)) {
7018 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7019 " but the PF device is not up.\n");
7020 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7021 " attempting to use the VF device.\n");
7023 return igb_set_vf_mac(adapter, vf, mac);
7026 static int igb_link_mbps(int internal_link_speed)
7028 switch (internal_link_speed) {
7029 case SPEED_100:
7030 return 100;
7031 case SPEED_1000:
7032 return 1000;
7033 default:
7034 return 0;
7038 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7039 int link_speed)
7041 int rf_dec, rf_int;
7042 u32 bcnrc_val;
7044 if (tx_rate != 0) {
7045 /* Calculate the rate factor values to set */
7046 rf_int = link_speed / tx_rate;
7047 rf_dec = (link_speed - (rf_int * tx_rate));
7048 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7050 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7051 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7052 E1000_RTTBCNRC_RF_INT_MASK);
7053 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7054 } else {
7055 bcnrc_val = 0;
7058 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7059 wr32(E1000_RTTBCNRC, bcnrc_val);
7062 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7064 int actual_link_speed, i;
7065 bool reset_rate = false;
7067 /* VF TX rate limit was not set or not supported */
7068 if ((adapter->vf_rate_link_speed == 0) ||
7069 (adapter->hw.mac.type != e1000_82576))
7070 return;
7072 actual_link_speed = igb_link_mbps(adapter->link_speed);
7073 if (actual_link_speed != adapter->vf_rate_link_speed) {
7074 reset_rate = true;
7075 adapter->vf_rate_link_speed = 0;
7076 dev_info(&adapter->pdev->dev,
7077 "Link speed has been changed. VF Transmit "
7078 "rate is disabled\n");
7081 for (i = 0; i < adapter->vfs_allocated_count; i++) {
7082 if (reset_rate)
7083 adapter->vf_data[i].tx_rate = 0;
7085 igb_set_vf_rate_limit(&adapter->hw, i,
7086 adapter->vf_data[i].tx_rate,
7087 actual_link_speed);
7091 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7093 struct igb_adapter *adapter = netdev_priv(netdev);
7094 struct e1000_hw *hw = &adapter->hw;
7095 int actual_link_speed;
7097 if (hw->mac.type != e1000_82576)
7098 return -EOPNOTSUPP;
7100 actual_link_speed = igb_link_mbps(adapter->link_speed);
7101 if ((vf >= adapter->vfs_allocated_count) ||
7102 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7103 (tx_rate < 0) || (tx_rate > actual_link_speed))
7104 return -EINVAL;
7106 adapter->vf_rate_link_speed = actual_link_speed;
7107 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7108 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7110 return 0;
7113 static int igb_ndo_get_vf_config(struct net_device *netdev,
7114 int vf, struct ifla_vf_info *ivi)
7116 struct igb_adapter *adapter = netdev_priv(netdev);
7117 if (vf >= adapter->vfs_allocated_count)
7118 return -EINVAL;
7119 ivi->vf = vf;
7120 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7121 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7122 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7123 ivi->qos = adapter->vf_data[vf].pf_qos;
7124 return 0;
7127 static void igb_vmm_control(struct igb_adapter *adapter)
7129 struct e1000_hw *hw = &adapter->hw;
7130 u32 reg;
7132 switch (hw->mac.type) {
7133 case e1000_82575:
7134 default:
7135 /* replication is not supported for 82575 */
7136 return;
7137 case e1000_82576:
7138 /* notify HW that the MAC is adding vlan tags */
7139 reg = rd32(E1000_DTXCTL);
7140 reg |= E1000_DTXCTL_VLAN_ADDED;
7141 wr32(E1000_DTXCTL, reg);
7142 case e1000_82580:
7143 /* enable replication vlan tag stripping */
7144 reg = rd32(E1000_RPLOLR);
7145 reg |= E1000_RPLOLR_STRVLAN;
7146 wr32(E1000_RPLOLR, reg);
7147 case e1000_i350:
7148 /* none of the above registers are supported by i350 */
7149 break;
7152 if (adapter->vfs_allocated_count) {
7153 igb_vmdq_set_loopback_pf(hw, true);
7154 igb_vmdq_set_replication_pf(hw, true);
7155 igb_vmdq_set_anti_spoofing_pf(hw, true,
7156 adapter->vfs_allocated_count);
7157 } else {
7158 igb_vmdq_set_loopback_pf(hw, false);
7159 igb_vmdq_set_replication_pf(hw, false);
7163 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7165 struct e1000_hw *hw = &adapter->hw;
7166 u32 dmac_thr;
7167 u16 hwm;
7169 if (hw->mac.type > e1000_82580) {
7170 if (adapter->flags & IGB_FLAG_DMAC) {
7171 u32 reg;
7173 /* force threshold to 0. */
7174 wr32(E1000_DMCTXTH, 0);
7177 * DMA Coalescing high water mark needs to be greater
7178 * than the Rx threshold. Set hwm to PBA - max frame
7179 * size in 16B units, capping it at PBA - 6KB.
7181 hwm = 64 * pba - adapter->max_frame_size / 16;
7182 if (hwm < 64 * (pba - 6))
7183 hwm = 64 * (pba - 6);
7184 reg = rd32(E1000_FCRTC);
7185 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7186 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7187 & E1000_FCRTC_RTH_COAL_MASK);
7188 wr32(E1000_FCRTC, reg);
7191 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7192 * frame size, capping it at PBA - 10KB.
7194 dmac_thr = pba - adapter->max_frame_size / 512;
7195 if (dmac_thr < pba - 10)
7196 dmac_thr = pba - 10;
7197 reg = rd32(E1000_DMACR);
7198 reg &= ~E1000_DMACR_DMACTHR_MASK;
7199 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7200 & E1000_DMACR_DMACTHR_MASK);
7202 /* transition to L0x or L1 if available..*/
7203 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7205 /* watchdog timer= +-1000 usec in 32usec intervals */
7206 reg |= (1000 >> 5);
7207 wr32(E1000_DMACR, reg);
7210 * no lower threshold to disable
7211 * coalescing(smart fifb)-UTRESH=0
7213 wr32(E1000_DMCRTRH, 0);
7215 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7217 wr32(E1000_DMCTLX, reg);
7220 * free space in tx packet buffer to wake from
7221 * DMA coal
7223 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7224 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7227 * make low power state decision controlled
7228 * by DMA coal
7230 reg = rd32(E1000_PCIEMISC);
7231 reg &= ~E1000_PCIEMISC_LX_DECISION;
7232 wr32(E1000_PCIEMISC, reg);
7233 } /* endif adapter->dmac is not disabled */
7234 } else if (hw->mac.type == e1000_82580) {
7235 u32 reg = rd32(E1000_PCIEMISC);
7236 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7237 wr32(E1000_DMACR, 0);
7241 /* igb_main.c */