include: replace linux/module.h with "struct module" wherever possible
[linux-2.6/next.git] / drivers / net / igb / igb_main.c
blob40d4c405fd7e0218703c091619a632b8525cd2c6
1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if_vlan.h>
43 #include <linux/pci.h>
44 #include <linux/pci-aspm.h>
45 #include <linux/delay.h>
46 #include <linux/interrupt.h>
47 #include <linux/if_ether.h>
48 #include <linux/aer.h>
49 #include <linux/prefetch.h>
50 #ifdef CONFIG_IGB_DCA
51 #include <linux/dca.h>
52 #endif
53 #include "igb.h"
55 #define MAJ 3
56 #define MIN 0
57 #define BUILD 6
58 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
59 __stringify(BUILD) "-k"
60 char igb_driver_name[] = "igb";
61 char igb_driver_version[] = DRV_VERSION;
62 static const char igb_driver_string[] =
63 "Intel(R) Gigabit Ethernet Network Driver";
64 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
66 static const struct e1000_info *igb_info_tbl[] = {
67 [board_82575] = &e1000_82575_info,
70 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
96 /* required last entry */
97 {0, }
100 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
102 void igb_reset(struct igb_adapter *);
103 static int igb_setup_all_tx_resources(struct igb_adapter *);
104 static int igb_setup_all_rx_resources(struct igb_adapter *);
105 static void igb_free_all_tx_resources(struct igb_adapter *);
106 static void igb_free_all_rx_resources(struct igb_adapter *);
107 static void igb_setup_mrqc(struct igb_adapter *);
108 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
109 static void __devexit igb_remove(struct pci_dev *pdev);
110 static void igb_init_hw_timer(struct igb_adapter *adapter);
111 static int igb_sw_init(struct igb_adapter *);
112 static int igb_open(struct net_device *);
113 static int igb_close(struct net_device *);
114 static void igb_configure_tx(struct igb_adapter *);
115 static void igb_configure_rx(struct igb_adapter *);
116 static void igb_clean_all_tx_rings(struct igb_adapter *);
117 static void igb_clean_all_rx_rings(struct igb_adapter *);
118 static void igb_clean_tx_ring(struct igb_ring *);
119 static void igb_clean_rx_ring(struct igb_ring *);
120 static void igb_set_rx_mode(struct net_device *);
121 static void igb_update_phy_info(unsigned long);
122 static void igb_watchdog(unsigned long);
123 static void igb_watchdog_task(struct work_struct *);
124 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
125 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
126 struct rtnl_link_stats64 *stats);
127 static int igb_change_mtu(struct net_device *, int);
128 static int igb_set_mac(struct net_device *, void *);
129 static void igb_set_uta(struct igb_adapter *adapter);
130 static irqreturn_t igb_intr(int irq, void *);
131 static irqreturn_t igb_intr_msi(int irq, void *);
132 static irqreturn_t igb_msix_other(int irq, void *);
133 static irqreturn_t igb_msix_ring(int irq, void *);
134 #ifdef CONFIG_IGB_DCA
135 static void igb_update_dca(struct igb_q_vector *);
136 static void igb_setup_dca(struct igb_adapter *);
137 #endif /* CONFIG_IGB_DCA */
138 static bool igb_clean_tx_irq(struct igb_q_vector *);
139 static int igb_poll(struct napi_struct *, int);
140 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
141 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
142 static void igb_tx_timeout(struct net_device *);
143 static void igb_reset_task(struct work_struct *);
144 static void igb_vlan_mode(struct net_device *netdev, u32 features);
145 static void igb_vlan_rx_add_vid(struct net_device *, u16);
146 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
147 static void igb_restore_vlan(struct igb_adapter *);
148 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
149 static void igb_ping_all_vfs(struct igb_adapter *);
150 static void igb_msg_task(struct igb_adapter *);
151 static void igb_vmm_control(struct igb_adapter *);
152 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
153 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
154 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
155 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
156 int vf, u16 vlan, u8 qos);
157 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
158 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
159 struct ifla_vf_info *ivi);
160 static void igb_check_vf_rate_limit(struct igb_adapter *);
162 #ifdef CONFIG_PM
163 static int igb_suspend(struct pci_dev *, pm_message_t);
164 static int igb_resume(struct pci_dev *);
165 #endif
166 static void igb_shutdown(struct pci_dev *);
167 #ifdef CONFIG_IGB_DCA
168 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
169 static struct notifier_block dca_notifier = {
170 .notifier_call = igb_notify_dca,
171 .next = NULL,
172 .priority = 0
174 #endif
175 #ifdef CONFIG_NET_POLL_CONTROLLER
176 /* for netdump / net console */
177 static void igb_netpoll(struct net_device *);
178 #endif
179 #ifdef CONFIG_PCI_IOV
180 static unsigned int max_vfs = 0;
181 module_param(max_vfs, uint, 0);
182 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
183 "per physical function");
184 #endif /* CONFIG_PCI_IOV */
186 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
187 pci_channel_state_t);
188 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
189 static void igb_io_resume(struct pci_dev *);
191 static struct pci_error_handlers igb_err_handler = {
192 .error_detected = igb_io_error_detected,
193 .slot_reset = igb_io_slot_reset,
194 .resume = igb_io_resume,
198 static struct pci_driver igb_driver = {
199 .name = igb_driver_name,
200 .id_table = igb_pci_tbl,
201 .probe = igb_probe,
202 .remove = __devexit_p(igb_remove),
203 #ifdef CONFIG_PM
204 /* Power Management Hooks */
205 .suspend = igb_suspend,
206 .resume = igb_resume,
207 #endif
208 .shutdown = igb_shutdown,
209 .err_handler = &igb_err_handler
212 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
213 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
214 MODULE_LICENSE("GPL");
215 MODULE_VERSION(DRV_VERSION);
217 struct igb_reg_info {
218 u32 ofs;
219 char *name;
222 static const struct igb_reg_info igb_reg_info_tbl[] = {
224 /* General Registers */
225 {E1000_CTRL, "CTRL"},
226 {E1000_STATUS, "STATUS"},
227 {E1000_CTRL_EXT, "CTRL_EXT"},
229 /* Interrupt Registers */
230 {E1000_ICR, "ICR"},
232 /* RX Registers */
233 {E1000_RCTL, "RCTL"},
234 {E1000_RDLEN(0), "RDLEN"},
235 {E1000_RDH(0), "RDH"},
236 {E1000_RDT(0), "RDT"},
237 {E1000_RXDCTL(0), "RXDCTL"},
238 {E1000_RDBAL(0), "RDBAL"},
239 {E1000_RDBAH(0), "RDBAH"},
241 /* TX Registers */
242 {E1000_TCTL, "TCTL"},
243 {E1000_TDBAL(0), "TDBAL"},
244 {E1000_TDBAH(0), "TDBAH"},
245 {E1000_TDLEN(0), "TDLEN"},
246 {E1000_TDH(0), "TDH"},
247 {E1000_TDT(0), "TDT"},
248 {E1000_TXDCTL(0), "TXDCTL"},
249 {E1000_TDFH, "TDFH"},
250 {E1000_TDFT, "TDFT"},
251 {E1000_TDFHS, "TDFHS"},
252 {E1000_TDFPC, "TDFPC"},
254 /* List Terminator */
259 * igb_regdump - register printout routine
261 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
263 int n = 0;
264 char rname[16];
265 u32 regs[8];
267 switch (reginfo->ofs) {
268 case E1000_RDLEN(0):
269 for (n = 0; n < 4; n++)
270 regs[n] = rd32(E1000_RDLEN(n));
271 break;
272 case E1000_RDH(0):
273 for (n = 0; n < 4; n++)
274 regs[n] = rd32(E1000_RDH(n));
275 break;
276 case E1000_RDT(0):
277 for (n = 0; n < 4; n++)
278 regs[n] = rd32(E1000_RDT(n));
279 break;
280 case E1000_RXDCTL(0):
281 for (n = 0; n < 4; n++)
282 regs[n] = rd32(E1000_RXDCTL(n));
283 break;
284 case E1000_RDBAL(0):
285 for (n = 0; n < 4; n++)
286 regs[n] = rd32(E1000_RDBAL(n));
287 break;
288 case E1000_RDBAH(0):
289 for (n = 0; n < 4; n++)
290 regs[n] = rd32(E1000_RDBAH(n));
291 break;
292 case E1000_TDBAL(0):
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDBAL(n));
295 break;
296 case E1000_TDBAH(0):
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_TDBAH(n));
299 break;
300 case E1000_TDLEN(0):
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_TDLEN(n));
303 break;
304 case E1000_TDH(0):
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_TDH(n));
307 break;
308 case E1000_TDT(0):
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_TDT(n));
311 break;
312 case E1000_TXDCTL(0):
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_TXDCTL(n));
315 break;
316 default:
317 printk(KERN_INFO "%-15s %08x\n",
318 reginfo->name, rd32(reginfo->ofs));
319 return;
322 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
323 printk(KERN_INFO "%-15s ", rname);
324 for (n = 0; n < 4; n++)
325 printk(KERN_CONT "%08x ", regs[n]);
326 printk(KERN_CONT "\n");
330 * igb_dump - Print registers, tx-rings and rx-rings
332 static void igb_dump(struct igb_adapter *adapter)
334 struct net_device *netdev = adapter->netdev;
335 struct e1000_hw *hw = &adapter->hw;
336 struct igb_reg_info *reginfo;
337 int n = 0;
338 struct igb_ring *tx_ring;
339 union e1000_adv_tx_desc *tx_desc;
340 struct my_u0 { u64 a; u64 b; } *u0;
341 struct igb_buffer *buffer_info;
342 struct igb_ring *rx_ring;
343 union e1000_adv_rx_desc *rx_desc;
344 u32 staterr;
345 int i = 0;
347 if (!netif_msg_hw(adapter))
348 return;
350 /* Print netdevice Info */
351 if (netdev) {
352 dev_info(&adapter->pdev->dev, "Net device Info\n");
353 printk(KERN_INFO "Device Name state "
354 "trans_start last_rx\n");
355 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
356 netdev->name,
357 netdev->state,
358 netdev->trans_start,
359 netdev->last_rx);
362 /* Print Registers */
363 dev_info(&adapter->pdev->dev, "Register Dump\n");
364 printk(KERN_INFO " Register Name Value\n");
365 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
366 reginfo->name; reginfo++) {
367 igb_regdump(hw, reginfo);
370 /* Print TX Ring Summary */
371 if (!netdev || !netif_running(netdev))
372 goto exit;
374 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
375 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
376 " leng ntw timestamp\n");
377 for (n = 0; n < adapter->num_tx_queues; n++) {
378 tx_ring = adapter->tx_ring[n];
379 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
380 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
381 n, tx_ring->next_to_use, tx_ring->next_to_clean,
382 (u64)buffer_info->dma,
383 buffer_info->length,
384 buffer_info->next_to_watch,
385 (u64)buffer_info->time_stamp);
388 /* Print TX Rings */
389 if (!netif_msg_tx_done(adapter))
390 goto rx_ring_summary;
392 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
394 /* Transmit Descriptor Formats
396 * Advanced Transmit Descriptor
397 * +--------------------------------------------------------------+
398 * 0 | Buffer Address [63:0] |
399 * +--------------------------------------------------------------+
400 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
401 * +--------------------------------------------------------------+
402 * 63 46 45 40 39 38 36 35 32 31 24 15 0
405 for (n = 0; n < adapter->num_tx_queues; n++) {
406 tx_ring = adapter->tx_ring[n];
407 printk(KERN_INFO "------------------------------------\n");
408 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
409 printk(KERN_INFO "------------------------------------\n");
410 printk(KERN_INFO "T [desc] [address 63:0 ] "
411 "[PlPOCIStDDM Ln] [bi->dma ] "
412 "leng ntw timestamp bi->skb\n");
414 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
415 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
416 buffer_info = &tx_ring->buffer_info[i];
417 u0 = (struct my_u0 *)tx_desc;
418 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
419 " %04X %3X %016llX %p", i,
420 le64_to_cpu(u0->a),
421 le64_to_cpu(u0->b),
422 (u64)buffer_info->dma,
423 buffer_info->length,
424 buffer_info->next_to_watch,
425 (u64)buffer_info->time_stamp,
426 buffer_info->skb);
427 if (i == tx_ring->next_to_use &&
428 i == tx_ring->next_to_clean)
429 printk(KERN_CONT " NTC/U\n");
430 else if (i == tx_ring->next_to_use)
431 printk(KERN_CONT " NTU\n");
432 else if (i == tx_ring->next_to_clean)
433 printk(KERN_CONT " NTC\n");
434 else
435 printk(KERN_CONT "\n");
437 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
438 print_hex_dump(KERN_INFO, "",
439 DUMP_PREFIX_ADDRESS,
440 16, 1, phys_to_virt(buffer_info->dma),
441 buffer_info->length, true);
445 /* Print RX Rings Summary */
446 rx_ring_summary:
447 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
448 printk(KERN_INFO "Queue [NTU] [NTC]\n");
449 for (n = 0; n < adapter->num_rx_queues; n++) {
450 rx_ring = adapter->rx_ring[n];
451 printk(KERN_INFO " %5d %5X %5X\n", n,
452 rx_ring->next_to_use, rx_ring->next_to_clean);
455 /* Print RX Rings */
456 if (!netif_msg_rx_status(adapter))
457 goto exit;
459 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
461 /* Advanced Receive Descriptor (Read) Format
462 * 63 1 0
463 * +-----------------------------------------------------+
464 * 0 | Packet Buffer Address [63:1] |A0/NSE|
465 * +----------------------------------------------+------+
466 * 8 | Header Buffer Address [63:1] | DD |
467 * +-----------------------------------------------------+
470 * Advanced Receive Descriptor (Write-Back) Format
472 * 63 48 47 32 31 30 21 20 17 16 4 3 0
473 * +------------------------------------------------------+
474 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
475 * | Checksum Ident | | | | Type | Type |
476 * +------------------------------------------------------+
477 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
478 * +------------------------------------------------------+
479 * 63 48 47 32 31 20 19 0
482 for (n = 0; n < adapter->num_rx_queues; n++) {
483 rx_ring = adapter->rx_ring[n];
484 printk(KERN_INFO "------------------------------------\n");
485 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
486 printk(KERN_INFO "------------------------------------\n");
487 printk(KERN_INFO "R [desc] [ PktBuf A0] "
488 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
489 "<-- Adv Rx Read format\n");
490 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
491 "[vl er S cks ln] ---------------- [bi->skb] "
492 "<-- Adv Rx Write-Back format\n");
494 for (i = 0; i < rx_ring->count; i++) {
495 buffer_info = &rx_ring->buffer_info[i];
496 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
497 u0 = (struct my_u0 *)rx_desc;
498 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
499 if (staterr & E1000_RXD_STAT_DD) {
500 /* Descriptor Done */
501 printk(KERN_INFO "RWB[0x%03X] %016llX "
502 "%016llX ---------------- %p", i,
503 le64_to_cpu(u0->a),
504 le64_to_cpu(u0->b),
505 buffer_info->skb);
506 } else {
507 printk(KERN_INFO "R [0x%03X] %016llX "
508 "%016llX %016llX %p", i,
509 le64_to_cpu(u0->a),
510 le64_to_cpu(u0->b),
511 (u64)buffer_info->dma,
512 buffer_info->skb);
514 if (netif_msg_pktdata(adapter)) {
515 print_hex_dump(KERN_INFO, "",
516 DUMP_PREFIX_ADDRESS,
517 16, 1,
518 phys_to_virt(buffer_info->dma),
519 rx_ring->rx_buffer_len, true);
520 if (rx_ring->rx_buffer_len
521 < IGB_RXBUFFER_1024)
522 print_hex_dump(KERN_INFO, "",
523 DUMP_PREFIX_ADDRESS,
524 16, 1,
525 phys_to_virt(
526 buffer_info->page_dma +
527 buffer_info->page_offset),
528 PAGE_SIZE/2, true);
532 if (i == rx_ring->next_to_use)
533 printk(KERN_CONT " NTU\n");
534 else if (i == rx_ring->next_to_clean)
535 printk(KERN_CONT " NTC\n");
536 else
537 printk(KERN_CONT "\n");
542 exit:
543 return;
548 * igb_read_clock - read raw cycle counter (to be used by time counter)
550 static cycle_t igb_read_clock(const struct cyclecounter *tc)
552 struct igb_adapter *adapter =
553 container_of(tc, struct igb_adapter, cycles);
554 struct e1000_hw *hw = &adapter->hw;
555 u64 stamp = 0;
556 int shift = 0;
559 * The timestamp latches on lowest register read. For the 82580
560 * the lowest register is SYSTIMR instead of SYSTIML. However we never
561 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
563 if (hw->mac.type == e1000_82580) {
564 stamp = rd32(E1000_SYSTIMR) >> 8;
565 shift = IGB_82580_TSYNC_SHIFT;
568 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
569 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
570 return stamp;
574 * igb_get_hw_dev - return device
575 * used by hardware layer to print debugging information
577 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
579 struct igb_adapter *adapter = hw->back;
580 return adapter->netdev;
584 * igb_init_module - Driver Registration Routine
586 * igb_init_module is the first routine called when the driver is
587 * loaded. All it does is register with the PCI subsystem.
589 static int __init igb_init_module(void)
591 int ret;
592 printk(KERN_INFO "%s - version %s\n",
593 igb_driver_string, igb_driver_version);
595 printk(KERN_INFO "%s\n", igb_copyright);
597 #ifdef CONFIG_IGB_DCA
598 dca_register_notify(&dca_notifier);
599 #endif
600 ret = pci_register_driver(&igb_driver);
601 return ret;
604 module_init(igb_init_module);
607 * igb_exit_module - Driver Exit Cleanup Routine
609 * igb_exit_module is called just before the driver is removed
610 * from memory.
612 static void __exit igb_exit_module(void)
614 #ifdef CONFIG_IGB_DCA
615 dca_unregister_notify(&dca_notifier);
616 #endif
617 pci_unregister_driver(&igb_driver);
620 module_exit(igb_exit_module);
622 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
624 * igb_cache_ring_register - Descriptor ring to register mapping
625 * @adapter: board private structure to initialize
627 * Once we know the feature-set enabled for the device, we'll cache
628 * the register offset the descriptor ring is assigned to.
630 static void igb_cache_ring_register(struct igb_adapter *adapter)
632 int i = 0, j = 0;
633 u32 rbase_offset = adapter->vfs_allocated_count;
635 switch (adapter->hw.mac.type) {
636 case e1000_82576:
637 /* The queues are allocated for virtualization such that VF 0
638 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
639 * In order to avoid collision we start at the first free queue
640 * and continue consuming queues in the same sequence
642 if (adapter->vfs_allocated_count) {
643 for (; i < adapter->rss_queues; i++)
644 adapter->rx_ring[i]->reg_idx = rbase_offset +
645 Q_IDX_82576(i);
647 case e1000_82575:
648 case e1000_82580:
649 case e1000_i350:
650 default:
651 for (; i < adapter->num_rx_queues; i++)
652 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
653 for (; j < adapter->num_tx_queues; j++)
654 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
655 break;
659 static void igb_free_queues(struct igb_adapter *adapter)
661 int i;
663 for (i = 0; i < adapter->num_tx_queues; i++) {
664 kfree(adapter->tx_ring[i]);
665 adapter->tx_ring[i] = NULL;
667 for (i = 0; i < adapter->num_rx_queues; i++) {
668 kfree(adapter->rx_ring[i]);
669 adapter->rx_ring[i] = NULL;
671 adapter->num_rx_queues = 0;
672 adapter->num_tx_queues = 0;
676 * igb_alloc_queues - Allocate memory for all rings
677 * @adapter: board private structure to initialize
679 * We allocate one ring per queue at run-time since we don't know the
680 * number of queues at compile-time.
682 static int igb_alloc_queues(struct igb_adapter *adapter)
684 struct igb_ring *ring;
685 int i;
687 for (i = 0; i < adapter->num_tx_queues; i++) {
688 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
689 if (!ring)
690 goto err;
691 ring->count = adapter->tx_ring_count;
692 ring->queue_index = i;
693 ring->dev = &adapter->pdev->dev;
694 ring->netdev = adapter->netdev;
695 /* For 82575, context index must be unique per ring. */
696 if (adapter->hw.mac.type == e1000_82575)
697 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
698 adapter->tx_ring[i] = ring;
701 for (i = 0; i < adapter->num_rx_queues; i++) {
702 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
703 if (!ring)
704 goto err;
705 ring->count = adapter->rx_ring_count;
706 ring->queue_index = i;
707 ring->dev = &adapter->pdev->dev;
708 ring->netdev = adapter->netdev;
709 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
710 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
711 /* set flag indicating ring supports SCTP checksum offload */
712 if (adapter->hw.mac.type >= e1000_82576)
713 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
714 adapter->rx_ring[i] = ring;
717 igb_cache_ring_register(adapter);
719 return 0;
721 err:
722 igb_free_queues(adapter);
724 return -ENOMEM;
727 #define IGB_N0_QUEUE -1
728 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
730 u32 msixbm = 0;
731 struct igb_adapter *adapter = q_vector->adapter;
732 struct e1000_hw *hw = &adapter->hw;
733 u32 ivar, index;
734 int rx_queue = IGB_N0_QUEUE;
735 int tx_queue = IGB_N0_QUEUE;
737 if (q_vector->rx_ring)
738 rx_queue = q_vector->rx_ring->reg_idx;
739 if (q_vector->tx_ring)
740 tx_queue = q_vector->tx_ring->reg_idx;
742 switch (hw->mac.type) {
743 case e1000_82575:
744 /* The 82575 assigns vectors using a bitmask, which matches the
745 bitmask for the EICR/EIMS/EIMC registers. To assign one
746 or more queues to a vector, we write the appropriate bits
747 into the MSIXBM register for that vector. */
748 if (rx_queue > IGB_N0_QUEUE)
749 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
750 if (tx_queue > IGB_N0_QUEUE)
751 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
752 if (!adapter->msix_entries && msix_vector == 0)
753 msixbm |= E1000_EIMS_OTHER;
754 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
755 q_vector->eims_value = msixbm;
756 break;
757 case e1000_82576:
758 /* 82576 uses a table-based method for assigning vectors.
759 Each queue has a single entry in the table to which we write
760 a vector number along with a "valid" bit. Sadly, the layout
761 of the table is somewhat counterintuitive. */
762 if (rx_queue > IGB_N0_QUEUE) {
763 index = (rx_queue & 0x7);
764 ivar = array_rd32(E1000_IVAR0, index);
765 if (rx_queue < 8) {
766 /* vector goes into low byte of register */
767 ivar = ivar & 0xFFFFFF00;
768 ivar |= msix_vector | E1000_IVAR_VALID;
769 } else {
770 /* vector goes into third byte of register */
771 ivar = ivar & 0xFF00FFFF;
772 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
774 array_wr32(E1000_IVAR0, index, ivar);
776 if (tx_queue > IGB_N0_QUEUE) {
777 index = (tx_queue & 0x7);
778 ivar = array_rd32(E1000_IVAR0, index);
779 if (tx_queue < 8) {
780 /* vector goes into second byte of register */
781 ivar = ivar & 0xFFFF00FF;
782 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
783 } else {
784 /* vector goes into high byte of register */
785 ivar = ivar & 0x00FFFFFF;
786 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
788 array_wr32(E1000_IVAR0, index, ivar);
790 q_vector->eims_value = 1 << msix_vector;
791 break;
792 case e1000_82580:
793 case e1000_i350:
794 /* 82580 uses the same table-based approach as 82576 but has fewer
795 entries as a result we carry over for queues greater than 4. */
796 if (rx_queue > IGB_N0_QUEUE) {
797 index = (rx_queue >> 1);
798 ivar = array_rd32(E1000_IVAR0, index);
799 if (rx_queue & 0x1) {
800 /* vector goes into third byte of register */
801 ivar = ivar & 0xFF00FFFF;
802 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
803 } else {
804 /* vector goes into low byte of register */
805 ivar = ivar & 0xFFFFFF00;
806 ivar |= msix_vector | E1000_IVAR_VALID;
808 array_wr32(E1000_IVAR0, index, ivar);
810 if (tx_queue > IGB_N0_QUEUE) {
811 index = (tx_queue >> 1);
812 ivar = array_rd32(E1000_IVAR0, index);
813 if (tx_queue & 0x1) {
814 /* vector goes into high byte of register */
815 ivar = ivar & 0x00FFFFFF;
816 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
817 } else {
818 /* vector goes into second byte of register */
819 ivar = ivar & 0xFFFF00FF;
820 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
822 array_wr32(E1000_IVAR0, index, ivar);
824 q_vector->eims_value = 1 << msix_vector;
825 break;
826 default:
827 BUG();
828 break;
831 /* add q_vector eims value to global eims_enable_mask */
832 adapter->eims_enable_mask |= q_vector->eims_value;
834 /* configure q_vector to set itr on first interrupt */
835 q_vector->set_itr = 1;
839 * igb_configure_msix - Configure MSI-X hardware
841 * igb_configure_msix sets up the hardware to properly
842 * generate MSI-X interrupts.
844 static void igb_configure_msix(struct igb_adapter *adapter)
846 u32 tmp;
847 int i, vector = 0;
848 struct e1000_hw *hw = &adapter->hw;
850 adapter->eims_enable_mask = 0;
852 /* set vector for other causes, i.e. link changes */
853 switch (hw->mac.type) {
854 case e1000_82575:
855 tmp = rd32(E1000_CTRL_EXT);
856 /* enable MSI-X PBA support*/
857 tmp |= E1000_CTRL_EXT_PBA_CLR;
859 /* Auto-Mask interrupts upon ICR read. */
860 tmp |= E1000_CTRL_EXT_EIAME;
861 tmp |= E1000_CTRL_EXT_IRCA;
863 wr32(E1000_CTRL_EXT, tmp);
865 /* enable msix_other interrupt */
866 array_wr32(E1000_MSIXBM(0), vector++,
867 E1000_EIMS_OTHER);
868 adapter->eims_other = E1000_EIMS_OTHER;
870 break;
872 case e1000_82576:
873 case e1000_82580:
874 case e1000_i350:
875 /* Turn on MSI-X capability first, or our settings
876 * won't stick. And it will take days to debug. */
877 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
878 E1000_GPIE_PBA | E1000_GPIE_EIAME |
879 E1000_GPIE_NSICR);
881 /* enable msix_other interrupt */
882 adapter->eims_other = 1 << vector;
883 tmp = (vector++ | E1000_IVAR_VALID) << 8;
885 wr32(E1000_IVAR_MISC, tmp);
886 break;
887 default:
888 /* do nothing, since nothing else supports MSI-X */
889 break;
890 } /* switch (hw->mac.type) */
892 adapter->eims_enable_mask |= adapter->eims_other;
894 for (i = 0; i < adapter->num_q_vectors; i++)
895 igb_assign_vector(adapter->q_vector[i], vector++);
897 wrfl();
901 * igb_request_msix - Initialize MSI-X interrupts
903 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
904 * kernel.
906 static int igb_request_msix(struct igb_adapter *adapter)
908 struct net_device *netdev = adapter->netdev;
909 struct e1000_hw *hw = &adapter->hw;
910 int i, err = 0, vector = 0;
912 err = request_irq(adapter->msix_entries[vector].vector,
913 igb_msix_other, 0, netdev->name, adapter);
914 if (err)
915 goto out;
916 vector++;
918 for (i = 0; i < adapter->num_q_vectors; i++) {
919 struct igb_q_vector *q_vector = adapter->q_vector[i];
921 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
923 if (q_vector->rx_ring && q_vector->tx_ring)
924 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
925 q_vector->rx_ring->queue_index);
926 else if (q_vector->tx_ring)
927 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
928 q_vector->tx_ring->queue_index);
929 else if (q_vector->rx_ring)
930 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
931 q_vector->rx_ring->queue_index);
932 else
933 sprintf(q_vector->name, "%s-unused", netdev->name);
935 err = request_irq(adapter->msix_entries[vector].vector,
936 igb_msix_ring, 0, q_vector->name,
937 q_vector);
938 if (err)
939 goto out;
940 vector++;
943 igb_configure_msix(adapter);
944 return 0;
945 out:
946 return err;
949 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
951 if (adapter->msix_entries) {
952 pci_disable_msix(adapter->pdev);
953 kfree(adapter->msix_entries);
954 adapter->msix_entries = NULL;
955 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
956 pci_disable_msi(adapter->pdev);
961 * igb_free_q_vectors - Free memory allocated for interrupt vectors
962 * @adapter: board private structure to initialize
964 * This function frees the memory allocated to the q_vectors. In addition if
965 * NAPI is enabled it will delete any references to the NAPI struct prior
966 * to freeing the q_vector.
968 static void igb_free_q_vectors(struct igb_adapter *adapter)
970 int v_idx;
972 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
973 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
974 adapter->q_vector[v_idx] = NULL;
975 if (!q_vector)
976 continue;
977 netif_napi_del(&q_vector->napi);
978 kfree(q_vector);
980 adapter->num_q_vectors = 0;
984 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
986 * This function resets the device so that it has 0 rx queues, tx queues, and
987 * MSI-X interrupts allocated.
989 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
991 igb_free_queues(adapter);
992 igb_free_q_vectors(adapter);
993 igb_reset_interrupt_capability(adapter);
997 * igb_set_interrupt_capability - set MSI or MSI-X if supported
999 * Attempt to configure interrupts using the best available
1000 * capabilities of the hardware and kernel.
1002 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1004 int err;
1005 int numvecs, i;
1007 /* Number of supported queues. */
1008 adapter->num_rx_queues = adapter->rss_queues;
1009 if (adapter->vfs_allocated_count)
1010 adapter->num_tx_queues = 1;
1011 else
1012 adapter->num_tx_queues = adapter->rss_queues;
1014 /* start with one vector for every rx queue */
1015 numvecs = adapter->num_rx_queues;
1017 /* if tx handler is separate add 1 for every tx queue */
1018 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1019 numvecs += adapter->num_tx_queues;
1021 /* store the number of vectors reserved for queues */
1022 adapter->num_q_vectors = numvecs;
1024 /* add 1 vector for link status interrupts */
1025 numvecs++;
1026 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1027 GFP_KERNEL);
1028 if (!adapter->msix_entries)
1029 goto msi_only;
1031 for (i = 0; i < numvecs; i++)
1032 adapter->msix_entries[i].entry = i;
1034 err = pci_enable_msix(adapter->pdev,
1035 adapter->msix_entries,
1036 numvecs);
1037 if (err == 0)
1038 goto out;
1040 igb_reset_interrupt_capability(adapter);
1042 /* If we can't do MSI-X, try MSI */
1043 msi_only:
1044 #ifdef CONFIG_PCI_IOV
1045 /* disable SR-IOV for non MSI-X configurations */
1046 if (adapter->vf_data) {
1047 struct e1000_hw *hw = &adapter->hw;
1048 /* disable iov and allow time for transactions to clear */
1049 pci_disable_sriov(adapter->pdev);
1050 msleep(500);
1052 kfree(adapter->vf_data);
1053 adapter->vf_data = NULL;
1054 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1055 wrfl();
1056 msleep(100);
1057 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1059 #endif
1060 adapter->vfs_allocated_count = 0;
1061 adapter->rss_queues = 1;
1062 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1063 adapter->num_rx_queues = 1;
1064 adapter->num_tx_queues = 1;
1065 adapter->num_q_vectors = 1;
1066 if (!pci_enable_msi(adapter->pdev))
1067 adapter->flags |= IGB_FLAG_HAS_MSI;
1068 out:
1069 /* Notify the stack of the (possibly) reduced queue counts. */
1070 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1071 return netif_set_real_num_rx_queues(adapter->netdev,
1072 adapter->num_rx_queues);
1076 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1077 * @adapter: board private structure to initialize
1079 * We allocate one q_vector per queue interrupt. If allocation fails we
1080 * return -ENOMEM.
1082 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1084 struct igb_q_vector *q_vector;
1085 struct e1000_hw *hw = &adapter->hw;
1086 int v_idx;
1088 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1089 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1090 if (!q_vector)
1091 goto err_out;
1092 q_vector->adapter = adapter;
1093 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1094 q_vector->itr_val = IGB_START_ITR;
1095 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1096 adapter->q_vector[v_idx] = q_vector;
1098 return 0;
1100 err_out:
1101 igb_free_q_vectors(adapter);
1102 return -ENOMEM;
1105 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1106 int ring_idx, int v_idx)
1108 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1110 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1111 q_vector->rx_ring->q_vector = q_vector;
1112 q_vector->itr_val = adapter->rx_itr_setting;
1113 if (q_vector->itr_val && q_vector->itr_val <= 3)
1114 q_vector->itr_val = IGB_START_ITR;
1117 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1118 int ring_idx, int v_idx)
1120 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1122 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1123 q_vector->tx_ring->q_vector = q_vector;
1124 q_vector->itr_val = adapter->tx_itr_setting;
1125 if (q_vector->itr_val && q_vector->itr_val <= 3)
1126 q_vector->itr_val = IGB_START_ITR;
1130 * igb_map_ring_to_vector - maps allocated queues to vectors
1132 * This function maps the recently allocated queues to vectors.
1134 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1136 int i;
1137 int v_idx = 0;
1139 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1140 (adapter->num_q_vectors < adapter->num_tx_queues))
1141 return -ENOMEM;
1143 if (adapter->num_q_vectors >=
1144 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1145 for (i = 0; i < adapter->num_rx_queues; i++)
1146 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1147 for (i = 0; i < adapter->num_tx_queues; i++)
1148 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1149 } else {
1150 for (i = 0; i < adapter->num_rx_queues; i++) {
1151 if (i < adapter->num_tx_queues)
1152 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1153 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1155 for (; i < adapter->num_tx_queues; i++)
1156 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1158 return 0;
1162 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1164 * This function initializes the interrupts and allocates all of the queues.
1166 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1168 struct pci_dev *pdev = adapter->pdev;
1169 int err;
1171 err = igb_set_interrupt_capability(adapter);
1172 if (err)
1173 return err;
1175 err = igb_alloc_q_vectors(adapter);
1176 if (err) {
1177 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1178 goto err_alloc_q_vectors;
1181 err = igb_alloc_queues(adapter);
1182 if (err) {
1183 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1184 goto err_alloc_queues;
1187 err = igb_map_ring_to_vector(adapter);
1188 if (err) {
1189 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1190 goto err_map_queues;
1194 return 0;
1195 err_map_queues:
1196 igb_free_queues(adapter);
1197 err_alloc_queues:
1198 igb_free_q_vectors(adapter);
1199 err_alloc_q_vectors:
1200 igb_reset_interrupt_capability(adapter);
1201 return err;
1205 * igb_request_irq - initialize interrupts
1207 * Attempts to configure interrupts using the best available
1208 * capabilities of the hardware and kernel.
1210 static int igb_request_irq(struct igb_adapter *adapter)
1212 struct net_device *netdev = adapter->netdev;
1213 struct pci_dev *pdev = adapter->pdev;
1214 int err = 0;
1216 if (adapter->msix_entries) {
1217 err = igb_request_msix(adapter);
1218 if (!err)
1219 goto request_done;
1220 /* fall back to MSI */
1221 igb_clear_interrupt_scheme(adapter);
1222 if (!pci_enable_msi(adapter->pdev))
1223 adapter->flags |= IGB_FLAG_HAS_MSI;
1224 igb_free_all_tx_resources(adapter);
1225 igb_free_all_rx_resources(adapter);
1226 adapter->num_tx_queues = 1;
1227 adapter->num_rx_queues = 1;
1228 adapter->num_q_vectors = 1;
1229 err = igb_alloc_q_vectors(adapter);
1230 if (err) {
1231 dev_err(&pdev->dev,
1232 "Unable to allocate memory for vectors\n");
1233 goto request_done;
1235 err = igb_alloc_queues(adapter);
1236 if (err) {
1237 dev_err(&pdev->dev,
1238 "Unable to allocate memory for queues\n");
1239 igb_free_q_vectors(adapter);
1240 goto request_done;
1242 igb_setup_all_tx_resources(adapter);
1243 igb_setup_all_rx_resources(adapter);
1244 } else {
1245 igb_assign_vector(adapter->q_vector[0], 0);
1248 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1249 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1250 netdev->name, adapter);
1251 if (!err)
1252 goto request_done;
1254 /* fall back to legacy interrupts */
1255 igb_reset_interrupt_capability(adapter);
1256 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1259 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1260 netdev->name, adapter);
1262 if (err)
1263 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1264 err);
1266 request_done:
1267 return err;
1270 static void igb_free_irq(struct igb_adapter *adapter)
1272 if (adapter->msix_entries) {
1273 int vector = 0, i;
1275 free_irq(adapter->msix_entries[vector++].vector, adapter);
1277 for (i = 0; i < adapter->num_q_vectors; i++) {
1278 struct igb_q_vector *q_vector = adapter->q_vector[i];
1279 free_irq(adapter->msix_entries[vector++].vector,
1280 q_vector);
1282 } else {
1283 free_irq(adapter->pdev->irq, adapter);
1288 * igb_irq_disable - Mask off interrupt generation on the NIC
1289 * @adapter: board private structure
1291 static void igb_irq_disable(struct igb_adapter *adapter)
1293 struct e1000_hw *hw = &adapter->hw;
1296 * we need to be careful when disabling interrupts. The VFs are also
1297 * mapped into these registers and so clearing the bits can cause
1298 * issues on the VF drivers so we only need to clear what we set
1300 if (adapter->msix_entries) {
1301 u32 regval = rd32(E1000_EIAM);
1302 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1303 wr32(E1000_EIMC, adapter->eims_enable_mask);
1304 regval = rd32(E1000_EIAC);
1305 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1308 wr32(E1000_IAM, 0);
1309 wr32(E1000_IMC, ~0);
1310 wrfl();
1311 if (adapter->msix_entries) {
1312 int i;
1313 for (i = 0; i < adapter->num_q_vectors; i++)
1314 synchronize_irq(adapter->msix_entries[i].vector);
1315 } else {
1316 synchronize_irq(adapter->pdev->irq);
1321 * igb_irq_enable - Enable default interrupt generation settings
1322 * @adapter: board private structure
1324 static void igb_irq_enable(struct igb_adapter *adapter)
1326 struct e1000_hw *hw = &adapter->hw;
1328 if (adapter->msix_entries) {
1329 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1330 u32 regval = rd32(E1000_EIAC);
1331 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1332 regval = rd32(E1000_EIAM);
1333 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1334 wr32(E1000_EIMS, adapter->eims_enable_mask);
1335 if (adapter->vfs_allocated_count) {
1336 wr32(E1000_MBVFIMR, 0xFF);
1337 ims |= E1000_IMS_VMMB;
1339 if (adapter->hw.mac.type == e1000_82580)
1340 ims |= E1000_IMS_DRSTA;
1342 wr32(E1000_IMS, ims);
1343 } else {
1344 wr32(E1000_IMS, IMS_ENABLE_MASK |
1345 E1000_IMS_DRSTA);
1346 wr32(E1000_IAM, IMS_ENABLE_MASK |
1347 E1000_IMS_DRSTA);
1351 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1353 struct e1000_hw *hw = &adapter->hw;
1354 u16 vid = adapter->hw.mng_cookie.vlan_id;
1355 u16 old_vid = adapter->mng_vlan_id;
1357 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1358 /* add VID to filter table */
1359 igb_vfta_set(hw, vid, true);
1360 adapter->mng_vlan_id = vid;
1361 } else {
1362 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1365 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1366 (vid != old_vid) &&
1367 !test_bit(old_vid, adapter->active_vlans)) {
1368 /* remove VID from filter table */
1369 igb_vfta_set(hw, old_vid, false);
1374 * igb_release_hw_control - release control of the h/w to f/w
1375 * @adapter: address of board private structure
1377 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1378 * For ASF and Pass Through versions of f/w this means that the
1379 * driver is no longer loaded.
1382 static void igb_release_hw_control(struct igb_adapter *adapter)
1384 struct e1000_hw *hw = &adapter->hw;
1385 u32 ctrl_ext;
1387 /* Let firmware take over control of h/w */
1388 ctrl_ext = rd32(E1000_CTRL_EXT);
1389 wr32(E1000_CTRL_EXT,
1390 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1394 * igb_get_hw_control - get control of the h/w from f/w
1395 * @adapter: address of board private structure
1397 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1398 * For ASF and Pass Through versions of f/w this means that
1399 * the driver is loaded.
1402 static void igb_get_hw_control(struct igb_adapter *adapter)
1404 struct e1000_hw *hw = &adapter->hw;
1405 u32 ctrl_ext;
1407 /* Let firmware know the driver has taken over */
1408 ctrl_ext = rd32(E1000_CTRL_EXT);
1409 wr32(E1000_CTRL_EXT,
1410 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1414 * igb_configure - configure the hardware for RX and TX
1415 * @adapter: private board structure
1417 static void igb_configure(struct igb_adapter *adapter)
1419 struct net_device *netdev = adapter->netdev;
1420 int i;
1422 igb_get_hw_control(adapter);
1423 igb_set_rx_mode(netdev);
1425 igb_restore_vlan(adapter);
1427 igb_setup_tctl(adapter);
1428 igb_setup_mrqc(adapter);
1429 igb_setup_rctl(adapter);
1431 igb_configure_tx(adapter);
1432 igb_configure_rx(adapter);
1434 igb_rx_fifo_flush_82575(&adapter->hw);
1436 /* call igb_desc_unused which always leaves
1437 * at least 1 descriptor unused to make sure
1438 * next_to_use != next_to_clean */
1439 for (i = 0; i < adapter->num_rx_queues; i++) {
1440 struct igb_ring *ring = adapter->rx_ring[i];
1441 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1446 * igb_power_up_link - Power up the phy/serdes link
1447 * @adapter: address of board private structure
1449 void igb_power_up_link(struct igb_adapter *adapter)
1451 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1452 igb_power_up_phy_copper(&adapter->hw);
1453 else
1454 igb_power_up_serdes_link_82575(&adapter->hw);
1458 * igb_power_down_link - Power down the phy/serdes link
1459 * @adapter: address of board private structure
1461 static void igb_power_down_link(struct igb_adapter *adapter)
1463 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1464 igb_power_down_phy_copper_82575(&adapter->hw);
1465 else
1466 igb_shutdown_serdes_link_82575(&adapter->hw);
1470 * igb_up - Open the interface and prepare it to handle traffic
1471 * @adapter: board private structure
1473 int igb_up(struct igb_adapter *adapter)
1475 struct e1000_hw *hw = &adapter->hw;
1476 int i;
1478 /* hardware has been reset, we need to reload some things */
1479 igb_configure(adapter);
1481 clear_bit(__IGB_DOWN, &adapter->state);
1483 for (i = 0; i < adapter->num_q_vectors; i++) {
1484 struct igb_q_vector *q_vector = adapter->q_vector[i];
1485 napi_enable(&q_vector->napi);
1487 if (adapter->msix_entries)
1488 igb_configure_msix(adapter);
1489 else
1490 igb_assign_vector(adapter->q_vector[0], 0);
1492 /* Clear any pending interrupts. */
1493 rd32(E1000_ICR);
1494 igb_irq_enable(adapter);
1496 /* notify VFs that reset has been completed */
1497 if (adapter->vfs_allocated_count) {
1498 u32 reg_data = rd32(E1000_CTRL_EXT);
1499 reg_data |= E1000_CTRL_EXT_PFRSTD;
1500 wr32(E1000_CTRL_EXT, reg_data);
1503 netif_tx_start_all_queues(adapter->netdev);
1505 /* start the watchdog. */
1506 hw->mac.get_link_status = 1;
1507 schedule_work(&adapter->watchdog_task);
1509 return 0;
1512 void igb_down(struct igb_adapter *adapter)
1514 struct net_device *netdev = adapter->netdev;
1515 struct e1000_hw *hw = &adapter->hw;
1516 u32 tctl, rctl;
1517 int i;
1519 /* signal that we're down so the interrupt handler does not
1520 * reschedule our watchdog timer */
1521 set_bit(__IGB_DOWN, &adapter->state);
1523 /* disable receives in the hardware */
1524 rctl = rd32(E1000_RCTL);
1525 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1526 /* flush and sleep below */
1528 netif_tx_stop_all_queues(netdev);
1530 /* disable transmits in the hardware */
1531 tctl = rd32(E1000_TCTL);
1532 tctl &= ~E1000_TCTL_EN;
1533 wr32(E1000_TCTL, tctl);
1534 /* flush both disables and wait for them to finish */
1535 wrfl();
1536 msleep(10);
1538 for (i = 0; i < adapter->num_q_vectors; i++) {
1539 struct igb_q_vector *q_vector = adapter->q_vector[i];
1540 napi_disable(&q_vector->napi);
1543 igb_irq_disable(adapter);
1545 del_timer_sync(&adapter->watchdog_timer);
1546 del_timer_sync(&adapter->phy_info_timer);
1548 netif_carrier_off(netdev);
1550 /* record the stats before reset*/
1551 spin_lock(&adapter->stats64_lock);
1552 igb_update_stats(adapter, &adapter->stats64);
1553 spin_unlock(&adapter->stats64_lock);
1555 adapter->link_speed = 0;
1556 adapter->link_duplex = 0;
1558 if (!pci_channel_offline(adapter->pdev))
1559 igb_reset(adapter);
1560 igb_clean_all_tx_rings(adapter);
1561 igb_clean_all_rx_rings(adapter);
1562 #ifdef CONFIG_IGB_DCA
1564 /* since we reset the hardware DCA settings were cleared */
1565 igb_setup_dca(adapter);
1566 #endif
1569 void igb_reinit_locked(struct igb_adapter *adapter)
1571 WARN_ON(in_interrupt());
1572 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1573 msleep(1);
1574 igb_down(adapter);
1575 igb_up(adapter);
1576 clear_bit(__IGB_RESETTING, &adapter->state);
1579 void igb_reset(struct igb_adapter *adapter)
1581 struct pci_dev *pdev = adapter->pdev;
1582 struct e1000_hw *hw = &adapter->hw;
1583 struct e1000_mac_info *mac = &hw->mac;
1584 struct e1000_fc_info *fc = &hw->fc;
1585 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1586 u16 hwm;
1588 /* Repartition Pba for greater than 9k mtu
1589 * To take effect CTRL.RST is required.
1591 switch (mac->type) {
1592 case e1000_i350:
1593 case e1000_82580:
1594 pba = rd32(E1000_RXPBS);
1595 pba = igb_rxpbs_adjust_82580(pba);
1596 break;
1597 case e1000_82576:
1598 pba = rd32(E1000_RXPBS);
1599 pba &= E1000_RXPBS_SIZE_MASK_82576;
1600 break;
1601 case e1000_82575:
1602 default:
1603 pba = E1000_PBA_34K;
1604 break;
1607 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1608 (mac->type < e1000_82576)) {
1609 /* adjust PBA for jumbo frames */
1610 wr32(E1000_PBA, pba);
1612 /* To maintain wire speed transmits, the Tx FIFO should be
1613 * large enough to accommodate two full transmit packets,
1614 * rounded up to the next 1KB and expressed in KB. Likewise,
1615 * the Rx FIFO should be large enough to accommodate at least
1616 * one full receive packet and is similarly rounded up and
1617 * expressed in KB. */
1618 pba = rd32(E1000_PBA);
1619 /* upper 16 bits has Tx packet buffer allocation size in KB */
1620 tx_space = pba >> 16;
1621 /* lower 16 bits has Rx packet buffer allocation size in KB */
1622 pba &= 0xffff;
1623 /* the tx fifo also stores 16 bytes of information about the tx
1624 * but don't include ethernet FCS because hardware appends it */
1625 min_tx_space = (adapter->max_frame_size +
1626 sizeof(union e1000_adv_tx_desc) -
1627 ETH_FCS_LEN) * 2;
1628 min_tx_space = ALIGN(min_tx_space, 1024);
1629 min_tx_space >>= 10;
1630 /* software strips receive CRC, so leave room for it */
1631 min_rx_space = adapter->max_frame_size;
1632 min_rx_space = ALIGN(min_rx_space, 1024);
1633 min_rx_space >>= 10;
1635 /* If current Tx allocation is less than the min Tx FIFO size,
1636 * and the min Tx FIFO size is less than the current Rx FIFO
1637 * allocation, take space away from current Rx allocation */
1638 if (tx_space < min_tx_space &&
1639 ((min_tx_space - tx_space) < pba)) {
1640 pba = pba - (min_tx_space - tx_space);
1642 /* if short on rx space, rx wins and must trump tx
1643 * adjustment */
1644 if (pba < min_rx_space)
1645 pba = min_rx_space;
1647 wr32(E1000_PBA, pba);
1650 /* flow control settings */
1651 /* The high water mark must be low enough to fit one full frame
1652 * (or the size used for early receive) above it in the Rx FIFO.
1653 * Set it to the lower of:
1654 * - 90% of the Rx FIFO size, or
1655 * - the full Rx FIFO size minus one full frame */
1656 hwm = min(((pba << 10) * 9 / 10),
1657 ((pba << 10) - 2 * adapter->max_frame_size));
1659 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1660 fc->low_water = fc->high_water - 16;
1661 fc->pause_time = 0xFFFF;
1662 fc->send_xon = 1;
1663 fc->current_mode = fc->requested_mode;
1665 /* disable receive for all VFs and wait one second */
1666 if (adapter->vfs_allocated_count) {
1667 int i;
1668 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1669 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1671 /* ping all the active vfs to let them know we are going down */
1672 igb_ping_all_vfs(adapter);
1674 /* disable transmits and receives */
1675 wr32(E1000_VFRE, 0);
1676 wr32(E1000_VFTE, 0);
1679 /* Allow time for pending master requests to run */
1680 hw->mac.ops.reset_hw(hw);
1681 wr32(E1000_WUC, 0);
1683 if (hw->mac.ops.init_hw(hw))
1684 dev_err(&pdev->dev, "Hardware Error\n");
1685 if (hw->mac.type > e1000_82580) {
1686 if (adapter->flags & IGB_FLAG_DMAC) {
1687 u32 reg;
1690 * DMA Coalescing high water mark needs to be higher
1691 * than * the * Rx threshold. The Rx threshold is
1692 * currently * pba - 6, so we * should use a high water
1693 * mark of pba * - 4. */
1694 hwm = (pba - 4) << 10;
1696 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1697 & E1000_DMACR_DMACTHR_MASK);
1699 /* transition to L0x or L1 if available..*/
1700 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1702 /* watchdog timer= +-1000 usec in 32usec intervals */
1703 reg |= (1000 >> 5);
1704 wr32(E1000_DMACR, reg);
1706 /* no lower threshold to disable coalescing(smart fifb)
1707 * -UTRESH=0*/
1708 wr32(E1000_DMCRTRH, 0);
1710 /* set hwm to PBA - 2 * max frame size */
1711 wr32(E1000_FCRTC, hwm);
1714 * This sets the time to wait before requesting tran-
1715 * sition to * low power state to number of usecs needed
1716 * to receive 1 512 * byte frame at gigabit line rate
1718 reg = rd32(E1000_DMCTLX);
1719 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1721 /* Delay 255 usec before entering Lx state. */
1722 reg |= 0xFF;
1723 wr32(E1000_DMCTLX, reg);
1725 /* free space in Tx packet buffer to wake from DMAC */
1726 wr32(E1000_DMCTXTH,
1727 (IGB_MIN_TXPBSIZE -
1728 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1729 >> 6);
1731 /* make low power state decision controlled by DMAC */
1732 reg = rd32(E1000_PCIEMISC);
1733 reg |= E1000_PCIEMISC_LX_DECISION;
1734 wr32(E1000_PCIEMISC, reg);
1735 } /* end if IGB_FLAG_DMAC set */
1737 if (hw->mac.type == e1000_82580) {
1738 u32 reg = rd32(E1000_PCIEMISC);
1739 wr32(E1000_PCIEMISC,
1740 reg & ~E1000_PCIEMISC_LX_DECISION);
1742 if (!netif_running(adapter->netdev))
1743 igb_power_down_link(adapter);
1745 igb_update_mng_vlan(adapter);
1747 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1748 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1750 igb_get_phy_info(hw);
1753 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1756 * Since there is no support for separate rx/tx vlan accel
1757 * enable/disable make sure tx flag is always in same state as rx.
1759 if (features & NETIF_F_HW_VLAN_RX)
1760 features |= NETIF_F_HW_VLAN_TX;
1761 else
1762 features &= ~NETIF_F_HW_VLAN_TX;
1764 return features;
1767 static int igb_set_features(struct net_device *netdev, u32 features)
1769 struct igb_adapter *adapter = netdev_priv(netdev);
1770 int i;
1771 u32 changed = netdev->features ^ features;
1773 for (i = 0; i < adapter->num_rx_queues; i++) {
1774 if (features & NETIF_F_RXCSUM)
1775 adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1776 else
1777 adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1780 if (changed & NETIF_F_HW_VLAN_RX)
1781 igb_vlan_mode(netdev, features);
1783 return 0;
1786 static const struct net_device_ops igb_netdev_ops = {
1787 .ndo_open = igb_open,
1788 .ndo_stop = igb_close,
1789 .ndo_start_xmit = igb_xmit_frame_adv,
1790 .ndo_get_stats64 = igb_get_stats64,
1791 .ndo_set_rx_mode = igb_set_rx_mode,
1792 .ndo_set_multicast_list = igb_set_rx_mode,
1793 .ndo_set_mac_address = igb_set_mac,
1794 .ndo_change_mtu = igb_change_mtu,
1795 .ndo_do_ioctl = igb_ioctl,
1796 .ndo_tx_timeout = igb_tx_timeout,
1797 .ndo_validate_addr = eth_validate_addr,
1798 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1799 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1800 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1801 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1802 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1803 .ndo_get_vf_config = igb_ndo_get_vf_config,
1804 #ifdef CONFIG_NET_POLL_CONTROLLER
1805 .ndo_poll_controller = igb_netpoll,
1806 #endif
1807 .ndo_fix_features = igb_fix_features,
1808 .ndo_set_features = igb_set_features,
1812 * igb_probe - Device Initialization Routine
1813 * @pdev: PCI device information struct
1814 * @ent: entry in igb_pci_tbl
1816 * Returns 0 on success, negative on failure
1818 * igb_probe initializes an adapter identified by a pci_dev structure.
1819 * The OS initialization, configuring of the adapter private structure,
1820 * and a hardware reset occur.
1822 static int __devinit igb_probe(struct pci_dev *pdev,
1823 const struct pci_device_id *ent)
1825 struct net_device *netdev;
1826 struct igb_adapter *adapter;
1827 struct e1000_hw *hw;
1828 u16 eeprom_data = 0;
1829 s32 ret_val;
1830 static int global_quad_port_a; /* global quad port a indication */
1831 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1832 unsigned long mmio_start, mmio_len;
1833 int err, pci_using_dac;
1834 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1835 u8 part_str[E1000_PBANUM_LENGTH];
1837 /* Catch broken hardware that put the wrong VF device ID in
1838 * the PCIe SR-IOV capability.
1840 if (pdev->is_virtfn) {
1841 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1842 pci_name(pdev), pdev->vendor, pdev->device);
1843 return -EINVAL;
1846 err = pci_enable_device_mem(pdev);
1847 if (err)
1848 return err;
1850 pci_using_dac = 0;
1851 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1852 if (!err) {
1853 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1854 if (!err)
1855 pci_using_dac = 1;
1856 } else {
1857 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1858 if (err) {
1859 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1860 if (err) {
1861 dev_err(&pdev->dev, "No usable DMA "
1862 "configuration, aborting\n");
1863 goto err_dma;
1868 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1869 IORESOURCE_MEM),
1870 igb_driver_name);
1871 if (err)
1872 goto err_pci_reg;
1874 pci_enable_pcie_error_reporting(pdev);
1876 pci_set_master(pdev);
1877 pci_save_state(pdev);
1879 err = -ENOMEM;
1880 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1881 IGB_ABS_MAX_TX_QUEUES);
1882 if (!netdev)
1883 goto err_alloc_etherdev;
1885 SET_NETDEV_DEV(netdev, &pdev->dev);
1887 pci_set_drvdata(pdev, netdev);
1888 adapter = netdev_priv(netdev);
1889 adapter->netdev = netdev;
1890 adapter->pdev = pdev;
1891 hw = &adapter->hw;
1892 hw->back = adapter;
1893 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1895 mmio_start = pci_resource_start(pdev, 0);
1896 mmio_len = pci_resource_len(pdev, 0);
1898 err = -EIO;
1899 hw->hw_addr = ioremap(mmio_start, mmio_len);
1900 if (!hw->hw_addr)
1901 goto err_ioremap;
1903 netdev->netdev_ops = &igb_netdev_ops;
1904 igb_set_ethtool_ops(netdev);
1905 netdev->watchdog_timeo = 5 * HZ;
1907 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1909 netdev->mem_start = mmio_start;
1910 netdev->mem_end = mmio_start + mmio_len;
1912 /* PCI config space info */
1913 hw->vendor_id = pdev->vendor;
1914 hw->device_id = pdev->device;
1915 hw->revision_id = pdev->revision;
1916 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1917 hw->subsystem_device_id = pdev->subsystem_device;
1919 /* Copy the default MAC, PHY and NVM function pointers */
1920 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1921 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1922 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1923 /* Initialize skew-specific constants */
1924 err = ei->get_invariants(hw);
1925 if (err)
1926 goto err_sw_init;
1928 /* setup the private structure */
1929 err = igb_sw_init(adapter);
1930 if (err)
1931 goto err_sw_init;
1933 igb_get_bus_info_pcie(hw);
1935 hw->phy.autoneg_wait_to_complete = false;
1937 /* Copper options */
1938 if (hw->phy.media_type == e1000_media_type_copper) {
1939 hw->phy.mdix = AUTO_ALL_MODES;
1940 hw->phy.disable_polarity_correction = false;
1941 hw->phy.ms_type = e1000_ms_hw_default;
1944 if (igb_check_reset_block(hw))
1945 dev_info(&pdev->dev,
1946 "PHY reset is blocked due to SOL/IDER session.\n");
1948 netdev->hw_features = NETIF_F_SG |
1949 NETIF_F_IP_CSUM |
1950 NETIF_F_IPV6_CSUM |
1951 NETIF_F_TSO |
1952 NETIF_F_TSO6 |
1953 NETIF_F_RXCSUM |
1954 NETIF_F_HW_VLAN_RX;
1956 netdev->features = netdev->hw_features |
1957 NETIF_F_HW_VLAN_TX |
1958 NETIF_F_HW_VLAN_FILTER;
1960 netdev->vlan_features |= NETIF_F_TSO;
1961 netdev->vlan_features |= NETIF_F_TSO6;
1962 netdev->vlan_features |= NETIF_F_IP_CSUM;
1963 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1964 netdev->vlan_features |= NETIF_F_SG;
1966 if (pci_using_dac) {
1967 netdev->features |= NETIF_F_HIGHDMA;
1968 netdev->vlan_features |= NETIF_F_HIGHDMA;
1971 if (hw->mac.type >= e1000_82576) {
1972 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1973 netdev->features |= NETIF_F_SCTP_CSUM;
1976 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1978 /* before reading the NVM, reset the controller to put the device in a
1979 * known good starting state */
1980 hw->mac.ops.reset_hw(hw);
1982 /* make sure the NVM is good */
1983 if (hw->nvm.ops.validate(hw) < 0) {
1984 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1985 err = -EIO;
1986 goto err_eeprom;
1989 /* copy the MAC address out of the NVM */
1990 if (hw->mac.ops.read_mac_addr(hw))
1991 dev_err(&pdev->dev, "NVM Read Error\n");
1993 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1994 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1996 if (!is_valid_ether_addr(netdev->perm_addr)) {
1997 dev_err(&pdev->dev, "Invalid MAC Address\n");
1998 err = -EIO;
1999 goto err_eeprom;
2002 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2003 (unsigned long) adapter);
2004 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2005 (unsigned long) adapter);
2007 INIT_WORK(&adapter->reset_task, igb_reset_task);
2008 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2010 /* Initialize link properties that are user-changeable */
2011 adapter->fc_autoneg = true;
2012 hw->mac.autoneg = true;
2013 hw->phy.autoneg_advertised = 0x2f;
2015 hw->fc.requested_mode = e1000_fc_default;
2016 hw->fc.current_mode = e1000_fc_default;
2018 igb_validate_mdi_setting(hw);
2020 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2021 * enable the ACPI Magic Packet filter
2024 if (hw->bus.func == 0)
2025 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2026 else if (hw->mac.type >= e1000_82580)
2027 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2028 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2029 &eeprom_data);
2030 else if (hw->bus.func == 1)
2031 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2033 if (eeprom_data & eeprom_apme_mask)
2034 adapter->eeprom_wol |= E1000_WUFC_MAG;
2036 /* now that we have the eeprom settings, apply the special cases where
2037 * the eeprom may be wrong or the board simply won't support wake on
2038 * lan on a particular port */
2039 switch (pdev->device) {
2040 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2041 adapter->eeprom_wol = 0;
2042 break;
2043 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2044 case E1000_DEV_ID_82576_FIBER:
2045 case E1000_DEV_ID_82576_SERDES:
2046 /* Wake events only supported on port A for dual fiber
2047 * regardless of eeprom setting */
2048 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2049 adapter->eeprom_wol = 0;
2050 break;
2051 case E1000_DEV_ID_82576_QUAD_COPPER:
2052 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2053 /* if quad port adapter, disable WoL on all but port A */
2054 if (global_quad_port_a != 0)
2055 adapter->eeprom_wol = 0;
2056 else
2057 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2058 /* Reset for multiple quad port adapters */
2059 if (++global_quad_port_a == 4)
2060 global_quad_port_a = 0;
2061 break;
2064 /* initialize the wol settings based on the eeprom settings */
2065 adapter->wol = adapter->eeprom_wol;
2066 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2068 /* reset the hardware with the new settings */
2069 igb_reset(adapter);
2071 /* let the f/w know that the h/w is now under the control of the
2072 * driver. */
2073 igb_get_hw_control(adapter);
2075 strcpy(netdev->name, "eth%d");
2076 err = register_netdev(netdev);
2077 if (err)
2078 goto err_register;
2080 igb_vlan_mode(netdev, netdev->features);
2082 /* carrier off reporting is important to ethtool even BEFORE open */
2083 netif_carrier_off(netdev);
2085 #ifdef CONFIG_IGB_DCA
2086 if (dca_add_requester(&pdev->dev) == 0) {
2087 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2088 dev_info(&pdev->dev, "DCA enabled\n");
2089 igb_setup_dca(adapter);
2092 #endif
2093 /* do hw tstamp init after resetting */
2094 igb_init_hw_timer(adapter);
2096 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2097 /* print bus type/speed/width info */
2098 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2099 netdev->name,
2100 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2101 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2102 "unknown"),
2103 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2104 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2105 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2106 "unknown"),
2107 netdev->dev_addr);
2109 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2110 if (ret_val)
2111 strcpy(part_str, "Unknown");
2112 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2113 dev_info(&pdev->dev,
2114 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2115 adapter->msix_entries ? "MSI-X" :
2116 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2117 adapter->num_rx_queues, adapter->num_tx_queues);
2118 switch (hw->mac.type) {
2119 case e1000_i350:
2120 igb_set_eee_i350(hw);
2121 break;
2122 default:
2123 break;
2125 return 0;
2127 err_register:
2128 igb_release_hw_control(adapter);
2129 err_eeprom:
2130 if (!igb_check_reset_block(hw))
2131 igb_reset_phy(hw);
2133 if (hw->flash_address)
2134 iounmap(hw->flash_address);
2135 err_sw_init:
2136 igb_clear_interrupt_scheme(adapter);
2137 iounmap(hw->hw_addr);
2138 err_ioremap:
2139 free_netdev(netdev);
2140 err_alloc_etherdev:
2141 pci_release_selected_regions(pdev,
2142 pci_select_bars(pdev, IORESOURCE_MEM));
2143 err_pci_reg:
2144 err_dma:
2145 pci_disable_device(pdev);
2146 return err;
2150 * igb_remove - Device Removal Routine
2151 * @pdev: PCI device information struct
2153 * igb_remove is called by the PCI subsystem to alert the driver
2154 * that it should release a PCI device. The could be caused by a
2155 * Hot-Plug event, or because the driver is going to be removed from
2156 * memory.
2158 static void __devexit igb_remove(struct pci_dev *pdev)
2160 struct net_device *netdev = pci_get_drvdata(pdev);
2161 struct igb_adapter *adapter = netdev_priv(netdev);
2162 struct e1000_hw *hw = &adapter->hw;
2165 * The watchdog timer may be rescheduled, so explicitly
2166 * disable watchdog from being rescheduled.
2168 set_bit(__IGB_DOWN, &adapter->state);
2169 del_timer_sync(&adapter->watchdog_timer);
2170 del_timer_sync(&adapter->phy_info_timer);
2172 cancel_work_sync(&adapter->reset_task);
2173 cancel_work_sync(&adapter->watchdog_task);
2175 #ifdef CONFIG_IGB_DCA
2176 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2177 dev_info(&pdev->dev, "DCA disabled\n");
2178 dca_remove_requester(&pdev->dev);
2179 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2180 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2182 #endif
2184 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2185 * would have already happened in close and is redundant. */
2186 igb_release_hw_control(adapter);
2188 unregister_netdev(netdev);
2190 igb_clear_interrupt_scheme(adapter);
2192 #ifdef CONFIG_PCI_IOV
2193 /* reclaim resources allocated to VFs */
2194 if (adapter->vf_data) {
2195 /* disable iov and allow time for transactions to clear */
2196 pci_disable_sriov(pdev);
2197 msleep(500);
2199 kfree(adapter->vf_data);
2200 adapter->vf_data = NULL;
2201 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2202 wrfl();
2203 msleep(100);
2204 dev_info(&pdev->dev, "IOV Disabled\n");
2206 #endif
2208 iounmap(hw->hw_addr);
2209 if (hw->flash_address)
2210 iounmap(hw->flash_address);
2211 pci_release_selected_regions(pdev,
2212 pci_select_bars(pdev, IORESOURCE_MEM));
2214 free_netdev(netdev);
2216 pci_disable_pcie_error_reporting(pdev);
2218 pci_disable_device(pdev);
2222 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2223 * @adapter: board private structure to initialize
2225 * This function initializes the vf specific data storage and then attempts to
2226 * allocate the VFs. The reason for ordering it this way is because it is much
2227 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2228 * the memory for the VFs.
2230 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2232 #ifdef CONFIG_PCI_IOV
2233 struct pci_dev *pdev = adapter->pdev;
2235 if (adapter->vfs_allocated_count) {
2236 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2237 sizeof(struct vf_data_storage),
2238 GFP_KERNEL);
2239 /* if allocation failed then we do not support SR-IOV */
2240 if (!adapter->vf_data) {
2241 adapter->vfs_allocated_count = 0;
2242 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2243 "Data Storage\n");
2247 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2248 kfree(adapter->vf_data);
2249 adapter->vf_data = NULL;
2250 #endif /* CONFIG_PCI_IOV */
2251 adapter->vfs_allocated_count = 0;
2252 #ifdef CONFIG_PCI_IOV
2253 } else {
2254 unsigned char mac_addr[ETH_ALEN];
2255 int i;
2256 dev_info(&pdev->dev, "%d vfs allocated\n",
2257 adapter->vfs_allocated_count);
2258 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2259 random_ether_addr(mac_addr);
2260 igb_set_vf_mac(adapter, i, mac_addr);
2262 /* DMA Coalescing is not supported in IOV mode. */
2263 if (adapter->flags & IGB_FLAG_DMAC)
2264 adapter->flags &= ~IGB_FLAG_DMAC;
2266 #endif /* CONFIG_PCI_IOV */
2271 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2272 * @adapter: board private structure to initialize
2274 * igb_init_hw_timer initializes the function pointer and values for the hw
2275 * timer found in hardware.
2277 static void igb_init_hw_timer(struct igb_adapter *adapter)
2279 struct e1000_hw *hw = &adapter->hw;
2281 switch (hw->mac.type) {
2282 case e1000_i350:
2283 case e1000_82580:
2284 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2285 adapter->cycles.read = igb_read_clock;
2286 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2287 adapter->cycles.mult = 1;
2289 * The 82580 timesync updates the system timer every 8ns by 8ns
2290 * and the value cannot be shifted. Instead we need to shift
2291 * the registers to generate a 64bit timer value. As a result
2292 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2293 * 24 in order to generate a larger value for synchronization.
2295 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2296 /* disable system timer temporarily by setting bit 31 */
2297 wr32(E1000_TSAUXC, 0x80000000);
2298 wrfl();
2300 /* Set registers so that rollover occurs soon to test this. */
2301 wr32(E1000_SYSTIMR, 0x00000000);
2302 wr32(E1000_SYSTIML, 0x80000000);
2303 wr32(E1000_SYSTIMH, 0x000000FF);
2304 wrfl();
2306 /* enable system timer by clearing bit 31 */
2307 wr32(E1000_TSAUXC, 0x0);
2308 wrfl();
2310 timecounter_init(&adapter->clock,
2311 &adapter->cycles,
2312 ktime_to_ns(ktime_get_real()));
2314 * Synchronize our NIC clock against system wall clock. NIC
2315 * time stamp reading requires ~3us per sample, each sample
2316 * was pretty stable even under load => only require 10
2317 * samples for each offset comparison.
2319 memset(&adapter->compare, 0, sizeof(adapter->compare));
2320 adapter->compare.source = &adapter->clock;
2321 adapter->compare.target = ktime_get_real;
2322 adapter->compare.num_samples = 10;
2323 timecompare_update(&adapter->compare, 0);
2324 break;
2325 case e1000_82576:
2327 * Initialize hardware timer: we keep it running just in case
2328 * that some program needs it later on.
2330 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2331 adapter->cycles.read = igb_read_clock;
2332 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2333 adapter->cycles.mult = 1;
2335 * Scale the NIC clock cycle by a large factor so that
2336 * relatively small clock corrections can be added or
2337 * subtracted at each clock tick. The drawbacks of a large
2338 * factor are a) that the clock register overflows more quickly
2339 * (not such a big deal) and b) that the increment per tick has
2340 * to fit into 24 bits. As a result we need to use a shift of
2341 * 19 so we can fit a value of 16 into the TIMINCA register.
2343 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2344 wr32(E1000_TIMINCA,
2345 (1 << E1000_TIMINCA_16NS_SHIFT) |
2346 (16 << IGB_82576_TSYNC_SHIFT));
2348 /* Set registers so that rollover occurs soon to test this. */
2349 wr32(E1000_SYSTIML, 0x00000000);
2350 wr32(E1000_SYSTIMH, 0xFF800000);
2351 wrfl();
2353 timecounter_init(&adapter->clock,
2354 &adapter->cycles,
2355 ktime_to_ns(ktime_get_real()));
2357 * Synchronize our NIC clock against system wall clock. NIC
2358 * time stamp reading requires ~3us per sample, each sample
2359 * was pretty stable even under load => only require 10
2360 * samples for each offset comparison.
2362 memset(&adapter->compare, 0, sizeof(adapter->compare));
2363 adapter->compare.source = &adapter->clock;
2364 adapter->compare.target = ktime_get_real;
2365 adapter->compare.num_samples = 10;
2366 timecompare_update(&adapter->compare, 0);
2367 break;
2368 case e1000_82575:
2369 /* 82575 does not support timesync */
2370 default:
2371 break;
2377 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2378 * @adapter: board private structure to initialize
2380 * igb_sw_init initializes the Adapter private data structure.
2381 * Fields are initialized based on PCI device information and
2382 * OS network device settings (MTU size).
2384 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2386 struct e1000_hw *hw = &adapter->hw;
2387 struct net_device *netdev = adapter->netdev;
2388 struct pci_dev *pdev = adapter->pdev;
2390 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2392 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2393 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2394 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2395 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2397 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2398 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2400 spin_lock_init(&adapter->stats64_lock);
2401 #ifdef CONFIG_PCI_IOV
2402 switch (hw->mac.type) {
2403 case e1000_82576:
2404 case e1000_i350:
2405 if (max_vfs > 7) {
2406 dev_warn(&pdev->dev,
2407 "Maximum of 7 VFs per PF, using max\n");
2408 adapter->vfs_allocated_count = 7;
2409 } else
2410 adapter->vfs_allocated_count = max_vfs;
2411 break;
2412 default:
2413 break;
2415 #endif /* CONFIG_PCI_IOV */
2416 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2417 /* i350 cannot do RSS and SR-IOV at the same time */
2418 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2419 adapter->rss_queues = 1;
2422 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2423 * then we should combine the queues into a queue pair in order to
2424 * conserve interrupts due to limited supply
2426 if ((adapter->rss_queues > 4) ||
2427 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2428 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2430 /* This call may decrease the number of queues */
2431 if (igb_init_interrupt_scheme(adapter)) {
2432 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2433 return -ENOMEM;
2436 igb_probe_vfs(adapter);
2438 /* Explicitly disable IRQ since the NIC can be in any state. */
2439 igb_irq_disable(adapter);
2441 if (hw->mac.type == e1000_i350)
2442 adapter->flags &= ~IGB_FLAG_DMAC;
2444 set_bit(__IGB_DOWN, &adapter->state);
2445 return 0;
2449 * igb_open - Called when a network interface is made active
2450 * @netdev: network interface device structure
2452 * Returns 0 on success, negative value on failure
2454 * The open entry point is called when a network interface is made
2455 * active by the system (IFF_UP). At this point all resources needed
2456 * for transmit and receive operations are allocated, the interrupt
2457 * handler is registered with the OS, the watchdog timer is started,
2458 * and the stack is notified that the interface is ready.
2460 static int igb_open(struct net_device *netdev)
2462 struct igb_adapter *adapter = netdev_priv(netdev);
2463 struct e1000_hw *hw = &adapter->hw;
2464 int err;
2465 int i;
2467 /* disallow open during test */
2468 if (test_bit(__IGB_TESTING, &adapter->state))
2469 return -EBUSY;
2471 netif_carrier_off(netdev);
2473 /* allocate transmit descriptors */
2474 err = igb_setup_all_tx_resources(adapter);
2475 if (err)
2476 goto err_setup_tx;
2478 /* allocate receive descriptors */
2479 err = igb_setup_all_rx_resources(adapter);
2480 if (err)
2481 goto err_setup_rx;
2483 igb_power_up_link(adapter);
2485 /* before we allocate an interrupt, we must be ready to handle it.
2486 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2487 * as soon as we call pci_request_irq, so we have to setup our
2488 * clean_rx handler before we do so. */
2489 igb_configure(adapter);
2491 err = igb_request_irq(adapter);
2492 if (err)
2493 goto err_req_irq;
2495 /* From here on the code is the same as igb_up() */
2496 clear_bit(__IGB_DOWN, &adapter->state);
2498 for (i = 0; i < adapter->num_q_vectors; i++) {
2499 struct igb_q_vector *q_vector = adapter->q_vector[i];
2500 napi_enable(&q_vector->napi);
2503 /* Clear any pending interrupts. */
2504 rd32(E1000_ICR);
2506 igb_irq_enable(adapter);
2508 /* notify VFs that reset has been completed */
2509 if (adapter->vfs_allocated_count) {
2510 u32 reg_data = rd32(E1000_CTRL_EXT);
2511 reg_data |= E1000_CTRL_EXT_PFRSTD;
2512 wr32(E1000_CTRL_EXT, reg_data);
2515 netif_tx_start_all_queues(netdev);
2517 /* start the watchdog. */
2518 hw->mac.get_link_status = 1;
2519 schedule_work(&adapter->watchdog_task);
2521 return 0;
2523 err_req_irq:
2524 igb_release_hw_control(adapter);
2525 igb_power_down_link(adapter);
2526 igb_free_all_rx_resources(adapter);
2527 err_setup_rx:
2528 igb_free_all_tx_resources(adapter);
2529 err_setup_tx:
2530 igb_reset(adapter);
2532 return err;
2536 * igb_close - Disables a network interface
2537 * @netdev: network interface device structure
2539 * Returns 0, this is not allowed to fail
2541 * The close entry point is called when an interface is de-activated
2542 * by the OS. The hardware is still under the driver's control, but
2543 * needs to be disabled. A global MAC reset is issued to stop the
2544 * hardware, and all transmit and receive resources are freed.
2546 static int igb_close(struct net_device *netdev)
2548 struct igb_adapter *adapter = netdev_priv(netdev);
2550 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2551 igb_down(adapter);
2553 igb_free_irq(adapter);
2555 igb_free_all_tx_resources(adapter);
2556 igb_free_all_rx_resources(adapter);
2558 return 0;
2562 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2563 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2565 * Return 0 on success, negative on failure
2567 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2569 struct device *dev = tx_ring->dev;
2570 int size;
2572 size = sizeof(struct igb_buffer) * tx_ring->count;
2573 tx_ring->buffer_info = vzalloc(size);
2574 if (!tx_ring->buffer_info)
2575 goto err;
2577 /* round up to nearest 4K */
2578 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2579 tx_ring->size = ALIGN(tx_ring->size, 4096);
2581 tx_ring->desc = dma_alloc_coherent(dev,
2582 tx_ring->size,
2583 &tx_ring->dma,
2584 GFP_KERNEL);
2586 if (!tx_ring->desc)
2587 goto err;
2589 tx_ring->next_to_use = 0;
2590 tx_ring->next_to_clean = 0;
2591 return 0;
2593 err:
2594 vfree(tx_ring->buffer_info);
2595 dev_err(dev,
2596 "Unable to allocate memory for the transmit descriptor ring\n");
2597 return -ENOMEM;
2601 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2602 * (Descriptors) for all queues
2603 * @adapter: board private structure
2605 * Return 0 on success, negative on failure
2607 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2609 struct pci_dev *pdev = adapter->pdev;
2610 int i, err = 0;
2612 for (i = 0; i < adapter->num_tx_queues; i++) {
2613 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2614 if (err) {
2615 dev_err(&pdev->dev,
2616 "Allocation for Tx Queue %u failed\n", i);
2617 for (i--; i >= 0; i--)
2618 igb_free_tx_resources(adapter->tx_ring[i]);
2619 break;
2623 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2624 int r_idx = i % adapter->num_tx_queues;
2625 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2627 return err;
2631 * igb_setup_tctl - configure the transmit control registers
2632 * @adapter: Board private structure
2634 void igb_setup_tctl(struct igb_adapter *adapter)
2636 struct e1000_hw *hw = &adapter->hw;
2637 u32 tctl;
2639 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2640 wr32(E1000_TXDCTL(0), 0);
2642 /* Program the Transmit Control Register */
2643 tctl = rd32(E1000_TCTL);
2644 tctl &= ~E1000_TCTL_CT;
2645 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2646 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2648 igb_config_collision_dist(hw);
2650 /* Enable transmits */
2651 tctl |= E1000_TCTL_EN;
2653 wr32(E1000_TCTL, tctl);
2657 * igb_configure_tx_ring - Configure transmit ring after Reset
2658 * @adapter: board private structure
2659 * @ring: tx ring to configure
2661 * Configure a transmit ring after a reset.
2663 void igb_configure_tx_ring(struct igb_adapter *adapter,
2664 struct igb_ring *ring)
2666 struct e1000_hw *hw = &adapter->hw;
2667 u32 txdctl;
2668 u64 tdba = ring->dma;
2669 int reg_idx = ring->reg_idx;
2671 /* disable the queue */
2672 txdctl = rd32(E1000_TXDCTL(reg_idx));
2673 wr32(E1000_TXDCTL(reg_idx),
2674 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2675 wrfl();
2676 mdelay(10);
2678 wr32(E1000_TDLEN(reg_idx),
2679 ring->count * sizeof(union e1000_adv_tx_desc));
2680 wr32(E1000_TDBAL(reg_idx),
2681 tdba & 0x00000000ffffffffULL);
2682 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2684 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2685 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2686 writel(0, ring->head);
2687 writel(0, ring->tail);
2689 txdctl |= IGB_TX_PTHRESH;
2690 txdctl |= IGB_TX_HTHRESH << 8;
2691 txdctl |= IGB_TX_WTHRESH << 16;
2693 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2694 wr32(E1000_TXDCTL(reg_idx), txdctl);
2698 * igb_configure_tx - Configure transmit Unit after Reset
2699 * @adapter: board private structure
2701 * Configure the Tx unit of the MAC after a reset.
2703 static void igb_configure_tx(struct igb_adapter *adapter)
2705 int i;
2707 for (i = 0; i < adapter->num_tx_queues; i++)
2708 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2712 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2713 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2715 * Returns 0 on success, negative on failure
2717 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2719 struct device *dev = rx_ring->dev;
2720 int size, desc_len;
2722 size = sizeof(struct igb_buffer) * rx_ring->count;
2723 rx_ring->buffer_info = vzalloc(size);
2724 if (!rx_ring->buffer_info)
2725 goto err;
2727 desc_len = sizeof(union e1000_adv_rx_desc);
2729 /* Round up to nearest 4K */
2730 rx_ring->size = rx_ring->count * desc_len;
2731 rx_ring->size = ALIGN(rx_ring->size, 4096);
2733 rx_ring->desc = dma_alloc_coherent(dev,
2734 rx_ring->size,
2735 &rx_ring->dma,
2736 GFP_KERNEL);
2738 if (!rx_ring->desc)
2739 goto err;
2741 rx_ring->next_to_clean = 0;
2742 rx_ring->next_to_use = 0;
2744 return 0;
2746 err:
2747 vfree(rx_ring->buffer_info);
2748 rx_ring->buffer_info = NULL;
2749 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2750 " ring\n");
2751 return -ENOMEM;
2755 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2756 * (Descriptors) for all queues
2757 * @adapter: board private structure
2759 * Return 0 on success, negative on failure
2761 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2763 struct pci_dev *pdev = adapter->pdev;
2764 int i, err = 0;
2766 for (i = 0; i < adapter->num_rx_queues; i++) {
2767 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2768 if (err) {
2769 dev_err(&pdev->dev,
2770 "Allocation for Rx Queue %u failed\n", i);
2771 for (i--; i >= 0; i--)
2772 igb_free_rx_resources(adapter->rx_ring[i]);
2773 break;
2777 return err;
2781 * igb_setup_mrqc - configure the multiple receive queue control registers
2782 * @adapter: Board private structure
2784 static void igb_setup_mrqc(struct igb_adapter *adapter)
2786 struct e1000_hw *hw = &adapter->hw;
2787 u32 mrqc, rxcsum;
2788 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2789 union e1000_reta {
2790 u32 dword;
2791 u8 bytes[4];
2792 } reta;
2793 static const u8 rsshash[40] = {
2794 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2795 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2796 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2797 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2799 /* Fill out hash function seeds */
2800 for (j = 0; j < 10; j++) {
2801 u32 rsskey = rsshash[(j * 4)];
2802 rsskey |= rsshash[(j * 4) + 1] << 8;
2803 rsskey |= rsshash[(j * 4) + 2] << 16;
2804 rsskey |= rsshash[(j * 4) + 3] << 24;
2805 array_wr32(E1000_RSSRK(0), j, rsskey);
2808 num_rx_queues = adapter->rss_queues;
2810 if (adapter->vfs_allocated_count) {
2811 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2812 switch (hw->mac.type) {
2813 case e1000_i350:
2814 case e1000_82580:
2815 num_rx_queues = 1;
2816 shift = 0;
2817 break;
2818 case e1000_82576:
2819 shift = 3;
2820 num_rx_queues = 2;
2821 break;
2822 case e1000_82575:
2823 shift = 2;
2824 shift2 = 6;
2825 default:
2826 break;
2828 } else {
2829 if (hw->mac.type == e1000_82575)
2830 shift = 6;
2833 for (j = 0; j < (32 * 4); j++) {
2834 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2835 if (shift2)
2836 reta.bytes[j & 3] |= num_rx_queues << shift2;
2837 if ((j & 3) == 3)
2838 wr32(E1000_RETA(j >> 2), reta.dword);
2842 * Disable raw packet checksumming so that RSS hash is placed in
2843 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2844 * offloads as they are enabled by default
2846 rxcsum = rd32(E1000_RXCSUM);
2847 rxcsum |= E1000_RXCSUM_PCSD;
2849 if (adapter->hw.mac.type >= e1000_82576)
2850 /* Enable Receive Checksum Offload for SCTP */
2851 rxcsum |= E1000_RXCSUM_CRCOFL;
2853 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2854 wr32(E1000_RXCSUM, rxcsum);
2856 /* If VMDq is enabled then we set the appropriate mode for that, else
2857 * we default to RSS so that an RSS hash is calculated per packet even
2858 * if we are only using one queue */
2859 if (adapter->vfs_allocated_count) {
2860 if (hw->mac.type > e1000_82575) {
2861 /* Set the default pool for the PF's first queue */
2862 u32 vtctl = rd32(E1000_VT_CTL);
2863 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2864 E1000_VT_CTL_DISABLE_DEF_POOL);
2865 vtctl |= adapter->vfs_allocated_count <<
2866 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2867 wr32(E1000_VT_CTL, vtctl);
2869 if (adapter->rss_queues > 1)
2870 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2871 else
2872 mrqc = E1000_MRQC_ENABLE_VMDQ;
2873 } else {
2874 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2876 igb_vmm_control(adapter);
2879 * Generate RSS hash based on TCP port numbers and/or
2880 * IPv4/v6 src and dst addresses since UDP cannot be
2881 * hashed reliably due to IP fragmentation
2883 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2884 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2885 E1000_MRQC_RSS_FIELD_IPV6 |
2886 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2887 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2889 wr32(E1000_MRQC, mrqc);
2893 * igb_setup_rctl - configure the receive control registers
2894 * @adapter: Board private structure
2896 void igb_setup_rctl(struct igb_adapter *adapter)
2898 struct e1000_hw *hw = &adapter->hw;
2899 u32 rctl;
2901 rctl = rd32(E1000_RCTL);
2903 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2904 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2906 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2907 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2910 * enable stripping of CRC. It's unlikely this will break BMC
2911 * redirection as it did with e1000. Newer features require
2912 * that the HW strips the CRC.
2914 rctl |= E1000_RCTL_SECRC;
2916 /* disable store bad packets and clear size bits. */
2917 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2919 /* enable LPE to prevent packets larger than max_frame_size */
2920 rctl |= E1000_RCTL_LPE;
2922 /* disable queue 0 to prevent tail write w/o re-config */
2923 wr32(E1000_RXDCTL(0), 0);
2925 /* Attention!!! For SR-IOV PF driver operations you must enable
2926 * queue drop for all VF and PF queues to prevent head of line blocking
2927 * if an un-trusted VF does not provide descriptors to hardware.
2929 if (adapter->vfs_allocated_count) {
2930 /* set all queue drop enable bits */
2931 wr32(E1000_QDE, ALL_QUEUES);
2934 wr32(E1000_RCTL, rctl);
2937 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2938 int vfn)
2940 struct e1000_hw *hw = &adapter->hw;
2941 u32 vmolr;
2943 /* if it isn't the PF check to see if VFs are enabled and
2944 * increase the size to support vlan tags */
2945 if (vfn < adapter->vfs_allocated_count &&
2946 adapter->vf_data[vfn].vlans_enabled)
2947 size += VLAN_TAG_SIZE;
2949 vmolr = rd32(E1000_VMOLR(vfn));
2950 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2951 vmolr |= size | E1000_VMOLR_LPE;
2952 wr32(E1000_VMOLR(vfn), vmolr);
2954 return 0;
2958 * igb_rlpml_set - set maximum receive packet size
2959 * @adapter: board private structure
2961 * Configure maximum receivable packet size.
2963 static void igb_rlpml_set(struct igb_adapter *adapter)
2965 u32 max_frame_size;
2966 struct e1000_hw *hw = &adapter->hw;
2967 u16 pf_id = adapter->vfs_allocated_count;
2969 max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
2971 /* if vfs are enabled we set RLPML to the largest possible request
2972 * size and set the VMOLR RLPML to the size we need */
2973 if (pf_id) {
2974 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2975 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2978 wr32(E1000_RLPML, max_frame_size);
2981 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2982 int vfn, bool aupe)
2984 struct e1000_hw *hw = &adapter->hw;
2985 u32 vmolr;
2988 * This register exists only on 82576 and newer so if we are older then
2989 * we should exit and do nothing
2991 if (hw->mac.type < e1000_82576)
2992 return;
2994 vmolr = rd32(E1000_VMOLR(vfn));
2995 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2996 if (aupe)
2997 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2998 else
2999 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3001 /* clear all bits that might not be set */
3002 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3004 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3005 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3007 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3008 * multicast packets
3010 if (vfn <= adapter->vfs_allocated_count)
3011 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3013 wr32(E1000_VMOLR(vfn), vmolr);
3017 * igb_configure_rx_ring - Configure a receive ring after Reset
3018 * @adapter: board private structure
3019 * @ring: receive ring to be configured
3021 * Configure the Rx unit of the MAC after a reset.
3023 void igb_configure_rx_ring(struct igb_adapter *adapter,
3024 struct igb_ring *ring)
3026 struct e1000_hw *hw = &adapter->hw;
3027 u64 rdba = ring->dma;
3028 int reg_idx = ring->reg_idx;
3029 u32 srrctl, rxdctl;
3031 /* disable the queue */
3032 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3033 wr32(E1000_RXDCTL(reg_idx),
3034 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
3036 /* Set DMA base address registers */
3037 wr32(E1000_RDBAL(reg_idx),
3038 rdba & 0x00000000ffffffffULL);
3039 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3040 wr32(E1000_RDLEN(reg_idx),
3041 ring->count * sizeof(union e1000_adv_rx_desc));
3043 /* initialize head and tail */
3044 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3045 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3046 writel(0, ring->head);
3047 writel(0, ring->tail);
3049 /* set descriptor configuration */
3050 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3051 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3052 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3053 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3054 srrctl |= IGB_RXBUFFER_16384 >>
3055 E1000_SRRCTL_BSIZEPKT_SHIFT;
3056 #else
3057 srrctl |= (PAGE_SIZE / 2) >>
3058 E1000_SRRCTL_BSIZEPKT_SHIFT;
3059 #endif
3060 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3061 } else {
3062 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3063 E1000_SRRCTL_BSIZEPKT_SHIFT;
3064 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3066 if (hw->mac.type == e1000_82580)
3067 srrctl |= E1000_SRRCTL_TIMESTAMP;
3068 /* Only set Drop Enable if we are supporting multiple queues */
3069 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3070 srrctl |= E1000_SRRCTL_DROP_EN;
3072 wr32(E1000_SRRCTL(reg_idx), srrctl);
3074 /* set filtering for VMDQ pools */
3075 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3077 /* enable receive descriptor fetching */
3078 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3079 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3080 rxdctl &= 0xFFF00000;
3081 rxdctl |= IGB_RX_PTHRESH;
3082 rxdctl |= IGB_RX_HTHRESH << 8;
3083 rxdctl |= IGB_RX_WTHRESH << 16;
3084 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3088 * igb_configure_rx - Configure receive Unit after Reset
3089 * @adapter: board private structure
3091 * Configure the Rx unit of the MAC after a reset.
3093 static void igb_configure_rx(struct igb_adapter *adapter)
3095 int i;
3097 /* set UTA to appropriate mode */
3098 igb_set_uta(adapter);
3100 /* set the correct pool for the PF default MAC address in entry 0 */
3101 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3102 adapter->vfs_allocated_count);
3104 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3105 * the Base and Length of the Rx Descriptor Ring */
3106 for (i = 0; i < adapter->num_rx_queues; i++)
3107 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3111 * igb_free_tx_resources - Free Tx Resources per Queue
3112 * @tx_ring: Tx descriptor ring for a specific queue
3114 * Free all transmit software resources
3116 void igb_free_tx_resources(struct igb_ring *tx_ring)
3118 igb_clean_tx_ring(tx_ring);
3120 vfree(tx_ring->buffer_info);
3121 tx_ring->buffer_info = NULL;
3123 /* if not set, then don't free */
3124 if (!tx_ring->desc)
3125 return;
3127 dma_free_coherent(tx_ring->dev, tx_ring->size,
3128 tx_ring->desc, tx_ring->dma);
3130 tx_ring->desc = NULL;
3134 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3135 * @adapter: board private structure
3137 * Free all transmit software resources
3139 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3141 int i;
3143 for (i = 0; i < adapter->num_tx_queues; i++)
3144 igb_free_tx_resources(adapter->tx_ring[i]);
3147 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3148 struct igb_buffer *buffer_info)
3150 if (buffer_info->dma) {
3151 if (buffer_info->mapped_as_page)
3152 dma_unmap_page(tx_ring->dev,
3153 buffer_info->dma,
3154 buffer_info->length,
3155 DMA_TO_DEVICE);
3156 else
3157 dma_unmap_single(tx_ring->dev,
3158 buffer_info->dma,
3159 buffer_info->length,
3160 DMA_TO_DEVICE);
3161 buffer_info->dma = 0;
3163 if (buffer_info->skb) {
3164 dev_kfree_skb_any(buffer_info->skb);
3165 buffer_info->skb = NULL;
3167 buffer_info->time_stamp = 0;
3168 buffer_info->length = 0;
3169 buffer_info->next_to_watch = 0;
3170 buffer_info->mapped_as_page = false;
3174 * igb_clean_tx_ring - Free Tx Buffers
3175 * @tx_ring: ring to be cleaned
3177 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3179 struct igb_buffer *buffer_info;
3180 unsigned long size;
3181 unsigned int i;
3183 if (!tx_ring->buffer_info)
3184 return;
3185 /* Free all the Tx ring sk_buffs */
3187 for (i = 0; i < tx_ring->count; i++) {
3188 buffer_info = &tx_ring->buffer_info[i];
3189 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3192 size = sizeof(struct igb_buffer) * tx_ring->count;
3193 memset(tx_ring->buffer_info, 0, size);
3195 /* Zero out the descriptor ring */
3196 memset(tx_ring->desc, 0, tx_ring->size);
3198 tx_ring->next_to_use = 0;
3199 tx_ring->next_to_clean = 0;
3203 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3204 * @adapter: board private structure
3206 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3208 int i;
3210 for (i = 0; i < adapter->num_tx_queues; i++)
3211 igb_clean_tx_ring(adapter->tx_ring[i]);
3215 * igb_free_rx_resources - Free Rx Resources
3216 * @rx_ring: ring to clean the resources from
3218 * Free all receive software resources
3220 void igb_free_rx_resources(struct igb_ring *rx_ring)
3222 igb_clean_rx_ring(rx_ring);
3224 vfree(rx_ring->buffer_info);
3225 rx_ring->buffer_info = NULL;
3227 /* if not set, then don't free */
3228 if (!rx_ring->desc)
3229 return;
3231 dma_free_coherent(rx_ring->dev, rx_ring->size,
3232 rx_ring->desc, rx_ring->dma);
3234 rx_ring->desc = NULL;
3238 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3239 * @adapter: board private structure
3241 * Free all receive software resources
3243 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3245 int i;
3247 for (i = 0; i < adapter->num_rx_queues; i++)
3248 igb_free_rx_resources(adapter->rx_ring[i]);
3252 * igb_clean_rx_ring - Free Rx Buffers per Queue
3253 * @rx_ring: ring to free buffers from
3255 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3257 struct igb_buffer *buffer_info;
3258 unsigned long size;
3259 unsigned int i;
3261 if (!rx_ring->buffer_info)
3262 return;
3264 /* Free all the Rx ring sk_buffs */
3265 for (i = 0; i < rx_ring->count; i++) {
3266 buffer_info = &rx_ring->buffer_info[i];
3267 if (buffer_info->dma) {
3268 dma_unmap_single(rx_ring->dev,
3269 buffer_info->dma,
3270 rx_ring->rx_buffer_len,
3271 DMA_FROM_DEVICE);
3272 buffer_info->dma = 0;
3275 if (buffer_info->skb) {
3276 dev_kfree_skb(buffer_info->skb);
3277 buffer_info->skb = NULL;
3279 if (buffer_info->page_dma) {
3280 dma_unmap_page(rx_ring->dev,
3281 buffer_info->page_dma,
3282 PAGE_SIZE / 2,
3283 DMA_FROM_DEVICE);
3284 buffer_info->page_dma = 0;
3286 if (buffer_info->page) {
3287 put_page(buffer_info->page);
3288 buffer_info->page = NULL;
3289 buffer_info->page_offset = 0;
3293 size = sizeof(struct igb_buffer) * rx_ring->count;
3294 memset(rx_ring->buffer_info, 0, size);
3296 /* Zero out the descriptor ring */
3297 memset(rx_ring->desc, 0, rx_ring->size);
3299 rx_ring->next_to_clean = 0;
3300 rx_ring->next_to_use = 0;
3304 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3305 * @adapter: board private structure
3307 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3309 int i;
3311 for (i = 0; i < adapter->num_rx_queues; i++)
3312 igb_clean_rx_ring(adapter->rx_ring[i]);
3316 * igb_set_mac - Change the Ethernet Address of the NIC
3317 * @netdev: network interface device structure
3318 * @p: pointer to an address structure
3320 * Returns 0 on success, negative on failure
3322 static int igb_set_mac(struct net_device *netdev, void *p)
3324 struct igb_adapter *adapter = netdev_priv(netdev);
3325 struct e1000_hw *hw = &adapter->hw;
3326 struct sockaddr *addr = p;
3328 if (!is_valid_ether_addr(addr->sa_data))
3329 return -EADDRNOTAVAIL;
3331 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3332 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3334 /* set the correct pool for the new PF MAC address in entry 0 */
3335 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3336 adapter->vfs_allocated_count);
3338 return 0;
3342 * igb_write_mc_addr_list - write multicast addresses to MTA
3343 * @netdev: network interface device structure
3345 * Writes multicast address list to the MTA hash table.
3346 * Returns: -ENOMEM on failure
3347 * 0 on no addresses written
3348 * X on writing X addresses to MTA
3350 static int igb_write_mc_addr_list(struct net_device *netdev)
3352 struct igb_adapter *adapter = netdev_priv(netdev);
3353 struct e1000_hw *hw = &adapter->hw;
3354 struct netdev_hw_addr *ha;
3355 u8 *mta_list;
3356 int i;
3358 if (netdev_mc_empty(netdev)) {
3359 /* nothing to program, so clear mc list */
3360 igb_update_mc_addr_list(hw, NULL, 0);
3361 igb_restore_vf_multicasts(adapter);
3362 return 0;
3365 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3366 if (!mta_list)
3367 return -ENOMEM;
3369 /* The shared function expects a packed array of only addresses. */
3370 i = 0;
3371 netdev_for_each_mc_addr(ha, netdev)
3372 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3374 igb_update_mc_addr_list(hw, mta_list, i);
3375 kfree(mta_list);
3377 return netdev_mc_count(netdev);
3381 * igb_write_uc_addr_list - write unicast addresses to RAR table
3382 * @netdev: network interface device structure
3384 * Writes unicast address list to the RAR table.
3385 * Returns: -ENOMEM on failure/insufficient address space
3386 * 0 on no addresses written
3387 * X on writing X addresses to the RAR table
3389 static int igb_write_uc_addr_list(struct net_device *netdev)
3391 struct igb_adapter *adapter = netdev_priv(netdev);
3392 struct e1000_hw *hw = &adapter->hw;
3393 unsigned int vfn = adapter->vfs_allocated_count;
3394 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3395 int count = 0;
3397 /* return ENOMEM indicating insufficient memory for addresses */
3398 if (netdev_uc_count(netdev) > rar_entries)
3399 return -ENOMEM;
3401 if (!netdev_uc_empty(netdev) && rar_entries) {
3402 struct netdev_hw_addr *ha;
3404 netdev_for_each_uc_addr(ha, netdev) {
3405 if (!rar_entries)
3406 break;
3407 igb_rar_set_qsel(adapter, ha->addr,
3408 rar_entries--,
3409 vfn);
3410 count++;
3413 /* write the addresses in reverse order to avoid write combining */
3414 for (; rar_entries > 0 ; rar_entries--) {
3415 wr32(E1000_RAH(rar_entries), 0);
3416 wr32(E1000_RAL(rar_entries), 0);
3418 wrfl();
3420 return count;
3424 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3425 * @netdev: network interface device structure
3427 * The set_rx_mode entry point is called whenever the unicast or multicast
3428 * address lists or the network interface flags are updated. This routine is
3429 * responsible for configuring the hardware for proper unicast, multicast,
3430 * promiscuous mode, and all-multi behavior.
3432 static void igb_set_rx_mode(struct net_device *netdev)
3434 struct igb_adapter *adapter = netdev_priv(netdev);
3435 struct e1000_hw *hw = &adapter->hw;
3436 unsigned int vfn = adapter->vfs_allocated_count;
3437 u32 rctl, vmolr = 0;
3438 int count;
3440 /* Check for Promiscuous and All Multicast modes */
3441 rctl = rd32(E1000_RCTL);
3443 /* clear the effected bits */
3444 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3446 if (netdev->flags & IFF_PROMISC) {
3447 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3448 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3449 } else {
3450 if (netdev->flags & IFF_ALLMULTI) {
3451 rctl |= E1000_RCTL_MPE;
3452 vmolr |= E1000_VMOLR_MPME;
3453 } else {
3455 * Write addresses to the MTA, if the attempt fails
3456 * then we should just turn on promiscuous mode so
3457 * that we can at least receive multicast traffic
3459 count = igb_write_mc_addr_list(netdev);
3460 if (count < 0) {
3461 rctl |= E1000_RCTL_MPE;
3462 vmolr |= E1000_VMOLR_MPME;
3463 } else if (count) {
3464 vmolr |= E1000_VMOLR_ROMPE;
3468 * Write addresses to available RAR registers, if there is not
3469 * sufficient space to store all the addresses then enable
3470 * unicast promiscuous mode
3472 count = igb_write_uc_addr_list(netdev);
3473 if (count < 0) {
3474 rctl |= E1000_RCTL_UPE;
3475 vmolr |= E1000_VMOLR_ROPE;
3477 rctl |= E1000_RCTL_VFE;
3479 wr32(E1000_RCTL, rctl);
3482 * In order to support SR-IOV and eventually VMDq it is necessary to set
3483 * the VMOLR to enable the appropriate modes. Without this workaround
3484 * we will have issues with VLAN tag stripping not being done for frames
3485 * that are only arriving because we are the default pool
3487 if (hw->mac.type < e1000_82576)
3488 return;
3490 vmolr |= rd32(E1000_VMOLR(vfn)) &
3491 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3492 wr32(E1000_VMOLR(vfn), vmolr);
3493 igb_restore_vf_multicasts(adapter);
3496 static void igb_check_wvbr(struct igb_adapter *adapter)
3498 struct e1000_hw *hw = &adapter->hw;
3499 u32 wvbr = 0;
3501 switch (hw->mac.type) {
3502 case e1000_82576:
3503 case e1000_i350:
3504 if (!(wvbr = rd32(E1000_WVBR)))
3505 return;
3506 break;
3507 default:
3508 break;
3511 adapter->wvbr |= wvbr;
3514 #define IGB_STAGGERED_QUEUE_OFFSET 8
3516 static void igb_spoof_check(struct igb_adapter *adapter)
3518 int j;
3520 if (!adapter->wvbr)
3521 return;
3523 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3524 if (adapter->wvbr & (1 << j) ||
3525 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3526 dev_warn(&adapter->pdev->dev,
3527 "Spoof event(s) detected on VF %d\n", j);
3528 adapter->wvbr &=
3529 ~((1 << j) |
3530 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3535 /* Need to wait a few seconds after link up to get diagnostic information from
3536 * the phy */
3537 static void igb_update_phy_info(unsigned long data)
3539 struct igb_adapter *adapter = (struct igb_adapter *) data;
3540 igb_get_phy_info(&adapter->hw);
3544 * igb_has_link - check shared code for link and determine up/down
3545 * @adapter: pointer to driver private info
3547 bool igb_has_link(struct igb_adapter *adapter)
3549 struct e1000_hw *hw = &adapter->hw;
3550 bool link_active = false;
3551 s32 ret_val = 0;
3553 /* get_link_status is set on LSC (link status) interrupt or
3554 * rx sequence error interrupt. get_link_status will stay
3555 * false until the e1000_check_for_link establishes link
3556 * for copper adapters ONLY
3558 switch (hw->phy.media_type) {
3559 case e1000_media_type_copper:
3560 if (hw->mac.get_link_status) {
3561 ret_val = hw->mac.ops.check_for_link(hw);
3562 link_active = !hw->mac.get_link_status;
3563 } else {
3564 link_active = true;
3566 break;
3567 case e1000_media_type_internal_serdes:
3568 ret_val = hw->mac.ops.check_for_link(hw);
3569 link_active = hw->mac.serdes_has_link;
3570 break;
3571 default:
3572 case e1000_media_type_unknown:
3573 break;
3576 return link_active;
3579 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3581 bool ret = false;
3582 u32 ctrl_ext, thstat;
3584 /* check for thermal sensor event on i350, copper only */
3585 if (hw->mac.type == e1000_i350) {
3586 thstat = rd32(E1000_THSTAT);
3587 ctrl_ext = rd32(E1000_CTRL_EXT);
3589 if ((hw->phy.media_type == e1000_media_type_copper) &&
3590 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3591 ret = !!(thstat & event);
3595 return ret;
3599 * igb_watchdog - Timer Call-back
3600 * @data: pointer to adapter cast into an unsigned long
3602 static void igb_watchdog(unsigned long data)
3604 struct igb_adapter *adapter = (struct igb_adapter *)data;
3605 /* Do the rest outside of interrupt context */
3606 schedule_work(&adapter->watchdog_task);
3609 static void igb_watchdog_task(struct work_struct *work)
3611 struct igb_adapter *adapter = container_of(work,
3612 struct igb_adapter,
3613 watchdog_task);
3614 struct e1000_hw *hw = &adapter->hw;
3615 struct net_device *netdev = adapter->netdev;
3616 u32 link;
3617 int i;
3619 link = igb_has_link(adapter);
3620 if (link) {
3621 if (!netif_carrier_ok(netdev)) {
3622 u32 ctrl;
3623 hw->mac.ops.get_speed_and_duplex(hw,
3624 &adapter->link_speed,
3625 &adapter->link_duplex);
3627 ctrl = rd32(E1000_CTRL);
3628 /* Links status message must follow this format */
3629 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3630 "Flow Control: %s\n",
3631 netdev->name,
3632 adapter->link_speed,
3633 adapter->link_duplex == FULL_DUPLEX ?
3634 "Full Duplex" : "Half Duplex",
3635 ((ctrl & E1000_CTRL_TFCE) &&
3636 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3637 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3638 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3640 /* check for thermal sensor event */
3641 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3642 printk(KERN_INFO "igb: %s The network adapter "
3643 "link speed was downshifted "
3644 "because it overheated.\n",
3645 netdev->name);
3648 /* adjust timeout factor according to speed/duplex */
3649 adapter->tx_timeout_factor = 1;
3650 switch (adapter->link_speed) {
3651 case SPEED_10:
3652 adapter->tx_timeout_factor = 14;
3653 break;
3654 case SPEED_100:
3655 /* maybe add some timeout factor ? */
3656 break;
3659 netif_carrier_on(netdev);
3661 igb_ping_all_vfs(adapter);
3662 igb_check_vf_rate_limit(adapter);
3664 /* link state has changed, schedule phy info update */
3665 if (!test_bit(__IGB_DOWN, &adapter->state))
3666 mod_timer(&adapter->phy_info_timer,
3667 round_jiffies(jiffies + 2 * HZ));
3669 } else {
3670 if (netif_carrier_ok(netdev)) {
3671 adapter->link_speed = 0;
3672 adapter->link_duplex = 0;
3674 /* check for thermal sensor event */
3675 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3676 printk(KERN_ERR "igb: %s The network adapter "
3677 "was stopped because it "
3678 "overheated.\n",
3679 netdev->name);
3682 /* Links status message must follow this format */
3683 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3684 netdev->name);
3685 netif_carrier_off(netdev);
3687 igb_ping_all_vfs(adapter);
3689 /* link state has changed, schedule phy info update */
3690 if (!test_bit(__IGB_DOWN, &adapter->state))
3691 mod_timer(&adapter->phy_info_timer,
3692 round_jiffies(jiffies + 2 * HZ));
3696 spin_lock(&adapter->stats64_lock);
3697 igb_update_stats(adapter, &adapter->stats64);
3698 spin_unlock(&adapter->stats64_lock);
3700 for (i = 0; i < adapter->num_tx_queues; i++) {
3701 struct igb_ring *tx_ring = adapter->tx_ring[i];
3702 if (!netif_carrier_ok(netdev)) {
3703 /* We've lost link, so the controller stops DMA,
3704 * but we've got queued Tx work that's never going
3705 * to get done, so reset controller to flush Tx.
3706 * (Do the reset outside of interrupt context). */
3707 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3708 adapter->tx_timeout_count++;
3709 schedule_work(&adapter->reset_task);
3710 /* return immediately since reset is imminent */
3711 return;
3715 /* Force detection of hung controller every watchdog period */
3716 tx_ring->detect_tx_hung = true;
3719 /* Cause software interrupt to ensure rx ring is cleaned */
3720 if (adapter->msix_entries) {
3721 u32 eics = 0;
3722 for (i = 0; i < adapter->num_q_vectors; i++) {
3723 struct igb_q_vector *q_vector = adapter->q_vector[i];
3724 eics |= q_vector->eims_value;
3726 wr32(E1000_EICS, eics);
3727 } else {
3728 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3731 igb_spoof_check(adapter);
3733 /* Reset the timer */
3734 if (!test_bit(__IGB_DOWN, &adapter->state))
3735 mod_timer(&adapter->watchdog_timer,
3736 round_jiffies(jiffies + 2 * HZ));
3739 enum latency_range {
3740 lowest_latency = 0,
3741 low_latency = 1,
3742 bulk_latency = 2,
3743 latency_invalid = 255
3747 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3749 * Stores a new ITR value based on strictly on packet size. This
3750 * algorithm is less sophisticated than that used in igb_update_itr,
3751 * due to the difficulty of synchronizing statistics across multiple
3752 * receive rings. The divisors and thresholds used by this function
3753 * were determined based on theoretical maximum wire speed and testing
3754 * data, in order to minimize response time while increasing bulk
3755 * throughput.
3756 * This functionality is controlled by the InterruptThrottleRate module
3757 * parameter (see igb_param.c)
3758 * NOTE: This function is called only when operating in a multiqueue
3759 * receive environment.
3760 * @q_vector: pointer to q_vector
3762 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3764 int new_val = q_vector->itr_val;
3765 int avg_wire_size = 0;
3766 struct igb_adapter *adapter = q_vector->adapter;
3767 struct igb_ring *ring;
3768 unsigned int packets;
3770 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3771 * ints/sec - ITR timer value of 120 ticks.
3773 if (adapter->link_speed != SPEED_1000) {
3774 new_val = 976;
3775 goto set_itr_val;
3778 ring = q_vector->rx_ring;
3779 if (ring) {
3780 packets = ACCESS_ONCE(ring->total_packets);
3782 if (packets)
3783 avg_wire_size = ring->total_bytes / packets;
3786 ring = q_vector->tx_ring;
3787 if (ring) {
3788 packets = ACCESS_ONCE(ring->total_packets);
3790 if (packets)
3791 avg_wire_size = max_t(u32, avg_wire_size,
3792 ring->total_bytes / packets);
3795 /* if avg_wire_size isn't set no work was done */
3796 if (!avg_wire_size)
3797 goto clear_counts;
3799 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3800 avg_wire_size += 24;
3802 /* Don't starve jumbo frames */
3803 avg_wire_size = min(avg_wire_size, 3000);
3805 /* Give a little boost to mid-size frames */
3806 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3807 new_val = avg_wire_size / 3;
3808 else
3809 new_val = avg_wire_size / 2;
3811 /* when in itr mode 3 do not exceed 20K ints/sec */
3812 if (adapter->rx_itr_setting == 3 && new_val < 196)
3813 new_val = 196;
3815 set_itr_val:
3816 if (new_val != q_vector->itr_val) {
3817 q_vector->itr_val = new_val;
3818 q_vector->set_itr = 1;
3820 clear_counts:
3821 if (q_vector->rx_ring) {
3822 q_vector->rx_ring->total_bytes = 0;
3823 q_vector->rx_ring->total_packets = 0;
3825 if (q_vector->tx_ring) {
3826 q_vector->tx_ring->total_bytes = 0;
3827 q_vector->tx_ring->total_packets = 0;
3832 * igb_update_itr - update the dynamic ITR value based on statistics
3833 * Stores a new ITR value based on packets and byte
3834 * counts during the last interrupt. The advantage of per interrupt
3835 * computation is faster updates and more accurate ITR for the current
3836 * traffic pattern. Constants in this function were computed
3837 * based on theoretical maximum wire speed and thresholds were set based
3838 * on testing data as well as attempting to minimize response time
3839 * while increasing bulk throughput.
3840 * this functionality is controlled by the InterruptThrottleRate module
3841 * parameter (see igb_param.c)
3842 * NOTE: These calculations are only valid when operating in a single-
3843 * queue environment.
3844 * @adapter: pointer to adapter
3845 * @itr_setting: current q_vector->itr_val
3846 * @packets: the number of packets during this measurement interval
3847 * @bytes: the number of bytes during this measurement interval
3849 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3850 int packets, int bytes)
3852 unsigned int retval = itr_setting;
3854 if (packets == 0)
3855 goto update_itr_done;
3857 switch (itr_setting) {
3858 case lowest_latency:
3859 /* handle TSO and jumbo frames */
3860 if (bytes/packets > 8000)
3861 retval = bulk_latency;
3862 else if ((packets < 5) && (bytes > 512))
3863 retval = low_latency;
3864 break;
3865 case low_latency: /* 50 usec aka 20000 ints/s */
3866 if (bytes > 10000) {
3867 /* this if handles the TSO accounting */
3868 if (bytes/packets > 8000) {
3869 retval = bulk_latency;
3870 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3871 retval = bulk_latency;
3872 } else if ((packets > 35)) {
3873 retval = lowest_latency;
3875 } else if (bytes/packets > 2000) {
3876 retval = bulk_latency;
3877 } else if (packets <= 2 && bytes < 512) {
3878 retval = lowest_latency;
3880 break;
3881 case bulk_latency: /* 250 usec aka 4000 ints/s */
3882 if (bytes > 25000) {
3883 if (packets > 35)
3884 retval = low_latency;
3885 } else if (bytes < 1500) {
3886 retval = low_latency;
3888 break;
3891 update_itr_done:
3892 return retval;
3895 static void igb_set_itr(struct igb_adapter *adapter)
3897 struct igb_q_vector *q_vector = adapter->q_vector[0];
3898 u16 current_itr;
3899 u32 new_itr = q_vector->itr_val;
3901 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3902 if (adapter->link_speed != SPEED_1000) {
3903 current_itr = 0;
3904 new_itr = 4000;
3905 goto set_itr_now;
3908 adapter->rx_itr = igb_update_itr(adapter,
3909 adapter->rx_itr,
3910 q_vector->rx_ring->total_packets,
3911 q_vector->rx_ring->total_bytes);
3913 adapter->tx_itr = igb_update_itr(adapter,
3914 adapter->tx_itr,
3915 q_vector->tx_ring->total_packets,
3916 q_vector->tx_ring->total_bytes);
3917 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3919 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3920 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3921 current_itr = low_latency;
3923 switch (current_itr) {
3924 /* counts and packets in update_itr are dependent on these numbers */
3925 case lowest_latency:
3926 new_itr = 56; /* aka 70,000 ints/sec */
3927 break;
3928 case low_latency:
3929 new_itr = 196; /* aka 20,000 ints/sec */
3930 break;
3931 case bulk_latency:
3932 new_itr = 980; /* aka 4,000 ints/sec */
3933 break;
3934 default:
3935 break;
3938 set_itr_now:
3939 q_vector->rx_ring->total_bytes = 0;
3940 q_vector->rx_ring->total_packets = 0;
3941 q_vector->tx_ring->total_bytes = 0;
3942 q_vector->tx_ring->total_packets = 0;
3944 if (new_itr != q_vector->itr_val) {
3945 /* this attempts to bias the interrupt rate towards Bulk
3946 * by adding intermediate steps when interrupt rate is
3947 * increasing */
3948 new_itr = new_itr > q_vector->itr_val ?
3949 max((new_itr * q_vector->itr_val) /
3950 (new_itr + (q_vector->itr_val >> 2)),
3951 new_itr) :
3952 new_itr;
3953 /* Don't write the value here; it resets the adapter's
3954 * internal timer, and causes us to delay far longer than
3955 * we should between interrupts. Instead, we write the ITR
3956 * value at the beginning of the next interrupt so the timing
3957 * ends up being correct.
3959 q_vector->itr_val = new_itr;
3960 q_vector->set_itr = 1;
3964 #define IGB_TX_FLAGS_CSUM 0x00000001
3965 #define IGB_TX_FLAGS_VLAN 0x00000002
3966 #define IGB_TX_FLAGS_TSO 0x00000004
3967 #define IGB_TX_FLAGS_IPV4 0x00000008
3968 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3969 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3970 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3972 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3973 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3975 struct e1000_adv_tx_context_desc *context_desc;
3976 unsigned int i;
3977 int err;
3978 struct igb_buffer *buffer_info;
3979 u32 info = 0, tu_cmd = 0;
3980 u32 mss_l4len_idx;
3981 u8 l4len;
3983 if (skb_header_cloned(skb)) {
3984 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3985 if (err)
3986 return err;
3989 l4len = tcp_hdrlen(skb);
3990 *hdr_len += l4len;
3992 if (skb->protocol == htons(ETH_P_IP)) {
3993 struct iphdr *iph = ip_hdr(skb);
3994 iph->tot_len = 0;
3995 iph->check = 0;
3996 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3997 iph->daddr, 0,
3998 IPPROTO_TCP,
4000 } else if (skb_is_gso_v6(skb)) {
4001 ipv6_hdr(skb)->payload_len = 0;
4002 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4003 &ipv6_hdr(skb)->daddr,
4004 0, IPPROTO_TCP, 0);
4007 i = tx_ring->next_to_use;
4009 buffer_info = &tx_ring->buffer_info[i];
4010 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4011 /* VLAN MACLEN IPLEN */
4012 if (tx_flags & IGB_TX_FLAGS_VLAN)
4013 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4014 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4015 *hdr_len += skb_network_offset(skb);
4016 info |= skb_network_header_len(skb);
4017 *hdr_len += skb_network_header_len(skb);
4018 context_desc->vlan_macip_lens = cpu_to_le32(info);
4020 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4021 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4023 if (skb->protocol == htons(ETH_P_IP))
4024 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4025 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4027 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4029 /* MSS L4LEN IDX */
4030 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4031 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4033 /* For 82575, context index must be unique per ring. */
4034 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4035 mss_l4len_idx |= tx_ring->reg_idx << 4;
4037 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4038 context_desc->seqnum_seed = 0;
4040 buffer_info->time_stamp = jiffies;
4041 buffer_info->next_to_watch = i;
4042 buffer_info->dma = 0;
4043 i++;
4044 if (i == tx_ring->count)
4045 i = 0;
4047 tx_ring->next_to_use = i;
4049 return true;
4052 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4053 struct sk_buff *skb, u32 tx_flags)
4055 struct e1000_adv_tx_context_desc *context_desc;
4056 struct device *dev = tx_ring->dev;
4057 struct igb_buffer *buffer_info;
4058 u32 info = 0, tu_cmd = 0;
4059 unsigned int i;
4061 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4062 (tx_flags & IGB_TX_FLAGS_VLAN)) {
4063 i = tx_ring->next_to_use;
4064 buffer_info = &tx_ring->buffer_info[i];
4065 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4067 if (tx_flags & IGB_TX_FLAGS_VLAN)
4068 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4070 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4071 if (skb->ip_summed == CHECKSUM_PARTIAL)
4072 info |= skb_network_header_len(skb);
4074 context_desc->vlan_macip_lens = cpu_to_le32(info);
4076 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4078 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4079 __be16 protocol;
4081 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4082 const struct vlan_ethhdr *vhdr =
4083 (const struct vlan_ethhdr*)skb->data;
4085 protocol = vhdr->h_vlan_encapsulated_proto;
4086 } else {
4087 protocol = skb->protocol;
4090 switch (protocol) {
4091 case cpu_to_be16(ETH_P_IP):
4092 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4093 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4094 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4095 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4096 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4097 break;
4098 case cpu_to_be16(ETH_P_IPV6):
4099 /* XXX what about other V6 headers?? */
4100 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4101 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4102 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4103 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4104 break;
4105 default:
4106 if (unlikely(net_ratelimit()))
4107 dev_warn(dev,
4108 "partial checksum but proto=%x!\n",
4109 skb->protocol);
4110 break;
4114 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4115 context_desc->seqnum_seed = 0;
4116 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4117 context_desc->mss_l4len_idx =
4118 cpu_to_le32(tx_ring->reg_idx << 4);
4120 buffer_info->time_stamp = jiffies;
4121 buffer_info->next_to_watch = i;
4122 buffer_info->dma = 0;
4124 i++;
4125 if (i == tx_ring->count)
4126 i = 0;
4127 tx_ring->next_to_use = i;
4129 return true;
4131 return false;
4134 #define IGB_MAX_TXD_PWR 16
4135 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4137 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4138 unsigned int first)
4140 struct igb_buffer *buffer_info;
4141 struct device *dev = tx_ring->dev;
4142 unsigned int hlen = skb_headlen(skb);
4143 unsigned int count = 0, i;
4144 unsigned int f;
4145 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4147 i = tx_ring->next_to_use;
4149 buffer_info = &tx_ring->buffer_info[i];
4150 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4151 buffer_info->length = hlen;
4152 /* set time_stamp *before* dma to help avoid a possible race */
4153 buffer_info->time_stamp = jiffies;
4154 buffer_info->next_to_watch = i;
4155 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4156 DMA_TO_DEVICE);
4157 if (dma_mapping_error(dev, buffer_info->dma))
4158 goto dma_error;
4160 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4161 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4162 unsigned int len = frag->size;
4164 count++;
4165 i++;
4166 if (i == tx_ring->count)
4167 i = 0;
4169 buffer_info = &tx_ring->buffer_info[i];
4170 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4171 buffer_info->length = len;
4172 buffer_info->time_stamp = jiffies;
4173 buffer_info->next_to_watch = i;
4174 buffer_info->mapped_as_page = true;
4175 buffer_info->dma = dma_map_page(dev,
4176 frag->page,
4177 frag->page_offset,
4178 len,
4179 DMA_TO_DEVICE);
4180 if (dma_mapping_error(dev, buffer_info->dma))
4181 goto dma_error;
4185 tx_ring->buffer_info[i].skb = skb;
4186 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4187 /* multiply data chunks by size of headers */
4188 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4189 tx_ring->buffer_info[i].gso_segs = gso_segs;
4190 tx_ring->buffer_info[first].next_to_watch = i;
4192 return ++count;
4194 dma_error:
4195 dev_err(dev, "TX DMA map failed\n");
4197 /* clear timestamp and dma mappings for failed buffer_info mapping */
4198 buffer_info->dma = 0;
4199 buffer_info->time_stamp = 0;
4200 buffer_info->length = 0;
4201 buffer_info->next_to_watch = 0;
4202 buffer_info->mapped_as_page = false;
4204 /* clear timestamp and dma mappings for remaining portion of packet */
4205 while (count--) {
4206 if (i == 0)
4207 i = tx_ring->count;
4208 i--;
4209 buffer_info = &tx_ring->buffer_info[i];
4210 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4213 return 0;
4216 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4217 u32 tx_flags, int count, u32 paylen,
4218 u8 hdr_len)
4220 union e1000_adv_tx_desc *tx_desc;
4221 struct igb_buffer *buffer_info;
4222 u32 olinfo_status = 0, cmd_type_len;
4223 unsigned int i = tx_ring->next_to_use;
4225 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4226 E1000_ADVTXD_DCMD_DEXT);
4228 if (tx_flags & IGB_TX_FLAGS_VLAN)
4229 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4231 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4232 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4234 if (tx_flags & IGB_TX_FLAGS_TSO) {
4235 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4237 /* insert tcp checksum */
4238 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4240 /* insert ip checksum */
4241 if (tx_flags & IGB_TX_FLAGS_IPV4)
4242 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4244 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4245 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4248 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4249 (tx_flags & (IGB_TX_FLAGS_CSUM |
4250 IGB_TX_FLAGS_TSO |
4251 IGB_TX_FLAGS_VLAN)))
4252 olinfo_status |= tx_ring->reg_idx << 4;
4254 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4256 do {
4257 buffer_info = &tx_ring->buffer_info[i];
4258 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4259 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4260 tx_desc->read.cmd_type_len =
4261 cpu_to_le32(cmd_type_len | buffer_info->length);
4262 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4263 count--;
4264 i++;
4265 if (i == tx_ring->count)
4266 i = 0;
4267 } while (count > 0);
4269 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4270 /* Force memory writes to complete before letting h/w
4271 * know there are new descriptors to fetch. (Only
4272 * applicable for weak-ordered memory model archs,
4273 * such as IA-64). */
4274 wmb();
4276 tx_ring->next_to_use = i;
4277 writel(i, tx_ring->tail);
4278 /* we need this if more than one processor can write to our tail
4279 * at a time, it syncronizes IO on IA64/Altix systems */
4280 mmiowb();
4283 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4285 struct net_device *netdev = tx_ring->netdev;
4287 netif_stop_subqueue(netdev, tx_ring->queue_index);
4289 /* Herbert's original patch had:
4290 * smp_mb__after_netif_stop_queue();
4291 * but since that doesn't exist yet, just open code it. */
4292 smp_mb();
4294 /* We need to check again in a case another CPU has just
4295 * made room available. */
4296 if (igb_desc_unused(tx_ring) < size)
4297 return -EBUSY;
4299 /* A reprieve! */
4300 netif_wake_subqueue(netdev, tx_ring->queue_index);
4302 u64_stats_update_begin(&tx_ring->tx_syncp2);
4303 tx_ring->tx_stats.restart_queue2++;
4304 u64_stats_update_end(&tx_ring->tx_syncp2);
4306 return 0;
4309 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4311 if (igb_desc_unused(tx_ring) >= size)
4312 return 0;
4313 return __igb_maybe_stop_tx(tx_ring, size);
4316 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4317 struct igb_ring *tx_ring)
4319 int tso = 0, count;
4320 u32 tx_flags = 0;
4321 u16 first;
4322 u8 hdr_len = 0;
4324 /* need: 1 descriptor per page,
4325 * + 2 desc gap to keep tail from touching head,
4326 * + 1 desc for skb->data,
4327 * + 1 desc for context descriptor,
4328 * otherwise try next time */
4329 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4330 /* this is a hard error */
4331 return NETDEV_TX_BUSY;
4334 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4335 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4336 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4339 if (vlan_tx_tag_present(skb)) {
4340 tx_flags |= IGB_TX_FLAGS_VLAN;
4341 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4344 if (skb->protocol == htons(ETH_P_IP))
4345 tx_flags |= IGB_TX_FLAGS_IPV4;
4347 first = tx_ring->next_to_use;
4348 if (skb_is_gso(skb)) {
4349 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4351 if (tso < 0) {
4352 dev_kfree_skb_any(skb);
4353 return NETDEV_TX_OK;
4357 if (tso)
4358 tx_flags |= IGB_TX_FLAGS_TSO;
4359 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4360 (skb->ip_summed == CHECKSUM_PARTIAL))
4361 tx_flags |= IGB_TX_FLAGS_CSUM;
4364 * count reflects descriptors mapped, if 0 or less then mapping error
4365 * has occurred and we need to rewind the descriptor queue
4367 count = igb_tx_map_adv(tx_ring, skb, first);
4368 if (!count) {
4369 dev_kfree_skb_any(skb);
4370 tx_ring->buffer_info[first].time_stamp = 0;
4371 tx_ring->next_to_use = first;
4372 return NETDEV_TX_OK;
4375 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4377 /* Make sure there is space in the ring for the next send. */
4378 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4380 return NETDEV_TX_OK;
4383 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4384 struct net_device *netdev)
4386 struct igb_adapter *adapter = netdev_priv(netdev);
4387 struct igb_ring *tx_ring;
4388 int r_idx = 0;
4390 if (test_bit(__IGB_DOWN, &adapter->state)) {
4391 dev_kfree_skb_any(skb);
4392 return NETDEV_TX_OK;
4395 if (skb->len <= 0) {
4396 dev_kfree_skb_any(skb);
4397 return NETDEV_TX_OK;
4400 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4401 tx_ring = adapter->multi_tx_table[r_idx];
4403 /* This goes back to the question of how to logically map a tx queue
4404 * to a flow. Right now, performance is impacted slightly negatively
4405 * if using multiple tx queues. If the stack breaks away from a
4406 * single qdisc implementation, we can look at this again. */
4407 return igb_xmit_frame_ring_adv(skb, tx_ring);
4411 * igb_tx_timeout - Respond to a Tx Hang
4412 * @netdev: network interface device structure
4414 static void igb_tx_timeout(struct net_device *netdev)
4416 struct igb_adapter *adapter = netdev_priv(netdev);
4417 struct e1000_hw *hw = &adapter->hw;
4419 /* Do the reset outside of interrupt context */
4420 adapter->tx_timeout_count++;
4422 if (hw->mac.type == e1000_82580)
4423 hw->dev_spec._82575.global_device_reset = true;
4425 schedule_work(&adapter->reset_task);
4426 wr32(E1000_EICS,
4427 (adapter->eims_enable_mask & ~adapter->eims_other));
4430 static void igb_reset_task(struct work_struct *work)
4432 struct igb_adapter *adapter;
4433 adapter = container_of(work, struct igb_adapter, reset_task);
4435 igb_dump(adapter);
4436 netdev_err(adapter->netdev, "Reset adapter\n");
4437 igb_reinit_locked(adapter);
4441 * igb_get_stats64 - Get System Network Statistics
4442 * @netdev: network interface device structure
4443 * @stats: rtnl_link_stats64 pointer
4446 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4447 struct rtnl_link_stats64 *stats)
4449 struct igb_adapter *adapter = netdev_priv(netdev);
4451 spin_lock(&adapter->stats64_lock);
4452 igb_update_stats(adapter, &adapter->stats64);
4453 memcpy(stats, &adapter->stats64, sizeof(*stats));
4454 spin_unlock(&adapter->stats64_lock);
4456 return stats;
4460 * igb_change_mtu - Change the Maximum Transfer Unit
4461 * @netdev: network interface device structure
4462 * @new_mtu: new value for maximum frame size
4464 * Returns 0 on success, negative on failure
4466 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4468 struct igb_adapter *adapter = netdev_priv(netdev);
4469 struct pci_dev *pdev = adapter->pdev;
4470 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4471 u32 rx_buffer_len, i;
4473 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4474 dev_err(&pdev->dev, "Invalid MTU setting\n");
4475 return -EINVAL;
4478 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4479 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4480 return -EINVAL;
4483 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4484 msleep(1);
4486 /* igb_down has a dependency on max_frame_size */
4487 adapter->max_frame_size = max_frame;
4489 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4490 * means we reserve 2 more, this pushes us to allocate from the next
4491 * larger slab size.
4492 * i.e. RXBUFFER_2048 --> size-4096 slab
4495 if (adapter->hw.mac.type == e1000_82580)
4496 max_frame += IGB_TS_HDR_LEN;
4498 if (max_frame <= IGB_RXBUFFER_1024)
4499 rx_buffer_len = IGB_RXBUFFER_1024;
4500 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4501 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4502 else
4503 rx_buffer_len = IGB_RXBUFFER_128;
4505 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4506 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4507 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4509 if ((adapter->hw.mac.type == e1000_82580) &&
4510 (rx_buffer_len == IGB_RXBUFFER_128))
4511 rx_buffer_len += IGB_RXBUFFER_64;
4513 if (netif_running(netdev))
4514 igb_down(adapter);
4516 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4517 netdev->mtu, new_mtu);
4518 netdev->mtu = new_mtu;
4520 for (i = 0; i < adapter->num_rx_queues; i++)
4521 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4523 if (netif_running(netdev))
4524 igb_up(adapter);
4525 else
4526 igb_reset(adapter);
4528 clear_bit(__IGB_RESETTING, &adapter->state);
4530 return 0;
4534 * igb_update_stats - Update the board statistics counters
4535 * @adapter: board private structure
4538 void igb_update_stats(struct igb_adapter *adapter,
4539 struct rtnl_link_stats64 *net_stats)
4541 struct e1000_hw *hw = &adapter->hw;
4542 struct pci_dev *pdev = adapter->pdev;
4543 u32 reg, mpc;
4544 u16 phy_tmp;
4545 int i;
4546 u64 bytes, packets;
4547 unsigned int start;
4548 u64 _bytes, _packets;
4550 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4553 * Prevent stats update while adapter is being reset, or if the pci
4554 * connection is down.
4556 if (adapter->link_speed == 0)
4557 return;
4558 if (pci_channel_offline(pdev))
4559 return;
4561 bytes = 0;
4562 packets = 0;
4563 for (i = 0; i < adapter->num_rx_queues; i++) {
4564 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4565 struct igb_ring *ring = adapter->rx_ring[i];
4567 ring->rx_stats.drops += rqdpc_tmp;
4568 net_stats->rx_fifo_errors += rqdpc_tmp;
4570 do {
4571 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4572 _bytes = ring->rx_stats.bytes;
4573 _packets = ring->rx_stats.packets;
4574 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4575 bytes += _bytes;
4576 packets += _packets;
4579 net_stats->rx_bytes = bytes;
4580 net_stats->rx_packets = packets;
4582 bytes = 0;
4583 packets = 0;
4584 for (i = 0; i < adapter->num_tx_queues; i++) {
4585 struct igb_ring *ring = adapter->tx_ring[i];
4586 do {
4587 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4588 _bytes = ring->tx_stats.bytes;
4589 _packets = ring->tx_stats.packets;
4590 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4591 bytes += _bytes;
4592 packets += _packets;
4594 net_stats->tx_bytes = bytes;
4595 net_stats->tx_packets = packets;
4597 /* read stats registers */
4598 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4599 adapter->stats.gprc += rd32(E1000_GPRC);
4600 adapter->stats.gorc += rd32(E1000_GORCL);
4601 rd32(E1000_GORCH); /* clear GORCL */
4602 adapter->stats.bprc += rd32(E1000_BPRC);
4603 adapter->stats.mprc += rd32(E1000_MPRC);
4604 adapter->stats.roc += rd32(E1000_ROC);
4606 adapter->stats.prc64 += rd32(E1000_PRC64);
4607 adapter->stats.prc127 += rd32(E1000_PRC127);
4608 adapter->stats.prc255 += rd32(E1000_PRC255);
4609 adapter->stats.prc511 += rd32(E1000_PRC511);
4610 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4611 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4612 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4613 adapter->stats.sec += rd32(E1000_SEC);
4615 mpc = rd32(E1000_MPC);
4616 adapter->stats.mpc += mpc;
4617 net_stats->rx_fifo_errors += mpc;
4618 adapter->stats.scc += rd32(E1000_SCC);
4619 adapter->stats.ecol += rd32(E1000_ECOL);
4620 adapter->stats.mcc += rd32(E1000_MCC);
4621 adapter->stats.latecol += rd32(E1000_LATECOL);
4622 adapter->stats.dc += rd32(E1000_DC);
4623 adapter->stats.rlec += rd32(E1000_RLEC);
4624 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4625 adapter->stats.xontxc += rd32(E1000_XONTXC);
4626 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4627 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4628 adapter->stats.fcruc += rd32(E1000_FCRUC);
4629 adapter->stats.gptc += rd32(E1000_GPTC);
4630 adapter->stats.gotc += rd32(E1000_GOTCL);
4631 rd32(E1000_GOTCH); /* clear GOTCL */
4632 adapter->stats.rnbc += rd32(E1000_RNBC);
4633 adapter->stats.ruc += rd32(E1000_RUC);
4634 adapter->stats.rfc += rd32(E1000_RFC);
4635 adapter->stats.rjc += rd32(E1000_RJC);
4636 adapter->stats.tor += rd32(E1000_TORH);
4637 adapter->stats.tot += rd32(E1000_TOTH);
4638 adapter->stats.tpr += rd32(E1000_TPR);
4640 adapter->stats.ptc64 += rd32(E1000_PTC64);
4641 adapter->stats.ptc127 += rd32(E1000_PTC127);
4642 adapter->stats.ptc255 += rd32(E1000_PTC255);
4643 adapter->stats.ptc511 += rd32(E1000_PTC511);
4644 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4645 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4647 adapter->stats.mptc += rd32(E1000_MPTC);
4648 adapter->stats.bptc += rd32(E1000_BPTC);
4650 adapter->stats.tpt += rd32(E1000_TPT);
4651 adapter->stats.colc += rd32(E1000_COLC);
4653 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4654 /* read internal phy specific stats */
4655 reg = rd32(E1000_CTRL_EXT);
4656 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4657 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4658 adapter->stats.tncrs += rd32(E1000_TNCRS);
4661 adapter->stats.tsctc += rd32(E1000_TSCTC);
4662 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4664 adapter->stats.iac += rd32(E1000_IAC);
4665 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4666 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4667 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4668 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4669 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4670 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4671 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4672 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4674 /* Fill out the OS statistics structure */
4675 net_stats->multicast = adapter->stats.mprc;
4676 net_stats->collisions = adapter->stats.colc;
4678 /* Rx Errors */
4680 /* RLEC on some newer hardware can be incorrect so build
4681 * our own version based on RUC and ROC */
4682 net_stats->rx_errors = adapter->stats.rxerrc +
4683 adapter->stats.crcerrs + adapter->stats.algnerrc +
4684 adapter->stats.ruc + adapter->stats.roc +
4685 adapter->stats.cexterr;
4686 net_stats->rx_length_errors = adapter->stats.ruc +
4687 adapter->stats.roc;
4688 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4689 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4690 net_stats->rx_missed_errors = adapter->stats.mpc;
4692 /* Tx Errors */
4693 net_stats->tx_errors = adapter->stats.ecol +
4694 adapter->stats.latecol;
4695 net_stats->tx_aborted_errors = adapter->stats.ecol;
4696 net_stats->tx_window_errors = adapter->stats.latecol;
4697 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4699 /* Tx Dropped needs to be maintained elsewhere */
4701 /* Phy Stats */
4702 if (hw->phy.media_type == e1000_media_type_copper) {
4703 if ((adapter->link_speed == SPEED_1000) &&
4704 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4705 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4706 adapter->phy_stats.idle_errors += phy_tmp;
4710 /* Management Stats */
4711 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4712 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4713 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4715 /* OS2BMC Stats */
4716 reg = rd32(E1000_MANC);
4717 if (reg & E1000_MANC_EN_BMC2OS) {
4718 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4719 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4720 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4721 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4725 static irqreturn_t igb_msix_other(int irq, void *data)
4727 struct igb_adapter *adapter = data;
4728 struct e1000_hw *hw = &adapter->hw;
4729 u32 icr = rd32(E1000_ICR);
4730 /* reading ICR causes bit 31 of EICR to be cleared */
4732 if (icr & E1000_ICR_DRSTA)
4733 schedule_work(&adapter->reset_task);
4735 if (icr & E1000_ICR_DOUTSYNC) {
4736 /* HW is reporting DMA is out of sync */
4737 adapter->stats.doosync++;
4738 /* The DMA Out of Sync is also indication of a spoof event
4739 * in IOV mode. Check the Wrong VM Behavior register to
4740 * see if it is really a spoof event. */
4741 igb_check_wvbr(adapter);
4744 /* Check for a mailbox event */
4745 if (icr & E1000_ICR_VMMB)
4746 igb_msg_task(adapter);
4748 if (icr & E1000_ICR_LSC) {
4749 hw->mac.get_link_status = 1;
4750 /* guard against interrupt when we're going down */
4751 if (!test_bit(__IGB_DOWN, &adapter->state))
4752 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4755 if (adapter->vfs_allocated_count)
4756 wr32(E1000_IMS, E1000_IMS_LSC |
4757 E1000_IMS_VMMB |
4758 E1000_IMS_DOUTSYNC);
4759 else
4760 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4761 wr32(E1000_EIMS, adapter->eims_other);
4763 return IRQ_HANDLED;
4766 static void igb_write_itr(struct igb_q_vector *q_vector)
4768 struct igb_adapter *adapter = q_vector->adapter;
4769 u32 itr_val = q_vector->itr_val & 0x7FFC;
4771 if (!q_vector->set_itr)
4772 return;
4774 if (!itr_val)
4775 itr_val = 0x4;
4777 if (adapter->hw.mac.type == e1000_82575)
4778 itr_val |= itr_val << 16;
4779 else
4780 itr_val |= 0x8000000;
4782 writel(itr_val, q_vector->itr_register);
4783 q_vector->set_itr = 0;
4786 static irqreturn_t igb_msix_ring(int irq, void *data)
4788 struct igb_q_vector *q_vector = data;
4790 /* Write the ITR value calculated from the previous interrupt. */
4791 igb_write_itr(q_vector);
4793 napi_schedule(&q_vector->napi);
4795 return IRQ_HANDLED;
4798 #ifdef CONFIG_IGB_DCA
4799 static void igb_update_dca(struct igb_q_vector *q_vector)
4801 struct igb_adapter *adapter = q_vector->adapter;
4802 struct e1000_hw *hw = &adapter->hw;
4803 int cpu = get_cpu();
4805 if (q_vector->cpu == cpu)
4806 goto out_no_update;
4808 if (q_vector->tx_ring) {
4809 int q = q_vector->tx_ring->reg_idx;
4810 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4811 if (hw->mac.type == e1000_82575) {
4812 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4813 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4814 } else {
4815 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4816 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4817 E1000_DCA_TXCTRL_CPUID_SHIFT;
4819 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4820 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4822 if (q_vector->rx_ring) {
4823 int q = q_vector->rx_ring->reg_idx;
4824 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4825 if (hw->mac.type == e1000_82575) {
4826 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4827 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4828 } else {
4829 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4830 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4831 E1000_DCA_RXCTRL_CPUID_SHIFT;
4833 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4834 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4835 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4836 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4838 q_vector->cpu = cpu;
4839 out_no_update:
4840 put_cpu();
4843 static void igb_setup_dca(struct igb_adapter *adapter)
4845 struct e1000_hw *hw = &adapter->hw;
4846 int i;
4848 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4849 return;
4851 /* Always use CB2 mode, difference is masked in the CB driver. */
4852 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4854 for (i = 0; i < adapter->num_q_vectors; i++) {
4855 adapter->q_vector[i]->cpu = -1;
4856 igb_update_dca(adapter->q_vector[i]);
4860 static int __igb_notify_dca(struct device *dev, void *data)
4862 struct net_device *netdev = dev_get_drvdata(dev);
4863 struct igb_adapter *adapter = netdev_priv(netdev);
4864 struct pci_dev *pdev = adapter->pdev;
4865 struct e1000_hw *hw = &adapter->hw;
4866 unsigned long event = *(unsigned long *)data;
4868 switch (event) {
4869 case DCA_PROVIDER_ADD:
4870 /* if already enabled, don't do it again */
4871 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4872 break;
4873 if (dca_add_requester(dev) == 0) {
4874 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4875 dev_info(&pdev->dev, "DCA enabled\n");
4876 igb_setup_dca(adapter);
4877 break;
4879 /* Fall Through since DCA is disabled. */
4880 case DCA_PROVIDER_REMOVE:
4881 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4882 /* without this a class_device is left
4883 * hanging around in the sysfs model */
4884 dca_remove_requester(dev);
4885 dev_info(&pdev->dev, "DCA disabled\n");
4886 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4887 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4889 break;
4892 return 0;
4895 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4896 void *p)
4898 int ret_val;
4900 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4901 __igb_notify_dca);
4903 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4905 #endif /* CONFIG_IGB_DCA */
4907 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4909 struct e1000_hw *hw = &adapter->hw;
4910 u32 ping;
4911 int i;
4913 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4914 ping = E1000_PF_CONTROL_MSG;
4915 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4916 ping |= E1000_VT_MSGTYPE_CTS;
4917 igb_write_mbx(hw, &ping, 1, i);
4921 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4923 struct e1000_hw *hw = &adapter->hw;
4924 u32 vmolr = rd32(E1000_VMOLR(vf));
4925 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4927 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4928 IGB_VF_FLAG_MULTI_PROMISC);
4929 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4931 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4932 vmolr |= E1000_VMOLR_MPME;
4933 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4934 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4935 } else {
4937 * if we have hashes and we are clearing a multicast promisc
4938 * flag we need to write the hashes to the MTA as this step
4939 * was previously skipped
4941 if (vf_data->num_vf_mc_hashes > 30) {
4942 vmolr |= E1000_VMOLR_MPME;
4943 } else if (vf_data->num_vf_mc_hashes) {
4944 int j;
4945 vmolr |= E1000_VMOLR_ROMPE;
4946 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4947 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4951 wr32(E1000_VMOLR(vf), vmolr);
4953 /* there are flags left unprocessed, likely not supported */
4954 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4955 return -EINVAL;
4957 return 0;
4961 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4962 u32 *msgbuf, u32 vf)
4964 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4965 u16 *hash_list = (u16 *)&msgbuf[1];
4966 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4967 int i;
4969 /* salt away the number of multicast addresses assigned
4970 * to this VF for later use to restore when the PF multi cast
4971 * list changes
4973 vf_data->num_vf_mc_hashes = n;
4975 /* only up to 30 hash values supported */
4976 if (n > 30)
4977 n = 30;
4979 /* store the hashes for later use */
4980 for (i = 0; i < n; i++)
4981 vf_data->vf_mc_hashes[i] = hash_list[i];
4983 /* Flush and reset the mta with the new values */
4984 igb_set_rx_mode(adapter->netdev);
4986 return 0;
4989 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4991 struct e1000_hw *hw = &adapter->hw;
4992 struct vf_data_storage *vf_data;
4993 int i, j;
4995 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4996 u32 vmolr = rd32(E1000_VMOLR(i));
4997 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4999 vf_data = &adapter->vf_data[i];
5001 if ((vf_data->num_vf_mc_hashes > 30) ||
5002 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5003 vmolr |= E1000_VMOLR_MPME;
5004 } else if (vf_data->num_vf_mc_hashes) {
5005 vmolr |= E1000_VMOLR_ROMPE;
5006 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5007 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5009 wr32(E1000_VMOLR(i), vmolr);
5013 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5015 struct e1000_hw *hw = &adapter->hw;
5016 u32 pool_mask, reg, vid;
5017 int i;
5019 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5021 /* Find the vlan filter for this id */
5022 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5023 reg = rd32(E1000_VLVF(i));
5025 /* remove the vf from the pool */
5026 reg &= ~pool_mask;
5028 /* if pool is empty then remove entry from vfta */
5029 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5030 (reg & E1000_VLVF_VLANID_ENABLE)) {
5031 reg = 0;
5032 vid = reg & E1000_VLVF_VLANID_MASK;
5033 igb_vfta_set(hw, vid, false);
5036 wr32(E1000_VLVF(i), reg);
5039 adapter->vf_data[vf].vlans_enabled = 0;
5042 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5044 struct e1000_hw *hw = &adapter->hw;
5045 u32 reg, i;
5047 /* The vlvf table only exists on 82576 hardware and newer */
5048 if (hw->mac.type < e1000_82576)
5049 return -1;
5051 /* we only need to do this if VMDq is enabled */
5052 if (!adapter->vfs_allocated_count)
5053 return -1;
5055 /* Find the vlan filter for this id */
5056 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5057 reg = rd32(E1000_VLVF(i));
5058 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5059 vid == (reg & E1000_VLVF_VLANID_MASK))
5060 break;
5063 if (add) {
5064 if (i == E1000_VLVF_ARRAY_SIZE) {
5065 /* Did not find a matching VLAN ID entry that was
5066 * enabled. Search for a free filter entry, i.e.
5067 * one without the enable bit set
5069 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5070 reg = rd32(E1000_VLVF(i));
5071 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5072 break;
5075 if (i < E1000_VLVF_ARRAY_SIZE) {
5076 /* Found an enabled/available entry */
5077 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5079 /* if !enabled we need to set this up in vfta */
5080 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5081 /* add VID to filter table */
5082 igb_vfta_set(hw, vid, true);
5083 reg |= E1000_VLVF_VLANID_ENABLE;
5085 reg &= ~E1000_VLVF_VLANID_MASK;
5086 reg |= vid;
5087 wr32(E1000_VLVF(i), reg);
5089 /* do not modify RLPML for PF devices */
5090 if (vf >= adapter->vfs_allocated_count)
5091 return 0;
5093 if (!adapter->vf_data[vf].vlans_enabled) {
5094 u32 size;
5095 reg = rd32(E1000_VMOLR(vf));
5096 size = reg & E1000_VMOLR_RLPML_MASK;
5097 size += 4;
5098 reg &= ~E1000_VMOLR_RLPML_MASK;
5099 reg |= size;
5100 wr32(E1000_VMOLR(vf), reg);
5103 adapter->vf_data[vf].vlans_enabled++;
5104 return 0;
5106 } else {
5107 if (i < E1000_VLVF_ARRAY_SIZE) {
5108 /* remove vf from the pool */
5109 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5110 /* if pool is empty then remove entry from vfta */
5111 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5112 reg = 0;
5113 igb_vfta_set(hw, vid, false);
5115 wr32(E1000_VLVF(i), reg);
5117 /* do not modify RLPML for PF devices */
5118 if (vf >= adapter->vfs_allocated_count)
5119 return 0;
5121 adapter->vf_data[vf].vlans_enabled--;
5122 if (!adapter->vf_data[vf].vlans_enabled) {
5123 u32 size;
5124 reg = rd32(E1000_VMOLR(vf));
5125 size = reg & E1000_VMOLR_RLPML_MASK;
5126 size -= 4;
5127 reg &= ~E1000_VMOLR_RLPML_MASK;
5128 reg |= size;
5129 wr32(E1000_VMOLR(vf), reg);
5133 return 0;
5136 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5138 struct e1000_hw *hw = &adapter->hw;
5140 if (vid)
5141 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5142 else
5143 wr32(E1000_VMVIR(vf), 0);
5146 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5147 int vf, u16 vlan, u8 qos)
5149 int err = 0;
5150 struct igb_adapter *adapter = netdev_priv(netdev);
5152 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5153 return -EINVAL;
5154 if (vlan || qos) {
5155 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5156 if (err)
5157 goto out;
5158 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5159 igb_set_vmolr(adapter, vf, !vlan);
5160 adapter->vf_data[vf].pf_vlan = vlan;
5161 adapter->vf_data[vf].pf_qos = qos;
5162 dev_info(&adapter->pdev->dev,
5163 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5164 if (test_bit(__IGB_DOWN, &adapter->state)) {
5165 dev_warn(&adapter->pdev->dev,
5166 "The VF VLAN has been set,"
5167 " but the PF device is not up.\n");
5168 dev_warn(&adapter->pdev->dev,
5169 "Bring the PF device up before"
5170 " attempting to use the VF device.\n");
5172 } else {
5173 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5174 false, vf);
5175 igb_set_vmvir(adapter, vlan, vf);
5176 igb_set_vmolr(adapter, vf, true);
5177 adapter->vf_data[vf].pf_vlan = 0;
5178 adapter->vf_data[vf].pf_qos = 0;
5180 out:
5181 return err;
5184 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5186 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5187 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5189 return igb_vlvf_set(adapter, vid, add, vf);
5192 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5194 /* clear flags - except flag that indicates PF has set the MAC */
5195 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5196 adapter->vf_data[vf].last_nack = jiffies;
5198 /* reset offloads to defaults */
5199 igb_set_vmolr(adapter, vf, true);
5201 /* reset vlans for device */
5202 igb_clear_vf_vfta(adapter, vf);
5203 if (adapter->vf_data[vf].pf_vlan)
5204 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5205 adapter->vf_data[vf].pf_vlan,
5206 adapter->vf_data[vf].pf_qos);
5207 else
5208 igb_clear_vf_vfta(adapter, vf);
5210 /* reset multicast table array for vf */
5211 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5213 /* Flush and reset the mta with the new values */
5214 igb_set_rx_mode(adapter->netdev);
5217 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5219 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5221 /* generate a new mac address as we were hotplug removed/added */
5222 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5223 random_ether_addr(vf_mac);
5225 /* process remaining reset events */
5226 igb_vf_reset(adapter, vf);
5229 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5231 struct e1000_hw *hw = &adapter->hw;
5232 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5233 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5234 u32 reg, msgbuf[3];
5235 u8 *addr = (u8 *)(&msgbuf[1]);
5237 /* process all the same items cleared in a function level reset */
5238 igb_vf_reset(adapter, vf);
5240 /* set vf mac address */
5241 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5243 /* enable transmit and receive for vf */
5244 reg = rd32(E1000_VFTE);
5245 wr32(E1000_VFTE, reg | (1 << vf));
5246 reg = rd32(E1000_VFRE);
5247 wr32(E1000_VFRE, reg | (1 << vf));
5249 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5251 /* reply to reset with ack and vf mac address */
5252 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5253 memcpy(addr, vf_mac, 6);
5254 igb_write_mbx(hw, msgbuf, 3, vf);
5257 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5260 * The VF MAC Address is stored in a packed array of bytes
5261 * starting at the second 32 bit word of the msg array
5263 unsigned char *addr = (char *)&msg[1];
5264 int err = -1;
5266 if (is_valid_ether_addr(addr))
5267 err = igb_set_vf_mac(adapter, vf, addr);
5269 return err;
5272 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5274 struct e1000_hw *hw = &adapter->hw;
5275 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5276 u32 msg = E1000_VT_MSGTYPE_NACK;
5278 /* if device isn't clear to send it shouldn't be reading either */
5279 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5280 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5281 igb_write_mbx(hw, &msg, 1, vf);
5282 vf_data->last_nack = jiffies;
5286 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5288 struct pci_dev *pdev = adapter->pdev;
5289 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5290 struct e1000_hw *hw = &adapter->hw;
5291 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5292 s32 retval;
5294 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5296 if (retval) {
5297 /* if receive failed revoke VF CTS stats and restart init */
5298 dev_err(&pdev->dev, "Error receiving message from VF\n");
5299 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5300 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5301 return;
5302 goto out;
5305 /* this is a message we already processed, do nothing */
5306 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5307 return;
5310 * until the vf completes a reset it should not be
5311 * allowed to start any configuration.
5314 if (msgbuf[0] == E1000_VF_RESET) {
5315 igb_vf_reset_msg(adapter, vf);
5316 return;
5319 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5320 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5321 return;
5322 retval = -1;
5323 goto out;
5326 switch ((msgbuf[0] & 0xFFFF)) {
5327 case E1000_VF_SET_MAC_ADDR:
5328 retval = -EINVAL;
5329 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5330 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5331 else
5332 dev_warn(&pdev->dev,
5333 "VF %d attempted to override administratively "
5334 "set MAC address\nReload the VF driver to "
5335 "resume operations\n", vf);
5336 break;
5337 case E1000_VF_SET_PROMISC:
5338 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5339 break;
5340 case E1000_VF_SET_MULTICAST:
5341 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5342 break;
5343 case E1000_VF_SET_LPE:
5344 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5345 break;
5346 case E1000_VF_SET_VLAN:
5347 retval = -1;
5348 if (vf_data->pf_vlan)
5349 dev_warn(&pdev->dev,
5350 "VF %d attempted to override administratively "
5351 "set VLAN tag\nReload the VF driver to "
5352 "resume operations\n", vf);
5353 else
5354 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5355 break;
5356 default:
5357 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5358 retval = -1;
5359 break;
5362 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5363 out:
5364 /* notify the VF of the results of what it sent us */
5365 if (retval)
5366 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5367 else
5368 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5370 igb_write_mbx(hw, msgbuf, 1, vf);
5373 static void igb_msg_task(struct igb_adapter *adapter)
5375 struct e1000_hw *hw = &adapter->hw;
5376 u32 vf;
5378 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5379 /* process any reset requests */
5380 if (!igb_check_for_rst(hw, vf))
5381 igb_vf_reset_event(adapter, vf);
5383 /* process any messages pending */
5384 if (!igb_check_for_msg(hw, vf))
5385 igb_rcv_msg_from_vf(adapter, vf);
5387 /* process any acks */
5388 if (!igb_check_for_ack(hw, vf))
5389 igb_rcv_ack_from_vf(adapter, vf);
5394 * igb_set_uta - Set unicast filter table address
5395 * @adapter: board private structure
5397 * The unicast table address is a register array of 32-bit registers.
5398 * The table is meant to be used in a way similar to how the MTA is used
5399 * however due to certain limitations in the hardware it is necessary to
5400 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5401 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5403 static void igb_set_uta(struct igb_adapter *adapter)
5405 struct e1000_hw *hw = &adapter->hw;
5406 int i;
5408 /* The UTA table only exists on 82576 hardware and newer */
5409 if (hw->mac.type < e1000_82576)
5410 return;
5412 /* we only need to do this if VMDq is enabled */
5413 if (!adapter->vfs_allocated_count)
5414 return;
5416 for (i = 0; i < hw->mac.uta_reg_count; i++)
5417 array_wr32(E1000_UTA, i, ~0);
5421 * igb_intr_msi - Interrupt Handler
5422 * @irq: interrupt number
5423 * @data: pointer to a network interface device structure
5425 static irqreturn_t igb_intr_msi(int irq, void *data)
5427 struct igb_adapter *adapter = data;
5428 struct igb_q_vector *q_vector = adapter->q_vector[0];
5429 struct e1000_hw *hw = &adapter->hw;
5430 /* read ICR disables interrupts using IAM */
5431 u32 icr = rd32(E1000_ICR);
5433 igb_write_itr(q_vector);
5435 if (icr & E1000_ICR_DRSTA)
5436 schedule_work(&adapter->reset_task);
5438 if (icr & E1000_ICR_DOUTSYNC) {
5439 /* HW is reporting DMA is out of sync */
5440 adapter->stats.doosync++;
5443 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5444 hw->mac.get_link_status = 1;
5445 if (!test_bit(__IGB_DOWN, &adapter->state))
5446 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5449 napi_schedule(&q_vector->napi);
5451 return IRQ_HANDLED;
5455 * igb_intr - Legacy Interrupt Handler
5456 * @irq: interrupt number
5457 * @data: pointer to a network interface device structure
5459 static irqreturn_t igb_intr(int irq, void *data)
5461 struct igb_adapter *adapter = data;
5462 struct igb_q_vector *q_vector = adapter->q_vector[0];
5463 struct e1000_hw *hw = &adapter->hw;
5464 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5465 * need for the IMC write */
5466 u32 icr = rd32(E1000_ICR);
5467 if (!icr)
5468 return IRQ_NONE; /* Not our interrupt */
5470 igb_write_itr(q_vector);
5472 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5473 * not set, then the adapter didn't send an interrupt */
5474 if (!(icr & E1000_ICR_INT_ASSERTED))
5475 return IRQ_NONE;
5477 if (icr & E1000_ICR_DRSTA)
5478 schedule_work(&adapter->reset_task);
5480 if (icr & E1000_ICR_DOUTSYNC) {
5481 /* HW is reporting DMA is out of sync */
5482 adapter->stats.doosync++;
5485 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5486 hw->mac.get_link_status = 1;
5487 /* guard against interrupt when we're going down */
5488 if (!test_bit(__IGB_DOWN, &adapter->state))
5489 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5492 napi_schedule(&q_vector->napi);
5494 return IRQ_HANDLED;
5497 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5499 struct igb_adapter *adapter = q_vector->adapter;
5500 struct e1000_hw *hw = &adapter->hw;
5502 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5503 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5504 if (!adapter->msix_entries)
5505 igb_set_itr(adapter);
5506 else
5507 igb_update_ring_itr(q_vector);
5510 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5511 if (adapter->msix_entries)
5512 wr32(E1000_EIMS, q_vector->eims_value);
5513 else
5514 igb_irq_enable(adapter);
5519 * igb_poll - NAPI Rx polling callback
5520 * @napi: napi polling structure
5521 * @budget: count of how many packets we should handle
5523 static int igb_poll(struct napi_struct *napi, int budget)
5525 struct igb_q_vector *q_vector = container_of(napi,
5526 struct igb_q_vector,
5527 napi);
5528 int tx_clean_complete = 1, work_done = 0;
5530 #ifdef CONFIG_IGB_DCA
5531 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5532 igb_update_dca(q_vector);
5533 #endif
5534 if (q_vector->tx_ring)
5535 tx_clean_complete = igb_clean_tx_irq(q_vector);
5537 if (q_vector->rx_ring)
5538 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5540 if (!tx_clean_complete)
5541 work_done = budget;
5543 /* If not enough Rx work done, exit the polling mode */
5544 if (work_done < budget) {
5545 napi_complete(napi);
5546 igb_ring_irq_enable(q_vector);
5549 return work_done;
5553 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5554 * @adapter: board private structure
5555 * @shhwtstamps: timestamp structure to update
5556 * @regval: unsigned 64bit system time value.
5558 * We need to convert the system time value stored in the RX/TXSTMP registers
5559 * into a hwtstamp which can be used by the upper level timestamping functions
5561 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5562 struct skb_shared_hwtstamps *shhwtstamps,
5563 u64 regval)
5565 u64 ns;
5568 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5569 * 24 to match clock shift we setup earlier.
5571 if (adapter->hw.mac.type == e1000_82580)
5572 regval <<= IGB_82580_TSYNC_SHIFT;
5574 ns = timecounter_cyc2time(&adapter->clock, regval);
5575 timecompare_update(&adapter->compare, ns);
5576 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5577 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5578 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5582 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5583 * @q_vector: pointer to q_vector containing needed info
5584 * @buffer: pointer to igb_buffer structure
5586 * If we were asked to do hardware stamping and such a time stamp is
5587 * available, then it must have been for this skb here because we only
5588 * allow only one such packet into the queue.
5590 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5592 struct igb_adapter *adapter = q_vector->adapter;
5593 struct e1000_hw *hw = &adapter->hw;
5594 struct skb_shared_hwtstamps shhwtstamps;
5595 u64 regval;
5597 /* if skb does not support hw timestamp or TX stamp not valid exit */
5598 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5599 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5600 return;
5602 regval = rd32(E1000_TXSTMPL);
5603 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5605 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5606 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5610 * igb_clean_tx_irq - Reclaim resources after transmit completes
5611 * @q_vector: pointer to q_vector containing needed info
5612 * returns true if ring is completely cleaned
5614 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5616 struct igb_adapter *adapter = q_vector->adapter;
5617 struct igb_ring *tx_ring = q_vector->tx_ring;
5618 struct net_device *netdev = tx_ring->netdev;
5619 struct e1000_hw *hw = &adapter->hw;
5620 struct igb_buffer *buffer_info;
5621 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5622 unsigned int total_bytes = 0, total_packets = 0;
5623 unsigned int i, eop, count = 0;
5624 bool cleaned = false;
5626 i = tx_ring->next_to_clean;
5627 eop = tx_ring->buffer_info[i].next_to_watch;
5628 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5630 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5631 (count < tx_ring->count)) {
5632 rmb(); /* read buffer_info after eop_desc status */
5633 for (cleaned = false; !cleaned; count++) {
5634 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5635 buffer_info = &tx_ring->buffer_info[i];
5636 cleaned = (i == eop);
5638 if (buffer_info->skb) {
5639 total_bytes += buffer_info->bytecount;
5640 /* gso_segs is currently only valid for tcp */
5641 total_packets += buffer_info->gso_segs;
5642 igb_tx_hwtstamp(q_vector, buffer_info);
5645 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5646 tx_desc->wb.status = 0;
5648 i++;
5649 if (i == tx_ring->count)
5650 i = 0;
5652 eop = tx_ring->buffer_info[i].next_to_watch;
5653 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5656 tx_ring->next_to_clean = i;
5658 if (unlikely(count &&
5659 netif_carrier_ok(netdev) &&
5660 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5661 /* Make sure that anybody stopping the queue after this
5662 * sees the new next_to_clean.
5664 smp_mb();
5665 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5666 !(test_bit(__IGB_DOWN, &adapter->state))) {
5667 netif_wake_subqueue(netdev, tx_ring->queue_index);
5669 u64_stats_update_begin(&tx_ring->tx_syncp);
5670 tx_ring->tx_stats.restart_queue++;
5671 u64_stats_update_end(&tx_ring->tx_syncp);
5675 if (tx_ring->detect_tx_hung) {
5676 /* Detect a transmit hang in hardware, this serializes the
5677 * check with the clearing of time_stamp and movement of i */
5678 tx_ring->detect_tx_hung = false;
5679 if (tx_ring->buffer_info[i].time_stamp &&
5680 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5681 (adapter->tx_timeout_factor * HZ)) &&
5682 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5684 /* detected Tx unit hang */
5685 dev_err(tx_ring->dev,
5686 "Detected Tx Unit Hang\n"
5687 " Tx Queue <%d>\n"
5688 " TDH <%x>\n"
5689 " TDT <%x>\n"
5690 " next_to_use <%x>\n"
5691 " next_to_clean <%x>\n"
5692 "buffer_info[next_to_clean]\n"
5693 " time_stamp <%lx>\n"
5694 " next_to_watch <%x>\n"
5695 " jiffies <%lx>\n"
5696 " desc.status <%x>\n",
5697 tx_ring->queue_index,
5698 readl(tx_ring->head),
5699 readl(tx_ring->tail),
5700 tx_ring->next_to_use,
5701 tx_ring->next_to_clean,
5702 tx_ring->buffer_info[eop].time_stamp,
5703 eop,
5704 jiffies,
5705 eop_desc->wb.status);
5706 netif_stop_subqueue(netdev, tx_ring->queue_index);
5709 tx_ring->total_bytes += total_bytes;
5710 tx_ring->total_packets += total_packets;
5711 u64_stats_update_begin(&tx_ring->tx_syncp);
5712 tx_ring->tx_stats.bytes += total_bytes;
5713 tx_ring->tx_stats.packets += total_packets;
5714 u64_stats_update_end(&tx_ring->tx_syncp);
5715 return count < tx_ring->count;
5718 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5719 u32 status_err, struct sk_buff *skb)
5721 skb_checksum_none_assert(skb);
5723 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5724 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5725 (status_err & E1000_RXD_STAT_IXSM))
5726 return;
5728 /* TCP/UDP checksum error bit is set */
5729 if (status_err &
5730 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5732 * work around errata with sctp packets where the TCPE aka
5733 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5734 * packets, (aka let the stack check the crc32c)
5736 if ((skb->len == 60) &&
5737 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5738 u64_stats_update_begin(&ring->rx_syncp);
5739 ring->rx_stats.csum_err++;
5740 u64_stats_update_end(&ring->rx_syncp);
5742 /* let the stack verify checksum errors */
5743 return;
5745 /* It must be a TCP or UDP packet with a valid checksum */
5746 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5747 skb->ip_summed = CHECKSUM_UNNECESSARY;
5749 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5752 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5753 struct sk_buff *skb)
5755 struct igb_adapter *adapter = q_vector->adapter;
5756 struct e1000_hw *hw = &adapter->hw;
5757 u64 regval;
5760 * If this bit is set, then the RX registers contain the time stamp. No
5761 * other packet will be time stamped until we read these registers, so
5762 * read the registers to make them available again. Because only one
5763 * packet can be time stamped at a time, we know that the register
5764 * values must belong to this one here and therefore we don't need to
5765 * compare any of the additional attributes stored for it.
5767 * If nothing went wrong, then it should have a shared tx_flags that we
5768 * can turn into a skb_shared_hwtstamps.
5770 if (staterr & E1000_RXDADV_STAT_TSIP) {
5771 u32 *stamp = (u32 *)skb->data;
5772 regval = le32_to_cpu(*(stamp + 2));
5773 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5774 skb_pull(skb, IGB_TS_HDR_LEN);
5775 } else {
5776 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5777 return;
5779 regval = rd32(E1000_RXSTMPL);
5780 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5783 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5785 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5786 union e1000_adv_rx_desc *rx_desc)
5788 /* HW will not DMA in data larger than the given buffer, even if it
5789 * parses the (NFS, of course) header to be larger. In that case, it
5790 * fills the header buffer and spills the rest into the page.
5792 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5793 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5794 if (hlen > rx_ring->rx_buffer_len)
5795 hlen = rx_ring->rx_buffer_len;
5796 return hlen;
5799 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5800 int *work_done, int budget)
5802 struct igb_ring *rx_ring = q_vector->rx_ring;
5803 struct net_device *netdev = rx_ring->netdev;
5804 struct device *dev = rx_ring->dev;
5805 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5806 struct igb_buffer *buffer_info , *next_buffer;
5807 struct sk_buff *skb;
5808 bool cleaned = false;
5809 int cleaned_count = 0;
5810 int current_node = numa_node_id();
5811 unsigned int total_bytes = 0, total_packets = 0;
5812 unsigned int i;
5813 u32 staterr;
5814 u16 length;
5816 i = rx_ring->next_to_clean;
5817 buffer_info = &rx_ring->buffer_info[i];
5818 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5819 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5821 while (staterr & E1000_RXD_STAT_DD) {
5822 if (*work_done >= budget)
5823 break;
5824 (*work_done)++;
5825 rmb(); /* read descriptor and rx_buffer_info after status DD */
5827 skb = buffer_info->skb;
5828 prefetch(skb->data - NET_IP_ALIGN);
5829 buffer_info->skb = NULL;
5831 i++;
5832 if (i == rx_ring->count)
5833 i = 0;
5835 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5836 prefetch(next_rxd);
5837 next_buffer = &rx_ring->buffer_info[i];
5839 length = le16_to_cpu(rx_desc->wb.upper.length);
5840 cleaned = true;
5841 cleaned_count++;
5843 if (buffer_info->dma) {
5844 dma_unmap_single(dev, buffer_info->dma,
5845 rx_ring->rx_buffer_len,
5846 DMA_FROM_DEVICE);
5847 buffer_info->dma = 0;
5848 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5849 skb_put(skb, length);
5850 goto send_up;
5852 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5855 if (length) {
5856 dma_unmap_page(dev, buffer_info->page_dma,
5857 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5858 buffer_info->page_dma = 0;
5860 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5861 buffer_info->page,
5862 buffer_info->page_offset,
5863 length);
5865 if ((page_count(buffer_info->page) != 1) ||
5866 (page_to_nid(buffer_info->page) != current_node))
5867 buffer_info->page = NULL;
5868 else
5869 get_page(buffer_info->page);
5871 skb->len += length;
5872 skb->data_len += length;
5873 skb->truesize += length;
5876 if (!(staterr & E1000_RXD_STAT_EOP)) {
5877 buffer_info->skb = next_buffer->skb;
5878 buffer_info->dma = next_buffer->dma;
5879 next_buffer->skb = skb;
5880 next_buffer->dma = 0;
5881 goto next_desc;
5883 send_up:
5884 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5885 dev_kfree_skb_irq(skb);
5886 goto next_desc;
5889 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5890 igb_rx_hwtstamp(q_vector, staterr, skb);
5891 total_bytes += skb->len;
5892 total_packets++;
5894 igb_rx_checksum_adv(rx_ring, staterr, skb);
5896 skb->protocol = eth_type_trans(skb, netdev);
5897 skb_record_rx_queue(skb, rx_ring->queue_index);
5899 if (staterr & E1000_RXD_STAT_VP) {
5900 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5902 __vlan_hwaccel_put_tag(skb, vid);
5904 napi_gro_receive(&q_vector->napi, skb);
5906 next_desc:
5907 rx_desc->wb.upper.status_error = 0;
5909 /* return some buffers to hardware, one at a time is too slow */
5910 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5911 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5912 cleaned_count = 0;
5915 /* use prefetched values */
5916 rx_desc = next_rxd;
5917 buffer_info = next_buffer;
5918 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5921 rx_ring->next_to_clean = i;
5922 cleaned_count = igb_desc_unused(rx_ring);
5924 if (cleaned_count)
5925 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5927 rx_ring->total_packets += total_packets;
5928 rx_ring->total_bytes += total_bytes;
5929 u64_stats_update_begin(&rx_ring->rx_syncp);
5930 rx_ring->rx_stats.packets += total_packets;
5931 rx_ring->rx_stats.bytes += total_bytes;
5932 u64_stats_update_end(&rx_ring->rx_syncp);
5933 return cleaned;
5937 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5938 * @adapter: address of board private structure
5940 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5942 struct net_device *netdev = rx_ring->netdev;
5943 union e1000_adv_rx_desc *rx_desc;
5944 struct igb_buffer *buffer_info;
5945 struct sk_buff *skb;
5946 unsigned int i;
5947 int bufsz;
5949 i = rx_ring->next_to_use;
5950 buffer_info = &rx_ring->buffer_info[i];
5952 bufsz = rx_ring->rx_buffer_len;
5954 while (cleaned_count--) {
5955 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5957 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5958 if (!buffer_info->page) {
5959 buffer_info->page = netdev_alloc_page(netdev);
5960 if (unlikely(!buffer_info->page)) {
5961 u64_stats_update_begin(&rx_ring->rx_syncp);
5962 rx_ring->rx_stats.alloc_failed++;
5963 u64_stats_update_end(&rx_ring->rx_syncp);
5964 goto no_buffers;
5966 buffer_info->page_offset = 0;
5967 } else {
5968 buffer_info->page_offset ^= PAGE_SIZE / 2;
5970 buffer_info->page_dma =
5971 dma_map_page(rx_ring->dev, buffer_info->page,
5972 buffer_info->page_offset,
5973 PAGE_SIZE / 2,
5974 DMA_FROM_DEVICE);
5975 if (dma_mapping_error(rx_ring->dev,
5976 buffer_info->page_dma)) {
5977 buffer_info->page_dma = 0;
5978 u64_stats_update_begin(&rx_ring->rx_syncp);
5979 rx_ring->rx_stats.alloc_failed++;
5980 u64_stats_update_end(&rx_ring->rx_syncp);
5981 goto no_buffers;
5985 skb = buffer_info->skb;
5986 if (!skb) {
5987 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5988 if (unlikely(!skb)) {
5989 u64_stats_update_begin(&rx_ring->rx_syncp);
5990 rx_ring->rx_stats.alloc_failed++;
5991 u64_stats_update_end(&rx_ring->rx_syncp);
5992 goto no_buffers;
5995 buffer_info->skb = skb;
5997 if (!buffer_info->dma) {
5998 buffer_info->dma = dma_map_single(rx_ring->dev,
5999 skb->data,
6000 bufsz,
6001 DMA_FROM_DEVICE);
6002 if (dma_mapping_error(rx_ring->dev,
6003 buffer_info->dma)) {
6004 buffer_info->dma = 0;
6005 u64_stats_update_begin(&rx_ring->rx_syncp);
6006 rx_ring->rx_stats.alloc_failed++;
6007 u64_stats_update_end(&rx_ring->rx_syncp);
6008 goto no_buffers;
6011 /* Refresh the desc even if buffer_addrs didn't change because
6012 * each write-back erases this info. */
6013 if (bufsz < IGB_RXBUFFER_1024) {
6014 rx_desc->read.pkt_addr =
6015 cpu_to_le64(buffer_info->page_dma);
6016 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
6017 } else {
6018 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
6019 rx_desc->read.hdr_addr = 0;
6022 i++;
6023 if (i == rx_ring->count)
6024 i = 0;
6025 buffer_info = &rx_ring->buffer_info[i];
6028 no_buffers:
6029 if (rx_ring->next_to_use != i) {
6030 rx_ring->next_to_use = i;
6031 if (i == 0)
6032 i = (rx_ring->count - 1);
6033 else
6034 i--;
6036 /* Force memory writes to complete before letting h/w
6037 * know there are new descriptors to fetch. (Only
6038 * applicable for weak-ordered memory model archs,
6039 * such as IA-64). */
6040 wmb();
6041 writel(i, rx_ring->tail);
6046 * igb_mii_ioctl -
6047 * @netdev:
6048 * @ifreq:
6049 * @cmd:
6051 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6053 struct igb_adapter *adapter = netdev_priv(netdev);
6054 struct mii_ioctl_data *data = if_mii(ifr);
6056 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6057 return -EOPNOTSUPP;
6059 switch (cmd) {
6060 case SIOCGMIIPHY:
6061 data->phy_id = adapter->hw.phy.addr;
6062 break;
6063 case SIOCGMIIREG:
6064 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6065 &data->val_out))
6066 return -EIO;
6067 break;
6068 case SIOCSMIIREG:
6069 default:
6070 return -EOPNOTSUPP;
6072 return 0;
6076 * igb_hwtstamp_ioctl - control hardware time stamping
6077 * @netdev:
6078 * @ifreq:
6079 * @cmd:
6081 * Outgoing time stamping can be enabled and disabled. Play nice and
6082 * disable it when requested, although it shouldn't case any overhead
6083 * when no packet needs it. At most one packet in the queue may be
6084 * marked for time stamping, otherwise it would be impossible to tell
6085 * for sure to which packet the hardware time stamp belongs.
6087 * Incoming time stamping has to be configured via the hardware
6088 * filters. Not all combinations are supported, in particular event
6089 * type has to be specified. Matching the kind of event packet is
6090 * not supported, with the exception of "all V2 events regardless of
6091 * level 2 or 4".
6094 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6095 struct ifreq *ifr, int cmd)
6097 struct igb_adapter *adapter = netdev_priv(netdev);
6098 struct e1000_hw *hw = &adapter->hw;
6099 struct hwtstamp_config config;
6100 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6101 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6102 u32 tsync_rx_cfg = 0;
6103 bool is_l4 = false;
6104 bool is_l2 = false;
6105 u32 regval;
6107 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6108 return -EFAULT;
6110 /* reserved for future extensions */
6111 if (config.flags)
6112 return -EINVAL;
6114 switch (config.tx_type) {
6115 case HWTSTAMP_TX_OFF:
6116 tsync_tx_ctl = 0;
6117 case HWTSTAMP_TX_ON:
6118 break;
6119 default:
6120 return -ERANGE;
6123 switch (config.rx_filter) {
6124 case HWTSTAMP_FILTER_NONE:
6125 tsync_rx_ctl = 0;
6126 break;
6127 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6128 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6129 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6130 case HWTSTAMP_FILTER_ALL:
6132 * register TSYNCRXCFG must be set, therefore it is not
6133 * possible to time stamp both Sync and Delay_Req messages
6134 * => fall back to time stamping all packets
6136 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6137 config.rx_filter = HWTSTAMP_FILTER_ALL;
6138 break;
6139 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6140 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6141 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6142 is_l4 = true;
6143 break;
6144 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6145 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6146 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6147 is_l4 = true;
6148 break;
6149 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6150 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6151 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6152 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6153 is_l2 = true;
6154 is_l4 = true;
6155 config.rx_filter = HWTSTAMP_FILTER_SOME;
6156 break;
6157 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6158 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6159 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6160 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6161 is_l2 = true;
6162 is_l4 = true;
6163 config.rx_filter = HWTSTAMP_FILTER_SOME;
6164 break;
6165 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6166 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6167 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6168 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6169 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6170 is_l2 = true;
6171 break;
6172 default:
6173 return -ERANGE;
6176 if (hw->mac.type == e1000_82575) {
6177 if (tsync_rx_ctl | tsync_tx_ctl)
6178 return -EINVAL;
6179 return 0;
6183 * Per-packet timestamping only works if all packets are
6184 * timestamped, so enable timestamping in all packets as
6185 * long as one rx filter was configured.
6187 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6188 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6189 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6192 /* enable/disable TX */
6193 regval = rd32(E1000_TSYNCTXCTL);
6194 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6195 regval |= tsync_tx_ctl;
6196 wr32(E1000_TSYNCTXCTL, regval);
6198 /* enable/disable RX */
6199 regval = rd32(E1000_TSYNCRXCTL);
6200 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6201 regval |= tsync_rx_ctl;
6202 wr32(E1000_TSYNCRXCTL, regval);
6204 /* define which PTP packets are time stamped */
6205 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6207 /* define ethertype filter for timestamped packets */
6208 if (is_l2)
6209 wr32(E1000_ETQF(3),
6210 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6211 E1000_ETQF_1588 | /* enable timestamping */
6212 ETH_P_1588)); /* 1588 eth protocol type */
6213 else
6214 wr32(E1000_ETQF(3), 0);
6216 #define PTP_PORT 319
6217 /* L4 Queue Filter[3]: filter by destination port and protocol */
6218 if (is_l4) {
6219 u32 ftqf = (IPPROTO_UDP /* UDP */
6220 | E1000_FTQF_VF_BP /* VF not compared */
6221 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6222 | E1000_FTQF_MASK); /* mask all inputs */
6223 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6225 wr32(E1000_IMIR(3), htons(PTP_PORT));
6226 wr32(E1000_IMIREXT(3),
6227 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6228 if (hw->mac.type == e1000_82576) {
6229 /* enable source port check */
6230 wr32(E1000_SPQF(3), htons(PTP_PORT));
6231 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6233 wr32(E1000_FTQF(3), ftqf);
6234 } else {
6235 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6237 wrfl();
6239 adapter->hwtstamp_config = config;
6241 /* clear TX/RX time stamp registers, just to be sure */
6242 regval = rd32(E1000_TXSTMPH);
6243 regval = rd32(E1000_RXSTMPH);
6245 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6246 -EFAULT : 0;
6250 * igb_ioctl -
6251 * @netdev:
6252 * @ifreq:
6253 * @cmd:
6255 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6257 switch (cmd) {
6258 case SIOCGMIIPHY:
6259 case SIOCGMIIREG:
6260 case SIOCSMIIREG:
6261 return igb_mii_ioctl(netdev, ifr, cmd);
6262 case SIOCSHWTSTAMP:
6263 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6264 default:
6265 return -EOPNOTSUPP;
6269 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6271 struct igb_adapter *adapter = hw->back;
6272 u16 cap_offset;
6274 cap_offset = adapter->pdev->pcie_cap;
6275 if (!cap_offset)
6276 return -E1000_ERR_CONFIG;
6278 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6280 return 0;
6283 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6285 struct igb_adapter *adapter = hw->back;
6286 u16 cap_offset;
6288 cap_offset = adapter->pdev->pcie_cap;
6289 if (!cap_offset)
6290 return -E1000_ERR_CONFIG;
6292 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6294 return 0;
6297 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6299 struct igb_adapter *adapter = netdev_priv(netdev);
6300 struct e1000_hw *hw = &adapter->hw;
6301 u32 ctrl, rctl;
6303 igb_irq_disable(adapter);
6305 if (features & NETIF_F_HW_VLAN_RX) {
6306 /* enable VLAN tag insert/strip */
6307 ctrl = rd32(E1000_CTRL);
6308 ctrl |= E1000_CTRL_VME;
6309 wr32(E1000_CTRL, ctrl);
6311 /* Disable CFI check */
6312 rctl = rd32(E1000_RCTL);
6313 rctl &= ~E1000_RCTL_CFIEN;
6314 wr32(E1000_RCTL, rctl);
6315 } else {
6316 /* disable VLAN tag insert/strip */
6317 ctrl = rd32(E1000_CTRL);
6318 ctrl &= ~E1000_CTRL_VME;
6319 wr32(E1000_CTRL, ctrl);
6322 igb_rlpml_set(adapter);
6324 if (!test_bit(__IGB_DOWN, &adapter->state))
6325 igb_irq_enable(adapter);
6328 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6330 struct igb_adapter *adapter = netdev_priv(netdev);
6331 struct e1000_hw *hw = &adapter->hw;
6332 int pf_id = adapter->vfs_allocated_count;
6334 /* attempt to add filter to vlvf array */
6335 igb_vlvf_set(adapter, vid, true, pf_id);
6337 /* add the filter since PF can receive vlans w/o entry in vlvf */
6338 igb_vfta_set(hw, vid, true);
6340 set_bit(vid, adapter->active_vlans);
6343 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6345 struct igb_adapter *adapter = netdev_priv(netdev);
6346 struct e1000_hw *hw = &adapter->hw;
6347 int pf_id = adapter->vfs_allocated_count;
6348 s32 err;
6350 igb_irq_disable(adapter);
6352 if (!test_bit(__IGB_DOWN, &adapter->state))
6353 igb_irq_enable(adapter);
6355 /* remove vlan from VLVF table array */
6356 err = igb_vlvf_set(adapter, vid, false, pf_id);
6358 /* if vid was not present in VLVF just remove it from table */
6359 if (err)
6360 igb_vfta_set(hw, vid, false);
6362 clear_bit(vid, adapter->active_vlans);
6365 static void igb_restore_vlan(struct igb_adapter *adapter)
6367 u16 vid;
6369 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6370 igb_vlan_rx_add_vid(adapter->netdev, vid);
6373 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6375 struct pci_dev *pdev = adapter->pdev;
6376 struct e1000_mac_info *mac = &adapter->hw.mac;
6378 mac->autoneg = 0;
6380 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6381 * for the switch() below to work */
6382 if ((spd & 1) || (dplx & ~1))
6383 goto err_inval;
6385 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6386 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6387 spd != SPEED_1000 &&
6388 dplx != DUPLEX_FULL)
6389 goto err_inval;
6391 switch (spd + dplx) {
6392 case SPEED_10 + DUPLEX_HALF:
6393 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6394 break;
6395 case SPEED_10 + DUPLEX_FULL:
6396 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6397 break;
6398 case SPEED_100 + DUPLEX_HALF:
6399 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6400 break;
6401 case SPEED_100 + DUPLEX_FULL:
6402 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6403 break;
6404 case SPEED_1000 + DUPLEX_FULL:
6405 mac->autoneg = 1;
6406 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6407 break;
6408 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6409 default:
6410 goto err_inval;
6412 return 0;
6414 err_inval:
6415 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6416 return -EINVAL;
6419 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6421 struct net_device *netdev = pci_get_drvdata(pdev);
6422 struct igb_adapter *adapter = netdev_priv(netdev);
6423 struct e1000_hw *hw = &adapter->hw;
6424 u32 ctrl, rctl, status;
6425 u32 wufc = adapter->wol;
6426 #ifdef CONFIG_PM
6427 int retval = 0;
6428 #endif
6430 netif_device_detach(netdev);
6432 if (netif_running(netdev))
6433 igb_close(netdev);
6435 igb_clear_interrupt_scheme(adapter);
6437 #ifdef CONFIG_PM
6438 retval = pci_save_state(pdev);
6439 if (retval)
6440 return retval;
6441 #endif
6443 status = rd32(E1000_STATUS);
6444 if (status & E1000_STATUS_LU)
6445 wufc &= ~E1000_WUFC_LNKC;
6447 if (wufc) {
6448 igb_setup_rctl(adapter);
6449 igb_set_rx_mode(netdev);
6451 /* turn on all-multi mode if wake on multicast is enabled */
6452 if (wufc & E1000_WUFC_MC) {
6453 rctl = rd32(E1000_RCTL);
6454 rctl |= E1000_RCTL_MPE;
6455 wr32(E1000_RCTL, rctl);
6458 ctrl = rd32(E1000_CTRL);
6459 /* advertise wake from D3Cold */
6460 #define E1000_CTRL_ADVD3WUC 0x00100000
6461 /* phy power management enable */
6462 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6463 ctrl |= E1000_CTRL_ADVD3WUC;
6464 wr32(E1000_CTRL, ctrl);
6466 /* Allow time for pending master requests to run */
6467 igb_disable_pcie_master(hw);
6469 wr32(E1000_WUC, E1000_WUC_PME_EN);
6470 wr32(E1000_WUFC, wufc);
6471 } else {
6472 wr32(E1000_WUC, 0);
6473 wr32(E1000_WUFC, 0);
6476 *enable_wake = wufc || adapter->en_mng_pt;
6477 if (!*enable_wake)
6478 igb_power_down_link(adapter);
6479 else
6480 igb_power_up_link(adapter);
6482 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6483 * would have already happened in close and is redundant. */
6484 igb_release_hw_control(adapter);
6486 pci_disable_device(pdev);
6488 return 0;
6491 #ifdef CONFIG_PM
6492 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6494 int retval;
6495 bool wake;
6497 retval = __igb_shutdown(pdev, &wake);
6498 if (retval)
6499 return retval;
6501 if (wake) {
6502 pci_prepare_to_sleep(pdev);
6503 } else {
6504 pci_wake_from_d3(pdev, false);
6505 pci_set_power_state(pdev, PCI_D3hot);
6508 return 0;
6511 static int igb_resume(struct pci_dev *pdev)
6513 struct net_device *netdev = pci_get_drvdata(pdev);
6514 struct igb_adapter *adapter = netdev_priv(netdev);
6515 struct e1000_hw *hw = &adapter->hw;
6516 u32 err;
6518 pci_set_power_state(pdev, PCI_D0);
6519 pci_restore_state(pdev);
6520 pci_save_state(pdev);
6522 err = pci_enable_device_mem(pdev);
6523 if (err) {
6524 dev_err(&pdev->dev,
6525 "igb: Cannot enable PCI device from suspend\n");
6526 return err;
6528 pci_set_master(pdev);
6530 pci_enable_wake(pdev, PCI_D3hot, 0);
6531 pci_enable_wake(pdev, PCI_D3cold, 0);
6533 if (igb_init_interrupt_scheme(adapter)) {
6534 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6535 return -ENOMEM;
6538 igb_reset(adapter);
6540 /* let the f/w know that the h/w is now under the control of the
6541 * driver. */
6542 igb_get_hw_control(adapter);
6544 wr32(E1000_WUS, ~0);
6546 if (netif_running(netdev)) {
6547 err = igb_open(netdev);
6548 if (err)
6549 return err;
6552 netif_device_attach(netdev);
6554 return 0;
6556 #endif
6558 static void igb_shutdown(struct pci_dev *pdev)
6560 bool wake;
6562 __igb_shutdown(pdev, &wake);
6564 if (system_state == SYSTEM_POWER_OFF) {
6565 pci_wake_from_d3(pdev, wake);
6566 pci_set_power_state(pdev, PCI_D3hot);
6570 #ifdef CONFIG_NET_POLL_CONTROLLER
6572 * Polling 'interrupt' - used by things like netconsole to send skbs
6573 * without having to re-enable interrupts. It's not called while
6574 * the interrupt routine is executing.
6576 static void igb_netpoll(struct net_device *netdev)
6578 struct igb_adapter *adapter = netdev_priv(netdev);
6579 struct e1000_hw *hw = &adapter->hw;
6580 int i;
6582 if (!adapter->msix_entries) {
6583 struct igb_q_vector *q_vector = adapter->q_vector[0];
6584 igb_irq_disable(adapter);
6585 napi_schedule(&q_vector->napi);
6586 return;
6589 for (i = 0; i < adapter->num_q_vectors; i++) {
6590 struct igb_q_vector *q_vector = adapter->q_vector[i];
6591 wr32(E1000_EIMC, q_vector->eims_value);
6592 napi_schedule(&q_vector->napi);
6595 #endif /* CONFIG_NET_POLL_CONTROLLER */
6598 * igb_io_error_detected - called when PCI error is detected
6599 * @pdev: Pointer to PCI device
6600 * @state: The current pci connection state
6602 * This function is called after a PCI bus error affecting
6603 * this device has been detected.
6605 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6606 pci_channel_state_t state)
6608 struct net_device *netdev = pci_get_drvdata(pdev);
6609 struct igb_adapter *adapter = netdev_priv(netdev);
6611 netif_device_detach(netdev);
6613 if (state == pci_channel_io_perm_failure)
6614 return PCI_ERS_RESULT_DISCONNECT;
6616 if (netif_running(netdev))
6617 igb_down(adapter);
6618 pci_disable_device(pdev);
6620 /* Request a slot slot reset. */
6621 return PCI_ERS_RESULT_NEED_RESET;
6625 * igb_io_slot_reset - called after the pci bus has been reset.
6626 * @pdev: Pointer to PCI device
6628 * Restart the card from scratch, as if from a cold-boot. Implementation
6629 * resembles the first-half of the igb_resume routine.
6631 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6633 struct net_device *netdev = pci_get_drvdata(pdev);
6634 struct igb_adapter *adapter = netdev_priv(netdev);
6635 struct e1000_hw *hw = &adapter->hw;
6636 pci_ers_result_t result;
6637 int err;
6639 if (pci_enable_device_mem(pdev)) {
6640 dev_err(&pdev->dev,
6641 "Cannot re-enable PCI device after reset.\n");
6642 result = PCI_ERS_RESULT_DISCONNECT;
6643 } else {
6644 pci_set_master(pdev);
6645 pci_restore_state(pdev);
6646 pci_save_state(pdev);
6648 pci_enable_wake(pdev, PCI_D3hot, 0);
6649 pci_enable_wake(pdev, PCI_D3cold, 0);
6651 igb_reset(adapter);
6652 wr32(E1000_WUS, ~0);
6653 result = PCI_ERS_RESULT_RECOVERED;
6656 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6657 if (err) {
6658 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6659 "failed 0x%0x\n", err);
6660 /* non-fatal, continue */
6663 return result;
6667 * igb_io_resume - called when traffic can start flowing again.
6668 * @pdev: Pointer to PCI device
6670 * This callback is called when the error recovery driver tells us that
6671 * its OK to resume normal operation. Implementation resembles the
6672 * second-half of the igb_resume routine.
6674 static void igb_io_resume(struct pci_dev *pdev)
6676 struct net_device *netdev = pci_get_drvdata(pdev);
6677 struct igb_adapter *adapter = netdev_priv(netdev);
6679 if (netif_running(netdev)) {
6680 if (igb_up(adapter)) {
6681 dev_err(&pdev->dev, "igb_up failed after reset\n");
6682 return;
6686 netif_device_attach(netdev);
6688 /* let the f/w know that the h/w is now under the control of the
6689 * driver. */
6690 igb_get_hw_control(adapter);
6693 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6694 u8 qsel)
6696 u32 rar_low, rar_high;
6697 struct e1000_hw *hw = &adapter->hw;
6699 /* HW expects these in little endian so we reverse the byte order
6700 * from network order (big endian) to little endian
6702 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6703 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6704 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6706 /* Indicate to hardware the Address is Valid. */
6707 rar_high |= E1000_RAH_AV;
6709 if (hw->mac.type == e1000_82575)
6710 rar_high |= E1000_RAH_POOL_1 * qsel;
6711 else
6712 rar_high |= E1000_RAH_POOL_1 << qsel;
6714 wr32(E1000_RAL(index), rar_low);
6715 wrfl();
6716 wr32(E1000_RAH(index), rar_high);
6717 wrfl();
6720 static int igb_set_vf_mac(struct igb_adapter *adapter,
6721 int vf, unsigned char *mac_addr)
6723 struct e1000_hw *hw = &adapter->hw;
6724 /* VF MAC addresses start at end of receive addresses and moves
6725 * torwards the first, as a result a collision should not be possible */
6726 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6728 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6730 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6732 return 0;
6735 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6737 struct igb_adapter *adapter = netdev_priv(netdev);
6738 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6739 return -EINVAL;
6740 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6741 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6742 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6743 " change effective.");
6744 if (test_bit(__IGB_DOWN, &adapter->state)) {
6745 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6746 " but the PF device is not up.\n");
6747 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6748 " attempting to use the VF device.\n");
6750 return igb_set_vf_mac(adapter, vf, mac);
6753 static int igb_link_mbps(int internal_link_speed)
6755 switch (internal_link_speed) {
6756 case SPEED_100:
6757 return 100;
6758 case SPEED_1000:
6759 return 1000;
6760 default:
6761 return 0;
6765 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6766 int link_speed)
6768 int rf_dec, rf_int;
6769 u32 bcnrc_val;
6771 if (tx_rate != 0) {
6772 /* Calculate the rate factor values to set */
6773 rf_int = link_speed / tx_rate;
6774 rf_dec = (link_speed - (rf_int * tx_rate));
6775 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6777 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6778 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6779 E1000_RTTBCNRC_RF_INT_MASK);
6780 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6781 } else {
6782 bcnrc_val = 0;
6785 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6786 wr32(E1000_RTTBCNRC, bcnrc_val);
6789 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6791 int actual_link_speed, i;
6792 bool reset_rate = false;
6794 /* VF TX rate limit was not set or not supported */
6795 if ((adapter->vf_rate_link_speed == 0) ||
6796 (adapter->hw.mac.type != e1000_82576))
6797 return;
6799 actual_link_speed = igb_link_mbps(adapter->link_speed);
6800 if (actual_link_speed != adapter->vf_rate_link_speed) {
6801 reset_rate = true;
6802 adapter->vf_rate_link_speed = 0;
6803 dev_info(&adapter->pdev->dev,
6804 "Link speed has been changed. VF Transmit "
6805 "rate is disabled\n");
6808 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6809 if (reset_rate)
6810 adapter->vf_data[i].tx_rate = 0;
6812 igb_set_vf_rate_limit(&adapter->hw, i,
6813 adapter->vf_data[i].tx_rate,
6814 actual_link_speed);
6818 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6820 struct igb_adapter *adapter = netdev_priv(netdev);
6821 struct e1000_hw *hw = &adapter->hw;
6822 int actual_link_speed;
6824 if (hw->mac.type != e1000_82576)
6825 return -EOPNOTSUPP;
6827 actual_link_speed = igb_link_mbps(adapter->link_speed);
6828 if ((vf >= adapter->vfs_allocated_count) ||
6829 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6830 (tx_rate < 0) || (tx_rate > actual_link_speed))
6831 return -EINVAL;
6833 adapter->vf_rate_link_speed = actual_link_speed;
6834 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6835 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6837 return 0;
6840 static int igb_ndo_get_vf_config(struct net_device *netdev,
6841 int vf, struct ifla_vf_info *ivi)
6843 struct igb_adapter *adapter = netdev_priv(netdev);
6844 if (vf >= adapter->vfs_allocated_count)
6845 return -EINVAL;
6846 ivi->vf = vf;
6847 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6848 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6849 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6850 ivi->qos = adapter->vf_data[vf].pf_qos;
6851 return 0;
6854 static void igb_vmm_control(struct igb_adapter *adapter)
6856 struct e1000_hw *hw = &adapter->hw;
6857 u32 reg;
6859 switch (hw->mac.type) {
6860 case e1000_82575:
6861 default:
6862 /* replication is not supported for 82575 */
6863 return;
6864 case e1000_82576:
6865 /* notify HW that the MAC is adding vlan tags */
6866 reg = rd32(E1000_DTXCTL);
6867 reg |= E1000_DTXCTL_VLAN_ADDED;
6868 wr32(E1000_DTXCTL, reg);
6869 case e1000_82580:
6870 /* enable replication vlan tag stripping */
6871 reg = rd32(E1000_RPLOLR);
6872 reg |= E1000_RPLOLR_STRVLAN;
6873 wr32(E1000_RPLOLR, reg);
6874 case e1000_i350:
6875 /* none of the above registers are supported by i350 */
6876 break;
6879 if (adapter->vfs_allocated_count) {
6880 igb_vmdq_set_loopback_pf(hw, true);
6881 igb_vmdq_set_replication_pf(hw, true);
6882 igb_vmdq_set_anti_spoofing_pf(hw, true,
6883 adapter->vfs_allocated_count);
6884 } else {
6885 igb_vmdq_set_loopback_pf(hw, false);
6886 igb_vmdq_set_replication_pf(hw, false);
6890 /* igb_main.c */