Blackfin: bf548-ezkit/bf561-ezkit: update nor flash layout
[zen-stable.git] / drivers / net / igb / igb_main.c
blob18fccf913635e671fbca0d321c73aa4c484e2f42
1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #include <linux/prefetch.h>
49 #ifdef CONFIG_IGB_DCA
50 #include <linux/dca.h>
51 #endif
52 #include "igb.h"
54 #define MAJ 3
55 #define MIN 0
56 #define BUILD 6
57 #define KFIX 2
58 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
59 __stringify(BUILD) "-k" __stringify(KFIX)
60 char igb_driver_name[] = "igb";
61 char igb_driver_version[] = DRV_VERSION;
62 static const char igb_driver_string[] =
63 "Intel(R) Gigabit Ethernet Network Driver";
64 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
66 static const struct e1000_info *igb_info_tbl[] = {
67 [board_82575] = &e1000_82575_info,
70 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
96 /* required last entry */
97 {0, }
100 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
102 void igb_reset(struct igb_adapter *);
103 static int igb_setup_all_tx_resources(struct igb_adapter *);
104 static int igb_setup_all_rx_resources(struct igb_adapter *);
105 static void igb_free_all_tx_resources(struct igb_adapter *);
106 static void igb_free_all_rx_resources(struct igb_adapter *);
107 static void igb_setup_mrqc(struct igb_adapter *);
108 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
109 static void __devexit igb_remove(struct pci_dev *pdev);
110 static void igb_init_hw_timer(struct igb_adapter *adapter);
111 static int igb_sw_init(struct igb_adapter *);
112 static int igb_open(struct net_device *);
113 static int igb_close(struct net_device *);
114 static void igb_configure_tx(struct igb_adapter *);
115 static void igb_configure_rx(struct igb_adapter *);
116 static void igb_clean_all_tx_rings(struct igb_adapter *);
117 static void igb_clean_all_rx_rings(struct igb_adapter *);
118 static void igb_clean_tx_ring(struct igb_ring *);
119 static void igb_clean_rx_ring(struct igb_ring *);
120 static void igb_set_rx_mode(struct net_device *);
121 static void igb_update_phy_info(unsigned long);
122 static void igb_watchdog(unsigned long);
123 static void igb_watchdog_task(struct work_struct *);
124 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
125 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
126 struct rtnl_link_stats64 *stats);
127 static int igb_change_mtu(struct net_device *, int);
128 static int igb_set_mac(struct net_device *, void *);
129 static void igb_set_uta(struct igb_adapter *adapter);
130 static irqreturn_t igb_intr(int irq, void *);
131 static irqreturn_t igb_intr_msi(int irq, void *);
132 static irqreturn_t igb_msix_other(int irq, void *);
133 static irqreturn_t igb_msix_ring(int irq, void *);
134 #ifdef CONFIG_IGB_DCA
135 static void igb_update_dca(struct igb_q_vector *);
136 static void igb_setup_dca(struct igb_adapter *);
137 #endif /* CONFIG_IGB_DCA */
138 static bool igb_clean_tx_irq(struct igb_q_vector *);
139 static int igb_poll(struct napi_struct *, int);
140 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
141 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
142 static void igb_tx_timeout(struct net_device *);
143 static void igb_reset_task(struct work_struct *);
144 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
145 static void igb_vlan_rx_add_vid(struct net_device *, u16);
146 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
147 static void igb_restore_vlan(struct igb_adapter *);
148 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
149 static void igb_ping_all_vfs(struct igb_adapter *);
150 static void igb_msg_task(struct igb_adapter *);
151 static void igb_vmm_control(struct igb_adapter *);
152 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
153 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
154 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
155 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
156 int vf, u16 vlan, u8 qos);
157 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
158 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
159 struct ifla_vf_info *ivi);
160 static void igb_check_vf_rate_limit(struct igb_adapter *);
162 #ifdef CONFIG_PM
163 static int igb_suspend(struct pci_dev *, pm_message_t);
164 static int igb_resume(struct pci_dev *);
165 #endif
166 static void igb_shutdown(struct pci_dev *);
167 #ifdef CONFIG_IGB_DCA
168 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
169 static struct notifier_block dca_notifier = {
170 .notifier_call = igb_notify_dca,
171 .next = NULL,
172 .priority = 0
174 #endif
175 #ifdef CONFIG_NET_POLL_CONTROLLER
176 /* for netdump / net console */
177 static void igb_netpoll(struct net_device *);
178 #endif
179 #ifdef CONFIG_PCI_IOV
180 static unsigned int max_vfs = 0;
181 module_param(max_vfs, uint, 0);
182 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
183 "per physical function");
184 #endif /* CONFIG_PCI_IOV */
186 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
187 pci_channel_state_t);
188 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
189 static void igb_io_resume(struct pci_dev *);
191 static struct pci_error_handlers igb_err_handler = {
192 .error_detected = igb_io_error_detected,
193 .slot_reset = igb_io_slot_reset,
194 .resume = igb_io_resume,
198 static struct pci_driver igb_driver = {
199 .name = igb_driver_name,
200 .id_table = igb_pci_tbl,
201 .probe = igb_probe,
202 .remove = __devexit_p(igb_remove),
203 #ifdef CONFIG_PM
204 /* Power Management Hooks */
205 .suspend = igb_suspend,
206 .resume = igb_resume,
207 #endif
208 .shutdown = igb_shutdown,
209 .err_handler = &igb_err_handler
212 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
213 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
214 MODULE_LICENSE("GPL");
215 MODULE_VERSION(DRV_VERSION);
217 struct igb_reg_info {
218 u32 ofs;
219 char *name;
222 static const struct igb_reg_info igb_reg_info_tbl[] = {
224 /* General Registers */
225 {E1000_CTRL, "CTRL"},
226 {E1000_STATUS, "STATUS"},
227 {E1000_CTRL_EXT, "CTRL_EXT"},
229 /* Interrupt Registers */
230 {E1000_ICR, "ICR"},
232 /* RX Registers */
233 {E1000_RCTL, "RCTL"},
234 {E1000_RDLEN(0), "RDLEN"},
235 {E1000_RDH(0), "RDH"},
236 {E1000_RDT(0), "RDT"},
237 {E1000_RXDCTL(0), "RXDCTL"},
238 {E1000_RDBAL(0), "RDBAL"},
239 {E1000_RDBAH(0), "RDBAH"},
241 /* TX Registers */
242 {E1000_TCTL, "TCTL"},
243 {E1000_TDBAL(0), "TDBAL"},
244 {E1000_TDBAH(0), "TDBAH"},
245 {E1000_TDLEN(0), "TDLEN"},
246 {E1000_TDH(0), "TDH"},
247 {E1000_TDT(0), "TDT"},
248 {E1000_TXDCTL(0), "TXDCTL"},
249 {E1000_TDFH, "TDFH"},
250 {E1000_TDFT, "TDFT"},
251 {E1000_TDFHS, "TDFHS"},
252 {E1000_TDFPC, "TDFPC"},
254 /* List Terminator */
259 * igb_regdump - register printout routine
261 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
263 int n = 0;
264 char rname[16];
265 u32 regs[8];
267 switch (reginfo->ofs) {
268 case E1000_RDLEN(0):
269 for (n = 0; n < 4; n++)
270 regs[n] = rd32(E1000_RDLEN(n));
271 break;
272 case E1000_RDH(0):
273 for (n = 0; n < 4; n++)
274 regs[n] = rd32(E1000_RDH(n));
275 break;
276 case E1000_RDT(0):
277 for (n = 0; n < 4; n++)
278 regs[n] = rd32(E1000_RDT(n));
279 break;
280 case E1000_RXDCTL(0):
281 for (n = 0; n < 4; n++)
282 regs[n] = rd32(E1000_RXDCTL(n));
283 break;
284 case E1000_RDBAL(0):
285 for (n = 0; n < 4; n++)
286 regs[n] = rd32(E1000_RDBAL(n));
287 break;
288 case E1000_RDBAH(0):
289 for (n = 0; n < 4; n++)
290 regs[n] = rd32(E1000_RDBAH(n));
291 break;
292 case E1000_TDBAL(0):
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDBAL(n));
295 break;
296 case E1000_TDBAH(0):
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_TDBAH(n));
299 break;
300 case E1000_TDLEN(0):
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_TDLEN(n));
303 break;
304 case E1000_TDH(0):
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_TDH(n));
307 break;
308 case E1000_TDT(0):
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_TDT(n));
311 break;
312 case E1000_TXDCTL(0):
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_TXDCTL(n));
315 break;
316 default:
317 printk(KERN_INFO "%-15s %08x\n",
318 reginfo->name, rd32(reginfo->ofs));
319 return;
322 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
323 printk(KERN_INFO "%-15s ", rname);
324 for (n = 0; n < 4; n++)
325 printk(KERN_CONT "%08x ", regs[n]);
326 printk(KERN_CONT "\n");
330 * igb_dump - Print registers, tx-rings and rx-rings
332 static void igb_dump(struct igb_adapter *adapter)
334 struct net_device *netdev = adapter->netdev;
335 struct e1000_hw *hw = &adapter->hw;
336 struct igb_reg_info *reginfo;
337 int n = 0;
338 struct igb_ring *tx_ring;
339 union e1000_adv_tx_desc *tx_desc;
340 struct my_u0 { u64 a; u64 b; } *u0;
341 struct igb_buffer *buffer_info;
342 struct igb_ring *rx_ring;
343 union e1000_adv_rx_desc *rx_desc;
344 u32 staterr;
345 int i = 0;
347 if (!netif_msg_hw(adapter))
348 return;
350 /* Print netdevice Info */
351 if (netdev) {
352 dev_info(&adapter->pdev->dev, "Net device Info\n");
353 printk(KERN_INFO "Device Name state "
354 "trans_start last_rx\n");
355 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
356 netdev->name,
357 netdev->state,
358 netdev->trans_start,
359 netdev->last_rx);
362 /* Print Registers */
363 dev_info(&adapter->pdev->dev, "Register Dump\n");
364 printk(KERN_INFO " Register Name Value\n");
365 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
366 reginfo->name; reginfo++) {
367 igb_regdump(hw, reginfo);
370 /* Print TX Ring Summary */
371 if (!netdev || !netif_running(netdev))
372 goto exit;
374 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
375 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
376 " leng ntw timestamp\n");
377 for (n = 0; n < adapter->num_tx_queues; n++) {
378 tx_ring = adapter->tx_ring[n];
379 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
380 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
381 n, tx_ring->next_to_use, tx_ring->next_to_clean,
382 (u64)buffer_info->dma,
383 buffer_info->length,
384 buffer_info->next_to_watch,
385 (u64)buffer_info->time_stamp);
388 /* Print TX Rings */
389 if (!netif_msg_tx_done(adapter))
390 goto rx_ring_summary;
392 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
394 /* Transmit Descriptor Formats
396 * Advanced Transmit Descriptor
397 * +--------------------------------------------------------------+
398 * 0 | Buffer Address [63:0] |
399 * +--------------------------------------------------------------+
400 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
401 * +--------------------------------------------------------------+
402 * 63 46 45 40 39 38 36 35 32 31 24 15 0
405 for (n = 0; n < adapter->num_tx_queues; n++) {
406 tx_ring = adapter->tx_ring[n];
407 printk(KERN_INFO "------------------------------------\n");
408 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
409 printk(KERN_INFO "------------------------------------\n");
410 printk(KERN_INFO "T [desc] [address 63:0 ] "
411 "[PlPOCIStDDM Ln] [bi->dma ] "
412 "leng ntw timestamp bi->skb\n");
414 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
415 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
416 buffer_info = &tx_ring->buffer_info[i];
417 u0 = (struct my_u0 *)tx_desc;
418 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
419 " %04X %3X %016llX %p", i,
420 le64_to_cpu(u0->a),
421 le64_to_cpu(u0->b),
422 (u64)buffer_info->dma,
423 buffer_info->length,
424 buffer_info->next_to_watch,
425 (u64)buffer_info->time_stamp,
426 buffer_info->skb);
427 if (i == tx_ring->next_to_use &&
428 i == tx_ring->next_to_clean)
429 printk(KERN_CONT " NTC/U\n");
430 else if (i == tx_ring->next_to_use)
431 printk(KERN_CONT " NTU\n");
432 else if (i == tx_ring->next_to_clean)
433 printk(KERN_CONT " NTC\n");
434 else
435 printk(KERN_CONT "\n");
437 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
438 print_hex_dump(KERN_INFO, "",
439 DUMP_PREFIX_ADDRESS,
440 16, 1, phys_to_virt(buffer_info->dma),
441 buffer_info->length, true);
445 /* Print RX Rings Summary */
446 rx_ring_summary:
447 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
448 printk(KERN_INFO "Queue [NTU] [NTC]\n");
449 for (n = 0; n < adapter->num_rx_queues; n++) {
450 rx_ring = adapter->rx_ring[n];
451 printk(KERN_INFO " %5d %5X %5X\n", n,
452 rx_ring->next_to_use, rx_ring->next_to_clean);
455 /* Print RX Rings */
456 if (!netif_msg_rx_status(adapter))
457 goto exit;
459 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
461 /* Advanced Receive Descriptor (Read) Format
462 * 63 1 0
463 * +-----------------------------------------------------+
464 * 0 | Packet Buffer Address [63:1] |A0/NSE|
465 * +----------------------------------------------+------+
466 * 8 | Header Buffer Address [63:1] | DD |
467 * +-----------------------------------------------------+
470 * Advanced Receive Descriptor (Write-Back) Format
472 * 63 48 47 32 31 30 21 20 17 16 4 3 0
473 * +------------------------------------------------------+
474 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
475 * | Checksum Ident | | | | Type | Type |
476 * +------------------------------------------------------+
477 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
478 * +------------------------------------------------------+
479 * 63 48 47 32 31 20 19 0
482 for (n = 0; n < adapter->num_rx_queues; n++) {
483 rx_ring = adapter->rx_ring[n];
484 printk(KERN_INFO "------------------------------------\n");
485 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
486 printk(KERN_INFO "------------------------------------\n");
487 printk(KERN_INFO "R [desc] [ PktBuf A0] "
488 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
489 "<-- Adv Rx Read format\n");
490 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
491 "[vl er S cks ln] ---------------- [bi->skb] "
492 "<-- Adv Rx Write-Back format\n");
494 for (i = 0; i < rx_ring->count; i++) {
495 buffer_info = &rx_ring->buffer_info[i];
496 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
497 u0 = (struct my_u0 *)rx_desc;
498 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
499 if (staterr & E1000_RXD_STAT_DD) {
500 /* Descriptor Done */
501 printk(KERN_INFO "RWB[0x%03X] %016llX "
502 "%016llX ---------------- %p", i,
503 le64_to_cpu(u0->a),
504 le64_to_cpu(u0->b),
505 buffer_info->skb);
506 } else {
507 printk(KERN_INFO "R [0x%03X] %016llX "
508 "%016llX %016llX %p", i,
509 le64_to_cpu(u0->a),
510 le64_to_cpu(u0->b),
511 (u64)buffer_info->dma,
512 buffer_info->skb);
514 if (netif_msg_pktdata(adapter)) {
515 print_hex_dump(KERN_INFO, "",
516 DUMP_PREFIX_ADDRESS,
517 16, 1,
518 phys_to_virt(buffer_info->dma),
519 rx_ring->rx_buffer_len, true);
520 if (rx_ring->rx_buffer_len
521 < IGB_RXBUFFER_1024)
522 print_hex_dump(KERN_INFO, "",
523 DUMP_PREFIX_ADDRESS,
524 16, 1,
525 phys_to_virt(
526 buffer_info->page_dma +
527 buffer_info->page_offset),
528 PAGE_SIZE/2, true);
532 if (i == rx_ring->next_to_use)
533 printk(KERN_CONT " NTU\n");
534 else if (i == rx_ring->next_to_clean)
535 printk(KERN_CONT " NTC\n");
536 else
537 printk(KERN_CONT "\n");
542 exit:
543 return;
548 * igb_read_clock - read raw cycle counter (to be used by time counter)
550 static cycle_t igb_read_clock(const struct cyclecounter *tc)
552 struct igb_adapter *adapter =
553 container_of(tc, struct igb_adapter, cycles);
554 struct e1000_hw *hw = &adapter->hw;
555 u64 stamp = 0;
556 int shift = 0;
559 * The timestamp latches on lowest register read. For the 82580
560 * the lowest register is SYSTIMR instead of SYSTIML. However we never
561 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
563 if (hw->mac.type == e1000_82580) {
564 stamp = rd32(E1000_SYSTIMR) >> 8;
565 shift = IGB_82580_TSYNC_SHIFT;
568 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
569 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
570 return stamp;
574 * igb_get_hw_dev - return device
575 * used by hardware layer to print debugging information
577 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
579 struct igb_adapter *adapter = hw->back;
580 return adapter->netdev;
584 * igb_init_module - Driver Registration Routine
586 * igb_init_module is the first routine called when the driver is
587 * loaded. All it does is register with the PCI subsystem.
589 static int __init igb_init_module(void)
591 int ret;
592 printk(KERN_INFO "%s - version %s\n",
593 igb_driver_string, igb_driver_version);
595 printk(KERN_INFO "%s\n", igb_copyright);
597 #ifdef CONFIG_IGB_DCA
598 dca_register_notify(&dca_notifier);
599 #endif
600 ret = pci_register_driver(&igb_driver);
601 return ret;
604 module_init(igb_init_module);
607 * igb_exit_module - Driver Exit Cleanup Routine
609 * igb_exit_module is called just before the driver is removed
610 * from memory.
612 static void __exit igb_exit_module(void)
614 #ifdef CONFIG_IGB_DCA
615 dca_unregister_notify(&dca_notifier);
616 #endif
617 pci_unregister_driver(&igb_driver);
620 module_exit(igb_exit_module);
622 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
624 * igb_cache_ring_register - Descriptor ring to register mapping
625 * @adapter: board private structure to initialize
627 * Once we know the feature-set enabled for the device, we'll cache
628 * the register offset the descriptor ring is assigned to.
630 static void igb_cache_ring_register(struct igb_adapter *adapter)
632 int i = 0, j = 0;
633 u32 rbase_offset = adapter->vfs_allocated_count;
635 switch (adapter->hw.mac.type) {
636 case e1000_82576:
637 /* The queues are allocated for virtualization such that VF 0
638 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
639 * In order to avoid collision we start at the first free queue
640 * and continue consuming queues in the same sequence
642 if (adapter->vfs_allocated_count) {
643 for (; i < adapter->rss_queues; i++)
644 adapter->rx_ring[i]->reg_idx = rbase_offset +
645 Q_IDX_82576(i);
647 case e1000_82575:
648 case e1000_82580:
649 case e1000_i350:
650 default:
651 for (; i < adapter->num_rx_queues; i++)
652 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
653 for (; j < adapter->num_tx_queues; j++)
654 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
655 break;
659 static void igb_free_queues(struct igb_adapter *adapter)
661 int i;
663 for (i = 0; i < adapter->num_tx_queues; i++) {
664 kfree(adapter->tx_ring[i]);
665 adapter->tx_ring[i] = NULL;
667 for (i = 0; i < adapter->num_rx_queues; i++) {
668 kfree(adapter->rx_ring[i]);
669 adapter->rx_ring[i] = NULL;
671 adapter->num_rx_queues = 0;
672 adapter->num_tx_queues = 0;
676 * igb_alloc_queues - Allocate memory for all rings
677 * @adapter: board private structure to initialize
679 * We allocate one ring per queue at run-time since we don't know the
680 * number of queues at compile-time.
682 static int igb_alloc_queues(struct igb_adapter *adapter)
684 struct igb_ring *ring;
685 int i;
687 for (i = 0; i < adapter->num_tx_queues; i++) {
688 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
689 if (!ring)
690 goto err;
691 ring->count = adapter->tx_ring_count;
692 ring->queue_index = i;
693 ring->dev = &adapter->pdev->dev;
694 ring->netdev = adapter->netdev;
695 /* For 82575, context index must be unique per ring. */
696 if (adapter->hw.mac.type == e1000_82575)
697 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
698 adapter->tx_ring[i] = ring;
701 for (i = 0; i < adapter->num_rx_queues; i++) {
702 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
703 if (!ring)
704 goto err;
705 ring->count = adapter->rx_ring_count;
706 ring->queue_index = i;
707 ring->dev = &adapter->pdev->dev;
708 ring->netdev = adapter->netdev;
709 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
710 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
711 /* set flag indicating ring supports SCTP checksum offload */
712 if (adapter->hw.mac.type >= e1000_82576)
713 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
714 adapter->rx_ring[i] = ring;
717 igb_cache_ring_register(adapter);
719 return 0;
721 err:
722 igb_free_queues(adapter);
724 return -ENOMEM;
727 #define IGB_N0_QUEUE -1
728 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
730 u32 msixbm = 0;
731 struct igb_adapter *adapter = q_vector->adapter;
732 struct e1000_hw *hw = &adapter->hw;
733 u32 ivar, index;
734 int rx_queue = IGB_N0_QUEUE;
735 int tx_queue = IGB_N0_QUEUE;
737 if (q_vector->rx_ring)
738 rx_queue = q_vector->rx_ring->reg_idx;
739 if (q_vector->tx_ring)
740 tx_queue = q_vector->tx_ring->reg_idx;
742 switch (hw->mac.type) {
743 case e1000_82575:
744 /* The 82575 assigns vectors using a bitmask, which matches the
745 bitmask for the EICR/EIMS/EIMC registers. To assign one
746 or more queues to a vector, we write the appropriate bits
747 into the MSIXBM register for that vector. */
748 if (rx_queue > IGB_N0_QUEUE)
749 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
750 if (tx_queue > IGB_N0_QUEUE)
751 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
752 if (!adapter->msix_entries && msix_vector == 0)
753 msixbm |= E1000_EIMS_OTHER;
754 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
755 q_vector->eims_value = msixbm;
756 break;
757 case e1000_82576:
758 /* 82576 uses a table-based method for assigning vectors.
759 Each queue has a single entry in the table to which we write
760 a vector number along with a "valid" bit. Sadly, the layout
761 of the table is somewhat counterintuitive. */
762 if (rx_queue > IGB_N0_QUEUE) {
763 index = (rx_queue & 0x7);
764 ivar = array_rd32(E1000_IVAR0, index);
765 if (rx_queue < 8) {
766 /* vector goes into low byte of register */
767 ivar = ivar & 0xFFFFFF00;
768 ivar |= msix_vector | E1000_IVAR_VALID;
769 } else {
770 /* vector goes into third byte of register */
771 ivar = ivar & 0xFF00FFFF;
772 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
774 array_wr32(E1000_IVAR0, index, ivar);
776 if (tx_queue > IGB_N0_QUEUE) {
777 index = (tx_queue & 0x7);
778 ivar = array_rd32(E1000_IVAR0, index);
779 if (tx_queue < 8) {
780 /* vector goes into second byte of register */
781 ivar = ivar & 0xFFFF00FF;
782 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
783 } else {
784 /* vector goes into high byte of register */
785 ivar = ivar & 0x00FFFFFF;
786 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
788 array_wr32(E1000_IVAR0, index, ivar);
790 q_vector->eims_value = 1 << msix_vector;
791 break;
792 case e1000_82580:
793 case e1000_i350:
794 /* 82580 uses the same table-based approach as 82576 but has fewer
795 entries as a result we carry over for queues greater than 4. */
796 if (rx_queue > IGB_N0_QUEUE) {
797 index = (rx_queue >> 1);
798 ivar = array_rd32(E1000_IVAR0, index);
799 if (rx_queue & 0x1) {
800 /* vector goes into third byte of register */
801 ivar = ivar & 0xFF00FFFF;
802 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
803 } else {
804 /* vector goes into low byte of register */
805 ivar = ivar & 0xFFFFFF00;
806 ivar |= msix_vector | E1000_IVAR_VALID;
808 array_wr32(E1000_IVAR0, index, ivar);
810 if (tx_queue > IGB_N0_QUEUE) {
811 index = (tx_queue >> 1);
812 ivar = array_rd32(E1000_IVAR0, index);
813 if (tx_queue & 0x1) {
814 /* vector goes into high byte of register */
815 ivar = ivar & 0x00FFFFFF;
816 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
817 } else {
818 /* vector goes into second byte of register */
819 ivar = ivar & 0xFFFF00FF;
820 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
822 array_wr32(E1000_IVAR0, index, ivar);
824 q_vector->eims_value = 1 << msix_vector;
825 break;
826 default:
827 BUG();
828 break;
831 /* add q_vector eims value to global eims_enable_mask */
832 adapter->eims_enable_mask |= q_vector->eims_value;
834 /* configure q_vector to set itr on first interrupt */
835 q_vector->set_itr = 1;
839 * igb_configure_msix - Configure MSI-X hardware
841 * igb_configure_msix sets up the hardware to properly
842 * generate MSI-X interrupts.
844 static void igb_configure_msix(struct igb_adapter *adapter)
846 u32 tmp;
847 int i, vector = 0;
848 struct e1000_hw *hw = &adapter->hw;
850 adapter->eims_enable_mask = 0;
852 /* set vector for other causes, i.e. link changes */
853 switch (hw->mac.type) {
854 case e1000_82575:
855 tmp = rd32(E1000_CTRL_EXT);
856 /* enable MSI-X PBA support*/
857 tmp |= E1000_CTRL_EXT_PBA_CLR;
859 /* Auto-Mask interrupts upon ICR read. */
860 tmp |= E1000_CTRL_EXT_EIAME;
861 tmp |= E1000_CTRL_EXT_IRCA;
863 wr32(E1000_CTRL_EXT, tmp);
865 /* enable msix_other interrupt */
866 array_wr32(E1000_MSIXBM(0), vector++,
867 E1000_EIMS_OTHER);
868 adapter->eims_other = E1000_EIMS_OTHER;
870 break;
872 case e1000_82576:
873 case e1000_82580:
874 case e1000_i350:
875 /* Turn on MSI-X capability first, or our settings
876 * won't stick. And it will take days to debug. */
877 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
878 E1000_GPIE_PBA | E1000_GPIE_EIAME |
879 E1000_GPIE_NSICR);
881 /* enable msix_other interrupt */
882 adapter->eims_other = 1 << vector;
883 tmp = (vector++ | E1000_IVAR_VALID) << 8;
885 wr32(E1000_IVAR_MISC, tmp);
886 break;
887 default:
888 /* do nothing, since nothing else supports MSI-X */
889 break;
890 } /* switch (hw->mac.type) */
892 adapter->eims_enable_mask |= adapter->eims_other;
894 for (i = 0; i < adapter->num_q_vectors; i++)
895 igb_assign_vector(adapter->q_vector[i], vector++);
897 wrfl();
901 * igb_request_msix - Initialize MSI-X interrupts
903 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
904 * kernel.
906 static int igb_request_msix(struct igb_adapter *adapter)
908 struct net_device *netdev = adapter->netdev;
909 struct e1000_hw *hw = &adapter->hw;
910 int i, err = 0, vector = 0;
912 err = request_irq(adapter->msix_entries[vector].vector,
913 igb_msix_other, 0, netdev->name, adapter);
914 if (err)
915 goto out;
916 vector++;
918 for (i = 0; i < adapter->num_q_vectors; i++) {
919 struct igb_q_vector *q_vector = adapter->q_vector[i];
921 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
923 if (q_vector->rx_ring && q_vector->tx_ring)
924 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
925 q_vector->rx_ring->queue_index);
926 else if (q_vector->tx_ring)
927 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
928 q_vector->tx_ring->queue_index);
929 else if (q_vector->rx_ring)
930 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
931 q_vector->rx_ring->queue_index);
932 else
933 sprintf(q_vector->name, "%s-unused", netdev->name);
935 err = request_irq(adapter->msix_entries[vector].vector,
936 igb_msix_ring, 0, q_vector->name,
937 q_vector);
938 if (err)
939 goto out;
940 vector++;
943 igb_configure_msix(adapter);
944 return 0;
945 out:
946 return err;
949 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
951 if (adapter->msix_entries) {
952 pci_disable_msix(adapter->pdev);
953 kfree(adapter->msix_entries);
954 adapter->msix_entries = NULL;
955 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
956 pci_disable_msi(adapter->pdev);
961 * igb_free_q_vectors - Free memory allocated for interrupt vectors
962 * @adapter: board private structure to initialize
964 * This function frees the memory allocated to the q_vectors. In addition if
965 * NAPI is enabled it will delete any references to the NAPI struct prior
966 * to freeing the q_vector.
968 static void igb_free_q_vectors(struct igb_adapter *adapter)
970 int v_idx;
972 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
973 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
974 adapter->q_vector[v_idx] = NULL;
975 if (!q_vector)
976 continue;
977 netif_napi_del(&q_vector->napi);
978 kfree(q_vector);
980 adapter->num_q_vectors = 0;
984 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
986 * This function resets the device so that it has 0 rx queues, tx queues, and
987 * MSI-X interrupts allocated.
989 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
991 igb_free_queues(adapter);
992 igb_free_q_vectors(adapter);
993 igb_reset_interrupt_capability(adapter);
997 * igb_set_interrupt_capability - set MSI or MSI-X if supported
999 * Attempt to configure interrupts using the best available
1000 * capabilities of the hardware and kernel.
1002 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1004 int err;
1005 int numvecs, i;
1007 /* Number of supported queues. */
1008 adapter->num_rx_queues = adapter->rss_queues;
1009 if (adapter->vfs_allocated_count)
1010 adapter->num_tx_queues = 1;
1011 else
1012 adapter->num_tx_queues = adapter->rss_queues;
1014 /* start with one vector for every rx queue */
1015 numvecs = adapter->num_rx_queues;
1017 /* if tx handler is separate add 1 for every tx queue */
1018 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1019 numvecs += adapter->num_tx_queues;
1021 /* store the number of vectors reserved for queues */
1022 adapter->num_q_vectors = numvecs;
1024 /* add 1 vector for link status interrupts */
1025 numvecs++;
1026 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1027 GFP_KERNEL);
1028 if (!adapter->msix_entries)
1029 goto msi_only;
1031 for (i = 0; i < numvecs; i++)
1032 adapter->msix_entries[i].entry = i;
1034 err = pci_enable_msix(adapter->pdev,
1035 adapter->msix_entries,
1036 numvecs);
1037 if (err == 0)
1038 goto out;
1040 igb_reset_interrupt_capability(adapter);
1042 /* If we can't do MSI-X, try MSI */
1043 msi_only:
1044 #ifdef CONFIG_PCI_IOV
1045 /* disable SR-IOV for non MSI-X configurations */
1046 if (adapter->vf_data) {
1047 struct e1000_hw *hw = &adapter->hw;
1048 /* disable iov and allow time for transactions to clear */
1049 pci_disable_sriov(adapter->pdev);
1050 msleep(500);
1052 kfree(adapter->vf_data);
1053 adapter->vf_data = NULL;
1054 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1055 msleep(100);
1056 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1058 #endif
1059 adapter->vfs_allocated_count = 0;
1060 adapter->rss_queues = 1;
1061 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1062 adapter->num_rx_queues = 1;
1063 adapter->num_tx_queues = 1;
1064 adapter->num_q_vectors = 1;
1065 if (!pci_enable_msi(adapter->pdev))
1066 adapter->flags |= IGB_FLAG_HAS_MSI;
1067 out:
1068 /* Notify the stack of the (possibly) reduced queue counts. */
1069 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1070 return netif_set_real_num_rx_queues(adapter->netdev,
1071 adapter->num_rx_queues);
1075 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1076 * @adapter: board private structure to initialize
1078 * We allocate one q_vector per queue interrupt. If allocation fails we
1079 * return -ENOMEM.
1081 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1083 struct igb_q_vector *q_vector;
1084 struct e1000_hw *hw = &adapter->hw;
1085 int v_idx;
1087 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1088 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1089 if (!q_vector)
1090 goto err_out;
1091 q_vector->adapter = adapter;
1092 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1093 q_vector->itr_val = IGB_START_ITR;
1094 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1095 adapter->q_vector[v_idx] = q_vector;
1097 return 0;
1099 err_out:
1100 igb_free_q_vectors(adapter);
1101 return -ENOMEM;
1104 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1105 int ring_idx, int v_idx)
1107 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1109 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1110 q_vector->rx_ring->q_vector = q_vector;
1111 q_vector->itr_val = adapter->rx_itr_setting;
1112 if (q_vector->itr_val && q_vector->itr_val <= 3)
1113 q_vector->itr_val = IGB_START_ITR;
1116 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1117 int ring_idx, int v_idx)
1119 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1121 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1122 q_vector->tx_ring->q_vector = q_vector;
1123 q_vector->itr_val = adapter->tx_itr_setting;
1124 if (q_vector->itr_val && q_vector->itr_val <= 3)
1125 q_vector->itr_val = IGB_START_ITR;
1129 * igb_map_ring_to_vector - maps allocated queues to vectors
1131 * This function maps the recently allocated queues to vectors.
1133 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1135 int i;
1136 int v_idx = 0;
1138 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1139 (adapter->num_q_vectors < adapter->num_tx_queues))
1140 return -ENOMEM;
1142 if (adapter->num_q_vectors >=
1143 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1144 for (i = 0; i < adapter->num_rx_queues; i++)
1145 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1146 for (i = 0; i < adapter->num_tx_queues; i++)
1147 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1148 } else {
1149 for (i = 0; i < adapter->num_rx_queues; i++) {
1150 if (i < adapter->num_tx_queues)
1151 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1152 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1154 for (; i < adapter->num_tx_queues; i++)
1155 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1157 return 0;
1161 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1163 * This function initializes the interrupts and allocates all of the queues.
1165 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1167 struct pci_dev *pdev = adapter->pdev;
1168 int err;
1170 err = igb_set_interrupt_capability(adapter);
1171 if (err)
1172 return err;
1174 err = igb_alloc_q_vectors(adapter);
1175 if (err) {
1176 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1177 goto err_alloc_q_vectors;
1180 err = igb_alloc_queues(adapter);
1181 if (err) {
1182 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1183 goto err_alloc_queues;
1186 err = igb_map_ring_to_vector(adapter);
1187 if (err) {
1188 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1189 goto err_map_queues;
1193 return 0;
1194 err_map_queues:
1195 igb_free_queues(adapter);
1196 err_alloc_queues:
1197 igb_free_q_vectors(adapter);
1198 err_alloc_q_vectors:
1199 igb_reset_interrupt_capability(adapter);
1200 return err;
1204 * igb_request_irq - initialize interrupts
1206 * Attempts to configure interrupts using the best available
1207 * capabilities of the hardware and kernel.
1209 static int igb_request_irq(struct igb_adapter *adapter)
1211 struct net_device *netdev = adapter->netdev;
1212 struct pci_dev *pdev = adapter->pdev;
1213 int err = 0;
1215 if (adapter->msix_entries) {
1216 err = igb_request_msix(adapter);
1217 if (!err)
1218 goto request_done;
1219 /* fall back to MSI */
1220 igb_clear_interrupt_scheme(adapter);
1221 if (!pci_enable_msi(adapter->pdev))
1222 adapter->flags |= IGB_FLAG_HAS_MSI;
1223 igb_free_all_tx_resources(adapter);
1224 igb_free_all_rx_resources(adapter);
1225 adapter->num_tx_queues = 1;
1226 adapter->num_rx_queues = 1;
1227 adapter->num_q_vectors = 1;
1228 err = igb_alloc_q_vectors(adapter);
1229 if (err) {
1230 dev_err(&pdev->dev,
1231 "Unable to allocate memory for vectors\n");
1232 goto request_done;
1234 err = igb_alloc_queues(adapter);
1235 if (err) {
1236 dev_err(&pdev->dev,
1237 "Unable to allocate memory for queues\n");
1238 igb_free_q_vectors(adapter);
1239 goto request_done;
1241 igb_setup_all_tx_resources(adapter);
1242 igb_setup_all_rx_resources(adapter);
1243 } else {
1244 igb_assign_vector(adapter->q_vector[0], 0);
1247 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1248 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1249 netdev->name, adapter);
1250 if (!err)
1251 goto request_done;
1253 /* fall back to legacy interrupts */
1254 igb_reset_interrupt_capability(adapter);
1255 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1258 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1259 netdev->name, adapter);
1261 if (err)
1262 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1263 err);
1265 request_done:
1266 return err;
1269 static void igb_free_irq(struct igb_adapter *adapter)
1271 if (adapter->msix_entries) {
1272 int vector = 0, i;
1274 free_irq(adapter->msix_entries[vector++].vector, adapter);
1276 for (i = 0; i < adapter->num_q_vectors; i++) {
1277 struct igb_q_vector *q_vector = adapter->q_vector[i];
1278 free_irq(adapter->msix_entries[vector++].vector,
1279 q_vector);
1281 } else {
1282 free_irq(adapter->pdev->irq, adapter);
1287 * igb_irq_disable - Mask off interrupt generation on the NIC
1288 * @adapter: board private structure
1290 static void igb_irq_disable(struct igb_adapter *adapter)
1292 struct e1000_hw *hw = &adapter->hw;
1295 * we need to be careful when disabling interrupts. The VFs are also
1296 * mapped into these registers and so clearing the bits can cause
1297 * issues on the VF drivers so we only need to clear what we set
1299 if (adapter->msix_entries) {
1300 u32 regval = rd32(E1000_EIAM);
1301 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1302 wr32(E1000_EIMC, adapter->eims_enable_mask);
1303 regval = rd32(E1000_EIAC);
1304 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1307 wr32(E1000_IAM, 0);
1308 wr32(E1000_IMC, ~0);
1309 wrfl();
1310 if (adapter->msix_entries) {
1311 int i;
1312 for (i = 0; i < adapter->num_q_vectors; i++)
1313 synchronize_irq(adapter->msix_entries[i].vector);
1314 } else {
1315 synchronize_irq(adapter->pdev->irq);
1320 * igb_irq_enable - Enable default interrupt generation settings
1321 * @adapter: board private structure
1323 static void igb_irq_enable(struct igb_adapter *adapter)
1325 struct e1000_hw *hw = &adapter->hw;
1327 if (adapter->msix_entries) {
1328 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1329 u32 regval = rd32(E1000_EIAC);
1330 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1331 regval = rd32(E1000_EIAM);
1332 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1333 wr32(E1000_EIMS, adapter->eims_enable_mask);
1334 if (adapter->vfs_allocated_count) {
1335 wr32(E1000_MBVFIMR, 0xFF);
1336 ims |= E1000_IMS_VMMB;
1338 if (adapter->hw.mac.type == e1000_82580)
1339 ims |= E1000_IMS_DRSTA;
1341 wr32(E1000_IMS, ims);
1342 } else {
1343 wr32(E1000_IMS, IMS_ENABLE_MASK |
1344 E1000_IMS_DRSTA);
1345 wr32(E1000_IAM, IMS_ENABLE_MASK |
1346 E1000_IMS_DRSTA);
1350 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1352 struct e1000_hw *hw = &adapter->hw;
1353 u16 vid = adapter->hw.mng_cookie.vlan_id;
1354 u16 old_vid = adapter->mng_vlan_id;
1356 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1357 /* add VID to filter table */
1358 igb_vfta_set(hw, vid, true);
1359 adapter->mng_vlan_id = vid;
1360 } else {
1361 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1364 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1365 (vid != old_vid) &&
1366 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1367 /* remove VID from filter table */
1368 igb_vfta_set(hw, old_vid, false);
1373 * igb_release_hw_control - release control of the h/w to f/w
1374 * @adapter: address of board private structure
1376 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1377 * For ASF and Pass Through versions of f/w this means that the
1378 * driver is no longer loaded.
1381 static void igb_release_hw_control(struct igb_adapter *adapter)
1383 struct e1000_hw *hw = &adapter->hw;
1384 u32 ctrl_ext;
1386 /* Let firmware take over control of h/w */
1387 ctrl_ext = rd32(E1000_CTRL_EXT);
1388 wr32(E1000_CTRL_EXT,
1389 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1393 * igb_get_hw_control - get control of the h/w from f/w
1394 * @adapter: address of board private structure
1396 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1397 * For ASF and Pass Through versions of f/w this means that
1398 * the driver is loaded.
1401 static void igb_get_hw_control(struct igb_adapter *adapter)
1403 struct e1000_hw *hw = &adapter->hw;
1404 u32 ctrl_ext;
1406 /* Let firmware know the driver has taken over */
1407 ctrl_ext = rd32(E1000_CTRL_EXT);
1408 wr32(E1000_CTRL_EXT,
1409 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1413 * igb_configure - configure the hardware for RX and TX
1414 * @adapter: private board structure
1416 static void igb_configure(struct igb_adapter *adapter)
1418 struct net_device *netdev = adapter->netdev;
1419 int i;
1421 igb_get_hw_control(adapter);
1422 igb_set_rx_mode(netdev);
1424 igb_restore_vlan(adapter);
1426 igb_setup_tctl(adapter);
1427 igb_setup_mrqc(adapter);
1428 igb_setup_rctl(adapter);
1430 igb_configure_tx(adapter);
1431 igb_configure_rx(adapter);
1433 igb_rx_fifo_flush_82575(&adapter->hw);
1435 /* call igb_desc_unused which always leaves
1436 * at least 1 descriptor unused to make sure
1437 * next_to_use != next_to_clean */
1438 for (i = 0; i < adapter->num_rx_queues; i++) {
1439 struct igb_ring *ring = adapter->rx_ring[i];
1440 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1445 * igb_power_up_link - Power up the phy/serdes link
1446 * @adapter: address of board private structure
1448 void igb_power_up_link(struct igb_adapter *adapter)
1450 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1451 igb_power_up_phy_copper(&adapter->hw);
1452 else
1453 igb_power_up_serdes_link_82575(&adapter->hw);
1457 * igb_power_down_link - Power down the phy/serdes link
1458 * @adapter: address of board private structure
1460 static void igb_power_down_link(struct igb_adapter *adapter)
1462 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1463 igb_power_down_phy_copper_82575(&adapter->hw);
1464 else
1465 igb_shutdown_serdes_link_82575(&adapter->hw);
1469 * igb_up - Open the interface and prepare it to handle traffic
1470 * @adapter: board private structure
1472 int igb_up(struct igb_adapter *adapter)
1474 struct e1000_hw *hw = &adapter->hw;
1475 int i;
1477 /* hardware has been reset, we need to reload some things */
1478 igb_configure(adapter);
1480 clear_bit(__IGB_DOWN, &adapter->state);
1482 for (i = 0; i < adapter->num_q_vectors; i++) {
1483 struct igb_q_vector *q_vector = adapter->q_vector[i];
1484 napi_enable(&q_vector->napi);
1486 if (adapter->msix_entries)
1487 igb_configure_msix(adapter);
1488 else
1489 igb_assign_vector(adapter->q_vector[0], 0);
1491 /* Clear any pending interrupts. */
1492 rd32(E1000_ICR);
1493 igb_irq_enable(adapter);
1495 /* notify VFs that reset has been completed */
1496 if (adapter->vfs_allocated_count) {
1497 u32 reg_data = rd32(E1000_CTRL_EXT);
1498 reg_data |= E1000_CTRL_EXT_PFRSTD;
1499 wr32(E1000_CTRL_EXT, reg_data);
1502 netif_tx_start_all_queues(adapter->netdev);
1504 /* start the watchdog. */
1505 hw->mac.get_link_status = 1;
1506 schedule_work(&adapter->watchdog_task);
1508 return 0;
1511 void igb_down(struct igb_adapter *adapter)
1513 struct net_device *netdev = adapter->netdev;
1514 struct e1000_hw *hw = &adapter->hw;
1515 u32 tctl, rctl;
1516 int i;
1518 /* signal that we're down so the interrupt handler does not
1519 * reschedule our watchdog timer */
1520 set_bit(__IGB_DOWN, &adapter->state);
1522 /* disable receives in the hardware */
1523 rctl = rd32(E1000_RCTL);
1524 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1525 /* flush and sleep below */
1527 netif_tx_stop_all_queues(netdev);
1529 /* disable transmits in the hardware */
1530 tctl = rd32(E1000_TCTL);
1531 tctl &= ~E1000_TCTL_EN;
1532 wr32(E1000_TCTL, tctl);
1533 /* flush both disables and wait for them to finish */
1534 wrfl();
1535 msleep(10);
1537 for (i = 0; i < adapter->num_q_vectors; i++) {
1538 struct igb_q_vector *q_vector = adapter->q_vector[i];
1539 napi_disable(&q_vector->napi);
1542 igb_irq_disable(adapter);
1544 del_timer_sync(&adapter->watchdog_timer);
1545 del_timer_sync(&adapter->phy_info_timer);
1547 netif_carrier_off(netdev);
1549 /* record the stats before reset*/
1550 spin_lock(&adapter->stats64_lock);
1551 igb_update_stats(adapter, &adapter->stats64);
1552 spin_unlock(&adapter->stats64_lock);
1554 adapter->link_speed = 0;
1555 adapter->link_duplex = 0;
1557 if (!pci_channel_offline(adapter->pdev))
1558 igb_reset(adapter);
1559 igb_clean_all_tx_rings(adapter);
1560 igb_clean_all_rx_rings(adapter);
1561 #ifdef CONFIG_IGB_DCA
1563 /* since we reset the hardware DCA settings were cleared */
1564 igb_setup_dca(adapter);
1565 #endif
1568 void igb_reinit_locked(struct igb_adapter *adapter)
1570 WARN_ON(in_interrupt());
1571 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1572 msleep(1);
1573 igb_down(adapter);
1574 igb_up(adapter);
1575 clear_bit(__IGB_RESETTING, &adapter->state);
1578 void igb_reset(struct igb_adapter *adapter)
1580 struct pci_dev *pdev = adapter->pdev;
1581 struct e1000_hw *hw = &adapter->hw;
1582 struct e1000_mac_info *mac = &hw->mac;
1583 struct e1000_fc_info *fc = &hw->fc;
1584 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1585 u16 hwm;
1587 /* Repartition Pba for greater than 9k mtu
1588 * To take effect CTRL.RST is required.
1590 switch (mac->type) {
1591 case e1000_i350:
1592 case e1000_82580:
1593 pba = rd32(E1000_RXPBS);
1594 pba = igb_rxpbs_adjust_82580(pba);
1595 break;
1596 case e1000_82576:
1597 pba = rd32(E1000_RXPBS);
1598 pba &= E1000_RXPBS_SIZE_MASK_82576;
1599 break;
1600 case e1000_82575:
1601 default:
1602 pba = E1000_PBA_34K;
1603 break;
1606 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1607 (mac->type < e1000_82576)) {
1608 /* adjust PBA for jumbo frames */
1609 wr32(E1000_PBA, pba);
1611 /* To maintain wire speed transmits, the Tx FIFO should be
1612 * large enough to accommodate two full transmit packets,
1613 * rounded up to the next 1KB and expressed in KB. Likewise,
1614 * the Rx FIFO should be large enough to accommodate at least
1615 * one full receive packet and is similarly rounded up and
1616 * expressed in KB. */
1617 pba = rd32(E1000_PBA);
1618 /* upper 16 bits has Tx packet buffer allocation size in KB */
1619 tx_space = pba >> 16;
1620 /* lower 16 bits has Rx packet buffer allocation size in KB */
1621 pba &= 0xffff;
1622 /* the tx fifo also stores 16 bytes of information about the tx
1623 * but don't include ethernet FCS because hardware appends it */
1624 min_tx_space = (adapter->max_frame_size +
1625 sizeof(union e1000_adv_tx_desc) -
1626 ETH_FCS_LEN) * 2;
1627 min_tx_space = ALIGN(min_tx_space, 1024);
1628 min_tx_space >>= 10;
1629 /* software strips receive CRC, so leave room for it */
1630 min_rx_space = adapter->max_frame_size;
1631 min_rx_space = ALIGN(min_rx_space, 1024);
1632 min_rx_space >>= 10;
1634 /* If current Tx allocation is less than the min Tx FIFO size,
1635 * and the min Tx FIFO size is less than the current Rx FIFO
1636 * allocation, take space away from current Rx allocation */
1637 if (tx_space < min_tx_space &&
1638 ((min_tx_space - tx_space) < pba)) {
1639 pba = pba - (min_tx_space - tx_space);
1641 /* if short on rx space, rx wins and must trump tx
1642 * adjustment */
1643 if (pba < min_rx_space)
1644 pba = min_rx_space;
1646 wr32(E1000_PBA, pba);
1649 /* flow control settings */
1650 /* The high water mark must be low enough to fit one full frame
1651 * (or the size used for early receive) above it in the Rx FIFO.
1652 * Set it to the lower of:
1653 * - 90% of the Rx FIFO size, or
1654 * - the full Rx FIFO size minus one full frame */
1655 hwm = min(((pba << 10) * 9 / 10),
1656 ((pba << 10) - 2 * adapter->max_frame_size));
1658 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1659 fc->low_water = fc->high_water - 16;
1660 fc->pause_time = 0xFFFF;
1661 fc->send_xon = 1;
1662 fc->current_mode = fc->requested_mode;
1664 /* disable receive for all VFs and wait one second */
1665 if (adapter->vfs_allocated_count) {
1666 int i;
1667 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1668 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1670 /* ping all the active vfs to let them know we are going down */
1671 igb_ping_all_vfs(adapter);
1673 /* disable transmits and receives */
1674 wr32(E1000_VFRE, 0);
1675 wr32(E1000_VFTE, 0);
1678 /* Allow time for pending master requests to run */
1679 hw->mac.ops.reset_hw(hw);
1680 wr32(E1000_WUC, 0);
1682 if (hw->mac.ops.init_hw(hw))
1683 dev_err(&pdev->dev, "Hardware Error\n");
1684 if (hw->mac.type > e1000_82580) {
1685 if (adapter->flags & IGB_FLAG_DMAC) {
1686 u32 reg;
1689 * DMA Coalescing high water mark needs to be higher
1690 * than * the * Rx threshold. The Rx threshold is
1691 * currently * pba - 6, so we * should use a high water
1692 * mark of pba * - 4. */
1693 hwm = (pba - 4) << 10;
1695 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1696 & E1000_DMACR_DMACTHR_MASK);
1698 /* transition to L0x or L1 if available..*/
1699 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1701 /* watchdog timer= +-1000 usec in 32usec intervals */
1702 reg |= (1000 >> 5);
1703 wr32(E1000_DMACR, reg);
1705 /* no lower threshold to disable coalescing(smart fifb)
1706 * -UTRESH=0*/
1707 wr32(E1000_DMCRTRH, 0);
1709 /* set hwm to PBA - 2 * max frame size */
1710 wr32(E1000_FCRTC, hwm);
1713 * This sets the time to wait before requesting tran-
1714 * sition to * low power state to number of usecs needed
1715 * to receive 1 512 * byte frame at gigabit line rate
1717 reg = rd32(E1000_DMCTLX);
1718 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1720 /* Delay 255 usec before entering Lx state. */
1721 reg |= 0xFF;
1722 wr32(E1000_DMCTLX, reg);
1724 /* free space in Tx packet buffer to wake from DMAC */
1725 wr32(E1000_DMCTXTH,
1726 (IGB_MIN_TXPBSIZE -
1727 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1728 >> 6);
1730 /* make low power state decision controlled by DMAC */
1731 reg = rd32(E1000_PCIEMISC);
1732 reg |= E1000_PCIEMISC_LX_DECISION;
1733 wr32(E1000_PCIEMISC, reg);
1734 } /* end if IGB_FLAG_DMAC set */
1736 if (hw->mac.type == e1000_82580) {
1737 u32 reg = rd32(E1000_PCIEMISC);
1738 wr32(E1000_PCIEMISC,
1739 reg & ~E1000_PCIEMISC_LX_DECISION);
1741 if (!netif_running(adapter->netdev))
1742 igb_power_down_link(adapter);
1744 igb_update_mng_vlan(adapter);
1746 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1747 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1749 igb_get_phy_info(hw);
1752 static const struct net_device_ops igb_netdev_ops = {
1753 .ndo_open = igb_open,
1754 .ndo_stop = igb_close,
1755 .ndo_start_xmit = igb_xmit_frame_adv,
1756 .ndo_get_stats64 = igb_get_stats64,
1757 .ndo_set_rx_mode = igb_set_rx_mode,
1758 .ndo_set_multicast_list = igb_set_rx_mode,
1759 .ndo_set_mac_address = igb_set_mac,
1760 .ndo_change_mtu = igb_change_mtu,
1761 .ndo_do_ioctl = igb_ioctl,
1762 .ndo_tx_timeout = igb_tx_timeout,
1763 .ndo_validate_addr = eth_validate_addr,
1764 .ndo_vlan_rx_register = igb_vlan_rx_register,
1765 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1766 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1767 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1768 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1769 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1770 .ndo_get_vf_config = igb_ndo_get_vf_config,
1771 #ifdef CONFIG_NET_POLL_CONTROLLER
1772 .ndo_poll_controller = igb_netpoll,
1773 #endif
1777 * igb_probe - Device Initialization Routine
1778 * @pdev: PCI device information struct
1779 * @ent: entry in igb_pci_tbl
1781 * Returns 0 on success, negative on failure
1783 * igb_probe initializes an adapter identified by a pci_dev structure.
1784 * The OS initialization, configuring of the adapter private structure,
1785 * and a hardware reset occur.
1787 static int __devinit igb_probe(struct pci_dev *pdev,
1788 const struct pci_device_id *ent)
1790 struct net_device *netdev;
1791 struct igb_adapter *adapter;
1792 struct e1000_hw *hw;
1793 u16 eeprom_data = 0;
1794 s32 ret_val;
1795 static int global_quad_port_a; /* global quad port a indication */
1796 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1797 unsigned long mmio_start, mmio_len;
1798 int err, pci_using_dac;
1799 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1800 u8 part_str[E1000_PBANUM_LENGTH];
1802 /* Catch broken hardware that put the wrong VF device ID in
1803 * the PCIe SR-IOV capability.
1805 if (pdev->is_virtfn) {
1806 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1807 pci_name(pdev), pdev->vendor, pdev->device);
1808 return -EINVAL;
1811 err = pci_enable_device_mem(pdev);
1812 if (err)
1813 return err;
1815 pci_using_dac = 0;
1816 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1817 if (!err) {
1818 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1819 if (!err)
1820 pci_using_dac = 1;
1821 } else {
1822 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1823 if (err) {
1824 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1825 if (err) {
1826 dev_err(&pdev->dev, "No usable DMA "
1827 "configuration, aborting\n");
1828 goto err_dma;
1833 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1834 IORESOURCE_MEM),
1835 igb_driver_name);
1836 if (err)
1837 goto err_pci_reg;
1839 pci_enable_pcie_error_reporting(pdev);
1841 pci_set_master(pdev);
1842 pci_save_state(pdev);
1844 err = -ENOMEM;
1845 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1846 IGB_ABS_MAX_TX_QUEUES);
1847 if (!netdev)
1848 goto err_alloc_etherdev;
1850 SET_NETDEV_DEV(netdev, &pdev->dev);
1852 pci_set_drvdata(pdev, netdev);
1853 adapter = netdev_priv(netdev);
1854 adapter->netdev = netdev;
1855 adapter->pdev = pdev;
1856 hw = &adapter->hw;
1857 hw->back = adapter;
1858 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1860 mmio_start = pci_resource_start(pdev, 0);
1861 mmio_len = pci_resource_len(pdev, 0);
1863 err = -EIO;
1864 hw->hw_addr = ioremap(mmio_start, mmio_len);
1865 if (!hw->hw_addr)
1866 goto err_ioremap;
1868 netdev->netdev_ops = &igb_netdev_ops;
1869 igb_set_ethtool_ops(netdev);
1870 netdev->watchdog_timeo = 5 * HZ;
1872 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1874 netdev->mem_start = mmio_start;
1875 netdev->mem_end = mmio_start + mmio_len;
1877 /* PCI config space info */
1878 hw->vendor_id = pdev->vendor;
1879 hw->device_id = pdev->device;
1880 hw->revision_id = pdev->revision;
1881 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1882 hw->subsystem_device_id = pdev->subsystem_device;
1884 /* Copy the default MAC, PHY and NVM function pointers */
1885 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1886 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1887 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1888 /* Initialize skew-specific constants */
1889 err = ei->get_invariants(hw);
1890 if (err)
1891 goto err_sw_init;
1893 /* setup the private structure */
1894 err = igb_sw_init(adapter);
1895 if (err)
1896 goto err_sw_init;
1898 igb_get_bus_info_pcie(hw);
1900 hw->phy.autoneg_wait_to_complete = false;
1902 /* Copper options */
1903 if (hw->phy.media_type == e1000_media_type_copper) {
1904 hw->phy.mdix = AUTO_ALL_MODES;
1905 hw->phy.disable_polarity_correction = false;
1906 hw->phy.ms_type = e1000_ms_hw_default;
1909 if (igb_check_reset_block(hw))
1910 dev_info(&pdev->dev,
1911 "PHY reset is blocked due to SOL/IDER session.\n");
1913 netdev->features = NETIF_F_SG |
1914 NETIF_F_IP_CSUM |
1915 NETIF_F_HW_VLAN_TX |
1916 NETIF_F_HW_VLAN_RX |
1917 NETIF_F_HW_VLAN_FILTER;
1919 netdev->features |= NETIF_F_IPV6_CSUM;
1920 netdev->features |= NETIF_F_TSO;
1921 netdev->features |= NETIF_F_TSO6;
1922 netdev->features |= NETIF_F_GRO;
1924 netdev->vlan_features |= NETIF_F_TSO;
1925 netdev->vlan_features |= NETIF_F_TSO6;
1926 netdev->vlan_features |= NETIF_F_IP_CSUM;
1927 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1928 netdev->vlan_features |= NETIF_F_SG;
1930 if (pci_using_dac) {
1931 netdev->features |= NETIF_F_HIGHDMA;
1932 netdev->vlan_features |= NETIF_F_HIGHDMA;
1935 if (hw->mac.type >= e1000_82576)
1936 netdev->features |= NETIF_F_SCTP_CSUM;
1938 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1940 /* before reading the NVM, reset the controller to put the device in a
1941 * known good starting state */
1942 hw->mac.ops.reset_hw(hw);
1944 /* make sure the NVM is good */
1945 if (hw->nvm.ops.validate(hw) < 0) {
1946 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1947 err = -EIO;
1948 goto err_eeprom;
1951 /* copy the MAC address out of the NVM */
1952 if (hw->mac.ops.read_mac_addr(hw))
1953 dev_err(&pdev->dev, "NVM Read Error\n");
1955 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1956 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1958 if (!is_valid_ether_addr(netdev->perm_addr)) {
1959 dev_err(&pdev->dev, "Invalid MAC Address\n");
1960 err = -EIO;
1961 goto err_eeprom;
1964 setup_timer(&adapter->watchdog_timer, igb_watchdog,
1965 (unsigned long) adapter);
1966 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1967 (unsigned long) adapter);
1969 INIT_WORK(&adapter->reset_task, igb_reset_task);
1970 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1972 /* Initialize link properties that are user-changeable */
1973 adapter->fc_autoneg = true;
1974 hw->mac.autoneg = true;
1975 hw->phy.autoneg_advertised = 0x2f;
1977 hw->fc.requested_mode = e1000_fc_default;
1978 hw->fc.current_mode = e1000_fc_default;
1980 igb_validate_mdi_setting(hw);
1982 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1983 * enable the ACPI Magic Packet filter
1986 if (hw->bus.func == 0)
1987 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1988 else if (hw->mac.type == e1000_82580)
1989 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1990 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1991 &eeprom_data);
1992 else if (hw->bus.func == 1)
1993 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1995 if (eeprom_data & eeprom_apme_mask)
1996 adapter->eeprom_wol |= E1000_WUFC_MAG;
1998 /* now that we have the eeprom settings, apply the special cases where
1999 * the eeprom may be wrong or the board simply won't support wake on
2000 * lan on a particular port */
2001 switch (pdev->device) {
2002 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2003 adapter->eeprom_wol = 0;
2004 break;
2005 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2006 case E1000_DEV_ID_82576_FIBER:
2007 case E1000_DEV_ID_82576_SERDES:
2008 /* Wake events only supported on port A for dual fiber
2009 * regardless of eeprom setting */
2010 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2011 adapter->eeprom_wol = 0;
2012 break;
2013 case E1000_DEV_ID_82576_QUAD_COPPER:
2014 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2015 /* if quad port adapter, disable WoL on all but port A */
2016 if (global_quad_port_a != 0)
2017 adapter->eeprom_wol = 0;
2018 else
2019 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2020 /* Reset for multiple quad port adapters */
2021 if (++global_quad_port_a == 4)
2022 global_quad_port_a = 0;
2023 break;
2026 /* initialize the wol settings based on the eeprom settings */
2027 adapter->wol = adapter->eeprom_wol;
2028 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2030 /* reset the hardware with the new settings */
2031 igb_reset(adapter);
2033 /* let the f/w know that the h/w is now under the control of the
2034 * driver. */
2035 igb_get_hw_control(adapter);
2037 strcpy(netdev->name, "eth%d");
2038 err = register_netdev(netdev);
2039 if (err)
2040 goto err_register;
2042 /* carrier off reporting is important to ethtool even BEFORE open */
2043 netif_carrier_off(netdev);
2045 #ifdef CONFIG_IGB_DCA
2046 if (dca_add_requester(&pdev->dev) == 0) {
2047 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2048 dev_info(&pdev->dev, "DCA enabled\n");
2049 igb_setup_dca(adapter);
2052 #endif
2053 /* do hw tstamp init after resetting */
2054 igb_init_hw_timer(adapter);
2056 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2057 /* print bus type/speed/width info */
2058 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2059 netdev->name,
2060 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2061 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2062 "unknown"),
2063 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2064 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2065 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2066 "unknown"),
2067 netdev->dev_addr);
2069 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2070 if (ret_val)
2071 strcpy(part_str, "Unknown");
2072 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2073 dev_info(&pdev->dev,
2074 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2075 adapter->msix_entries ? "MSI-X" :
2076 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2077 adapter->num_rx_queues, adapter->num_tx_queues);
2078 switch (hw->mac.type) {
2079 case e1000_i350:
2080 igb_set_eee_i350(hw);
2081 break;
2082 default:
2083 break;
2085 return 0;
2087 err_register:
2088 igb_release_hw_control(adapter);
2089 err_eeprom:
2090 if (!igb_check_reset_block(hw))
2091 igb_reset_phy(hw);
2093 if (hw->flash_address)
2094 iounmap(hw->flash_address);
2095 err_sw_init:
2096 igb_clear_interrupt_scheme(adapter);
2097 iounmap(hw->hw_addr);
2098 err_ioremap:
2099 free_netdev(netdev);
2100 err_alloc_etherdev:
2101 pci_release_selected_regions(pdev,
2102 pci_select_bars(pdev, IORESOURCE_MEM));
2103 err_pci_reg:
2104 err_dma:
2105 pci_disable_device(pdev);
2106 return err;
2110 * igb_remove - Device Removal Routine
2111 * @pdev: PCI device information struct
2113 * igb_remove is called by the PCI subsystem to alert the driver
2114 * that it should release a PCI device. The could be caused by a
2115 * Hot-Plug event, or because the driver is going to be removed from
2116 * memory.
2118 static void __devexit igb_remove(struct pci_dev *pdev)
2120 struct net_device *netdev = pci_get_drvdata(pdev);
2121 struct igb_adapter *adapter = netdev_priv(netdev);
2122 struct e1000_hw *hw = &adapter->hw;
2125 * The watchdog timer may be rescheduled, so explicitly
2126 * disable watchdog from being rescheduled.
2128 set_bit(__IGB_DOWN, &adapter->state);
2129 del_timer_sync(&adapter->watchdog_timer);
2130 del_timer_sync(&adapter->phy_info_timer);
2132 cancel_work_sync(&adapter->reset_task);
2133 cancel_work_sync(&adapter->watchdog_task);
2135 #ifdef CONFIG_IGB_DCA
2136 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2137 dev_info(&pdev->dev, "DCA disabled\n");
2138 dca_remove_requester(&pdev->dev);
2139 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2140 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2142 #endif
2144 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2145 * would have already happened in close and is redundant. */
2146 igb_release_hw_control(adapter);
2148 unregister_netdev(netdev);
2150 igb_clear_interrupt_scheme(adapter);
2152 #ifdef CONFIG_PCI_IOV
2153 /* reclaim resources allocated to VFs */
2154 if (adapter->vf_data) {
2155 /* disable iov and allow time for transactions to clear */
2156 pci_disable_sriov(pdev);
2157 msleep(500);
2159 kfree(adapter->vf_data);
2160 adapter->vf_data = NULL;
2161 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2162 msleep(100);
2163 dev_info(&pdev->dev, "IOV Disabled\n");
2165 #endif
2167 iounmap(hw->hw_addr);
2168 if (hw->flash_address)
2169 iounmap(hw->flash_address);
2170 pci_release_selected_regions(pdev,
2171 pci_select_bars(pdev, IORESOURCE_MEM));
2173 free_netdev(netdev);
2175 pci_disable_pcie_error_reporting(pdev);
2177 pci_disable_device(pdev);
2181 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2182 * @adapter: board private structure to initialize
2184 * This function initializes the vf specific data storage and then attempts to
2185 * allocate the VFs. The reason for ordering it this way is because it is much
2186 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2187 * the memory for the VFs.
2189 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2191 #ifdef CONFIG_PCI_IOV
2192 struct pci_dev *pdev = adapter->pdev;
2194 if (adapter->vfs_allocated_count) {
2195 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2196 sizeof(struct vf_data_storage),
2197 GFP_KERNEL);
2198 /* if allocation failed then we do not support SR-IOV */
2199 if (!adapter->vf_data) {
2200 adapter->vfs_allocated_count = 0;
2201 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2202 "Data Storage\n");
2206 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2207 kfree(adapter->vf_data);
2208 adapter->vf_data = NULL;
2209 #endif /* CONFIG_PCI_IOV */
2210 adapter->vfs_allocated_count = 0;
2211 #ifdef CONFIG_PCI_IOV
2212 } else {
2213 unsigned char mac_addr[ETH_ALEN];
2214 int i;
2215 dev_info(&pdev->dev, "%d vfs allocated\n",
2216 adapter->vfs_allocated_count);
2217 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2218 random_ether_addr(mac_addr);
2219 igb_set_vf_mac(adapter, i, mac_addr);
2221 /* DMA Coalescing is not supported in IOV mode. */
2222 if (adapter->flags & IGB_FLAG_DMAC)
2223 adapter->flags &= ~IGB_FLAG_DMAC;
2225 #endif /* CONFIG_PCI_IOV */
2230 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2231 * @adapter: board private structure to initialize
2233 * igb_init_hw_timer initializes the function pointer and values for the hw
2234 * timer found in hardware.
2236 static void igb_init_hw_timer(struct igb_adapter *adapter)
2238 struct e1000_hw *hw = &adapter->hw;
2240 switch (hw->mac.type) {
2241 case e1000_i350:
2242 case e1000_82580:
2243 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2244 adapter->cycles.read = igb_read_clock;
2245 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2246 adapter->cycles.mult = 1;
2248 * The 82580 timesync updates the system timer every 8ns by 8ns
2249 * and the value cannot be shifted. Instead we need to shift
2250 * the registers to generate a 64bit timer value. As a result
2251 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2252 * 24 in order to generate a larger value for synchronization.
2254 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2255 /* disable system timer temporarily by setting bit 31 */
2256 wr32(E1000_TSAUXC, 0x80000000);
2257 wrfl();
2259 /* Set registers so that rollover occurs soon to test this. */
2260 wr32(E1000_SYSTIMR, 0x00000000);
2261 wr32(E1000_SYSTIML, 0x80000000);
2262 wr32(E1000_SYSTIMH, 0x000000FF);
2263 wrfl();
2265 /* enable system timer by clearing bit 31 */
2266 wr32(E1000_TSAUXC, 0x0);
2267 wrfl();
2269 timecounter_init(&adapter->clock,
2270 &adapter->cycles,
2271 ktime_to_ns(ktime_get_real()));
2273 * Synchronize our NIC clock against system wall clock. NIC
2274 * time stamp reading requires ~3us per sample, each sample
2275 * was pretty stable even under load => only require 10
2276 * samples for each offset comparison.
2278 memset(&adapter->compare, 0, sizeof(adapter->compare));
2279 adapter->compare.source = &adapter->clock;
2280 adapter->compare.target = ktime_get_real;
2281 adapter->compare.num_samples = 10;
2282 timecompare_update(&adapter->compare, 0);
2283 break;
2284 case e1000_82576:
2286 * Initialize hardware timer: we keep it running just in case
2287 * that some program needs it later on.
2289 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2290 adapter->cycles.read = igb_read_clock;
2291 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2292 adapter->cycles.mult = 1;
2294 * Scale the NIC clock cycle by a large factor so that
2295 * relatively small clock corrections can be added or
2296 * subtracted at each clock tick. The drawbacks of a large
2297 * factor are a) that the clock register overflows more quickly
2298 * (not such a big deal) and b) that the increment per tick has
2299 * to fit into 24 bits. As a result we need to use a shift of
2300 * 19 so we can fit a value of 16 into the TIMINCA register.
2302 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2303 wr32(E1000_TIMINCA,
2304 (1 << E1000_TIMINCA_16NS_SHIFT) |
2305 (16 << IGB_82576_TSYNC_SHIFT));
2307 /* Set registers so that rollover occurs soon to test this. */
2308 wr32(E1000_SYSTIML, 0x00000000);
2309 wr32(E1000_SYSTIMH, 0xFF800000);
2310 wrfl();
2312 timecounter_init(&adapter->clock,
2313 &adapter->cycles,
2314 ktime_to_ns(ktime_get_real()));
2316 * Synchronize our NIC clock against system wall clock. NIC
2317 * time stamp reading requires ~3us per sample, each sample
2318 * was pretty stable even under load => only require 10
2319 * samples for each offset comparison.
2321 memset(&adapter->compare, 0, sizeof(adapter->compare));
2322 adapter->compare.source = &adapter->clock;
2323 adapter->compare.target = ktime_get_real;
2324 adapter->compare.num_samples = 10;
2325 timecompare_update(&adapter->compare, 0);
2326 break;
2327 case e1000_82575:
2328 /* 82575 does not support timesync */
2329 default:
2330 break;
2336 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2337 * @adapter: board private structure to initialize
2339 * igb_sw_init initializes the Adapter private data structure.
2340 * Fields are initialized based on PCI device information and
2341 * OS network device settings (MTU size).
2343 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2345 struct e1000_hw *hw = &adapter->hw;
2346 struct net_device *netdev = adapter->netdev;
2347 struct pci_dev *pdev = adapter->pdev;
2349 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2351 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2352 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2353 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2354 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2356 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2357 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2359 spin_lock_init(&adapter->stats64_lock);
2360 #ifdef CONFIG_PCI_IOV
2361 switch (hw->mac.type) {
2362 case e1000_82576:
2363 case e1000_i350:
2364 if (max_vfs > 7) {
2365 dev_warn(&pdev->dev,
2366 "Maximum of 7 VFs per PF, using max\n");
2367 adapter->vfs_allocated_count = 7;
2368 } else
2369 adapter->vfs_allocated_count = max_vfs;
2370 break;
2371 default:
2372 break;
2374 #endif /* CONFIG_PCI_IOV */
2375 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2378 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2379 * then we should combine the queues into a queue pair in order to
2380 * conserve interrupts due to limited supply
2382 if ((adapter->rss_queues > 4) ||
2383 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2384 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2386 /* This call may decrease the number of queues */
2387 if (igb_init_interrupt_scheme(adapter)) {
2388 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2389 return -ENOMEM;
2392 igb_probe_vfs(adapter);
2394 /* Explicitly disable IRQ since the NIC can be in any state. */
2395 igb_irq_disable(adapter);
2397 if (hw->mac.type == e1000_i350)
2398 adapter->flags &= ~IGB_FLAG_DMAC;
2400 set_bit(__IGB_DOWN, &adapter->state);
2401 return 0;
2405 * igb_open - Called when a network interface is made active
2406 * @netdev: network interface device structure
2408 * Returns 0 on success, negative value on failure
2410 * The open entry point is called when a network interface is made
2411 * active by the system (IFF_UP). At this point all resources needed
2412 * for transmit and receive operations are allocated, the interrupt
2413 * handler is registered with the OS, the watchdog timer is started,
2414 * and the stack is notified that the interface is ready.
2416 static int igb_open(struct net_device *netdev)
2418 struct igb_adapter *adapter = netdev_priv(netdev);
2419 struct e1000_hw *hw = &adapter->hw;
2420 int err;
2421 int i;
2423 /* disallow open during test */
2424 if (test_bit(__IGB_TESTING, &adapter->state))
2425 return -EBUSY;
2427 netif_carrier_off(netdev);
2429 /* allocate transmit descriptors */
2430 err = igb_setup_all_tx_resources(adapter);
2431 if (err)
2432 goto err_setup_tx;
2434 /* allocate receive descriptors */
2435 err = igb_setup_all_rx_resources(adapter);
2436 if (err)
2437 goto err_setup_rx;
2439 igb_power_up_link(adapter);
2441 /* before we allocate an interrupt, we must be ready to handle it.
2442 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2443 * as soon as we call pci_request_irq, so we have to setup our
2444 * clean_rx handler before we do so. */
2445 igb_configure(adapter);
2447 err = igb_request_irq(adapter);
2448 if (err)
2449 goto err_req_irq;
2451 /* From here on the code is the same as igb_up() */
2452 clear_bit(__IGB_DOWN, &adapter->state);
2454 for (i = 0; i < adapter->num_q_vectors; i++) {
2455 struct igb_q_vector *q_vector = adapter->q_vector[i];
2456 napi_enable(&q_vector->napi);
2459 /* Clear any pending interrupts. */
2460 rd32(E1000_ICR);
2462 igb_irq_enable(adapter);
2464 /* notify VFs that reset has been completed */
2465 if (adapter->vfs_allocated_count) {
2466 u32 reg_data = rd32(E1000_CTRL_EXT);
2467 reg_data |= E1000_CTRL_EXT_PFRSTD;
2468 wr32(E1000_CTRL_EXT, reg_data);
2471 netif_tx_start_all_queues(netdev);
2473 /* start the watchdog. */
2474 hw->mac.get_link_status = 1;
2475 schedule_work(&adapter->watchdog_task);
2477 return 0;
2479 err_req_irq:
2480 igb_release_hw_control(adapter);
2481 igb_power_down_link(adapter);
2482 igb_free_all_rx_resources(adapter);
2483 err_setup_rx:
2484 igb_free_all_tx_resources(adapter);
2485 err_setup_tx:
2486 igb_reset(adapter);
2488 return err;
2492 * igb_close - Disables a network interface
2493 * @netdev: network interface device structure
2495 * Returns 0, this is not allowed to fail
2497 * The close entry point is called when an interface is de-activated
2498 * by the OS. The hardware is still under the driver's control, but
2499 * needs to be disabled. A global MAC reset is issued to stop the
2500 * hardware, and all transmit and receive resources are freed.
2502 static int igb_close(struct net_device *netdev)
2504 struct igb_adapter *adapter = netdev_priv(netdev);
2506 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2507 igb_down(adapter);
2509 igb_free_irq(adapter);
2511 igb_free_all_tx_resources(adapter);
2512 igb_free_all_rx_resources(adapter);
2514 return 0;
2518 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2519 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2521 * Return 0 on success, negative on failure
2523 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2525 struct device *dev = tx_ring->dev;
2526 int size;
2528 size = sizeof(struct igb_buffer) * tx_ring->count;
2529 tx_ring->buffer_info = vzalloc(size);
2530 if (!tx_ring->buffer_info)
2531 goto err;
2533 /* round up to nearest 4K */
2534 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2535 tx_ring->size = ALIGN(tx_ring->size, 4096);
2537 tx_ring->desc = dma_alloc_coherent(dev,
2538 tx_ring->size,
2539 &tx_ring->dma,
2540 GFP_KERNEL);
2542 if (!tx_ring->desc)
2543 goto err;
2545 tx_ring->next_to_use = 0;
2546 tx_ring->next_to_clean = 0;
2547 return 0;
2549 err:
2550 vfree(tx_ring->buffer_info);
2551 dev_err(dev,
2552 "Unable to allocate memory for the transmit descriptor ring\n");
2553 return -ENOMEM;
2557 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2558 * (Descriptors) for all queues
2559 * @adapter: board private structure
2561 * Return 0 on success, negative on failure
2563 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2565 struct pci_dev *pdev = adapter->pdev;
2566 int i, err = 0;
2568 for (i = 0; i < adapter->num_tx_queues; i++) {
2569 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2570 if (err) {
2571 dev_err(&pdev->dev,
2572 "Allocation for Tx Queue %u failed\n", i);
2573 for (i--; i >= 0; i--)
2574 igb_free_tx_resources(adapter->tx_ring[i]);
2575 break;
2579 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2580 int r_idx = i % adapter->num_tx_queues;
2581 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2583 return err;
2587 * igb_setup_tctl - configure the transmit control registers
2588 * @adapter: Board private structure
2590 void igb_setup_tctl(struct igb_adapter *adapter)
2592 struct e1000_hw *hw = &adapter->hw;
2593 u32 tctl;
2595 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2596 wr32(E1000_TXDCTL(0), 0);
2598 /* Program the Transmit Control Register */
2599 tctl = rd32(E1000_TCTL);
2600 tctl &= ~E1000_TCTL_CT;
2601 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2602 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2604 igb_config_collision_dist(hw);
2606 /* Enable transmits */
2607 tctl |= E1000_TCTL_EN;
2609 wr32(E1000_TCTL, tctl);
2613 * igb_configure_tx_ring - Configure transmit ring after Reset
2614 * @adapter: board private structure
2615 * @ring: tx ring to configure
2617 * Configure a transmit ring after a reset.
2619 void igb_configure_tx_ring(struct igb_adapter *adapter,
2620 struct igb_ring *ring)
2622 struct e1000_hw *hw = &adapter->hw;
2623 u32 txdctl;
2624 u64 tdba = ring->dma;
2625 int reg_idx = ring->reg_idx;
2627 /* disable the queue */
2628 txdctl = rd32(E1000_TXDCTL(reg_idx));
2629 wr32(E1000_TXDCTL(reg_idx),
2630 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2631 wrfl();
2632 mdelay(10);
2634 wr32(E1000_TDLEN(reg_idx),
2635 ring->count * sizeof(union e1000_adv_tx_desc));
2636 wr32(E1000_TDBAL(reg_idx),
2637 tdba & 0x00000000ffffffffULL);
2638 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2640 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2641 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2642 writel(0, ring->head);
2643 writel(0, ring->tail);
2645 txdctl |= IGB_TX_PTHRESH;
2646 txdctl |= IGB_TX_HTHRESH << 8;
2647 txdctl |= IGB_TX_WTHRESH << 16;
2649 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2650 wr32(E1000_TXDCTL(reg_idx), txdctl);
2654 * igb_configure_tx - Configure transmit Unit after Reset
2655 * @adapter: board private structure
2657 * Configure the Tx unit of the MAC after a reset.
2659 static void igb_configure_tx(struct igb_adapter *adapter)
2661 int i;
2663 for (i = 0; i < adapter->num_tx_queues; i++)
2664 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2668 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2669 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2671 * Returns 0 on success, negative on failure
2673 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2675 struct device *dev = rx_ring->dev;
2676 int size, desc_len;
2678 size = sizeof(struct igb_buffer) * rx_ring->count;
2679 rx_ring->buffer_info = vzalloc(size);
2680 if (!rx_ring->buffer_info)
2681 goto err;
2683 desc_len = sizeof(union e1000_adv_rx_desc);
2685 /* Round up to nearest 4K */
2686 rx_ring->size = rx_ring->count * desc_len;
2687 rx_ring->size = ALIGN(rx_ring->size, 4096);
2689 rx_ring->desc = dma_alloc_coherent(dev,
2690 rx_ring->size,
2691 &rx_ring->dma,
2692 GFP_KERNEL);
2694 if (!rx_ring->desc)
2695 goto err;
2697 rx_ring->next_to_clean = 0;
2698 rx_ring->next_to_use = 0;
2700 return 0;
2702 err:
2703 vfree(rx_ring->buffer_info);
2704 rx_ring->buffer_info = NULL;
2705 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2706 " ring\n");
2707 return -ENOMEM;
2711 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2712 * (Descriptors) for all queues
2713 * @adapter: board private structure
2715 * Return 0 on success, negative on failure
2717 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2719 struct pci_dev *pdev = adapter->pdev;
2720 int i, err = 0;
2722 for (i = 0; i < adapter->num_rx_queues; i++) {
2723 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2724 if (err) {
2725 dev_err(&pdev->dev,
2726 "Allocation for Rx Queue %u failed\n", i);
2727 for (i--; i >= 0; i--)
2728 igb_free_rx_resources(adapter->rx_ring[i]);
2729 break;
2733 return err;
2737 * igb_setup_mrqc - configure the multiple receive queue control registers
2738 * @adapter: Board private structure
2740 static void igb_setup_mrqc(struct igb_adapter *adapter)
2742 struct e1000_hw *hw = &adapter->hw;
2743 u32 mrqc, rxcsum;
2744 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2745 union e1000_reta {
2746 u32 dword;
2747 u8 bytes[4];
2748 } reta;
2749 static const u8 rsshash[40] = {
2750 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2751 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2752 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2753 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2755 /* Fill out hash function seeds */
2756 for (j = 0; j < 10; j++) {
2757 u32 rsskey = rsshash[(j * 4)];
2758 rsskey |= rsshash[(j * 4) + 1] << 8;
2759 rsskey |= rsshash[(j * 4) + 2] << 16;
2760 rsskey |= rsshash[(j * 4) + 3] << 24;
2761 array_wr32(E1000_RSSRK(0), j, rsskey);
2764 num_rx_queues = adapter->rss_queues;
2766 if (adapter->vfs_allocated_count) {
2767 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2768 switch (hw->mac.type) {
2769 case e1000_i350:
2770 case e1000_82580:
2771 num_rx_queues = 1;
2772 shift = 0;
2773 break;
2774 case e1000_82576:
2775 shift = 3;
2776 num_rx_queues = 2;
2777 break;
2778 case e1000_82575:
2779 shift = 2;
2780 shift2 = 6;
2781 default:
2782 break;
2784 } else {
2785 if (hw->mac.type == e1000_82575)
2786 shift = 6;
2789 for (j = 0; j < (32 * 4); j++) {
2790 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2791 if (shift2)
2792 reta.bytes[j & 3] |= num_rx_queues << shift2;
2793 if ((j & 3) == 3)
2794 wr32(E1000_RETA(j >> 2), reta.dword);
2798 * Disable raw packet checksumming so that RSS hash is placed in
2799 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2800 * offloads as they are enabled by default
2802 rxcsum = rd32(E1000_RXCSUM);
2803 rxcsum |= E1000_RXCSUM_PCSD;
2805 if (adapter->hw.mac.type >= e1000_82576)
2806 /* Enable Receive Checksum Offload for SCTP */
2807 rxcsum |= E1000_RXCSUM_CRCOFL;
2809 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2810 wr32(E1000_RXCSUM, rxcsum);
2812 /* If VMDq is enabled then we set the appropriate mode for that, else
2813 * we default to RSS so that an RSS hash is calculated per packet even
2814 * if we are only using one queue */
2815 if (adapter->vfs_allocated_count) {
2816 if (hw->mac.type > e1000_82575) {
2817 /* Set the default pool for the PF's first queue */
2818 u32 vtctl = rd32(E1000_VT_CTL);
2819 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2820 E1000_VT_CTL_DISABLE_DEF_POOL);
2821 vtctl |= adapter->vfs_allocated_count <<
2822 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2823 wr32(E1000_VT_CTL, vtctl);
2825 if (adapter->rss_queues > 1)
2826 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2827 else
2828 mrqc = E1000_MRQC_ENABLE_VMDQ;
2829 } else {
2830 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2832 igb_vmm_control(adapter);
2835 * Generate RSS hash based on TCP port numbers and/or
2836 * IPv4/v6 src and dst addresses since UDP cannot be
2837 * hashed reliably due to IP fragmentation
2839 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2840 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2841 E1000_MRQC_RSS_FIELD_IPV6 |
2842 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2843 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2845 wr32(E1000_MRQC, mrqc);
2849 * igb_setup_rctl - configure the receive control registers
2850 * @adapter: Board private structure
2852 void igb_setup_rctl(struct igb_adapter *adapter)
2854 struct e1000_hw *hw = &adapter->hw;
2855 u32 rctl;
2857 rctl = rd32(E1000_RCTL);
2859 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2860 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2862 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2863 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2866 * enable stripping of CRC. It's unlikely this will break BMC
2867 * redirection as it did with e1000. Newer features require
2868 * that the HW strips the CRC.
2870 rctl |= E1000_RCTL_SECRC;
2872 /* disable store bad packets and clear size bits. */
2873 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2875 /* enable LPE to prevent packets larger than max_frame_size */
2876 rctl |= E1000_RCTL_LPE;
2878 /* disable queue 0 to prevent tail write w/o re-config */
2879 wr32(E1000_RXDCTL(0), 0);
2881 /* Attention!!! For SR-IOV PF driver operations you must enable
2882 * queue drop for all VF and PF queues to prevent head of line blocking
2883 * if an un-trusted VF does not provide descriptors to hardware.
2885 if (adapter->vfs_allocated_count) {
2886 /* set all queue drop enable bits */
2887 wr32(E1000_QDE, ALL_QUEUES);
2890 wr32(E1000_RCTL, rctl);
2893 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2894 int vfn)
2896 struct e1000_hw *hw = &adapter->hw;
2897 u32 vmolr;
2899 /* if it isn't the PF check to see if VFs are enabled and
2900 * increase the size to support vlan tags */
2901 if (vfn < adapter->vfs_allocated_count &&
2902 adapter->vf_data[vfn].vlans_enabled)
2903 size += VLAN_TAG_SIZE;
2905 vmolr = rd32(E1000_VMOLR(vfn));
2906 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2907 vmolr |= size | E1000_VMOLR_LPE;
2908 wr32(E1000_VMOLR(vfn), vmolr);
2910 return 0;
2914 * igb_rlpml_set - set maximum receive packet size
2915 * @adapter: board private structure
2917 * Configure maximum receivable packet size.
2919 static void igb_rlpml_set(struct igb_adapter *adapter)
2921 u32 max_frame_size = adapter->max_frame_size;
2922 struct e1000_hw *hw = &adapter->hw;
2923 u16 pf_id = adapter->vfs_allocated_count;
2925 if (adapter->vlgrp)
2926 max_frame_size += VLAN_TAG_SIZE;
2928 /* if vfs are enabled we set RLPML to the largest possible request
2929 * size and set the VMOLR RLPML to the size we need */
2930 if (pf_id) {
2931 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2932 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2935 wr32(E1000_RLPML, max_frame_size);
2938 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2939 int vfn, bool aupe)
2941 struct e1000_hw *hw = &adapter->hw;
2942 u32 vmolr;
2945 * This register exists only on 82576 and newer so if we are older then
2946 * we should exit and do nothing
2948 if (hw->mac.type < e1000_82576)
2949 return;
2951 vmolr = rd32(E1000_VMOLR(vfn));
2952 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2953 if (aupe)
2954 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2955 else
2956 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2958 /* clear all bits that might not be set */
2959 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2961 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2962 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2964 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2965 * multicast packets
2967 if (vfn <= adapter->vfs_allocated_count)
2968 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2970 wr32(E1000_VMOLR(vfn), vmolr);
2974 * igb_configure_rx_ring - Configure a receive ring after Reset
2975 * @adapter: board private structure
2976 * @ring: receive ring to be configured
2978 * Configure the Rx unit of the MAC after a reset.
2980 void igb_configure_rx_ring(struct igb_adapter *adapter,
2981 struct igb_ring *ring)
2983 struct e1000_hw *hw = &adapter->hw;
2984 u64 rdba = ring->dma;
2985 int reg_idx = ring->reg_idx;
2986 u32 srrctl, rxdctl;
2988 /* disable the queue */
2989 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2990 wr32(E1000_RXDCTL(reg_idx),
2991 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2993 /* Set DMA base address registers */
2994 wr32(E1000_RDBAL(reg_idx),
2995 rdba & 0x00000000ffffffffULL);
2996 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2997 wr32(E1000_RDLEN(reg_idx),
2998 ring->count * sizeof(union e1000_adv_rx_desc));
3000 /* initialize head and tail */
3001 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3002 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3003 writel(0, ring->head);
3004 writel(0, ring->tail);
3006 /* set descriptor configuration */
3007 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3008 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3009 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3010 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3011 srrctl |= IGB_RXBUFFER_16384 >>
3012 E1000_SRRCTL_BSIZEPKT_SHIFT;
3013 #else
3014 srrctl |= (PAGE_SIZE / 2) >>
3015 E1000_SRRCTL_BSIZEPKT_SHIFT;
3016 #endif
3017 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3018 } else {
3019 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3020 E1000_SRRCTL_BSIZEPKT_SHIFT;
3021 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3023 if (hw->mac.type == e1000_82580)
3024 srrctl |= E1000_SRRCTL_TIMESTAMP;
3025 /* Only set Drop Enable if we are supporting multiple queues */
3026 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3027 srrctl |= E1000_SRRCTL_DROP_EN;
3029 wr32(E1000_SRRCTL(reg_idx), srrctl);
3031 /* set filtering for VMDQ pools */
3032 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3034 /* enable receive descriptor fetching */
3035 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3036 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3037 rxdctl &= 0xFFF00000;
3038 rxdctl |= IGB_RX_PTHRESH;
3039 rxdctl |= IGB_RX_HTHRESH << 8;
3040 rxdctl |= IGB_RX_WTHRESH << 16;
3041 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3045 * igb_configure_rx - Configure receive Unit after Reset
3046 * @adapter: board private structure
3048 * Configure the Rx unit of the MAC after a reset.
3050 static void igb_configure_rx(struct igb_adapter *adapter)
3052 int i;
3054 /* set UTA to appropriate mode */
3055 igb_set_uta(adapter);
3057 /* set the correct pool for the PF default MAC address in entry 0 */
3058 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3059 adapter->vfs_allocated_count);
3061 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3062 * the Base and Length of the Rx Descriptor Ring */
3063 for (i = 0; i < adapter->num_rx_queues; i++)
3064 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3068 * igb_free_tx_resources - Free Tx Resources per Queue
3069 * @tx_ring: Tx descriptor ring for a specific queue
3071 * Free all transmit software resources
3073 void igb_free_tx_resources(struct igb_ring *tx_ring)
3075 igb_clean_tx_ring(tx_ring);
3077 vfree(tx_ring->buffer_info);
3078 tx_ring->buffer_info = NULL;
3080 /* if not set, then don't free */
3081 if (!tx_ring->desc)
3082 return;
3084 dma_free_coherent(tx_ring->dev, tx_ring->size,
3085 tx_ring->desc, tx_ring->dma);
3087 tx_ring->desc = NULL;
3091 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3092 * @adapter: board private structure
3094 * Free all transmit software resources
3096 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3098 int i;
3100 for (i = 0; i < adapter->num_tx_queues; i++)
3101 igb_free_tx_resources(adapter->tx_ring[i]);
3104 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3105 struct igb_buffer *buffer_info)
3107 if (buffer_info->dma) {
3108 if (buffer_info->mapped_as_page)
3109 dma_unmap_page(tx_ring->dev,
3110 buffer_info->dma,
3111 buffer_info->length,
3112 DMA_TO_DEVICE);
3113 else
3114 dma_unmap_single(tx_ring->dev,
3115 buffer_info->dma,
3116 buffer_info->length,
3117 DMA_TO_DEVICE);
3118 buffer_info->dma = 0;
3120 if (buffer_info->skb) {
3121 dev_kfree_skb_any(buffer_info->skb);
3122 buffer_info->skb = NULL;
3124 buffer_info->time_stamp = 0;
3125 buffer_info->length = 0;
3126 buffer_info->next_to_watch = 0;
3127 buffer_info->mapped_as_page = false;
3131 * igb_clean_tx_ring - Free Tx Buffers
3132 * @tx_ring: ring to be cleaned
3134 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3136 struct igb_buffer *buffer_info;
3137 unsigned long size;
3138 unsigned int i;
3140 if (!tx_ring->buffer_info)
3141 return;
3142 /* Free all the Tx ring sk_buffs */
3144 for (i = 0; i < tx_ring->count; i++) {
3145 buffer_info = &tx_ring->buffer_info[i];
3146 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3149 size = sizeof(struct igb_buffer) * tx_ring->count;
3150 memset(tx_ring->buffer_info, 0, size);
3152 /* Zero out the descriptor ring */
3153 memset(tx_ring->desc, 0, tx_ring->size);
3155 tx_ring->next_to_use = 0;
3156 tx_ring->next_to_clean = 0;
3160 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3161 * @adapter: board private structure
3163 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3165 int i;
3167 for (i = 0; i < adapter->num_tx_queues; i++)
3168 igb_clean_tx_ring(adapter->tx_ring[i]);
3172 * igb_free_rx_resources - Free Rx Resources
3173 * @rx_ring: ring to clean the resources from
3175 * Free all receive software resources
3177 void igb_free_rx_resources(struct igb_ring *rx_ring)
3179 igb_clean_rx_ring(rx_ring);
3181 vfree(rx_ring->buffer_info);
3182 rx_ring->buffer_info = NULL;
3184 /* if not set, then don't free */
3185 if (!rx_ring->desc)
3186 return;
3188 dma_free_coherent(rx_ring->dev, rx_ring->size,
3189 rx_ring->desc, rx_ring->dma);
3191 rx_ring->desc = NULL;
3195 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3196 * @adapter: board private structure
3198 * Free all receive software resources
3200 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3202 int i;
3204 for (i = 0; i < adapter->num_rx_queues; i++)
3205 igb_free_rx_resources(adapter->rx_ring[i]);
3209 * igb_clean_rx_ring - Free Rx Buffers per Queue
3210 * @rx_ring: ring to free buffers from
3212 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3214 struct igb_buffer *buffer_info;
3215 unsigned long size;
3216 unsigned int i;
3218 if (!rx_ring->buffer_info)
3219 return;
3221 /* Free all the Rx ring sk_buffs */
3222 for (i = 0; i < rx_ring->count; i++) {
3223 buffer_info = &rx_ring->buffer_info[i];
3224 if (buffer_info->dma) {
3225 dma_unmap_single(rx_ring->dev,
3226 buffer_info->dma,
3227 rx_ring->rx_buffer_len,
3228 DMA_FROM_DEVICE);
3229 buffer_info->dma = 0;
3232 if (buffer_info->skb) {
3233 dev_kfree_skb(buffer_info->skb);
3234 buffer_info->skb = NULL;
3236 if (buffer_info->page_dma) {
3237 dma_unmap_page(rx_ring->dev,
3238 buffer_info->page_dma,
3239 PAGE_SIZE / 2,
3240 DMA_FROM_DEVICE);
3241 buffer_info->page_dma = 0;
3243 if (buffer_info->page) {
3244 put_page(buffer_info->page);
3245 buffer_info->page = NULL;
3246 buffer_info->page_offset = 0;
3250 size = sizeof(struct igb_buffer) * rx_ring->count;
3251 memset(rx_ring->buffer_info, 0, size);
3253 /* Zero out the descriptor ring */
3254 memset(rx_ring->desc, 0, rx_ring->size);
3256 rx_ring->next_to_clean = 0;
3257 rx_ring->next_to_use = 0;
3261 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3262 * @adapter: board private structure
3264 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3266 int i;
3268 for (i = 0; i < adapter->num_rx_queues; i++)
3269 igb_clean_rx_ring(adapter->rx_ring[i]);
3273 * igb_set_mac - Change the Ethernet Address of the NIC
3274 * @netdev: network interface device structure
3275 * @p: pointer to an address structure
3277 * Returns 0 on success, negative on failure
3279 static int igb_set_mac(struct net_device *netdev, void *p)
3281 struct igb_adapter *adapter = netdev_priv(netdev);
3282 struct e1000_hw *hw = &adapter->hw;
3283 struct sockaddr *addr = p;
3285 if (!is_valid_ether_addr(addr->sa_data))
3286 return -EADDRNOTAVAIL;
3288 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3289 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3291 /* set the correct pool for the new PF MAC address in entry 0 */
3292 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3293 adapter->vfs_allocated_count);
3295 return 0;
3299 * igb_write_mc_addr_list - write multicast addresses to MTA
3300 * @netdev: network interface device structure
3302 * Writes multicast address list to the MTA hash table.
3303 * Returns: -ENOMEM on failure
3304 * 0 on no addresses written
3305 * X on writing X addresses to MTA
3307 static int igb_write_mc_addr_list(struct net_device *netdev)
3309 struct igb_adapter *adapter = netdev_priv(netdev);
3310 struct e1000_hw *hw = &adapter->hw;
3311 struct netdev_hw_addr *ha;
3312 u8 *mta_list;
3313 int i;
3315 if (netdev_mc_empty(netdev)) {
3316 /* nothing to program, so clear mc list */
3317 igb_update_mc_addr_list(hw, NULL, 0);
3318 igb_restore_vf_multicasts(adapter);
3319 return 0;
3322 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3323 if (!mta_list)
3324 return -ENOMEM;
3326 /* The shared function expects a packed array of only addresses. */
3327 i = 0;
3328 netdev_for_each_mc_addr(ha, netdev)
3329 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3331 igb_update_mc_addr_list(hw, mta_list, i);
3332 kfree(mta_list);
3334 return netdev_mc_count(netdev);
3338 * igb_write_uc_addr_list - write unicast addresses to RAR table
3339 * @netdev: network interface device structure
3341 * Writes unicast address list to the RAR table.
3342 * Returns: -ENOMEM on failure/insufficient address space
3343 * 0 on no addresses written
3344 * X on writing X addresses to the RAR table
3346 static int igb_write_uc_addr_list(struct net_device *netdev)
3348 struct igb_adapter *adapter = netdev_priv(netdev);
3349 struct e1000_hw *hw = &adapter->hw;
3350 unsigned int vfn = adapter->vfs_allocated_count;
3351 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3352 int count = 0;
3354 /* return ENOMEM indicating insufficient memory for addresses */
3355 if (netdev_uc_count(netdev) > rar_entries)
3356 return -ENOMEM;
3358 if (!netdev_uc_empty(netdev) && rar_entries) {
3359 struct netdev_hw_addr *ha;
3361 netdev_for_each_uc_addr(ha, netdev) {
3362 if (!rar_entries)
3363 break;
3364 igb_rar_set_qsel(adapter, ha->addr,
3365 rar_entries--,
3366 vfn);
3367 count++;
3370 /* write the addresses in reverse order to avoid write combining */
3371 for (; rar_entries > 0 ; rar_entries--) {
3372 wr32(E1000_RAH(rar_entries), 0);
3373 wr32(E1000_RAL(rar_entries), 0);
3375 wrfl();
3377 return count;
3381 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3382 * @netdev: network interface device structure
3384 * The set_rx_mode entry point is called whenever the unicast or multicast
3385 * address lists or the network interface flags are updated. This routine is
3386 * responsible for configuring the hardware for proper unicast, multicast,
3387 * promiscuous mode, and all-multi behavior.
3389 static void igb_set_rx_mode(struct net_device *netdev)
3391 struct igb_adapter *adapter = netdev_priv(netdev);
3392 struct e1000_hw *hw = &adapter->hw;
3393 unsigned int vfn = adapter->vfs_allocated_count;
3394 u32 rctl, vmolr = 0;
3395 int count;
3397 /* Check for Promiscuous and All Multicast modes */
3398 rctl = rd32(E1000_RCTL);
3400 /* clear the effected bits */
3401 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3403 if (netdev->flags & IFF_PROMISC) {
3404 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3405 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3406 } else {
3407 if (netdev->flags & IFF_ALLMULTI) {
3408 rctl |= E1000_RCTL_MPE;
3409 vmolr |= E1000_VMOLR_MPME;
3410 } else {
3412 * Write addresses to the MTA, if the attempt fails
3413 * then we should just turn on promiscuous mode so
3414 * that we can at least receive multicast traffic
3416 count = igb_write_mc_addr_list(netdev);
3417 if (count < 0) {
3418 rctl |= E1000_RCTL_MPE;
3419 vmolr |= E1000_VMOLR_MPME;
3420 } else if (count) {
3421 vmolr |= E1000_VMOLR_ROMPE;
3425 * Write addresses to available RAR registers, if there is not
3426 * sufficient space to store all the addresses then enable
3427 * unicast promiscuous mode
3429 count = igb_write_uc_addr_list(netdev);
3430 if (count < 0) {
3431 rctl |= E1000_RCTL_UPE;
3432 vmolr |= E1000_VMOLR_ROPE;
3434 rctl |= E1000_RCTL_VFE;
3436 wr32(E1000_RCTL, rctl);
3439 * In order to support SR-IOV and eventually VMDq it is necessary to set
3440 * the VMOLR to enable the appropriate modes. Without this workaround
3441 * we will have issues with VLAN tag stripping not being done for frames
3442 * that are only arriving because we are the default pool
3444 if (hw->mac.type < e1000_82576)
3445 return;
3447 vmolr |= rd32(E1000_VMOLR(vfn)) &
3448 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3449 wr32(E1000_VMOLR(vfn), vmolr);
3450 igb_restore_vf_multicasts(adapter);
3453 static void igb_check_wvbr(struct igb_adapter *adapter)
3455 struct e1000_hw *hw = &adapter->hw;
3456 u32 wvbr = 0;
3458 switch (hw->mac.type) {
3459 case e1000_82576:
3460 case e1000_i350:
3461 if (!(wvbr = rd32(E1000_WVBR)))
3462 return;
3463 break;
3464 default:
3465 break;
3468 adapter->wvbr |= wvbr;
3471 #define IGB_STAGGERED_QUEUE_OFFSET 8
3473 static void igb_spoof_check(struct igb_adapter *adapter)
3475 int j;
3477 if (!adapter->wvbr)
3478 return;
3480 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3481 if (adapter->wvbr & (1 << j) ||
3482 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3483 dev_warn(&adapter->pdev->dev,
3484 "Spoof event(s) detected on VF %d\n", j);
3485 adapter->wvbr &=
3486 ~((1 << j) |
3487 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3492 /* Need to wait a few seconds after link up to get diagnostic information from
3493 * the phy */
3494 static void igb_update_phy_info(unsigned long data)
3496 struct igb_adapter *adapter = (struct igb_adapter *) data;
3497 igb_get_phy_info(&adapter->hw);
3501 * igb_has_link - check shared code for link and determine up/down
3502 * @adapter: pointer to driver private info
3504 bool igb_has_link(struct igb_adapter *adapter)
3506 struct e1000_hw *hw = &adapter->hw;
3507 bool link_active = false;
3508 s32 ret_val = 0;
3510 /* get_link_status is set on LSC (link status) interrupt or
3511 * rx sequence error interrupt. get_link_status will stay
3512 * false until the e1000_check_for_link establishes link
3513 * for copper adapters ONLY
3515 switch (hw->phy.media_type) {
3516 case e1000_media_type_copper:
3517 if (hw->mac.get_link_status) {
3518 ret_val = hw->mac.ops.check_for_link(hw);
3519 link_active = !hw->mac.get_link_status;
3520 } else {
3521 link_active = true;
3523 break;
3524 case e1000_media_type_internal_serdes:
3525 ret_val = hw->mac.ops.check_for_link(hw);
3526 link_active = hw->mac.serdes_has_link;
3527 break;
3528 default:
3529 case e1000_media_type_unknown:
3530 break;
3533 return link_active;
3536 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3538 bool ret = false;
3539 u32 ctrl_ext, thstat;
3541 /* check for thermal sensor event on i350, copper only */
3542 if (hw->mac.type == e1000_i350) {
3543 thstat = rd32(E1000_THSTAT);
3544 ctrl_ext = rd32(E1000_CTRL_EXT);
3546 if ((hw->phy.media_type == e1000_media_type_copper) &&
3547 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3548 ret = !!(thstat & event);
3552 return ret;
3556 * igb_watchdog - Timer Call-back
3557 * @data: pointer to adapter cast into an unsigned long
3559 static void igb_watchdog(unsigned long data)
3561 struct igb_adapter *adapter = (struct igb_adapter *)data;
3562 /* Do the rest outside of interrupt context */
3563 schedule_work(&adapter->watchdog_task);
3566 static void igb_watchdog_task(struct work_struct *work)
3568 struct igb_adapter *adapter = container_of(work,
3569 struct igb_adapter,
3570 watchdog_task);
3571 struct e1000_hw *hw = &adapter->hw;
3572 struct net_device *netdev = adapter->netdev;
3573 u32 link;
3574 int i;
3576 link = igb_has_link(adapter);
3577 if (link) {
3578 if (!netif_carrier_ok(netdev)) {
3579 u32 ctrl;
3580 hw->mac.ops.get_speed_and_duplex(hw,
3581 &adapter->link_speed,
3582 &adapter->link_duplex);
3584 ctrl = rd32(E1000_CTRL);
3585 /* Links status message must follow this format */
3586 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3587 "Flow Control: %s\n",
3588 netdev->name,
3589 adapter->link_speed,
3590 adapter->link_duplex == FULL_DUPLEX ?
3591 "Full Duplex" : "Half Duplex",
3592 ((ctrl & E1000_CTRL_TFCE) &&
3593 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3594 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3595 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3597 /* check for thermal sensor event */
3598 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3599 printk(KERN_INFO "igb: %s The network adapter "
3600 "link speed was downshifted "
3601 "because it overheated.\n",
3602 netdev->name);
3605 /* adjust timeout factor according to speed/duplex */
3606 adapter->tx_timeout_factor = 1;
3607 switch (adapter->link_speed) {
3608 case SPEED_10:
3609 adapter->tx_timeout_factor = 14;
3610 break;
3611 case SPEED_100:
3612 /* maybe add some timeout factor ? */
3613 break;
3616 netif_carrier_on(netdev);
3618 igb_ping_all_vfs(adapter);
3619 igb_check_vf_rate_limit(adapter);
3621 /* link state has changed, schedule phy info update */
3622 if (!test_bit(__IGB_DOWN, &adapter->state))
3623 mod_timer(&adapter->phy_info_timer,
3624 round_jiffies(jiffies + 2 * HZ));
3626 } else {
3627 if (netif_carrier_ok(netdev)) {
3628 adapter->link_speed = 0;
3629 adapter->link_duplex = 0;
3631 /* check for thermal sensor event */
3632 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3633 printk(KERN_ERR "igb: %s The network adapter "
3634 "was stopped because it "
3635 "overheated.\n",
3636 netdev->name);
3639 /* Links status message must follow this format */
3640 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3641 netdev->name);
3642 netif_carrier_off(netdev);
3644 igb_ping_all_vfs(adapter);
3646 /* link state has changed, schedule phy info update */
3647 if (!test_bit(__IGB_DOWN, &adapter->state))
3648 mod_timer(&adapter->phy_info_timer,
3649 round_jiffies(jiffies + 2 * HZ));
3653 spin_lock(&adapter->stats64_lock);
3654 igb_update_stats(adapter, &adapter->stats64);
3655 spin_unlock(&adapter->stats64_lock);
3657 for (i = 0; i < adapter->num_tx_queues; i++) {
3658 struct igb_ring *tx_ring = adapter->tx_ring[i];
3659 if (!netif_carrier_ok(netdev)) {
3660 /* We've lost link, so the controller stops DMA,
3661 * but we've got queued Tx work that's never going
3662 * to get done, so reset controller to flush Tx.
3663 * (Do the reset outside of interrupt context). */
3664 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3665 adapter->tx_timeout_count++;
3666 schedule_work(&adapter->reset_task);
3667 /* return immediately since reset is imminent */
3668 return;
3672 /* Force detection of hung controller every watchdog period */
3673 tx_ring->detect_tx_hung = true;
3676 /* Cause software interrupt to ensure rx ring is cleaned */
3677 if (adapter->msix_entries) {
3678 u32 eics = 0;
3679 for (i = 0; i < adapter->num_q_vectors; i++) {
3680 struct igb_q_vector *q_vector = adapter->q_vector[i];
3681 eics |= q_vector->eims_value;
3683 wr32(E1000_EICS, eics);
3684 } else {
3685 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3688 igb_spoof_check(adapter);
3690 /* Reset the timer */
3691 if (!test_bit(__IGB_DOWN, &adapter->state))
3692 mod_timer(&adapter->watchdog_timer,
3693 round_jiffies(jiffies + 2 * HZ));
3696 enum latency_range {
3697 lowest_latency = 0,
3698 low_latency = 1,
3699 bulk_latency = 2,
3700 latency_invalid = 255
3704 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3706 * Stores a new ITR value based on strictly on packet size. This
3707 * algorithm is less sophisticated than that used in igb_update_itr,
3708 * due to the difficulty of synchronizing statistics across multiple
3709 * receive rings. The divisors and thresholds used by this function
3710 * were determined based on theoretical maximum wire speed and testing
3711 * data, in order to minimize response time while increasing bulk
3712 * throughput.
3713 * This functionality is controlled by the InterruptThrottleRate module
3714 * parameter (see igb_param.c)
3715 * NOTE: This function is called only when operating in a multiqueue
3716 * receive environment.
3717 * @q_vector: pointer to q_vector
3719 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3721 int new_val = q_vector->itr_val;
3722 int avg_wire_size = 0;
3723 struct igb_adapter *adapter = q_vector->adapter;
3724 struct igb_ring *ring;
3725 unsigned int packets;
3727 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3728 * ints/sec - ITR timer value of 120 ticks.
3730 if (adapter->link_speed != SPEED_1000) {
3731 new_val = 976;
3732 goto set_itr_val;
3735 ring = q_vector->rx_ring;
3736 if (ring) {
3737 packets = ACCESS_ONCE(ring->total_packets);
3739 if (packets)
3740 avg_wire_size = ring->total_bytes / packets;
3743 ring = q_vector->tx_ring;
3744 if (ring) {
3745 packets = ACCESS_ONCE(ring->total_packets);
3747 if (packets)
3748 avg_wire_size = max_t(u32, avg_wire_size,
3749 ring->total_bytes / packets);
3752 /* if avg_wire_size isn't set no work was done */
3753 if (!avg_wire_size)
3754 goto clear_counts;
3756 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3757 avg_wire_size += 24;
3759 /* Don't starve jumbo frames */
3760 avg_wire_size = min(avg_wire_size, 3000);
3762 /* Give a little boost to mid-size frames */
3763 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3764 new_val = avg_wire_size / 3;
3765 else
3766 new_val = avg_wire_size / 2;
3768 /* when in itr mode 3 do not exceed 20K ints/sec */
3769 if (adapter->rx_itr_setting == 3 && new_val < 196)
3770 new_val = 196;
3772 set_itr_val:
3773 if (new_val != q_vector->itr_val) {
3774 q_vector->itr_val = new_val;
3775 q_vector->set_itr = 1;
3777 clear_counts:
3778 if (q_vector->rx_ring) {
3779 q_vector->rx_ring->total_bytes = 0;
3780 q_vector->rx_ring->total_packets = 0;
3782 if (q_vector->tx_ring) {
3783 q_vector->tx_ring->total_bytes = 0;
3784 q_vector->tx_ring->total_packets = 0;
3789 * igb_update_itr - update the dynamic ITR value based on statistics
3790 * Stores a new ITR value based on packets and byte
3791 * counts during the last interrupt. The advantage of per interrupt
3792 * computation is faster updates and more accurate ITR for the current
3793 * traffic pattern. Constants in this function were computed
3794 * based on theoretical maximum wire speed and thresholds were set based
3795 * on testing data as well as attempting to minimize response time
3796 * while increasing bulk throughput.
3797 * this functionality is controlled by the InterruptThrottleRate module
3798 * parameter (see igb_param.c)
3799 * NOTE: These calculations are only valid when operating in a single-
3800 * queue environment.
3801 * @adapter: pointer to adapter
3802 * @itr_setting: current q_vector->itr_val
3803 * @packets: the number of packets during this measurement interval
3804 * @bytes: the number of bytes during this measurement interval
3806 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3807 int packets, int bytes)
3809 unsigned int retval = itr_setting;
3811 if (packets == 0)
3812 goto update_itr_done;
3814 switch (itr_setting) {
3815 case lowest_latency:
3816 /* handle TSO and jumbo frames */
3817 if (bytes/packets > 8000)
3818 retval = bulk_latency;
3819 else if ((packets < 5) && (bytes > 512))
3820 retval = low_latency;
3821 break;
3822 case low_latency: /* 50 usec aka 20000 ints/s */
3823 if (bytes > 10000) {
3824 /* this if handles the TSO accounting */
3825 if (bytes/packets > 8000) {
3826 retval = bulk_latency;
3827 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3828 retval = bulk_latency;
3829 } else if ((packets > 35)) {
3830 retval = lowest_latency;
3832 } else if (bytes/packets > 2000) {
3833 retval = bulk_latency;
3834 } else if (packets <= 2 && bytes < 512) {
3835 retval = lowest_latency;
3837 break;
3838 case bulk_latency: /* 250 usec aka 4000 ints/s */
3839 if (bytes > 25000) {
3840 if (packets > 35)
3841 retval = low_latency;
3842 } else if (bytes < 1500) {
3843 retval = low_latency;
3845 break;
3848 update_itr_done:
3849 return retval;
3852 static void igb_set_itr(struct igb_adapter *adapter)
3854 struct igb_q_vector *q_vector = adapter->q_vector[0];
3855 u16 current_itr;
3856 u32 new_itr = q_vector->itr_val;
3858 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3859 if (adapter->link_speed != SPEED_1000) {
3860 current_itr = 0;
3861 new_itr = 4000;
3862 goto set_itr_now;
3865 adapter->rx_itr = igb_update_itr(adapter,
3866 adapter->rx_itr,
3867 q_vector->rx_ring->total_packets,
3868 q_vector->rx_ring->total_bytes);
3870 adapter->tx_itr = igb_update_itr(adapter,
3871 adapter->tx_itr,
3872 q_vector->tx_ring->total_packets,
3873 q_vector->tx_ring->total_bytes);
3874 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3876 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3877 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3878 current_itr = low_latency;
3880 switch (current_itr) {
3881 /* counts and packets in update_itr are dependent on these numbers */
3882 case lowest_latency:
3883 new_itr = 56; /* aka 70,000 ints/sec */
3884 break;
3885 case low_latency:
3886 new_itr = 196; /* aka 20,000 ints/sec */
3887 break;
3888 case bulk_latency:
3889 new_itr = 980; /* aka 4,000 ints/sec */
3890 break;
3891 default:
3892 break;
3895 set_itr_now:
3896 q_vector->rx_ring->total_bytes = 0;
3897 q_vector->rx_ring->total_packets = 0;
3898 q_vector->tx_ring->total_bytes = 0;
3899 q_vector->tx_ring->total_packets = 0;
3901 if (new_itr != q_vector->itr_val) {
3902 /* this attempts to bias the interrupt rate towards Bulk
3903 * by adding intermediate steps when interrupt rate is
3904 * increasing */
3905 new_itr = new_itr > q_vector->itr_val ?
3906 max((new_itr * q_vector->itr_val) /
3907 (new_itr + (q_vector->itr_val >> 2)),
3908 new_itr) :
3909 new_itr;
3910 /* Don't write the value here; it resets the adapter's
3911 * internal timer, and causes us to delay far longer than
3912 * we should between interrupts. Instead, we write the ITR
3913 * value at the beginning of the next interrupt so the timing
3914 * ends up being correct.
3916 q_vector->itr_val = new_itr;
3917 q_vector->set_itr = 1;
3921 #define IGB_TX_FLAGS_CSUM 0x00000001
3922 #define IGB_TX_FLAGS_VLAN 0x00000002
3923 #define IGB_TX_FLAGS_TSO 0x00000004
3924 #define IGB_TX_FLAGS_IPV4 0x00000008
3925 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3926 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3927 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3929 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3930 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3932 struct e1000_adv_tx_context_desc *context_desc;
3933 unsigned int i;
3934 int err;
3935 struct igb_buffer *buffer_info;
3936 u32 info = 0, tu_cmd = 0;
3937 u32 mss_l4len_idx;
3938 u8 l4len;
3940 if (skb_header_cloned(skb)) {
3941 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3942 if (err)
3943 return err;
3946 l4len = tcp_hdrlen(skb);
3947 *hdr_len += l4len;
3949 if (skb->protocol == htons(ETH_P_IP)) {
3950 struct iphdr *iph = ip_hdr(skb);
3951 iph->tot_len = 0;
3952 iph->check = 0;
3953 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3954 iph->daddr, 0,
3955 IPPROTO_TCP,
3957 } else if (skb_is_gso_v6(skb)) {
3958 ipv6_hdr(skb)->payload_len = 0;
3959 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3960 &ipv6_hdr(skb)->daddr,
3961 0, IPPROTO_TCP, 0);
3964 i = tx_ring->next_to_use;
3966 buffer_info = &tx_ring->buffer_info[i];
3967 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3968 /* VLAN MACLEN IPLEN */
3969 if (tx_flags & IGB_TX_FLAGS_VLAN)
3970 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3971 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3972 *hdr_len += skb_network_offset(skb);
3973 info |= skb_network_header_len(skb);
3974 *hdr_len += skb_network_header_len(skb);
3975 context_desc->vlan_macip_lens = cpu_to_le32(info);
3977 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3978 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3980 if (skb->protocol == htons(ETH_P_IP))
3981 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3982 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3984 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3986 /* MSS L4LEN IDX */
3987 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3988 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3990 /* For 82575, context index must be unique per ring. */
3991 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3992 mss_l4len_idx |= tx_ring->reg_idx << 4;
3994 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3995 context_desc->seqnum_seed = 0;
3997 buffer_info->time_stamp = jiffies;
3998 buffer_info->next_to_watch = i;
3999 buffer_info->dma = 0;
4000 i++;
4001 if (i == tx_ring->count)
4002 i = 0;
4004 tx_ring->next_to_use = i;
4006 return true;
4009 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4010 struct sk_buff *skb, u32 tx_flags)
4012 struct e1000_adv_tx_context_desc *context_desc;
4013 struct device *dev = tx_ring->dev;
4014 struct igb_buffer *buffer_info;
4015 u32 info = 0, tu_cmd = 0;
4016 unsigned int i;
4018 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4019 (tx_flags & IGB_TX_FLAGS_VLAN)) {
4020 i = tx_ring->next_to_use;
4021 buffer_info = &tx_ring->buffer_info[i];
4022 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4024 if (tx_flags & IGB_TX_FLAGS_VLAN)
4025 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4027 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4028 if (skb->ip_summed == CHECKSUM_PARTIAL)
4029 info |= skb_network_header_len(skb);
4031 context_desc->vlan_macip_lens = cpu_to_le32(info);
4033 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4035 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4036 __be16 protocol;
4038 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4039 const struct vlan_ethhdr *vhdr =
4040 (const struct vlan_ethhdr*)skb->data;
4042 protocol = vhdr->h_vlan_encapsulated_proto;
4043 } else {
4044 protocol = skb->protocol;
4047 switch (protocol) {
4048 case cpu_to_be16(ETH_P_IP):
4049 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4050 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4051 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4052 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4053 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4054 break;
4055 case cpu_to_be16(ETH_P_IPV6):
4056 /* XXX what about other V6 headers?? */
4057 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4058 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4059 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4060 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4061 break;
4062 default:
4063 if (unlikely(net_ratelimit()))
4064 dev_warn(dev,
4065 "partial checksum but proto=%x!\n",
4066 skb->protocol);
4067 break;
4071 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4072 context_desc->seqnum_seed = 0;
4073 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4074 context_desc->mss_l4len_idx =
4075 cpu_to_le32(tx_ring->reg_idx << 4);
4077 buffer_info->time_stamp = jiffies;
4078 buffer_info->next_to_watch = i;
4079 buffer_info->dma = 0;
4081 i++;
4082 if (i == tx_ring->count)
4083 i = 0;
4084 tx_ring->next_to_use = i;
4086 return true;
4088 return false;
4091 #define IGB_MAX_TXD_PWR 16
4092 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4094 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4095 unsigned int first)
4097 struct igb_buffer *buffer_info;
4098 struct device *dev = tx_ring->dev;
4099 unsigned int hlen = skb_headlen(skb);
4100 unsigned int count = 0, i;
4101 unsigned int f;
4102 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4104 i = tx_ring->next_to_use;
4106 buffer_info = &tx_ring->buffer_info[i];
4107 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4108 buffer_info->length = hlen;
4109 /* set time_stamp *before* dma to help avoid a possible race */
4110 buffer_info->time_stamp = jiffies;
4111 buffer_info->next_to_watch = i;
4112 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4113 DMA_TO_DEVICE);
4114 if (dma_mapping_error(dev, buffer_info->dma))
4115 goto dma_error;
4117 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4118 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4119 unsigned int len = frag->size;
4121 count++;
4122 i++;
4123 if (i == tx_ring->count)
4124 i = 0;
4126 buffer_info = &tx_ring->buffer_info[i];
4127 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4128 buffer_info->length = len;
4129 buffer_info->time_stamp = jiffies;
4130 buffer_info->next_to_watch = i;
4131 buffer_info->mapped_as_page = true;
4132 buffer_info->dma = dma_map_page(dev,
4133 frag->page,
4134 frag->page_offset,
4135 len,
4136 DMA_TO_DEVICE);
4137 if (dma_mapping_error(dev, buffer_info->dma))
4138 goto dma_error;
4142 tx_ring->buffer_info[i].skb = skb;
4143 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4144 /* multiply data chunks by size of headers */
4145 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4146 tx_ring->buffer_info[i].gso_segs = gso_segs;
4147 tx_ring->buffer_info[first].next_to_watch = i;
4149 return ++count;
4151 dma_error:
4152 dev_err(dev, "TX DMA map failed\n");
4154 /* clear timestamp and dma mappings for failed buffer_info mapping */
4155 buffer_info->dma = 0;
4156 buffer_info->time_stamp = 0;
4157 buffer_info->length = 0;
4158 buffer_info->next_to_watch = 0;
4159 buffer_info->mapped_as_page = false;
4161 /* clear timestamp and dma mappings for remaining portion of packet */
4162 while (count--) {
4163 if (i == 0)
4164 i = tx_ring->count;
4165 i--;
4166 buffer_info = &tx_ring->buffer_info[i];
4167 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4170 return 0;
4173 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4174 u32 tx_flags, int count, u32 paylen,
4175 u8 hdr_len)
4177 union e1000_adv_tx_desc *tx_desc;
4178 struct igb_buffer *buffer_info;
4179 u32 olinfo_status = 0, cmd_type_len;
4180 unsigned int i = tx_ring->next_to_use;
4182 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4183 E1000_ADVTXD_DCMD_DEXT);
4185 if (tx_flags & IGB_TX_FLAGS_VLAN)
4186 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4188 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4189 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4191 if (tx_flags & IGB_TX_FLAGS_TSO) {
4192 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4194 /* insert tcp checksum */
4195 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4197 /* insert ip checksum */
4198 if (tx_flags & IGB_TX_FLAGS_IPV4)
4199 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4201 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4202 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4205 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4206 (tx_flags & (IGB_TX_FLAGS_CSUM |
4207 IGB_TX_FLAGS_TSO |
4208 IGB_TX_FLAGS_VLAN)))
4209 olinfo_status |= tx_ring->reg_idx << 4;
4211 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4213 do {
4214 buffer_info = &tx_ring->buffer_info[i];
4215 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4216 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4217 tx_desc->read.cmd_type_len =
4218 cpu_to_le32(cmd_type_len | buffer_info->length);
4219 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4220 count--;
4221 i++;
4222 if (i == tx_ring->count)
4223 i = 0;
4224 } while (count > 0);
4226 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4227 /* Force memory writes to complete before letting h/w
4228 * know there are new descriptors to fetch. (Only
4229 * applicable for weak-ordered memory model archs,
4230 * such as IA-64). */
4231 wmb();
4233 tx_ring->next_to_use = i;
4234 writel(i, tx_ring->tail);
4235 /* we need this if more than one processor can write to our tail
4236 * at a time, it syncronizes IO on IA64/Altix systems */
4237 mmiowb();
4240 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4242 struct net_device *netdev = tx_ring->netdev;
4244 netif_stop_subqueue(netdev, tx_ring->queue_index);
4246 /* Herbert's original patch had:
4247 * smp_mb__after_netif_stop_queue();
4248 * but since that doesn't exist yet, just open code it. */
4249 smp_mb();
4251 /* We need to check again in a case another CPU has just
4252 * made room available. */
4253 if (igb_desc_unused(tx_ring) < size)
4254 return -EBUSY;
4256 /* A reprieve! */
4257 netif_wake_subqueue(netdev, tx_ring->queue_index);
4259 u64_stats_update_begin(&tx_ring->tx_syncp2);
4260 tx_ring->tx_stats.restart_queue2++;
4261 u64_stats_update_end(&tx_ring->tx_syncp2);
4263 return 0;
4266 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4268 if (igb_desc_unused(tx_ring) >= size)
4269 return 0;
4270 return __igb_maybe_stop_tx(tx_ring, size);
4273 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4274 struct igb_ring *tx_ring)
4276 int tso = 0, count;
4277 u32 tx_flags = 0;
4278 u16 first;
4279 u8 hdr_len = 0;
4281 /* need: 1 descriptor per page,
4282 * + 2 desc gap to keep tail from touching head,
4283 * + 1 desc for skb->data,
4284 * + 1 desc for context descriptor,
4285 * otherwise try next time */
4286 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4287 /* this is a hard error */
4288 return NETDEV_TX_BUSY;
4291 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4292 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4293 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4296 if (vlan_tx_tag_present(skb)) {
4297 tx_flags |= IGB_TX_FLAGS_VLAN;
4298 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4301 if (skb->protocol == htons(ETH_P_IP))
4302 tx_flags |= IGB_TX_FLAGS_IPV4;
4304 first = tx_ring->next_to_use;
4305 if (skb_is_gso(skb)) {
4306 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4308 if (tso < 0) {
4309 dev_kfree_skb_any(skb);
4310 return NETDEV_TX_OK;
4314 if (tso)
4315 tx_flags |= IGB_TX_FLAGS_TSO;
4316 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4317 (skb->ip_summed == CHECKSUM_PARTIAL))
4318 tx_flags |= IGB_TX_FLAGS_CSUM;
4321 * count reflects descriptors mapped, if 0 or less then mapping error
4322 * has occurred and we need to rewind the descriptor queue
4324 count = igb_tx_map_adv(tx_ring, skb, first);
4325 if (!count) {
4326 dev_kfree_skb_any(skb);
4327 tx_ring->buffer_info[first].time_stamp = 0;
4328 tx_ring->next_to_use = first;
4329 return NETDEV_TX_OK;
4332 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4334 /* Make sure there is space in the ring for the next send. */
4335 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4337 return NETDEV_TX_OK;
4340 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4341 struct net_device *netdev)
4343 struct igb_adapter *adapter = netdev_priv(netdev);
4344 struct igb_ring *tx_ring;
4345 int r_idx = 0;
4347 if (test_bit(__IGB_DOWN, &adapter->state)) {
4348 dev_kfree_skb_any(skb);
4349 return NETDEV_TX_OK;
4352 if (skb->len <= 0) {
4353 dev_kfree_skb_any(skb);
4354 return NETDEV_TX_OK;
4357 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4358 tx_ring = adapter->multi_tx_table[r_idx];
4360 /* This goes back to the question of how to logically map a tx queue
4361 * to a flow. Right now, performance is impacted slightly negatively
4362 * if using multiple tx queues. If the stack breaks away from a
4363 * single qdisc implementation, we can look at this again. */
4364 return igb_xmit_frame_ring_adv(skb, tx_ring);
4368 * igb_tx_timeout - Respond to a Tx Hang
4369 * @netdev: network interface device structure
4371 static void igb_tx_timeout(struct net_device *netdev)
4373 struct igb_adapter *adapter = netdev_priv(netdev);
4374 struct e1000_hw *hw = &adapter->hw;
4376 /* Do the reset outside of interrupt context */
4377 adapter->tx_timeout_count++;
4379 if (hw->mac.type == e1000_82580)
4380 hw->dev_spec._82575.global_device_reset = true;
4382 schedule_work(&adapter->reset_task);
4383 wr32(E1000_EICS,
4384 (adapter->eims_enable_mask & ~adapter->eims_other));
4387 static void igb_reset_task(struct work_struct *work)
4389 struct igb_adapter *adapter;
4390 adapter = container_of(work, struct igb_adapter, reset_task);
4392 igb_dump(adapter);
4393 netdev_err(adapter->netdev, "Reset adapter\n");
4394 igb_reinit_locked(adapter);
4398 * igb_get_stats64 - Get System Network Statistics
4399 * @netdev: network interface device structure
4400 * @stats: rtnl_link_stats64 pointer
4403 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4404 struct rtnl_link_stats64 *stats)
4406 struct igb_adapter *adapter = netdev_priv(netdev);
4408 spin_lock(&adapter->stats64_lock);
4409 igb_update_stats(adapter, &adapter->stats64);
4410 memcpy(stats, &adapter->stats64, sizeof(*stats));
4411 spin_unlock(&adapter->stats64_lock);
4413 return stats;
4417 * igb_change_mtu - Change the Maximum Transfer Unit
4418 * @netdev: network interface device structure
4419 * @new_mtu: new value for maximum frame size
4421 * Returns 0 on success, negative on failure
4423 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4425 struct igb_adapter *adapter = netdev_priv(netdev);
4426 struct pci_dev *pdev = adapter->pdev;
4427 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4428 u32 rx_buffer_len, i;
4430 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4431 dev_err(&pdev->dev, "Invalid MTU setting\n");
4432 return -EINVAL;
4435 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4436 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4437 return -EINVAL;
4440 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4441 msleep(1);
4443 /* igb_down has a dependency on max_frame_size */
4444 adapter->max_frame_size = max_frame;
4446 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4447 * means we reserve 2 more, this pushes us to allocate from the next
4448 * larger slab size.
4449 * i.e. RXBUFFER_2048 --> size-4096 slab
4452 if (adapter->hw.mac.type == e1000_82580)
4453 max_frame += IGB_TS_HDR_LEN;
4455 if (max_frame <= IGB_RXBUFFER_1024)
4456 rx_buffer_len = IGB_RXBUFFER_1024;
4457 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4458 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4459 else
4460 rx_buffer_len = IGB_RXBUFFER_128;
4462 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4463 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4464 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4466 if ((adapter->hw.mac.type == e1000_82580) &&
4467 (rx_buffer_len == IGB_RXBUFFER_128))
4468 rx_buffer_len += IGB_RXBUFFER_64;
4470 if (netif_running(netdev))
4471 igb_down(adapter);
4473 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4474 netdev->mtu, new_mtu);
4475 netdev->mtu = new_mtu;
4477 for (i = 0; i < adapter->num_rx_queues; i++)
4478 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4480 if (netif_running(netdev))
4481 igb_up(adapter);
4482 else
4483 igb_reset(adapter);
4485 clear_bit(__IGB_RESETTING, &adapter->state);
4487 return 0;
4491 * igb_update_stats - Update the board statistics counters
4492 * @adapter: board private structure
4495 void igb_update_stats(struct igb_adapter *adapter,
4496 struct rtnl_link_stats64 *net_stats)
4498 struct e1000_hw *hw = &adapter->hw;
4499 struct pci_dev *pdev = adapter->pdev;
4500 u32 reg, mpc;
4501 u16 phy_tmp;
4502 int i;
4503 u64 bytes, packets;
4504 unsigned int start;
4505 u64 _bytes, _packets;
4507 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4510 * Prevent stats update while adapter is being reset, or if the pci
4511 * connection is down.
4513 if (adapter->link_speed == 0)
4514 return;
4515 if (pci_channel_offline(pdev))
4516 return;
4518 bytes = 0;
4519 packets = 0;
4520 for (i = 0; i < adapter->num_rx_queues; i++) {
4521 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4522 struct igb_ring *ring = adapter->rx_ring[i];
4524 ring->rx_stats.drops += rqdpc_tmp;
4525 net_stats->rx_fifo_errors += rqdpc_tmp;
4527 do {
4528 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4529 _bytes = ring->rx_stats.bytes;
4530 _packets = ring->rx_stats.packets;
4531 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4532 bytes += _bytes;
4533 packets += _packets;
4536 net_stats->rx_bytes = bytes;
4537 net_stats->rx_packets = packets;
4539 bytes = 0;
4540 packets = 0;
4541 for (i = 0; i < adapter->num_tx_queues; i++) {
4542 struct igb_ring *ring = adapter->tx_ring[i];
4543 do {
4544 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4545 _bytes = ring->tx_stats.bytes;
4546 _packets = ring->tx_stats.packets;
4547 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4548 bytes += _bytes;
4549 packets += _packets;
4551 net_stats->tx_bytes = bytes;
4552 net_stats->tx_packets = packets;
4554 /* read stats registers */
4555 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4556 adapter->stats.gprc += rd32(E1000_GPRC);
4557 adapter->stats.gorc += rd32(E1000_GORCL);
4558 rd32(E1000_GORCH); /* clear GORCL */
4559 adapter->stats.bprc += rd32(E1000_BPRC);
4560 adapter->stats.mprc += rd32(E1000_MPRC);
4561 adapter->stats.roc += rd32(E1000_ROC);
4563 adapter->stats.prc64 += rd32(E1000_PRC64);
4564 adapter->stats.prc127 += rd32(E1000_PRC127);
4565 adapter->stats.prc255 += rd32(E1000_PRC255);
4566 adapter->stats.prc511 += rd32(E1000_PRC511);
4567 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4568 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4569 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4570 adapter->stats.sec += rd32(E1000_SEC);
4572 mpc = rd32(E1000_MPC);
4573 adapter->stats.mpc += mpc;
4574 net_stats->rx_fifo_errors += mpc;
4575 adapter->stats.scc += rd32(E1000_SCC);
4576 adapter->stats.ecol += rd32(E1000_ECOL);
4577 adapter->stats.mcc += rd32(E1000_MCC);
4578 adapter->stats.latecol += rd32(E1000_LATECOL);
4579 adapter->stats.dc += rd32(E1000_DC);
4580 adapter->stats.rlec += rd32(E1000_RLEC);
4581 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4582 adapter->stats.xontxc += rd32(E1000_XONTXC);
4583 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4584 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4585 adapter->stats.fcruc += rd32(E1000_FCRUC);
4586 adapter->stats.gptc += rd32(E1000_GPTC);
4587 adapter->stats.gotc += rd32(E1000_GOTCL);
4588 rd32(E1000_GOTCH); /* clear GOTCL */
4589 adapter->stats.rnbc += rd32(E1000_RNBC);
4590 adapter->stats.ruc += rd32(E1000_RUC);
4591 adapter->stats.rfc += rd32(E1000_RFC);
4592 adapter->stats.rjc += rd32(E1000_RJC);
4593 adapter->stats.tor += rd32(E1000_TORH);
4594 adapter->stats.tot += rd32(E1000_TOTH);
4595 adapter->stats.tpr += rd32(E1000_TPR);
4597 adapter->stats.ptc64 += rd32(E1000_PTC64);
4598 adapter->stats.ptc127 += rd32(E1000_PTC127);
4599 adapter->stats.ptc255 += rd32(E1000_PTC255);
4600 adapter->stats.ptc511 += rd32(E1000_PTC511);
4601 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4602 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4604 adapter->stats.mptc += rd32(E1000_MPTC);
4605 adapter->stats.bptc += rd32(E1000_BPTC);
4607 adapter->stats.tpt += rd32(E1000_TPT);
4608 adapter->stats.colc += rd32(E1000_COLC);
4610 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4611 /* read internal phy specific stats */
4612 reg = rd32(E1000_CTRL_EXT);
4613 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4614 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4615 adapter->stats.tncrs += rd32(E1000_TNCRS);
4618 adapter->stats.tsctc += rd32(E1000_TSCTC);
4619 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4621 adapter->stats.iac += rd32(E1000_IAC);
4622 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4623 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4624 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4625 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4626 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4627 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4628 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4629 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4631 /* Fill out the OS statistics structure */
4632 net_stats->multicast = adapter->stats.mprc;
4633 net_stats->collisions = adapter->stats.colc;
4635 /* Rx Errors */
4637 /* RLEC on some newer hardware can be incorrect so build
4638 * our own version based on RUC and ROC */
4639 net_stats->rx_errors = adapter->stats.rxerrc +
4640 adapter->stats.crcerrs + adapter->stats.algnerrc +
4641 adapter->stats.ruc + adapter->stats.roc +
4642 adapter->stats.cexterr;
4643 net_stats->rx_length_errors = adapter->stats.ruc +
4644 adapter->stats.roc;
4645 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4646 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4647 net_stats->rx_missed_errors = adapter->stats.mpc;
4649 /* Tx Errors */
4650 net_stats->tx_errors = adapter->stats.ecol +
4651 adapter->stats.latecol;
4652 net_stats->tx_aborted_errors = adapter->stats.ecol;
4653 net_stats->tx_window_errors = adapter->stats.latecol;
4654 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4656 /* Tx Dropped needs to be maintained elsewhere */
4658 /* Phy Stats */
4659 if (hw->phy.media_type == e1000_media_type_copper) {
4660 if ((adapter->link_speed == SPEED_1000) &&
4661 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4662 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4663 adapter->phy_stats.idle_errors += phy_tmp;
4667 /* Management Stats */
4668 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4669 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4670 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4672 /* OS2BMC Stats */
4673 reg = rd32(E1000_MANC);
4674 if (reg & E1000_MANC_EN_BMC2OS) {
4675 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4676 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4677 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4678 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4682 static irqreturn_t igb_msix_other(int irq, void *data)
4684 struct igb_adapter *adapter = data;
4685 struct e1000_hw *hw = &adapter->hw;
4686 u32 icr = rd32(E1000_ICR);
4687 /* reading ICR causes bit 31 of EICR to be cleared */
4689 if (icr & E1000_ICR_DRSTA)
4690 schedule_work(&adapter->reset_task);
4692 if (icr & E1000_ICR_DOUTSYNC) {
4693 /* HW is reporting DMA is out of sync */
4694 adapter->stats.doosync++;
4695 /* The DMA Out of Sync is also indication of a spoof event
4696 * in IOV mode. Check the Wrong VM Behavior register to
4697 * see if it is really a spoof event. */
4698 igb_check_wvbr(adapter);
4701 /* Check for a mailbox event */
4702 if (icr & E1000_ICR_VMMB)
4703 igb_msg_task(adapter);
4705 if (icr & E1000_ICR_LSC) {
4706 hw->mac.get_link_status = 1;
4707 /* guard against interrupt when we're going down */
4708 if (!test_bit(__IGB_DOWN, &adapter->state))
4709 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4712 if (adapter->vfs_allocated_count)
4713 wr32(E1000_IMS, E1000_IMS_LSC |
4714 E1000_IMS_VMMB |
4715 E1000_IMS_DOUTSYNC);
4716 else
4717 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4718 wr32(E1000_EIMS, adapter->eims_other);
4720 return IRQ_HANDLED;
4723 static void igb_write_itr(struct igb_q_vector *q_vector)
4725 struct igb_adapter *adapter = q_vector->adapter;
4726 u32 itr_val = q_vector->itr_val & 0x7FFC;
4728 if (!q_vector->set_itr)
4729 return;
4731 if (!itr_val)
4732 itr_val = 0x4;
4734 if (adapter->hw.mac.type == e1000_82575)
4735 itr_val |= itr_val << 16;
4736 else
4737 itr_val |= 0x8000000;
4739 writel(itr_val, q_vector->itr_register);
4740 q_vector->set_itr = 0;
4743 static irqreturn_t igb_msix_ring(int irq, void *data)
4745 struct igb_q_vector *q_vector = data;
4747 /* Write the ITR value calculated from the previous interrupt. */
4748 igb_write_itr(q_vector);
4750 napi_schedule(&q_vector->napi);
4752 return IRQ_HANDLED;
4755 #ifdef CONFIG_IGB_DCA
4756 static void igb_update_dca(struct igb_q_vector *q_vector)
4758 struct igb_adapter *adapter = q_vector->adapter;
4759 struct e1000_hw *hw = &adapter->hw;
4760 int cpu = get_cpu();
4762 if (q_vector->cpu == cpu)
4763 goto out_no_update;
4765 if (q_vector->tx_ring) {
4766 int q = q_vector->tx_ring->reg_idx;
4767 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4768 if (hw->mac.type == e1000_82575) {
4769 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4770 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4771 } else {
4772 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4773 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4774 E1000_DCA_TXCTRL_CPUID_SHIFT;
4776 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4777 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4779 if (q_vector->rx_ring) {
4780 int q = q_vector->rx_ring->reg_idx;
4781 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4782 if (hw->mac.type == e1000_82575) {
4783 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4784 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4785 } else {
4786 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4787 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4788 E1000_DCA_RXCTRL_CPUID_SHIFT;
4790 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4791 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4792 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4793 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4795 q_vector->cpu = cpu;
4796 out_no_update:
4797 put_cpu();
4800 static void igb_setup_dca(struct igb_adapter *adapter)
4802 struct e1000_hw *hw = &adapter->hw;
4803 int i;
4805 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4806 return;
4808 /* Always use CB2 mode, difference is masked in the CB driver. */
4809 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4811 for (i = 0; i < adapter->num_q_vectors; i++) {
4812 adapter->q_vector[i]->cpu = -1;
4813 igb_update_dca(adapter->q_vector[i]);
4817 static int __igb_notify_dca(struct device *dev, void *data)
4819 struct net_device *netdev = dev_get_drvdata(dev);
4820 struct igb_adapter *adapter = netdev_priv(netdev);
4821 struct pci_dev *pdev = adapter->pdev;
4822 struct e1000_hw *hw = &adapter->hw;
4823 unsigned long event = *(unsigned long *)data;
4825 switch (event) {
4826 case DCA_PROVIDER_ADD:
4827 /* if already enabled, don't do it again */
4828 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4829 break;
4830 if (dca_add_requester(dev) == 0) {
4831 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4832 dev_info(&pdev->dev, "DCA enabled\n");
4833 igb_setup_dca(adapter);
4834 break;
4836 /* Fall Through since DCA is disabled. */
4837 case DCA_PROVIDER_REMOVE:
4838 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4839 /* without this a class_device is left
4840 * hanging around in the sysfs model */
4841 dca_remove_requester(dev);
4842 dev_info(&pdev->dev, "DCA disabled\n");
4843 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4844 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4846 break;
4849 return 0;
4852 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4853 void *p)
4855 int ret_val;
4857 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4858 __igb_notify_dca);
4860 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4862 #endif /* CONFIG_IGB_DCA */
4864 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4866 struct e1000_hw *hw = &adapter->hw;
4867 u32 ping;
4868 int i;
4870 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4871 ping = E1000_PF_CONTROL_MSG;
4872 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4873 ping |= E1000_VT_MSGTYPE_CTS;
4874 igb_write_mbx(hw, &ping, 1, i);
4878 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4880 struct e1000_hw *hw = &adapter->hw;
4881 u32 vmolr = rd32(E1000_VMOLR(vf));
4882 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4884 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4885 IGB_VF_FLAG_MULTI_PROMISC);
4886 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4888 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4889 vmolr |= E1000_VMOLR_MPME;
4890 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4891 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4892 } else {
4894 * if we have hashes and we are clearing a multicast promisc
4895 * flag we need to write the hashes to the MTA as this step
4896 * was previously skipped
4898 if (vf_data->num_vf_mc_hashes > 30) {
4899 vmolr |= E1000_VMOLR_MPME;
4900 } else if (vf_data->num_vf_mc_hashes) {
4901 int j;
4902 vmolr |= E1000_VMOLR_ROMPE;
4903 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4904 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4908 wr32(E1000_VMOLR(vf), vmolr);
4910 /* there are flags left unprocessed, likely not supported */
4911 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4912 return -EINVAL;
4914 return 0;
4918 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4919 u32 *msgbuf, u32 vf)
4921 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4922 u16 *hash_list = (u16 *)&msgbuf[1];
4923 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4924 int i;
4926 /* salt away the number of multicast addresses assigned
4927 * to this VF for later use to restore when the PF multi cast
4928 * list changes
4930 vf_data->num_vf_mc_hashes = n;
4932 /* only up to 30 hash values supported */
4933 if (n > 30)
4934 n = 30;
4936 /* store the hashes for later use */
4937 for (i = 0; i < n; i++)
4938 vf_data->vf_mc_hashes[i] = hash_list[i];
4940 /* Flush and reset the mta with the new values */
4941 igb_set_rx_mode(adapter->netdev);
4943 return 0;
4946 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4948 struct e1000_hw *hw = &adapter->hw;
4949 struct vf_data_storage *vf_data;
4950 int i, j;
4952 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4953 u32 vmolr = rd32(E1000_VMOLR(i));
4954 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4956 vf_data = &adapter->vf_data[i];
4958 if ((vf_data->num_vf_mc_hashes > 30) ||
4959 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4960 vmolr |= E1000_VMOLR_MPME;
4961 } else if (vf_data->num_vf_mc_hashes) {
4962 vmolr |= E1000_VMOLR_ROMPE;
4963 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4964 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4966 wr32(E1000_VMOLR(i), vmolr);
4970 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4972 struct e1000_hw *hw = &adapter->hw;
4973 u32 pool_mask, reg, vid;
4974 int i;
4976 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4978 /* Find the vlan filter for this id */
4979 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4980 reg = rd32(E1000_VLVF(i));
4982 /* remove the vf from the pool */
4983 reg &= ~pool_mask;
4985 /* if pool is empty then remove entry from vfta */
4986 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4987 (reg & E1000_VLVF_VLANID_ENABLE)) {
4988 reg = 0;
4989 vid = reg & E1000_VLVF_VLANID_MASK;
4990 igb_vfta_set(hw, vid, false);
4993 wr32(E1000_VLVF(i), reg);
4996 adapter->vf_data[vf].vlans_enabled = 0;
4999 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5001 struct e1000_hw *hw = &adapter->hw;
5002 u32 reg, i;
5004 /* The vlvf table only exists on 82576 hardware and newer */
5005 if (hw->mac.type < e1000_82576)
5006 return -1;
5008 /* we only need to do this if VMDq is enabled */
5009 if (!adapter->vfs_allocated_count)
5010 return -1;
5012 /* Find the vlan filter for this id */
5013 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5014 reg = rd32(E1000_VLVF(i));
5015 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5016 vid == (reg & E1000_VLVF_VLANID_MASK))
5017 break;
5020 if (add) {
5021 if (i == E1000_VLVF_ARRAY_SIZE) {
5022 /* Did not find a matching VLAN ID entry that was
5023 * enabled. Search for a free filter entry, i.e.
5024 * one without the enable bit set
5026 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5027 reg = rd32(E1000_VLVF(i));
5028 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5029 break;
5032 if (i < E1000_VLVF_ARRAY_SIZE) {
5033 /* Found an enabled/available entry */
5034 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5036 /* if !enabled we need to set this up in vfta */
5037 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5038 /* add VID to filter table */
5039 igb_vfta_set(hw, vid, true);
5040 reg |= E1000_VLVF_VLANID_ENABLE;
5042 reg &= ~E1000_VLVF_VLANID_MASK;
5043 reg |= vid;
5044 wr32(E1000_VLVF(i), reg);
5046 /* do not modify RLPML for PF devices */
5047 if (vf >= adapter->vfs_allocated_count)
5048 return 0;
5050 if (!adapter->vf_data[vf].vlans_enabled) {
5051 u32 size;
5052 reg = rd32(E1000_VMOLR(vf));
5053 size = reg & E1000_VMOLR_RLPML_MASK;
5054 size += 4;
5055 reg &= ~E1000_VMOLR_RLPML_MASK;
5056 reg |= size;
5057 wr32(E1000_VMOLR(vf), reg);
5060 adapter->vf_data[vf].vlans_enabled++;
5061 return 0;
5063 } else {
5064 if (i < E1000_VLVF_ARRAY_SIZE) {
5065 /* remove vf from the pool */
5066 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5067 /* if pool is empty then remove entry from vfta */
5068 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5069 reg = 0;
5070 igb_vfta_set(hw, vid, false);
5072 wr32(E1000_VLVF(i), reg);
5074 /* do not modify RLPML for PF devices */
5075 if (vf >= adapter->vfs_allocated_count)
5076 return 0;
5078 adapter->vf_data[vf].vlans_enabled--;
5079 if (!adapter->vf_data[vf].vlans_enabled) {
5080 u32 size;
5081 reg = rd32(E1000_VMOLR(vf));
5082 size = reg & E1000_VMOLR_RLPML_MASK;
5083 size -= 4;
5084 reg &= ~E1000_VMOLR_RLPML_MASK;
5085 reg |= size;
5086 wr32(E1000_VMOLR(vf), reg);
5090 return 0;
5093 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5095 struct e1000_hw *hw = &adapter->hw;
5097 if (vid)
5098 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5099 else
5100 wr32(E1000_VMVIR(vf), 0);
5103 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5104 int vf, u16 vlan, u8 qos)
5106 int err = 0;
5107 struct igb_adapter *adapter = netdev_priv(netdev);
5109 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5110 return -EINVAL;
5111 if (vlan || qos) {
5112 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5113 if (err)
5114 goto out;
5115 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5116 igb_set_vmolr(adapter, vf, !vlan);
5117 adapter->vf_data[vf].pf_vlan = vlan;
5118 adapter->vf_data[vf].pf_qos = qos;
5119 dev_info(&adapter->pdev->dev,
5120 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5121 if (test_bit(__IGB_DOWN, &adapter->state)) {
5122 dev_warn(&adapter->pdev->dev,
5123 "The VF VLAN has been set,"
5124 " but the PF device is not up.\n");
5125 dev_warn(&adapter->pdev->dev,
5126 "Bring the PF device up before"
5127 " attempting to use the VF device.\n");
5129 } else {
5130 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5131 false, vf);
5132 igb_set_vmvir(adapter, vlan, vf);
5133 igb_set_vmolr(adapter, vf, true);
5134 adapter->vf_data[vf].pf_vlan = 0;
5135 adapter->vf_data[vf].pf_qos = 0;
5137 out:
5138 return err;
5141 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5143 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5144 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5146 return igb_vlvf_set(adapter, vid, add, vf);
5149 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5151 /* clear flags - except flag that indicates PF has set the MAC */
5152 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5153 adapter->vf_data[vf].last_nack = jiffies;
5155 /* reset offloads to defaults */
5156 igb_set_vmolr(adapter, vf, true);
5158 /* reset vlans for device */
5159 igb_clear_vf_vfta(adapter, vf);
5160 if (adapter->vf_data[vf].pf_vlan)
5161 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5162 adapter->vf_data[vf].pf_vlan,
5163 adapter->vf_data[vf].pf_qos);
5164 else
5165 igb_clear_vf_vfta(adapter, vf);
5167 /* reset multicast table array for vf */
5168 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5170 /* Flush and reset the mta with the new values */
5171 igb_set_rx_mode(adapter->netdev);
5174 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5176 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5178 /* generate a new mac address as we were hotplug removed/added */
5179 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5180 random_ether_addr(vf_mac);
5182 /* process remaining reset events */
5183 igb_vf_reset(adapter, vf);
5186 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5188 struct e1000_hw *hw = &adapter->hw;
5189 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5190 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5191 u32 reg, msgbuf[3];
5192 u8 *addr = (u8 *)(&msgbuf[1]);
5194 /* process all the same items cleared in a function level reset */
5195 igb_vf_reset(adapter, vf);
5197 /* set vf mac address */
5198 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5200 /* enable transmit and receive for vf */
5201 reg = rd32(E1000_VFTE);
5202 wr32(E1000_VFTE, reg | (1 << vf));
5203 reg = rd32(E1000_VFRE);
5204 wr32(E1000_VFRE, reg | (1 << vf));
5206 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5208 /* reply to reset with ack and vf mac address */
5209 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5210 memcpy(addr, vf_mac, 6);
5211 igb_write_mbx(hw, msgbuf, 3, vf);
5214 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5217 * The VF MAC Address is stored in a packed array of bytes
5218 * starting at the second 32 bit word of the msg array
5220 unsigned char *addr = (char *)&msg[1];
5221 int err = -1;
5223 if (is_valid_ether_addr(addr))
5224 err = igb_set_vf_mac(adapter, vf, addr);
5226 return err;
5229 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5231 struct e1000_hw *hw = &adapter->hw;
5232 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5233 u32 msg = E1000_VT_MSGTYPE_NACK;
5235 /* if device isn't clear to send it shouldn't be reading either */
5236 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5237 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5238 igb_write_mbx(hw, &msg, 1, vf);
5239 vf_data->last_nack = jiffies;
5243 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5245 struct pci_dev *pdev = adapter->pdev;
5246 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5247 struct e1000_hw *hw = &adapter->hw;
5248 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5249 s32 retval;
5251 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5253 if (retval) {
5254 /* if receive failed revoke VF CTS stats and restart init */
5255 dev_err(&pdev->dev, "Error receiving message from VF\n");
5256 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5257 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5258 return;
5259 goto out;
5262 /* this is a message we already processed, do nothing */
5263 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5264 return;
5267 * until the vf completes a reset it should not be
5268 * allowed to start any configuration.
5271 if (msgbuf[0] == E1000_VF_RESET) {
5272 igb_vf_reset_msg(adapter, vf);
5273 return;
5276 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5277 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5278 return;
5279 retval = -1;
5280 goto out;
5283 switch ((msgbuf[0] & 0xFFFF)) {
5284 case E1000_VF_SET_MAC_ADDR:
5285 retval = -EINVAL;
5286 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5287 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5288 else
5289 dev_warn(&pdev->dev,
5290 "VF %d attempted to override administratively "
5291 "set MAC address\nReload the VF driver to "
5292 "resume operations\n", vf);
5293 break;
5294 case E1000_VF_SET_PROMISC:
5295 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5296 break;
5297 case E1000_VF_SET_MULTICAST:
5298 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5299 break;
5300 case E1000_VF_SET_LPE:
5301 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5302 break;
5303 case E1000_VF_SET_VLAN:
5304 retval = -1;
5305 if (vf_data->pf_vlan)
5306 dev_warn(&pdev->dev,
5307 "VF %d attempted to override administratively "
5308 "set VLAN tag\nReload the VF driver to "
5309 "resume operations\n", vf);
5310 else
5311 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5312 break;
5313 default:
5314 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5315 retval = -1;
5316 break;
5319 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5320 out:
5321 /* notify the VF of the results of what it sent us */
5322 if (retval)
5323 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5324 else
5325 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5327 igb_write_mbx(hw, msgbuf, 1, vf);
5330 static void igb_msg_task(struct igb_adapter *adapter)
5332 struct e1000_hw *hw = &adapter->hw;
5333 u32 vf;
5335 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5336 /* process any reset requests */
5337 if (!igb_check_for_rst(hw, vf))
5338 igb_vf_reset_event(adapter, vf);
5340 /* process any messages pending */
5341 if (!igb_check_for_msg(hw, vf))
5342 igb_rcv_msg_from_vf(adapter, vf);
5344 /* process any acks */
5345 if (!igb_check_for_ack(hw, vf))
5346 igb_rcv_ack_from_vf(adapter, vf);
5351 * igb_set_uta - Set unicast filter table address
5352 * @adapter: board private structure
5354 * The unicast table address is a register array of 32-bit registers.
5355 * The table is meant to be used in a way similar to how the MTA is used
5356 * however due to certain limitations in the hardware it is necessary to
5357 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5358 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5360 static void igb_set_uta(struct igb_adapter *adapter)
5362 struct e1000_hw *hw = &adapter->hw;
5363 int i;
5365 /* The UTA table only exists on 82576 hardware and newer */
5366 if (hw->mac.type < e1000_82576)
5367 return;
5369 /* we only need to do this if VMDq is enabled */
5370 if (!adapter->vfs_allocated_count)
5371 return;
5373 for (i = 0; i < hw->mac.uta_reg_count; i++)
5374 array_wr32(E1000_UTA, i, ~0);
5378 * igb_intr_msi - Interrupt Handler
5379 * @irq: interrupt number
5380 * @data: pointer to a network interface device structure
5382 static irqreturn_t igb_intr_msi(int irq, void *data)
5384 struct igb_adapter *adapter = data;
5385 struct igb_q_vector *q_vector = adapter->q_vector[0];
5386 struct e1000_hw *hw = &adapter->hw;
5387 /* read ICR disables interrupts using IAM */
5388 u32 icr = rd32(E1000_ICR);
5390 igb_write_itr(q_vector);
5392 if (icr & E1000_ICR_DRSTA)
5393 schedule_work(&adapter->reset_task);
5395 if (icr & E1000_ICR_DOUTSYNC) {
5396 /* HW is reporting DMA is out of sync */
5397 adapter->stats.doosync++;
5400 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5401 hw->mac.get_link_status = 1;
5402 if (!test_bit(__IGB_DOWN, &adapter->state))
5403 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5406 napi_schedule(&q_vector->napi);
5408 return IRQ_HANDLED;
5412 * igb_intr - Legacy Interrupt Handler
5413 * @irq: interrupt number
5414 * @data: pointer to a network interface device structure
5416 static irqreturn_t igb_intr(int irq, void *data)
5418 struct igb_adapter *adapter = data;
5419 struct igb_q_vector *q_vector = adapter->q_vector[0];
5420 struct e1000_hw *hw = &adapter->hw;
5421 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5422 * need for the IMC write */
5423 u32 icr = rd32(E1000_ICR);
5424 if (!icr)
5425 return IRQ_NONE; /* Not our interrupt */
5427 igb_write_itr(q_vector);
5429 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5430 * not set, then the adapter didn't send an interrupt */
5431 if (!(icr & E1000_ICR_INT_ASSERTED))
5432 return IRQ_NONE;
5434 if (icr & E1000_ICR_DRSTA)
5435 schedule_work(&adapter->reset_task);
5437 if (icr & E1000_ICR_DOUTSYNC) {
5438 /* HW is reporting DMA is out of sync */
5439 adapter->stats.doosync++;
5442 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5443 hw->mac.get_link_status = 1;
5444 /* guard against interrupt when we're going down */
5445 if (!test_bit(__IGB_DOWN, &adapter->state))
5446 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5449 napi_schedule(&q_vector->napi);
5451 return IRQ_HANDLED;
5454 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5456 struct igb_adapter *adapter = q_vector->adapter;
5457 struct e1000_hw *hw = &adapter->hw;
5459 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5460 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5461 if (!adapter->msix_entries)
5462 igb_set_itr(adapter);
5463 else
5464 igb_update_ring_itr(q_vector);
5467 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5468 if (adapter->msix_entries)
5469 wr32(E1000_EIMS, q_vector->eims_value);
5470 else
5471 igb_irq_enable(adapter);
5476 * igb_poll - NAPI Rx polling callback
5477 * @napi: napi polling structure
5478 * @budget: count of how many packets we should handle
5480 static int igb_poll(struct napi_struct *napi, int budget)
5482 struct igb_q_vector *q_vector = container_of(napi,
5483 struct igb_q_vector,
5484 napi);
5485 int tx_clean_complete = 1, work_done = 0;
5487 #ifdef CONFIG_IGB_DCA
5488 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5489 igb_update_dca(q_vector);
5490 #endif
5491 if (q_vector->tx_ring)
5492 tx_clean_complete = igb_clean_tx_irq(q_vector);
5494 if (q_vector->rx_ring)
5495 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5497 if (!tx_clean_complete)
5498 work_done = budget;
5500 /* If not enough Rx work done, exit the polling mode */
5501 if (work_done < budget) {
5502 napi_complete(napi);
5503 igb_ring_irq_enable(q_vector);
5506 return work_done;
5510 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5511 * @adapter: board private structure
5512 * @shhwtstamps: timestamp structure to update
5513 * @regval: unsigned 64bit system time value.
5515 * We need to convert the system time value stored in the RX/TXSTMP registers
5516 * into a hwtstamp which can be used by the upper level timestamping functions
5518 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5519 struct skb_shared_hwtstamps *shhwtstamps,
5520 u64 regval)
5522 u64 ns;
5525 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5526 * 24 to match clock shift we setup earlier.
5528 if (adapter->hw.mac.type == e1000_82580)
5529 regval <<= IGB_82580_TSYNC_SHIFT;
5531 ns = timecounter_cyc2time(&adapter->clock, regval);
5532 timecompare_update(&adapter->compare, ns);
5533 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5534 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5535 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5539 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5540 * @q_vector: pointer to q_vector containing needed info
5541 * @buffer: pointer to igb_buffer structure
5543 * If we were asked to do hardware stamping and such a time stamp is
5544 * available, then it must have been for this skb here because we only
5545 * allow only one such packet into the queue.
5547 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5549 struct igb_adapter *adapter = q_vector->adapter;
5550 struct e1000_hw *hw = &adapter->hw;
5551 struct skb_shared_hwtstamps shhwtstamps;
5552 u64 regval;
5554 /* if skb does not support hw timestamp or TX stamp not valid exit */
5555 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5556 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5557 return;
5559 regval = rd32(E1000_TXSTMPL);
5560 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5562 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5563 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5567 * igb_clean_tx_irq - Reclaim resources after transmit completes
5568 * @q_vector: pointer to q_vector containing needed info
5569 * returns true if ring is completely cleaned
5571 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5573 struct igb_adapter *adapter = q_vector->adapter;
5574 struct igb_ring *tx_ring = q_vector->tx_ring;
5575 struct net_device *netdev = tx_ring->netdev;
5576 struct e1000_hw *hw = &adapter->hw;
5577 struct igb_buffer *buffer_info;
5578 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5579 unsigned int total_bytes = 0, total_packets = 0;
5580 unsigned int i, eop, count = 0;
5581 bool cleaned = false;
5583 i = tx_ring->next_to_clean;
5584 eop = tx_ring->buffer_info[i].next_to_watch;
5585 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5587 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5588 (count < tx_ring->count)) {
5589 rmb(); /* read buffer_info after eop_desc status */
5590 for (cleaned = false; !cleaned; count++) {
5591 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5592 buffer_info = &tx_ring->buffer_info[i];
5593 cleaned = (i == eop);
5595 if (buffer_info->skb) {
5596 total_bytes += buffer_info->bytecount;
5597 /* gso_segs is currently only valid for tcp */
5598 total_packets += buffer_info->gso_segs;
5599 igb_tx_hwtstamp(q_vector, buffer_info);
5602 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5603 tx_desc->wb.status = 0;
5605 i++;
5606 if (i == tx_ring->count)
5607 i = 0;
5609 eop = tx_ring->buffer_info[i].next_to_watch;
5610 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5613 tx_ring->next_to_clean = i;
5615 if (unlikely(count &&
5616 netif_carrier_ok(netdev) &&
5617 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5618 /* Make sure that anybody stopping the queue after this
5619 * sees the new next_to_clean.
5621 smp_mb();
5622 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5623 !(test_bit(__IGB_DOWN, &adapter->state))) {
5624 netif_wake_subqueue(netdev, tx_ring->queue_index);
5626 u64_stats_update_begin(&tx_ring->tx_syncp);
5627 tx_ring->tx_stats.restart_queue++;
5628 u64_stats_update_end(&tx_ring->tx_syncp);
5632 if (tx_ring->detect_tx_hung) {
5633 /* Detect a transmit hang in hardware, this serializes the
5634 * check with the clearing of time_stamp and movement of i */
5635 tx_ring->detect_tx_hung = false;
5636 if (tx_ring->buffer_info[i].time_stamp &&
5637 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5638 (adapter->tx_timeout_factor * HZ)) &&
5639 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5641 /* detected Tx unit hang */
5642 dev_err(tx_ring->dev,
5643 "Detected Tx Unit Hang\n"
5644 " Tx Queue <%d>\n"
5645 " TDH <%x>\n"
5646 " TDT <%x>\n"
5647 " next_to_use <%x>\n"
5648 " next_to_clean <%x>\n"
5649 "buffer_info[next_to_clean]\n"
5650 " time_stamp <%lx>\n"
5651 " next_to_watch <%x>\n"
5652 " jiffies <%lx>\n"
5653 " desc.status <%x>\n",
5654 tx_ring->queue_index,
5655 readl(tx_ring->head),
5656 readl(tx_ring->tail),
5657 tx_ring->next_to_use,
5658 tx_ring->next_to_clean,
5659 tx_ring->buffer_info[eop].time_stamp,
5660 eop,
5661 jiffies,
5662 eop_desc->wb.status);
5663 netif_stop_subqueue(netdev, tx_ring->queue_index);
5666 tx_ring->total_bytes += total_bytes;
5667 tx_ring->total_packets += total_packets;
5668 u64_stats_update_begin(&tx_ring->tx_syncp);
5669 tx_ring->tx_stats.bytes += total_bytes;
5670 tx_ring->tx_stats.packets += total_packets;
5671 u64_stats_update_end(&tx_ring->tx_syncp);
5672 return count < tx_ring->count;
5676 * igb_receive_skb - helper function to handle rx indications
5677 * @q_vector: structure containing interrupt and ring information
5678 * @skb: packet to send up
5679 * @vlan_tag: vlan tag for packet
5681 static void igb_receive_skb(struct igb_q_vector *q_vector,
5682 struct sk_buff *skb,
5683 u16 vlan_tag)
5685 struct igb_adapter *adapter = q_vector->adapter;
5687 if (vlan_tag && adapter->vlgrp)
5688 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5689 vlan_tag, skb);
5690 else
5691 napi_gro_receive(&q_vector->napi, skb);
5694 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5695 u32 status_err, struct sk_buff *skb)
5697 skb_checksum_none_assert(skb);
5699 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5700 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5701 (status_err & E1000_RXD_STAT_IXSM))
5702 return;
5704 /* TCP/UDP checksum error bit is set */
5705 if (status_err &
5706 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5708 * work around errata with sctp packets where the TCPE aka
5709 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5710 * packets, (aka let the stack check the crc32c)
5712 if ((skb->len == 60) &&
5713 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5714 u64_stats_update_begin(&ring->rx_syncp);
5715 ring->rx_stats.csum_err++;
5716 u64_stats_update_end(&ring->rx_syncp);
5718 /* let the stack verify checksum errors */
5719 return;
5721 /* It must be a TCP or UDP packet with a valid checksum */
5722 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5723 skb->ip_summed = CHECKSUM_UNNECESSARY;
5725 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5728 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5729 struct sk_buff *skb)
5731 struct igb_adapter *adapter = q_vector->adapter;
5732 struct e1000_hw *hw = &adapter->hw;
5733 u64 regval;
5736 * If this bit is set, then the RX registers contain the time stamp. No
5737 * other packet will be time stamped until we read these registers, so
5738 * read the registers to make them available again. Because only one
5739 * packet can be time stamped at a time, we know that the register
5740 * values must belong to this one here and therefore we don't need to
5741 * compare any of the additional attributes stored for it.
5743 * If nothing went wrong, then it should have a shared tx_flags that we
5744 * can turn into a skb_shared_hwtstamps.
5746 if (staterr & E1000_RXDADV_STAT_TSIP) {
5747 u32 *stamp = (u32 *)skb->data;
5748 regval = le32_to_cpu(*(stamp + 2));
5749 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5750 skb_pull(skb, IGB_TS_HDR_LEN);
5751 } else {
5752 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5753 return;
5755 regval = rd32(E1000_RXSTMPL);
5756 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5759 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5761 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5762 union e1000_adv_rx_desc *rx_desc)
5764 /* HW will not DMA in data larger than the given buffer, even if it
5765 * parses the (NFS, of course) header to be larger. In that case, it
5766 * fills the header buffer and spills the rest into the page.
5768 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5769 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5770 if (hlen > rx_ring->rx_buffer_len)
5771 hlen = rx_ring->rx_buffer_len;
5772 return hlen;
5775 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5776 int *work_done, int budget)
5778 struct igb_ring *rx_ring = q_vector->rx_ring;
5779 struct net_device *netdev = rx_ring->netdev;
5780 struct device *dev = rx_ring->dev;
5781 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5782 struct igb_buffer *buffer_info , *next_buffer;
5783 struct sk_buff *skb;
5784 bool cleaned = false;
5785 int cleaned_count = 0;
5786 int current_node = numa_node_id();
5787 unsigned int total_bytes = 0, total_packets = 0;
5788 unsigned int i;
5789 u32 staterr;
5790 u16 length;
5791 u16 vlan_tag;
5793 i = rx_ring->next_to_clean;
5794 buffer_info = &rx_ring->buffer_info[i];
5795 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5796 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5798 while (staterr & E1000_RXD_STAT_DD) {
5799 if (*work_done >= budget)
5800 break;
5801 (*work_done)++;
5802 rmb(); /* read descriptor and rx_buffer_info after status DD */
5804 skb = buffer_info->skb;
5805 prefetch(skb->data - NET_IP_ALIGN);
5806 buffer_info->skb = NULL;
5808 i++;
5809 if (i == rx_ring->count)
5810 i = 0;
5812 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5813 prefetch(next_rxd);
5814 next_buffer = &rx_ring->buffer_info[i];
5816 length = le16_to_cpu(rx_desc->wb.upper.length);
5817 cleaned = true;
5818 cleaned_count++;
5820 if (buffer_info->dma) {
5821 dma_unmap_single(dev, buffer_info->dma,
5822 rx_ring->rx_buffer_len,
5823 DMA_FROM_DEVICE);
5824 buffer_info->dma = 0;
5825 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5826 skb_put(skb, length);
5827 goto send_up;
5829 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5832 if (length) {
5833 dma_unmap_page(dev, buffer_info->page_dma,
5834 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5835 buffer_info->page_dma = 0;
5837 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5838 buffer_info->page,
5839 buffer_info->page_offset,
5840 length);
5842 if ((page_count(buffer_info->page) != 1) ||
5843 (page_to_nid(buffer_info->page) != current_node))
5844 buffer_info->page = NULL;
5845 else
5846 get_page(buffer_info->page);
5848 skb->len += length;
5849 skb->data_len += length;
5850 skb->truesize += length;
5853 if (!(staterr & E1000_RXD_STAT_EOP)) {
5854 buffer_info->skb = next_buffer->skb;
5855 buffer_info->dma = next_buffer->dma;
5856 next_buffer->skb = skb;
5857 next_buffer->dma = 0;
5858 goto next_desc;
5860 send_up:
5861 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5862 dev_kfree_skb_irq(skb);
5863 goto next_desc;
5866 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5867 igb_rx_hwtstamp(q_vector, staterr, skb);
5868 total_bytes += skb->len;
5869 total_packets++;
5871 igb_rx_checksum_adv(rx_ring, staterr, skb);
5873 skb->protocol = eth_type_trans(skb, netdev);
5874 skb_record_rx_queue(skb, rx_ring->queue_index);
5876 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5877 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5879 igb_receive_skb(q_vector, skb, vlan_tag);
5881 next_desc:
5882 rx_desc->wb.upper.status_error = 0;
5884 /* return some buffers to hardware, one at a time is too slow */
5885 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5886 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5887 cleaned_count = 0;
5890 /* use prefetched values */
5891 rx_desc = next_rxd;
5892 buffer_info = next_buffer;
5893 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5896 rx_ring->next_to_clean = i;
5897 cleaned_count = igb_desc_unused(rx_ring);
5899 if (cleaned_count)
5900 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5902 rx_ring->total_packets += total_packets;
5903 rx_ring->total_bytes += total_bytes;
5904 u64_stats_update_begin(&rx_ring->rx_syncp);
5905 rx_ring->rx_stats.packets += total_packets;
5906 rx_ring->rx_stats.bytes += total_bytes;
5907 u64_stats_update_end(&rx_ring->rx_syncp);
5908 return cleaned;
5912 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5913 * @adapter: address of board private structure
5915 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5917 struct net_device *netdev = rx_ring->netdev;
5918 union e1000_adv_rx_desc *rx_desc;
5919 struct igb_buffer *buffer_info;
5920 struct sk_buff *skb;
5921 unsigned int i;
5922 int bufsz;
5924 i = rx_ring->next_to_use;
5925 buffer_info = &rx_ring->buffer_info[i];
5927 bufsz = rx_ring->rx_buffer_len;
5929 while (cleaned_count--) {
5930 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5932 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5933 if (!buffer_info->page) {
5934 buffer_info->page = netdev_alloc_page(netdev);
5935 if (unlikely(!buffer_info->page)) {
5936 u64_stats_update_begin(&rx_ring->rx_syncp);
5937 rx_ring->rx_stats.alloc_failed++;
5938 u64_stats_update_end(&rx_ring->rx_syncp);
5939 goto no_buffers;
5941 buffer_info->page_offset = 0;
5942 } else {
5943 buffer_info->page_offset ^= PAGE_SIZE / 2;
5945 buffer_info->page_dma =
5946 dma_map_page(rx_ring->dev, buffer_info->page,
5947 buffer_info->page_offset,
5948 PAGE_SIZE / 2,
5949 DMA_FROM_DEVICE);
5950 if (dma_mapping_error(rx_ring->dev,
5951 buffer_info->page_dma)) {
5952 buffer_info->page_dma = 0;
5953 u64_stats_update_begin(&rx_ring->rx_syncp);
5954 rx_ring->rx_stats.alloc_failed++;
5955 u64_stats_update_end(&rx_ring->rx_syncp);
5956 goto no_buffers;
5960 skb = buffer_info->skb;
5961 if (!skb) {
5962 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5963 if (unlikely(!skb)) {
5964 u64_stats_update_begin(&rx_ring->rx_syncp);
5965 rx_ring->rx_stats.alloc_failed++;
5966 u64_stats_update_end(&rx_ring->rx_syncp);
5967 goto no_buffers;
5970 buffer_info->skb = skb;
5972 if (!buffer_info->dma) {
5973 buffer_info->dma = dma_map_single(rx_ring->dev,
5974 skb->data,
5975 bufsz,
5976 DMA_FROM_DEVICE);
5977 if (dma_mapping_error(rx_ring->dev,
5978 buffer_info->dma)) {
5979 buffer_info->dma = 0;
5980 u64_stats_update_begin(&rx_ring->rx_syncp);
5981 rx_ring->rx_stats.alloc_failed++;
5982 u64_stats_update_end(&rx_ring->rx_syncp);
5983 goto no_buffers;
5986 /* Refresh the desc even if buffer_addrs didn't change because
5987 * each write-back erases this info. */
5988 if (bufsz < IGB_RXBUFFER_1024) {
5989 rx_desc->read.pkt_addr =
5990 cpu_to_le64(buffer_info->page_dma);
5991 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5992 } else {
5993 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5994 rx_desc->read.hdr_addr = 0;
5997 i++;
5998 if (i == rx_ring->count)
5999 i = 0;
6000 buffer_info = &rx_ring->buffer_info[i];
6003 no_buffers:
6004 if (rx_ring->next_to_use != i) {
6005 rx_ring->next_to_use = i;
6006 if (i == 0)
6007 i = (rx_ring->count - 1);
6008 else
6009 i--;
6011 /* Force memory writes to complete before letting h/w
6012 * know there are new descriptors to fetch. (Only
6013 * applicable for weak-ordered memory model archs,
6014 * such as IA-64). */
6015 wmb();
6016 writel(i, rx_ring->tail);
6021 * igb_mii_ioctl -
6022 * @netdev:
6023 * @ifreq:
6024 * @cmd:
6026 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6028 struct igb_adapter *adapter = netdev_priv(netdev);
6029 struct mii_ioctl_data *data = if_mii(ifr);
6031 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6032 return -EOPNOTSUPP;
6034 switch (cmd) {
6035 case SIOCGMIIPHY:
6036 data->phy_id = adapter->hw.phy.addr;
6037 break;
6038 case SIOCGMIIREG:
6039 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6040 &data->val_out))
6041 return -EIO;
6042 break;
6043 case SIOCSMIIREG:
6044 default:
6045 return -EOPNOTSUPP;
6047 return 0;
6051 * igb_hwtstamp_ioctl - control hardware time stamping
6052 * @netdev:
6053 * @ifreq:
6054 * @cmd:
6056 * Outgoing time stamping can be enabled and disabled. Play nice and
6057 * disable it when requested, although it shouldn't case any overhead
6058 * when no packet needs it. At most one packet in the queue may be
6059 * marked for time stamping, otherwise it would be impossible to tell
6060 * for sure to which packet the hardware time stamp belongs.
6062 * Incoming time stamping has to be configured via the hardware
6063 * filters. Not all combinations are supported, in particular event
6064 * type has to be specified. Matching the kind of event packet is
6065 * not supported, with the exception of "all V2 events regardless of
6066 * level 2 or 4".
6069 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6070 struct ifreq *ifr, int cmd)
6072 struct igb_adapter *adapter = netdev_priv(netdev);
6073 struct e1000_hw *hw = &adapter->hw;
6074 struct hwtstamp_config config;
6075 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6076 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6077 u32 tsync_rx_cfg = 0;
6078 bool is_l4 = false;
6079 bool is_l2 = false;
6080 u32 regval;
6082 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6083 return -EFAULT;
6085 /* reserved for future extensions */
6086 if (config.flags)
6087 return -EINVAL;
6089 switch (config.tx_type) {
6090 case HWTSTAMP_TX_OFF:
6091 tsync_tx_ctl = 0;
6092 case HWTSTAMP_TX_ON:
6093 break;
6094 default:
6095 return -ERANGE;
6098 switch (config.rx_filter) {
6099 case HWTSTAMP_FILTER_NONE:
6100 tsync_rx_ctl = 0;
6101 break;
6102 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6103 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6104 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6105 case HWTSTAMP_FILTER_ALL:
6107 * register TSYNCRXCFG must be set, therefore it is not
6108 * possible to time stamp both Sync and Delay_Req messages
6109 * => fall back to time stamping all packets
6111 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6112 config.rx_filter = HWTSTAMP_FILTER_ALL;
6113 break;
6114 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6115 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6116 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6117 is_l4 = true;
6118 break;
6119 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6120 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6121 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6122 is_l4 = true;
6123 break;
6124 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6125 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6126 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6127 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6128 is_l2 = true;
6129 is_l4 = true;
6130 config.rx_filter = HWTSTAMP_FILTER_SOME;
6131 break;
6132 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6133 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6134 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6135 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6136 is_l2 = true;
6137 is_l4 = true;
6138 config.rx_filter = HWTSTAMP_FILTER_SOME;
6139 break;
6140 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6141 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6142 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6143 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6144 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6145 is_l2 = true;
6146 break;
6147 default:
6148 return -ERANGE;
6151 if (hw->mac.type == e1000_82575) {
6152 if (tsync_rx_ctl | tsync_tx_ctl)
6153 return -EINVAL;
6154 return 0;
6158 * Per-packet timestamping only works if all packets are
6159 * timestamped, so enable timestamping in all packets as
6160 * long as one rx filter was configured.
6162 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6163 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6164 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6167 /* enable/disable TX */
6168 regval = rd32(E1000_TSYNCTXCTL);
6169 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6170 regval |= tsync_tx_ctl;
6171 wr32(E1000_TSYNCTXCTL, regval);
6173 /* enable/disable RX */
6174 regval = rd32(E1000_TSYNCRXCTL);
6175 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6176 regval |= tsync_rx_ctl;
6177 wr32(E1000_TSYNCRXCTL, regval);
6179 /* define which PTP packets are time stamped */
6180 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6182 /* define ethertype filter for timestamped packets */
6183 if (is_l2)
6184 wr32(E1000_ETQF(3),
6185 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6186 E1000_ETQF_1588 | /* enable timestamping */
6187 ETH_P_1588)); /* 1588 eth protocol type */
6188 else
6189 wr32(E1000_ETQF(3), 0);
6191 #define PTP_PORT 319
6192 /* L4 Queue Filter[3]: filter by destination port and protocol */
6193 if (is_l4) {
6194 u32 ftqf = (IPPROTO_UDP /* UDP */
6195 | E1000_FTQF_VF_BP /* VF not compared */
6196 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6197 | E1000_FTQF_MASK); /* mask all inputs */
6198 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6200 wr32(E1000_IMIR(3), htons(PTP_PORT));
6201 wr32(E1000_IMIREXT(3),
6202 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6203 if (hw->mac.type == e1000_82576) {
6204 /* enable source port check */
6205 wr32(E1000_SPQF(3), htons(PTP_PORT));
6206 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6208 wr32(E1000_FTQF(3), ftqf);
6209 } else {
6210 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6212 wrfl();
6214 adapter->hwtstamp_config = config;
6216 /* clear TX/RX time stamp registers, just to be sure */
6217 regval = rd32(E1000_TXSTMPH);
6218 regval = rd32(E1000_RXSTMPH);
6220 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6221 -EFAULT : 0;
6225 * igb_ioctl -
6226 * @netdev:
6227 * @ifreq:
6228 * @cmd:
6230 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6232 switch (cmd) {
6233 case SIOCGMIIPHY:
6234 case SIOCGMIIREG:
6235 case SIOCSMIIREG:
6236 return igb_mii_ioctl(netdev, ifr, cmd);
6237 case SIOCSHWTSTAMP:
6238 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6239 default:
6240 return -EOPNOTSUPP;
6244 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6246 struct igb_adapter *adapter = hw->back;
6247 u16 cap_offset;
6249 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6250 if (!cap_offset)
6251 return -E1000_ERR_CONFIG;
6253 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6255 return 0;
6258 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6260 struct igb_adapter *adapter = hw->back;
6261 u16 cap_offset;
6263 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6264 if (!cap_offset)
6265 return -E1000_ERR_CONFIG;
6267 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6269 return 0;
6272 static void igb_vlan_rx_register(struct net_device *netdev,
6273 struct vlan_group *grp)
6275 struct igb_adapter *adapter = netdev_priv(netdev);
6276 struct e1000_hw *hw = &adapter->hw;
6277 u32 ctrl, rctl;
6279 igb_irq_disable(adapter);
6280 adapter->vlgrp = grp;
6282 if (grp) {
6283 /* enable VLAN tag insert/strip */
6284 ctrl = rd32(E1000_CTRL);
6285 ctrl |= E1000_CTRL_VME;
6286 wr32(E1000_CTRL, ctrl);
6288 /* Disable CFI check */
6289 rctl = rd32(E1000_RCTL);
6290 rctl &= ~E1000_RCTL_CFIEN;
6291 wr32(E1000_RCTL, rctl);
6292 } else {
6293 /* disable VLAN tag insert/strip */
6294 ctrl = rd32(E1000_CTRL);
6295 ctrl &= ~E1000_CTRL_VME;
6296 wr32(E1000_CTRL, ctrl);
6299 igb_rlpml_set(adapter);
6301 if (!test_bit(__IGB_DOWN, &adapter->state))
6302 igb_irq_enable(adapter);
6305 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6307 struct igb_adapter *adapter = netdev_priv(netdev);
6308 struct e1000_hw *hw = &adapter->hw;
6309 int pf_id = adapter->vfs_allocated_count;
6311 /* attempt to add filter to vlvf array */
6312 igb_vlvf_set(adapter, vid, true, pf_id);
6314 /* add the filter since PF can receive vlans w/o entry in vlvf */
6315 igb_vfta_set(hw, vid, true);
6318 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6320 struct igb_adapter *adapter = netdev_priv(netdev);
6321 struct e1000_hw *hw = &adapter->hw;
6322 int pf_id = adapter->vfs_allocated_count;
6323 s32 err;
6325 igb_irq_disable(adapter);
6326 vlan_group_set_device(adapter->vlgrp, vid, NULL);
6328 if (!test_bit(__IGB_DOWN, &adapter->state))
6329 igb_irq_enable(adapter);
6331 /* remove vlan from VLVF table array */
6332 err = igb_vlvf_set(adapter, vid, false, pf_id);
6334 /* if vid was not present in VLVF just remove it from table */
6335 if (err)
6336 igb_vfta_set(hw, vid, false);
6339 static void igb_restore_vlan(struct igb_adapter *adapter)
6341 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6343 if (adapter->vlgrp) {
6344 u16 vid;
6345 for (vid = 0; vid < VLAN_N_VID; vid++) {
6346 if (!vlan_group_get_device(adapter->vlgrp, vid))
6347 continue;
6348 igb_vlan_rx_add_vid(adapter->netdev, vid);
6353 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6355 struct pci_dev *pdev = adapter->pdev;
6356 struct e1000_mac_info *mac = &adapter->hw.mac;
6358 mac->autoneg = 0;
6360 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6361 * for the switch() below to work */
6362 if ((spd & 1) || (dplx & ~1))
6363 goto err_inval;
6365 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6366 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6367 spd != SPEED_1000 &&
6368 dplx != DUPLEX_FULL)
6369 goto err_inval;
6371 switch (spd + dplx) {
6372 case SPEED_10 + DUPLEX_HALF:
6373 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6374 break;
6375 case SPEED_10 + DUPLEX_FULL:
6376 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6377 break;
6378 case SPEED_100 + DUPLEX_HALF:
6379 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6380 break;
6381 case SPEED_100 + DUPLEX_FULL:
6382 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6383 break;
6384 case SPEED_1000 + DUPLEX_FULL:
6385 mac->autoneg = 1;
6386 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6387 break;
6388 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6389 default:
6390 goto err_inval;
6392 return 0;
6394 err_inval:
6395 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6396 return -EINVAL;
6399 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6401 struct net_device *netdev = pci_get_drvdata(pdev);
6402 struct igb_adapter *adapter = netdev_priv(netdev);
6403 struct e1000_hw *hw = &adapter->hw;
6404 u32 ctrl, rctl, status;
6405 u32 wufc = adapter->wol;
6406 #ifdef CONFIG_PM
6407 int retval = 0;
6408 #endif
6410 netif_device_detach(netdev);
6412 if (netif_running(netdev))
6413 igb_close(netdev);
6415 igb_clear_interrupt_scheme(adapter);
6417 #ifdef CONFIG_PM
6418 retval = pci_save_state(pdev);
6419 if (retval)
6420 return retval;
6421 #endif
6423 status = rd32(E1000_STATUS);
6424 if (status & E1000_STATUS_LU)
6425 wufc &= ~E1000_WUFC_LNKC;
6427 if (wufc) {
6428 igb_setup_rctl(adapter);
6429 igb_set_rx_mode(netdev);
6431 /* turn on all-multi mode if wake on multicast is enabled */
6432 if (wufc & E1000_WUFC_MC) {
6433 rctl = rd32(E1000_RCTL);
6434 rctl |= E1000_RCTL_MPE;
6435 wr32(E1000_RCTL, rctl);
6438 ctrl = rd32(E1000_CTRL);
6439 /* advertise wake from D3Cold */
6440 #define E1000_CTRL_ADVD3WUC 0x00100000
6441 /* phy power management enable */
6442 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6443 ctrl |= E1000_CTRL_ADVD3WUC;
6444 wr32(E1000_CTRL, ctrl);
6446 /* Allow time for pending master requests to run */
6447 igb_disable_pcie_master(hw);
6449 wr32(E1000_WUC, E1000_WUC_PME_EN);
6450 wr32(E1000_WUFC, wufc);
6451 } else {
6452 wr32(E1000_WUC, 0);
6453 wr32(E1000_WUFC, 0);
6456 *enable_wake = wufc || adapter->en_mng_pt;
6457 if (!*enable_wake)
6458 igb_power_down_link(adapter);
6459 else
6460 igb_power_up_link(adapter);
6462 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6463 * would have already happened in close and is redundant. */
6464 igb_release_hw_control(adapter);
6466 pci_disable_device(pdev);
6468 return 0;
6471 #ifdef CONFIG_PM
6472 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6474 int retval;
6475 bool wake;
6477 retval = __igb_shutdown(pdev, &wake);
6478 if (retval)
6479 return retval;
6481 if (wake) {
6482 pci_prepare_to_sleep(pdev);
6483 } else {
6484 pci_wake_from_d3(pdev, false);
6485 pci_set_power_state(pdev, PCI_D3hot);
6488 return 0;
6491 static int igb_resume(struct pci_dev *pdev)
6493 struct net_device *netdev = pci_get_drvdata(pdev);
6494 struct igb_adapter *adapter = netdev_priv(netdev);
6495 struct e1000_hw *hw = &adapter->hw;
6496 u32 err;
6498 pci_set_power_state(pdev, PCI_D0);
6499 pci_restore_state(pdev);
6500 pci_save_state(pdev);
6502 err = pci_enable_device_mem(pdev);
6503 if (err) {
6504 dev_err(&pdev->dev,
6505 "igb: Cannot enable PCI device from suspend\n");
6506 return err;
6508 pci_set_master(pdev);
6510 pci_enable_wake(pdev, PCI_D3hot, 0);
6511 pci_enable_wake(pdev, PCI_D3cold, 0);
6513 if (igb_init_interrupt_scheme(adapter)) {
6514 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6515 return -ENOMEM;
6518 igb_reset(adapter);
6520 /* let the f/w know that the h/w is now under the control of the
6521 * driver. */
6522 igb_get_hw_control(adapter);
6524 wr32(E1000_WUS, ~0);
6526 if (netif_running(netdev)) {
6527 err = igb_open(netdev);
6528 if (err)
6529 return err;
6532 netif_device_attach(netdev);
6534 return 0;
6536 #endif
6538 static void igb_shutdown(struct pci_dev *pdev)
6540 bool wake;
6542 __igb_shutdown(pdev, &wake);
6544 if (system_state == SYSTEM_POWER_OFF) {
6545 pci_wake_from_d3(pdev, wake);
6546 pci_set_power_state(pdev, PCI_D3hot);
6550 #ifdef CONFIG_NET_POLL_CONTROLLER
6552 * Polling 'interrupt' - used by things like netconsole to send skbs
6553 * without having to re-enable interrupts. It's not called while
6554 * the interrupt routine is executing.
6556 static void igb_netpoll(struct net_device *netdev)
6558 struct igb_adapter *adapter = netdev_priv(netdev);
6559 struct e1000_hw *hw = &adapter->hw;
6560 int i;
6562 if (!adapter->msix_entries) {
6563 struct igb_q_vector *q_vector = adapter->q_vector[0];
6564 igb_irq_disable(adapter);
6565 napi_schedule(&q_vector->napi);
6566 return;
6569 for (i = 0; i < adapter->num_q_vectors; i++) {
6570 struct igb_q_vector *q_vector = adapter->q_vector[i];
6571 wr32(E1000_EIMC, q_vector->eims_value);
6572 napi_schedule(&q_vector->napi);
6575 #endif /* CONFIG_NET_POLL_CONTROLLER */
6578 * igb_io_error_detected - called when PCI error is detected
6579 * @pdev: Pointer to PCI device
6580 * @state: The current pci connection state
6582 * This function is called after a PCI bus error affecting
6583 * this device has been detected.
6585 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6586 pci_channel_state_t state)
6588 struct net_device *netdev = pci_get_drvdata(pdev);
6589 struct igb_adapter *adapter = netdev_priv(netdev);
6591 netif_device_detach(netdev);
6593 if (state == pci_channel_io_perm_failure)
6594 return PCI_ERS_RESULT_DISCONNECT;
6596 if (netif_running(netdev))
6597 igb_down(adapter);
6598 pci_disable_device(pdev);
6600 /* Request a slot slot reset. */
6601 return PCI_ERS_RESULT_NEED_RESET;
6605 * igb_io_slot_reset - called after the pci bus has been reset.
6606 * @pdev: Pointer to PCI device
6608 * Restart the card from scratch, as if from a cold-boot. Implementation
6609 * resembles the first-half of the igb_resume routine.
6611 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6613 struct net_device *netdev = pci_get_drvdata(pdev);
6614 struct igb_adapter *adapter = netdev_priv(netdev);
6615 struct e1000_hw *hw = &adapter->hw;
6616 pci_ers_result_t result;
6617 int err;
6619 if (pci_enable_device_mem(pdev)) {
6620 dev_err(&pdev->dev,
6621 "Cannot re-enable PCI device after reset.\n");
6622 result = PCI_ERS_RESULT_DISCONNECT;
6623 } else {
6624 pci_set_master(pdev);
6625 pci_restore_state(pdev);
6626 pci_save_state(pdev);
6628 pci_enable_wake(pdev, PCI_D3hot, 0);
6629 pci_enable_wake(pdev, PCI_D3cold, 0);
6631 igb_reset(adapter);
6632 wr32(E1000_WUS, ~0);
6633 result = PCI_ERS_RESULT_RECOVERED;
6636 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6637 if (err) {
6638 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6639 "failed 0x%0x\n", err);
6640 /* non-fatal, continue */
6643 return result;
6647 * igb_io_resume - called when traffic can start flowing again.
6648 * @pdev: Pointer to PCI device
6650 * This callback is called when the error recovery driver tells us that
6651 * its OK to resume normal operation. Implementation resembles the
6652 * second-half of the igb_resume routine.
6654 static void igb_io_resume(struct pci_dev *pdev)
6656 struct net_device *netdev = pci_get_drvdata(pdev);
6657 struct igb_adapter *adapter = netdev_priv(netdev);
6659 if (netif_running(netdev)) {
6660 if (igb_up(adapter)) {
6661 dev_err(&pdev->dev, "igb_up failed after reset\n");
6662 return;
6666 netif_device_attach(netdev);
6668 /* let the f/w know that the h/w is now under the control of the
6669 * driver. */
6670 igb_get_hw_control(adapter);
6673 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6674 u8 qsel)
6676 u32 rar_low, rar_high;
6677 struct e1000_hw *hw = &adapter->hw;
6679 /* HW expects these in little endian so we reverse the byte order
6680 * from network order (big endian) to little endian
6682 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6683 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6684 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6686 /* Indicate to hardware the Address is Valid. */
6687 rar_high |= E1000_RAH_AV;
6689 if (hw->mac.type == e1000_82575)
6690 rar_high |= E1000_RAH_POOL_1 * qsel;
6691 else
6692 rar_high |= E1000_RAH_POOL_1 << qsel;
6694 wr32(E1000_RAL(index), rar_low);
6695 wrfl();
6696 wr32(E1000_RAH(index), rar_high);
6697 wrfl();
6700 static int igb_set_vf_mac(struct igb_adapter *adapter,
6701 int vf, unsigned char *mac_addr)
6703 struct e1000_hw *hw = &adapter->hw;
6704 /* VF MAC addresses start at end of receive addresses and moves
6705 * torwards the first, as a result a collision should not be possible */
6706 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6708 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6710 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6712 return 0;
6715 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6717 struct igb_adapter *adapter = netdev_priv(netdev);
6718 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6719 return -EINVAL;
6720 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6721 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6722 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6723 " change effective.");
6724 if (test_bit(__IGB_DOWN, &adapter->state)) {
6725 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6726 " but the PF device is not up.\n");
6727 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6728 " attempting to use the VF device.\n");
6730 return igb_set_vf_mac(adapter, vf, mac);
6733 static int igb_link_mbps(int internal_link_speed)
6735 switch (internal_link_speed) {
6736 case SPEED_100:
6737 return 100;
6738 case SPEED_1000:
6739 return 1000;
6740 default:
6741 return 0;
6745 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6746 int link_speed)
6748 int rf_dec, rf_int;
6749 u32 bcnrc_val;
6751 if (tx_rate != 0) {
6752 /* Calculate the rate factor values to set */
6753 rf_int = link_speed / tx_rate;
6754 rf_dec = (link_speed - (rf_int * tx_rate));
6755 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6757 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6758 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6759 E1000_RTTBCNRC_RF_INT_MASK);
6760 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6761 } else {
6762 bcnrc_val = 0;
6765 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6766 wr32(E1000_RTTBCNRC, bcnrc_val);
6769 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6771 int actual_link_speed, i;
6772 bool reset_rate = false;
6774 /* VF TX rate limit was not set or not supported */
6775 if ((adapter->vf_rate_link_speed == 0) ||
6776 (adapter->hw.mac.type != e1000_82576))
6777 return;
6779 actual_link_speed = igb_link_mbps(adapter->link_speed);
6780 if (actual_link_speed != adapter->vf_rate_link_speed) {
6781 reset_rate = true;
6782 adapter->vf_rate_link_speed = 0;
6783 dev_info(&adapter->pdev->dev,
6784 "Link speed has been changed. VF Transmit "
6785 "rate is disabled\n");
6788 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6789 if (reset_rate)
6790 adapter->vf_data[i].tx_rate = 0;
6792 igb_set_vf_rate_limit(&adapter->hw, i,
6793 adapter->vf_data[i].tx_rate,
6794 actual_link_speed);
6798 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6800 struct igb_adapter *adapter = netdev_priv(netdev);
6801 struct e1000_hw *hw = &adapter->hw;
6802 int actual_link_speed;
6804 if (hw->mac.type != e1000_82576)
6805 return -EOPNOTSUPP;
6807 actual_link_speed = igb_link_mbps(adapter->link_speed);
6808 if ((vf >= adapter->vfs_allocated_count) ||
6809 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6810 (tx_rate < 0) || (tx_rate > actual_link_speed))
6811 return -EINVAL;
6813 adapter->vf_rate_link_speed = actual_link_speed;
6814 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6815 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6817 return 0;
6820 static int igb_ndo_get_vf_config(struct net_device *netdev,
6821 int vf, struct ifla_vf_info *ivi)
6823 struct igb_adapter *adapter = netdev_priv(netdev);
6824 if (vf >= adapter->vfs_allocated_count)
6825 return -EINVAL;
6826 ivi->vf = vf;
6827 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6828 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6829 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6830 ivi->qos = adapter->vf_data[vf].pf_qos;
6831 return 0;
6834 static void igb_vmm_control(struct igb_adapter *adapter)
6836 struct e1000_hw *hw = &adapter->hw;
6837 u32 reg;
6839 switch (hw->mac.type) {
6840 case e1000_82575:
6841 default:
6842 /* replication is not supported for 82575 */
6843 return;
6844 case e1000_82576:
6845 /* notify HW that the MAC is adding vlan tags */
6846 reg = rd32(E1000_DTXCTL);
6847 reg |= E1000_DTXCTL_VLAN_ADDED;
6848 wr32(E1000_DTXCTL, reg);
6849 case e1000_82580:
6850 /* enable replication vlan tag stripping */
6851 reg = rd32(E1000_RPLOLR);
6852 reg |= E1000_RPLOLR_STRVLAN;
6853 wr32(E1000_RPLOLR, reg);
6854 case e1000_i350:
6855 /* none of the above registers are supported by i350 */
6856 break;
6859 if (adapter->vfs_allocated_count) {
6860 igb_vmdq_set_loopback_pf(hw, true);
6861 igb_vmdq_set_replication_pf(hw, true);
6862 igb_vmdq_set_anti_spoofing_pf(hw, true,
6863 adapter->vfs_allocated_count);
6864 } else {
6865 igb_vmdq_set_loopback_pf(hw, false);
6866 igb_vmdq_set_replication_pf(hw, false);
6870 /* igb_main.c */