1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
55 #include <linux/dca.h>
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name
[] = "igb";
65 char igb_driver_version
[] = DRV_VERSION
;
66 static const char igb_driver_string
[] =
67 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright
[] = "Copyright (c) 2007-2011 Intel Corporation.";
70 static const struct e1000_info
*igb_info_tbl
[] = {
71 [board_82575
] = &e1000_82575_info
,
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl
) = {
75 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_COPPER
), board_82575
},
76 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_FIBER
), board_82575
},
77 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_SERDES
), board_82575
},
78 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_SGMII
), board_82575
},
79 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_COPPER
), board_82575
},
80 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_FIBER
), board_82575
},
81 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_QUAD_FIBER
), board_82575
},
82 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_SERDES
), board_82575
},
83 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_SGMII
), board_82575
},
84 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_COPPER_DUAL
), board_82575
},
85 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SGMII
), board_82575
},
86 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SERDES
), board_82575
},
87 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_BACKPLANE
), board_82575
},
88 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SFP
), board_82575
},
89 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576
), board_82575
},
90 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_NS
), board_82575
},
91 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_NS_SERDES
), board_82575
},
92 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_FIBER
), board_82575
},
93 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_SERDES
), board_82575
},
94 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_SERDES_QUAD
), board_82575
},
95 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_QUAD_COPPER_ET2
), board_82575
},
96 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_QUAD_COPPER
), board_82575
},
97 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575EB_COPPER
), board_82575
},
98 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575EB_FIBER_SERDES
), board_82575
},
99 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575GB_QUAD_COPPER
), board_82575
},
100 /* required last entry */
104 MODULE_DEVICE_TABLE(pci
, igb_pci_tbl
);
106 void igb_reset(struct igb_adapter
*);
107 static int igb_setup_all_tx_resources(struct igb_adapter
*);
108 static int igb_setup_all_rx_resources(struct igb_adapter
*);
109 static void igb_free_all_tx_resources(struct igb_adapter
*);
110 static void igb_free_all_rx_resources(struct igb_adapter
*);
111 static void igb_setup_mrqc(struct igb_adapter
*);
112 static int igb_probe(struct pci_dev
*, const struct pci_device_id
*);
113 static void __devexit
igb_remove(struct pci_dev
*pdev
);
114 static void igb_init_hw_timer(struct igb_adapter
*adapter
);
115 static int igb_sw_init(struct igb_adapter
*);
116 static int igb_open(struct net_device
*);
117 static int igb_close(struct net_device
*);
118 static void igb_configure_tx(struct igb_adapter
*);
119 static void igb_configure_rx(struct igb_adapter
*);
120 static void igb_clean_all_tx_rings(struct igb_adapter
*);
121 static void igb_clean_all_rx_rings(struct igb_adapter
*);
122 static void igb_clean_tx_ring(struct igb_ring
*);
123 static void igb_clean_rx_ring(struct igb_ring
*);
124 static void igb_set_rx_mode(struct net_device
*);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct
*);
128 static netdev_tx_t
igb_xmit_frame(struct sk_buff
*skb
, struct net_device
*);
129 static struct rtnl_link_stats64
*igb_get_stats64(struct net_device
*dev
,
130 struct rtnl_link_stats64
*stats
);
131 static int igb_change_mtu(struct net_device
*, int);
132 static int igb_set_mac(struct net_device
*, void *);
133 static void igb_set_uta(struct igb_adapter
*adapter
);
134 static irqreturn_t
igb_intr(int irq
, void *);
135 static irqreturn_t
igb_intr_msi(int irq
, void *);
136 static irqreturn_t
igb_msix_other(int irq
, void *);
137 static irqreturn_t
igb_msix_ring(int irq
, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector
*);
140 static void igb_setup_dca(struct igb_adapter
*);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct
*, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector
*);
144 static bool igb_clean_rx_irq(struct igb_q_vector
*, int);
145 static int igb_ioctl(struct net_device
*, struct ifreq
*, int cmd
);
146 static void igb_tx_timeout(struct net_device
*);
147 static void igb_reset_task(struct work_struct
*);
148 static void igb_vlan_mode(struct net_device
*netdev
, u32 features
);
149 static void igb_vlan_rx_add_vid(struct net_device
*, u16
);
150 static void igb_vlan_rx_kill_vid(struct net_device
*, u16
);
151 static void igb_restore_vlan(struct igb_adapter
*);
152 static void igb_rar_set_qsel(struct igb_adapter
*, u8
*, u32
, u8
);
153 static void igb_ping_all_vfs(struct igb_adapter
*);
154 static void igb_msg_task(struct igb_adapter
*);
155 static void igb_vmm_control(struct igb_adapter
*);
156 static int igb_set_vf_mac(struct igb_adapter
*, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter
*adapter
);
158 static int igb_ndo_set_vf_mac(struct net_device
*netdev
, int vf
, u8
*mac
);
159 static int igb_ndo_set_vf_vlan(struct net_device
*netdev
,
160 int vf
, u16 vlan
, u8 qos
);
161 static int igb_ndo_set_vf_bw(struct net_device
*netdev
, int vf
, int tx_rate
);
162 static int igb_ndo_get_vf_config(struct net_device
*netdev
, int vf
,
163 struct ifla_vf_info
*ivi
);
164 static void igb_check_vf_rate_limit(struct igb_adapter
*);
166 #ifdef CONFIG_PCI_IOV
167 static int igb_vf_configure(struct igb_adapter
*adapter
, int vf
);
168 static int igb_find_enabled_vfs(struct igb_adapter
*adapter
);
169 static int igb_check_vf_assignment(struct igb_adapter
*adapter
);
173 static int igb_suspend(struct pci_dev
*, pm_message_t
);
174 static int igb_resume(struct pci_dev
*);
176 static void igb_shutdown(struct pci_dev
*);
177 #ifdef CONFIG_IGB_DCA
178 static int igb_notify_dca(struct notifier_block
*, unsigned long, void *);
179 static struct notifier_block dca_notifier
= {
180 .notifier_call
= igb_notify_dca
,
185 #ifdef CONFIG_NET_POLL_CONTROLLER
186 /* for netdump / net console */
187 static void igb_netpoll(struct net_device
*);
189 #ifdef CONFIG_PCI_IOV
190 static unsigned int max_vfs
= 0;
191 module_param(max_vfs
, uint
, 0);
192 MODULE_PARM_DESC(max_vfs
, "Maximum number of virtual functions to allocate "
193 "per physical function");
194 #endif /* CONFIG_PCI_IOV */
196 static pci_ers_result_t
igb_io_error_detected(struct pci_dev
*,
197 pci_channel_state_t
);
198 static pci_ers_result_t
igb_io_slot_reset(struct pci_dev
*);
199 static void igb_io_resume(struct pci_dev
*);
201 static struct pci_error_handlers igb_err_handler
= {
202 .error_detected
= igb_io_error_detected
,
203 .slot_reset
= igb_io_slot_reset
,
204 .resume
= igb_io_resume
,
207 static void igb_init_dmac(struct igb_adapter
*adapter
, u32 pba
);
209 static struct pci_driver igb_driver
= {
210 .name
= igb_driver_name
,
211 .id_table
= igb_pci_tbl
,
213 .remove
= __devexit_p(igb_remove
),
215 /* Power Management Hooks */
216 .suspend
= igb_suspend
,
217 .resume
= igb_resume
,
219 .shutdown
= igb_shutdown
,
220 .err_handler
= &igb_err_handler
223 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
224 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
225 MODULE_LICENSE("GPL");
226 MODULE_VERSION(DRV_VERSION
);
228 struct igb_reg_info
{
233 static const struct igb_reg_info igb_reg_info_tbl
[] = {
235 /* General Registers */
236 {E1000_CTRL
, "CTRL"},
237 {E1000_STATUS
, "STATUS"},
238 {E1000_CTRL_EXT
, "CTRL_EXT"},
240 /* Interrupt Registers */
244 {E1000_RCTL
, "RCTL"},
245 {E1000_RDLEN(0), "RDLEN"},
246 {E1000_RDH(0), "RDH"},
247 {E1000_RDT(0), "RDT"},
248 {E1000_RXDCTL(0), "RXDCTL"},
249 {E1000_RDBAL(0), "RDBAL"},
250 {E1000_RDBAH(0), "RDBAH"},
253 {E1000_TCTL
, "TCTL"},
254 {E1000_TDBAL(0), "TDBAL"},
255 {E1000_TDBAH(0), "TDBAH"},
256 {E1000_TDLEN(0), "TDLEN"},
257 {E1000_TDH(0), "TDH"},
258 {E1000_TDT(0), "TDT"},
259 {E1000_TXDCTL(0), "TXDCTL"},
260 {E1000_TDFH
, "TDFH"},
261 {E1000_TDFT
, "TDFT"},
262 {E1000_TDFHS
, "TDFHS"},
263 {E1000_TDFPC
, "TDFPC"},
265 /* List Terminator */
270 * igb_regdump - register printout routine
272 static void igb_regdump(struct e1000_hw
*hw
, struct igb_reg_info
*reginfo
)
278 switch (reginfo
->ofs
) {
280 for (n
= 0; n
< 4; n
++)
281 regs
[n
] = rd32(E1000_RDLEN(n
));
284 for (n
= 0; n
< 4; n
++)
285 regs
[n
] = rd32(E1000_RDH(n
));
288 for (n
= 0; n
< 4; n
++)
289 regs
[n
] = rd32(E1000_RDT(n
));
291 case E1000_RXDCTL(0):
292 for (n
= 0; n
< 4; n
++)
293 regs
[n
] = rd32(E1000_RXDCTL(n
));
296 for (n
= 0; n
< 4; n
++)
297 regs
[n
] = rd32(E1000_RDBAL(n
));
300 for (n
= 0; n
< 4; n
++)
301 regs
[n
] = rd32(E1000_RDBAH(n
));
304 for (n
= 0; n
< 4; n
++)
305 regs
[n
] = rd32(E1000_RDBAL(n
));
308 for (n
= 0; n
< 4; n
++)
309 regs
[n
] = rd32(E1000_TDBAH(n
));
312 for (n
= 0; n
< 4; n
++)
313 regs
[n
] = rd32(E1000_TDLEN(n
));
316 for (n
= 0; n
< 4; n
++)
317 regs
[n
] = rd32(E1000_TDH(n
));
320 for (n
= 0; n
< 4; n
++)
321 regs
[n
] = rd32(E1000_TDT(n
));
323 case E1000_TXDCTL(0):
324 for (n
= 0; n
< 4; n
++)
325 regs
[n
] = rd32(E1000_TXDCTL(n
));
328 printk(KERN_INFO
"%-15s %08x\n",
329 reginfo
->name
, rd32(reginfo
->ofs
));
333 snprintf(rname
, 16, "%s%s", reginfo
->name
, "[0-3]");
334 printk(KERN_INFO
"%-15s ", rname
);
335 for (n
= 0; n
< 4; n
++)
336 printk(KERN_CONT
"%08x ", regs
[n
]);
337 printk(KERN_CONT
"\n");
341 * igb_dump - Print registers, tx-rings and rx-rings
343 static void igb_dump(struct igb_adapter
*adapter
)
345 struct net_device
*netdev
= adapter
->netdev
;
346 struct e1000_hw
*hw
= &adapter
->hw
;
347 struct igb_reg_info
*reginfo
;
348 struct igb_ring
*tx_ring
;
349 union e1000_adv_tx_desc
*tx_desc
;
350 struct my_u0
{ u64 a
; u64 b
; } *u0
;
351 struct igb_ring
*rx_ring
;
352 union e1000_adv_rx_desc
*rx_desc
;
356 if (!netif_msg_hw(adapter
))
359 /* Print netdevice Info */
361 dev_info(&adapter
->pdev
->dev
, "Net device Info\n");
362 printk(KERN_INFO
"Device Name state "
363 "trans_start last_rx\n");
364 printk(KERN_INFO
"%-15s %016lX %016lX %016lX\n",
371 /* Print Registers */
372 dev_info(&adapter
->pdev
->dev
, "Register Dump\n");
373 printk(KERN_INFO
" Register Name Value\n");
374 for (reginfo
= (struct igb_reg_info
*)igb_reg_info_tbl
;
375 reginfo
->name
; reginfo
++) {
376 igb_regdump(hw
, reginfo
);
379 /* Print TX Ring Summary */
380 if (!netdev
|| !netif_running(netdev
))
383 dev_info(&adapter
->pdev
->dev
, "TX Rings Summary\n");
384 printk(KERN_INFO
"Queue [NTU] [NTC] [bi(ntc)->dma ]"
385 " leng ntw timestamp\n");
386 for (n
= 0; n
< adapter
->num_tx_queues
; n
++) {
387 struct igb_tx_buffer
*buffer_info
;
388 tx_ring
= adapter
->tx_ring
[n
];
389 buffer_info
= &tx_ring
->tx_buffer_info
[tx_ring
->next_to_clean
];
390 printk(KERN_INFO
" %5d %5X %5X %016llX %04X %p %016llX\n",
391 n
, tx_ring
->next_to_use
, tx_ring
->next_to_clean
,
392 (u64
)buffer_info
->dma
,
394 buffer_info
->next_to_watch
,
395 (u64
)buffer_info
->time_stamp
);
399 if (!netif_msg_tx_done(adapter
))
400 goto rx_ring_summary
;
402 dev_info(&adapter
->pdev
->dev
, "TX Rings Dump\n");
404 /* Transmit Descriptor Formats
406 * Advanced Transmit Descriptor
407 * +--------------------------------------------------------------+
408 * 0 | Buffer Address [63:0] |
409 * +--------------------------------------------------------------+
410 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
411 * +--------------------------------------------------------------+
412 * 63 46 45 40 39 38 36 35 32 31 24 15 0
415 for (n
= 0; n
< adapter
->num_tx_queues
; n
++) {
416 tx_ring
= adapter
->tx_ring
[n
];
417 printk(KERN_INFO
"------------------------------------\n");
418 printk(KERN_INFO
"TX QUEUE INDEX = %d\n", tx_ring
->queue_index
);
419 printk(KERN_INFO
"------------------------------------\n");
420 printk(KERN_INFO
"T [desc] [address 63:0 ] "
421 "[PlPOCIStDDM Ln] [bi->dma ] "
422 "leng ntw timestamp bi->skb\n");
424 for (i
= 0; tx_ring
->desc
&& (i
< tx_ring
->count
); i
++) {
425 struct igb_tx_buffer
*buffer_info
;
426 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
427 buffer_info
= &tx_ring
->tx_buffer_info
[i
];
428 u0
= (struct my_u0
*)tx_desc
;
429 printk(KERN_INFO
"T [0x%03X] %016llX %016llX %016llX"
430 " %04X %p %016llX %p", i
,
433 (u64
)buffer_info
->dma
,
435 buffer_info
->next_to_watch
,
436 (u64
)buffer_info
->time_stamp
,
438 if (i
== tx_ring
->next_to_use
&&
439 i
== tx_ring
->next_to_clean
)
440 printk(KERN_CONT
" NTC/U\n");
441 else if (i
== tx_ring
->next_to_use
)
442 printk(KERN_CONT
" NTU\n");
443 else if (i
== tx_ring
->next_to_clean
)
444 printk(KERN_CONT
" NTC\n");
446 printk(KERN_CONT
"\n");
448 if (netif_msg_pktdata(adapter
) && buffer_info
->dma
!= 0)
449 print_hex_dump(KERN_INFO
, "",
451 16, 1, phys_to_virt(buffer_info
->dma
),
452 buffer_info
->length
, true);
456 /* Print RX Rings Summary */
458 dev_info(&adapter
->pdev
->dev
, "RX Rings Summary\n");
459 printk(KERN_INFO
"Queue [NTU] [NTC]\n");
460 for (n
= 0; n
< adapter
->num_rx_queues
; n
++) {
461 rx_ring
= adapter
->rx_ring
[n
];
462 printk(KERN_INFO
" %5d %5X %5X\n", n
,
463 rx_ring
->next_to_use
, rx_ring
->next_to_clean
);
467 if (!netif_msg_rx_status(adapter
))
470 dev_info(&adapter
->pdev
->dev
, "RX Rings Dump\n");
472 /* Advanced Receive Descriptor (Read) Format
474 * +-----------------------------------------------------+
475 * 0 | Packet Buffer Address [63:1] |A0/NSE|
476 * +----------------------------------------------+------+
477 * 8 | Header Buffer Address [63:1] | DD |
478 * +-----------------------------------------------------+
481 * Advanced Receive Descriptor (Write-Back) Format
483 * 63 48 47 32 31 30 21 20 17 16 4 3 0
484 * +------------------------------------------------------+
485 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
486 * | Checksum Ident | | | | Type | Type |
487 * +------------------------------------------------------+
488 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
489 * +------------------------------------------------------+
490 * 63 48 47 32 31 20 19 0
493 for (n
= 0; n
< adapter
->num_rx_queues
; n
++) {
494 rx_ring
= adapter
->rx_ring
[n
];
495 printk(KERN_INFO
"------------------------------------\n");
496 printk(KERN_INFO
"RX QUEUE INDEX = %d\n", rx_ring
->queue_index
);
497 printk(KERN_INFO
"------------------------------------\n");
498 printk(KERN_INFO
"R [desc] [ PktBuf A0] "
499 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
500 "<-- Adv Rx Read format\n");
501 printk(KERN_INFO
"RWB[desc] [PcsmIpSHl PtRs] "
502 "[vl er S cks ln] ---------------- [bi->skb] "
503 "<-- Adv Rx Write-Back format\n");
505 for (i
= 0; i
< rx_ring
->count
; i
++) {
506 struct igb_rx_buffer
*buffer_info
;
507 buffer_info
= &rx_ring
->rx_buffer_info
[i
];
508 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
509 u0
= (struct my_u0
*)rx_desc
;
510 staterr
= le32_to_cpu(rx_desc
->wb
.upper
.status_error
);
511 if (staterr
& E1000_RXD_STAT_DD
) {
512 /* Descriptor Done */
513 printk(KERN_INFO
"RWB[0x%03X] %016llX "
514 "%016llX ---------------- %p", i
,
519 printk(KERN_INFO
"R [0x%03X] %016llX "
520 "%016llX %016llX %p", i
,
523 (u64
)buffer_info
->dma
,
526 if (netif_msg_pktdata(adapter
)) {
527 print_hex_dump(KERN_INFO
, "",
530 phys_to_virt(buffer_info
->dma
),
531 IGB_RX_HDR_LEN
, true);
532 print_hex_dump(KERN_INFO
, "",
536 buffer_info
->page_dma
+
537 buffer_info
->page_offset
),
542 if (i
== rx_ring
->next_to_use
)
543 printk(KERN_CONT
" NTU\n");
544 else if (i
== rx_ring
->next_to_clean
)
545 printk(KERN_CONT
" NTC\n");
547 printk(KERN_CONT
"\n");
558 * igb_read_clock - read raw cycle counter (to be used by time counter)
560 static cycle_t
igb_read_clock(const struct cyclecounter
*tc
)
562 struct igb_adapter
*adapter
=
563 container_of(tc
, struct igb_adapter
, cycles
);
564 struct e1000_hw
*hw
= &adapter
->hw
;
569 * The timestamp latches on lowest register read. For the 82580
570 * the lowest register is SYSTIMR instead of SYSTIML. However we never
571 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
573 if (hw
->mac
.type
>= e1000_82580
) {
574 stamp
= rd32(E1000_SYSTIMR
) >> 8;
575 shift
= IGB_82580_TSYNC_SHIFT
;
578 stamp
|= (u64
)rd32(E1000_SYSTIML
) << shift
;
579 stamp
|= (u64
)rd32(E1000_SYSTIMH
) << (shift
+ 32);
584 * igb_get_hw_dev - return device
585 * used by hardware layer to print debugging information
587 struct net_device
*igb_get_hw_dev(struct e1000_hw
*hw
)
589 struct igb_adapter
*adapter
= hw
->back
;
590 return adapter
->netdev
;
594 * igb_init_module - Driver Registration Routine
596 * igb_init_module is the first routine called when the driver is
597 * loaded. All it does is register with the PCI subsystem.
599 static int __init
igb_init_module(void)
602 printk(KERN_INFO
"%s - version %s\n",
603 igb_driver_string
, igb_driver_version
);
605 printk(KERN_INFO
"%s\n", igb_copyright
);
607 #ifdef CONFIG_IGB_DCA
608 dca_register_notify(&dca_notifier
);
610 ret
= pci_register_driver(&igb_driver
);
614 module_init(igb_init_module
);
617 * igb_exit_module - Driver Exit Cleanup Routine
619 * igb_exit_module is called just before the driver is removed
622 static void __exit
igb_exit_module(void)
624 #ifdef CONFIG_IGB_DCA
625 dca_unregister_notify(&dca_notifier
);
627 pci_unregister_driver(&igb_driver
);
630 module_exit(igb_exit_module
);
632 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
634 * igb_cache_ring_register - Descriptor ring to register mapping
635 * @adapter: board private structure to initialize
637 * Once we know the feature-set enabled for the device, we'll cache
638 * the register offset the descriptor ring is assigned to.
640 static void igb_cache_ring_register(struct igb_adapter
*adapter
)
643 u32 rbase_offset
= adapter
->vfs_allocated_count
;
645 switch (adapter
->hw
.mac
.type
) {
647 /* The queues are allocated for virtualization such that VF 0
648 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
649 * In order to avoid collision we start at the first free queue
650 * and continue consuming queues in the same sequence
652 if (adapter
->vfs_allocated_count
) {
653 for (; i
< adapter
->rss_queues
; i
++)
654 adapter
->rx_ring
[i
]->reg_idx
= rbase_offset
+
661 for (; i
< adapter
->num_rx_queues
; i
++)
662 adapter
->rx_ring
[i
]->reg_idx
= rbase_offset
+ i
;
663 for (; j
< adapter
->num_tx_queues
; j
++)
664 adapter
->tx_ring
[j
]->reg_idx
= rbase_offset
+ j
;
669 static void igb_free_queues(struct igb_adapter
*adapter
)
673 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
674 kfree(adapter
->tx_ring
[i
]);
675 adapter
->tx_ring
[i
] = NULL
;
677 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
678 kfree(adapter
->rx_ring
[i
]);
679 adapter
->rx_ring
[i
] = NULL
;
681 adapter
->num_rx_queues
= 0;
682 adapter
->num_tx_queues
= 0;
686 * igb_alloc_queues - Allocate memory for all rings
687 * @adapter: board private structure to initialize
689 * We allocate one ring per queue at run-time since we don't know the
690 * number of queues at compile-time.
692 static int igb_alloc_queues(struct igb_adapter
*adapter
)
694 struct igb_ring
*ring
;
696 int orig_node
= adapter
->node
;
698 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
699 if (orig_node
== -1) {
700 int cur_node
= next_online_node(adapter
->node
);
701 if (cur_node
== MAX_NUMNODES
)
702 cur_node
= first_online_node
;
703 adapter
->node
= cur_node
;
705 ring
= kzalloc_node(sizeof(struct igb_ring
), GFP_KERNEL
,
708 ring
= kzalloc(sizeof(struct igb_ring
), GFP_KERNEL
);
711 ring
->count
= adapter
->tx_ring_count
;
712 ring
->queue_index
= i
;
713 ring
->dev
= &adapter
->pdev
->dev
;
714 ring
->netdev
= adapter
->netdev
;
715 ring
->numa_node
= adapter
->node
;
716 /* For 82575, context index must be unique per ring. */
717 if (adapter
->hw
.mac
.type
== e1000_82575
)
718 set_bit(IGB_RING_FLAG_TX_CTX_IDX
, &ring
->flags
);
719 adapter
->tx_ring
[i
] = ring
;
721 /* Restore the adapter's original node */
722 adapter
->node
= orig_node
;
724 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
725 if (orig_node
== -1) {
726 int cur_node
= next_online_node(adapter
->node
);
727 if (cur_node
== MAX_NUMNODES
)
728 cur_node
= first_online_node
;
729 adapter
->node
= cur_node
;
731 ring
= kzalloc_node(sizeof(struct igb_ring
), GFP_KERNEL
,
734 ring
= kzalloc(sizeof(struct igb_ring
), GFP_KERNEL
);
737 ring
->count
= adapter
->rx_ring_count
;
738 ring
->queue_index
= i
;
739 ring
->dev
= &adapter
->pdev
->dev
;
740 ring
->netdev
= adapter
->netdev
;
741 ring
->numa_node
= adapter
->node
;
742 /* set flag indicating ring supports SCTP checksum offload */
743 if (adapter
->hw
.mac
.type
>= e1000_82576
)
744 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM
, &ring
->flags
);
746 /* On i350, loopback VLAN packets have the tag byte-swapped. */
747 if (adapter
->hw
.mac
.type
== e1000_i350
)
748 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP
, &ring
->flags
);
750 adapter
->rx_ring
[i
] = ring
;
752 /* Restore the adapter's original node */
753 adapter
->node
= orig_node
;
755 igb_cache_ring_register(adapter
);
760 /* Restore the adapter's original node */
761 adapter
->node
= orig_node
;
762 igb_free_queues(adapter
);
768 * igb_write_ivar - configure ivar for given MSI-X vector
769 * @hw: pointer to the HW structure
770 * @msix_vector: vector number we are allocating to a given ring
771 * @index: row index of IVAR register to write within IVAR table
772 * @offset: column offset of in IVAR, should be multiple of 8
774 * This function is intended to handle the writing of the IVAR register
775 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
776 * each containing an cause allocation for an Rx and Tx ring, and a
777 * variable number of rows depending on the number of queues supported.
779 static void igb_write_ivar(struct e1000_hw
*hw
, int msix_vector
,
780 int index
, int offset
)
782 u32 ivar
= array_rd32(E1000_IVAR0
, index
);
784 /* clear any bits that are currently set */
785 ivar
&= ~((u32
)0xFF << offset
);
787 /* write vector and valid bit */
788 ivar
|= (msix_vector
| E1000_IVAR_VALID
) << offset
;
790 array_wr32(E1000_IVAR0
, index
, ivar
);
793 #define IGB_N0_QUEUE -1
794 static void igb_assign_vector(struct igb_q_vector
*q_vector
, int msix_vector
)
796 struct igb_adapter
*adapter
= q_vector
->adapter
;
797 struct e1000_hw
*hw
= &adapter
->hw
;
798 int rx_queue
= IGB_N0_QUEUE
;
799 int tx_queue
= IGB_N0_QUEUE
;
802 if (q_vector
->rx
.ring
)
803 rx_queue
= q_vector
->rx
.ring
->reg_idx
;
804 if (q_vector
->tx
.ring
)
805 tx_queue
= q_vector
->tx
.ring
->reg_idx
;
807 switch (hw
->mac
.type
) {
809 /* The 82575 assigns vectors using a bitmask, which matches the
810 bitmask for the EICR/EIMS/EIMC registers. To assign one
811 or more queues to a vector, we write the appropriate bits
812 into the MSIXBM register for that vector. */
813 if (rx_queue
> IGB_N0_QUEUE
)
814 msixbm
= E1000_EICR_RX_QUEUE0
<< rx_queue
;
815 if (tx_queue
> IGB_N0_QUEUE
)
816 msixbm
|= E1000_EICR_TX_QUEUE0
<< tx_queue
;
817 if (!adapter
->msix_entries
&& msix_vector
== 0)
818 msixbm
|= E1000_EIMS_OTHER
;
819 array_wr32(E1000_MSIXBM(0), msix_vector
, msixbm
);
820 q_vector
->eims_value
= msixbm
;
824 * 82576 uses a table that essentially consists of 2 columns
825 * with 8 rows. The ordering is column-major so we use the
826 * lower 3 bits as the row index, and the 4th bit as the
829 if (rx_queue
> IGB_N0_QUEUE
)
830 igb_write_ivar(hw
, msix_vector
,
832 (rx_queue
& 0x8) << 1);
833 if (tx_queue
> IGB_N0_QUEUE
)
834 igb_write_ivar(hw
, msix_vector
,
836 ((tx_queue
& 0x8) << 1) + 8);
837 q_vector
->eims_value
= 1 << msix_vector
;
842 * On 82580 and newer adapters the scheme is similar to 82576
843 * however instead of ordering column-major we have things
844 * ordered row-major. So we traverse the table by using
845 * bit 0 as the column offset, and the remaining bits as the
848 if (rx_queue
> IGB_N0_QUEUE
)
849 igb_write_ivar(hw
, msix_vector
,
851 (rx_queue
& 0x1) << 4);
852 if (tx_queue
> IGB_N0_QUEUE
)
853 igb_write_ivar(hw
, msix_vector
,
855 ((tx_queue
& 0x1) << 4) + 8);
856 q_vector
->eims_value
= 1 << msix_vector
;
863 /* add q_vector eims value to global eims_enable_mask */
864 adapter
->eims_enable_mask
|= q_vector
->eims_value
;
866 /* configure q_vector to set itr on first interrupt */
867 q_vector
->set_itr
= 1;
871 * igb_configure_msix - Configure MSI-X hardware
873 * igb_configure_msix sets up the hardware to properly
874 * generate MSI-X interrupts.
876 static void igb_configure_msix(struct igb_adapter
*adapter
)
880 struct e1000_hw
*hw
= &adapter
->hw
;
882 adapter
->eims_enable_mask
= 0;
884 /* set vector for other causes, i.e. link changes */
885 switch (hw
->mac
.type
) {
887 tmp
= rd32(E1000_CTRL_EXT
);
888 /* enable MSI-X PBA support*/
889 tmp
|= E1000_CTRL_EXT_PBA_CLR
;
891 /* Auto-Mask interrupts upon ICR read. */
892 tmp
|= E1000_CTRL_EXT_EIAME
;
893 tmp
|= E1000_CTRL_EXT_IRCA
;
895 wr32(E1000_CTRL_EXT
, tmp
);
897 /* enable msix_other interrupt */
898 array_wr32(E1000_MSIXBM(0), vector
++,
900 adapter
->eims_other
= E1000_EIMS_OTHER
;
907 /* Turn on MSI-X capability first, or our settings
908 * won't stick. And it will take days to debug. */
909 wr32(E1000_GPIE
, E1000_GPIE_MSIX_MODE
|
910 E1000_GPIE_PBA
| E1000_GPIE_EIAME
|
913 /* enable msix_other interrupt */
914 adapter
->eims_other
= 1 << vector
;
915 tmp
= (vector
++ | E1000_IVAR_VALID
) << 8;
917 wr32(E1000_IVAR_MISC
, tmp
);
920 /* do nothing, since nothing else supports MSI-X */
922 } /* switch (hw->mac.type) */
924 adapter
->eims_enable_mask
|= adapter
->eims_other
;
926 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
927 igb_assign_vector(adapter
->q_vector
[i
], vector
++);
933 * igb_request_msix - Initialize MSI-X interrupts
935 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
938 static int igb_request_msix(struct igb_adapter
*adapter
)
940 struct net_device
*netdev
= adapter
->netdev
;
941 struct e1000_hw
*hw
= &adapter
->hw
;
942 int i
, err
= 0, vector
= 0;
944 err
= request_irq(adapter
->msix_entries
[vector
].vector
,
945 igb_msix_other
, 0, netdev
->name
, adapter
);
950 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
951 struct igb_q_vector
*q_vector
= adapter
->q_vector
[i
];
953 q_vector
->itr_register
= hw
->hw_addr
+ E1000_EITR(vector
);
955 if (q_vector
->rx
.ring
&& q_vector
->tx
.ring
)
956 sprintf(q_vector
->name
, "%s-TxRx-%u", netdev
->name
,
957 q_vector
->rx
.ring
->queue_index
);
958 else if (q_vector
->tx
.ring
)
959 sprintf(q_vector
->name
, "%s-tx-%u", netdev
->name
,
960 q_vector
->tx
.ring
->queue_index
);
961 else if (q_vector
->rx
.ring
)
962 sprintf(q_vector
->name
, "%s-rx-%u", netdev
->name
,
963 q_vector
->rx
.ring
->queue_index
);
965 sprintf(q_vector
->name
, "%s-unused", netdev
->name
);
967 err
= request_irq(adapter
->msix_entries
[vector
].vector
,
968 igb_msix_ring
, 0, q_vector
->name
,
975 igb_configure_msix(adapter
);
981 static void igb_reset_interrupt_capability(struct igb_adapter
*adapter
)
983 if (adapter
->msix_entries
) {
984 pci_disable_msix(adapter
->pdev
);
985 kfree(adapter
->msix_entries
);
986 adapter
->msix_entries
= NULL
;
987 } else if (adapter
->flags
& IGB_FLAG_HAS_MSI
) {
988 pci_disable_msi(adapter
->pdev
);
993 * igb_free_q_vectors - Free memory allocated for interrupt vectors
994 * @adapter: board private structure to initialize
996 * This function frees the memory allocated to the q_vectors. In addition if
997 * NAPI is enabled it will delete any references to the NAPI struct prior
998 * to freeing the q_vector.
1000 static void igb_free_q_vectors(struct igb_adapter
*adapter
)
1004 for (v_idx
= 0; v_idx
< adapter
->num_q_vectors
; v_idx
++) {
1005 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1006 adapter
->q_vector
[v_idx
] = NULL
;
1009 netif_napi_del(&q_vector
->napi
);
1012 adapter
->num_q_vectors
= 0;
1016 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1018 * This function resets the device so that it has 0 rx queues, tx queues, and
1019 * MSI-X interrupts allocated.
1021 static void igb_clear_interrupt_scheme(struct igb_adapter
*adapter
)
1023 igb_free_queues(adapter
);
1024 igb_free_q_vectors(adapter
);
1025 igb_reset_interrupt_capability(adapter
);
1029 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1031 * Attempt to configure interrupts using the best available
1032 * capabilities of the hardware and kernel.
1034 static int igb_set_interrupt_capability(struct igb_adapter
*adapter
)
1039 /* Number of supported queues. */
1040 adapter
->num_rx_queues
= adapter
->rss_queues
;
1041 if (adapter
->vfs_allocated_count
)
1042 adapter
->num_tx_queues
= 1;
1044 adapter
->num_tx_queues
= adapter
->rss_queues
;
1046 /* start with one vector for every rx queue */
1047 numvecs
= adapter
->num_rx_queues
;
1049 /* if tx handler is separate add 1 for every tx queue */
1050 if (!(adapter
->flags
& IGB_FLAG_QUEUE_PAIRS
))
1051 numvecs
+= adapter
->num_tx_queues
;
1053 /* store the number of vectors reserved for queues */
1054 adapter
->num_q_vectors
= numvecs
;
1056 /* add 1 vector for link status interrupts */
1058 adapter
->msix_entries
= kcalloc(numvecs
, sizeof(struct msix_entry
),
1060 if (!adapter
->msix_entries
)
1063 for (i
= 0; i
< numvecs
; i
++)
1064 adapter
->msix_entries
[i
].entry
= i
;
1066 err
= pci_enable_msix(adapter
->pdev
,
1067 adapter
->msix_entries
,
1072 igb_reset_interrupt_capability(adapter
);
1074 /* If we can't do MSI-X, try MSI */
1076 #ifdef CONFIG_PCI_IOV
1077 /* disable SR-IOV for non MSI-X configurations */
1078 if (adapter
->vf_data
) {
1079 struct e1000_hw
*hw
= &adapter
->hw
;
1080 /* disable iov and allow time for transactions to clear */
1081 pci_disable_sriov(adapter
->pdev
);
1084 kfree(adapter
->vf_data
);
1085 adapter
->vf_data
= NULL
;
1086 wr32(E1000_IOVCTL
, E1000_IOVCTL_REUSE_VFQ
);
1089 dev_info(&adapter
->pdev
->dev
, "IOV Disabled\n");
1092 adapter
->vfs_allocated_count
= 0;
1093 adapter
->rss_queues
= 1;
1094 adapter
->flags
|= IGB_FLAG_QUEUE_PAIRS
;
1095 adapter
->num_rx_queues
= 1;
1096 adapter
->num_tx_queues
= 1;
1097 adapter
->num_q_vectors
= 1;
1098 if (!pci_enable_msi(adapter
->pdev
))
1099 adapter
->flags
|= IGB_FLAG_HAS_MSI
;
1101 /* Notify the stack of the (possibly) reduced queue counts. */
1102 netif_set_real_num_tx_queues(adapter
->netdev
, adapter
->num_tx_queues
);
1103 return netif_set_real_num_rx_queues(adapter
->netdev
,
1104 adapter
->num_rx_queues
);
1108 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1109 * @adapter: board private structure to initialize
1111 * We allocate one q_vector per queue interrupt. If allocation fails we
1114 static int igb_alloc_q_vectors(struct igb_adapter
*adapter
)
1116 struct igb_q_vector
*q_vector
;
1117 struct e1000_hw
*hw
= &adapter
->hw
;
1119 int orig_node
= adapter
->node
;
1121 for (v_idx
= 0; v_idx
< adapter
->num_q_vectors
; v_idx
++) {
1122 if ((adapter
->num_q_vectors
== (adapter
->num_rx_queues
+
1123 adapter
->num_tx_queues
)) &&
1124 (adapter
->num_rx_queues
== v_idx
))
1125 adapter
->node
= orig_node
;
1126 if (orig_node
== -1) {
1127 int cur_node
= next_online_node(adapter
->node
);
1128 if (cur_node
== MAX_NUMNODES
)
1129 cur_node
= first_online_node
;
1130 adapter
->node
= cur_node
;
1132 q_vector
= kzalloc_node(sizeof(struct igb_q_vector
), GFP_KERNEL
,
1135 q_vector
= kzalloc(sizeof(struct igb_q_vector
),
1139 q_vector
->adapter
= adapter
;
1140 q_vector
->itr_register
= hw
->hw_addr
+ E1000_EITR(0);
1141 q_vector
->itr_val
= IGB_START_ITR
;
1142 netif_napi_add(adapter
->netdev
, &q_vector
->napi
, igb_poll
, 64);
1143 adapter
->q_vector
[v_idx
] = q_vector
;
1145 /* Restore the adapter's original node */
1146 adapter
->node
= orig_node
;
1151 /* Restore the adapter's original node */
1152 adapter
->node
= orig_node
;
1153 igb_free_q_vectors(adapter
);
1157 static void igb_map_rx_ring_to_vector(struct igb_adapter
*adapter
,
1158 int ring_idx
, int v_idx
)
1160 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1162 q_vector
->rx
.ring
= adapter
->rx_ring
[ring_idx
];
1163 q_vector
->rx
.ring
->q_vector
= q_vector
;
1164 q_vector
->rx
.count
++;
1165 q_vector
->itr_val
= adapter
->rx_itr_setting
;
1166 if (q_vector
->itr_val
&& q_vector
->itr_val
<= 3)
1167 q_vector
->itr_val
= IGB_START_ITR
;
1170 static void igb_map_tx_ring_to_vector(struct igb_adapter
*adapter
,
1171 int ring_idx
, int v_idx
)
1173 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1175 q_vector
->tx
.ring
= adapter
->tx_ring
[ring_idx
];
1176 q_vector
->tx
.ring
->q_vector
= q_vector
;
1177 q_vector
->tx
.count
++;
1178 q_vector
->itr_val
= adapter
->tx_itr_setting
;
1179 q_vector
->tx
.work_limit
= adapter
->tx_work_limit
;
1180 if (q_vector
->itr_val
&& q_vector
->itr_val
<= 3)
1181 q_vector
->itr_val
= IGB_START_ITR
;
1185 * igb_map_ring_to_vector - maps allocated queues to vectors
1187 * This function maps the recently allocated queues to vectors.
1189 static int igb_map_ring_to_vector(struct igb_adapter
*adapter
)
1194 if ((adapter
->num_q_vectors
< adapter
->num_rx_queues
) ||
1195 (adapter
->num_q_vectors
< adapter
->num_tx_queues
))
1198 if (adapter
->num_q_vectors
>=
1199 (adapter
->num_rx_queues
+ adapter
->num_tx_queues
)) {
1200 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
1201 igb_map_rx_ring_to_vector(adapter
, i
, v_idx
++);
1202 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
1203 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
++);
1205 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
1206 if (i
< adapter
->num_tx_queues
)
1207 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
);
1208 igb_map_rx_ring_to_vector(adapter
, i
, v_idx
++);
1210 for (; i
< adapter
->num_tx_queues
; i
++)
1211 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
++);
1217 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1219 * This function initializes the interrupts and allocates all of the queues.
1221 static int igb_init_interrupt_scheme(struct igb_adapter
*adapter
)
1223 struct pci_dev
*pdev
= adapter
->pdev
;
1226 err
= igb_set_interrupt_capability(adapter
);
1230 err
= igb_alloc_q_vectors(adapter
);
1232 dev_err(&pdev
->dev
, "Unable to allocate memory for vectors\n");
1233 goto err_alloc_q_vectors
;
1236 err
= igb_alloc_queues(adapter
);
1238 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
1239 goto err_alloc_queues
;
1242 err
= igb_map_ring_to_vector(adapter
);
1244 dev_err(&pdev
->dev
, "Invalid q_vector to ring mapping\n");
1245 goto err_map_queues
;
1251 igb_free_queues(adapter
);
1253 igb_free_q_vectors(adapter
);
1254 err_alloc_q_vectors
:
1255 igb_reset_interrupt_capability(adapter
);
1260 * igb_request_irq - initialize interrupts
1262 * Attempts to configure interrupts using the best available
1263 * capabilities of the hardware and kernel.
1265 static int igb_request_irq(struct igb_adapter
*adapter
)
1267 struct net_device
*netdev
= adapter
->netdev
;
1268 struct pci_dev
*pdev
= adapter
->pdev
;
1271 if (adapter
->msix_entries
) {
1272 err
= igb_request_msix(adapter
);
1275 /* fall back to MSI */
1276 igb_clear_interrupt_scheme(adapter
);
1277 if (!pci_enable_msi(pdev
))
1278 adapter
->flags
|= IGB_FLAG_HAS_MSI
;
1279 igb_free_all_tx_resources(adapter
);
1280 igb_free_all_rx_resources(adapter
);
1281 adapter
->num_tx_queues
= 1;
1282 adapter
->num_rx_queues
= 1;
1283 adapter
->num_q_vectors
= 1;
1284 err
= igb_alloc_q_vectors(adapter
);
1287 "Unable to allocate memory for vectors\n");
1290 err
= igb_alloc_queues(adapter
);
1293 "Unable to allocate memory for queues\n");
1294 igb_free_q_vectors(adapter
);
1297 igb_setup_all_tx_resources(adapter
);
1298 igb_setup_all_rx_resources(adapter
);
1301 igb_assign_vector(adapter
->q_vector
[0], 0);
1303 if (adapter
->flags
& IGB_FLAG_HAS_MSI
) {
1304 err
= request_irq(pdev
->irq
, igb_intr_msi
, 0,
1305 netdev
->name
, adapter
);
1309 /* fall back to legacy interrupts */
1310 igb_reset_interrupt_capability(adapter
);
1311 adapter
->flags
&= ~IGB_FLAG_HAS_MSI
;
1314 err
= request_irq(pdev
->irq
, igb_intr
, IRQF_SHARED
,
1315 netdev
->name
, adapter
);
1318 dev_err(&pdev
->dev
, "Error %d getting interrupt\n",
1325 static void igb_free_irq(struct igb_adapter
*adapter
)
1327 if (adapter
->msix_entries
) {
1330 free_irq(adapter
->msix_entries
[vector
++].vector
, adapter
);
1332 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1333 free_irq(adapter
->msix_entries
[vector
++].vector
,
1334 adapter
->q_vector
[i
]);
1336 free_irq(adapter
->pdev
->irq
, adapter
);
1341 * igb_irq_disable - Mask off interrupt generation on the NIC
1342 * @adapter: board private structure
1344 static void igb_irq_disable(struct igb_adapter
*adapter
)
1346 struct e1000_hw
*hw
= &adapter
->hw
;
1349 * we need to be careful when disabling interrupts. The VFs are also
1350 * mapped into these registers and so clearing the bits can cause
1351 * issues on the VF drivers so we only need to clear what we set
1353 if (adapter
->msix_entries
) {
1354 u32 regval
= rd32(E1000_EIAM
);
1355 wr32(E1000_EIAM
, regval
& ~adapter
->eims_enable_mask
);
1356 wr32(E1000_EIMC
, adapter
->eims_enable_mask
);
1357 regval
= rd32(E1000_EIAC
);
1358 wr32(E1000_EIAC
, regval
& ~adapter
->eims_enable_mask
);
1362 wr32(E1000_IMC
, ~0);
1364 if (adapter
->msix_entries
) {
1366 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1367 synchronize_irq(adapter
->msix_entries
[i
].vector
);
1369 synchronize_irq(adapter
->pdev
->irq
);
1374 * igb_irq_enable - Enable default interrupt generation settings
1375 * @adapter: board private structure
1377 static void igb_irq_enable(struct igb_adapter
*adapter
)
1379 struct e1000_hw
*hw
= &adapter
->hw
;
1381 if (adapter
->msix_entries
) {
1382 u32 ims
= E1000_IMS_LSC
| E1000_IMS_DOUTSYNC
| E1000_IMS_DRSTA
;
1383 u32 regval
= rd32(E1000_EIAC
);
1384 wr32(E1000_EIAC
, regval
| adapter
->eims_enable_mask
);
1385 regval
= rd32(E1000_EIAM
);
1386 wr32(E1000_EIAM
, regval
| adapter
->eims_enable_mask
);
1387 wr32(E1000_EIMS
, adapter
->eims_enable_mask
);
1388 if (adapter
->vfs_allocated_count
) {
1389 wr32(E1000_MBVFIMR
, 0xFF);
1390 ims
|= E1000_IMS_VMMB
;
1392 wr32(E1000_IMS
, ims
);
1394 wr32(E1000_IMS
, IMS_ENABLE_MASK
|
1396 wr32(E1000_IAM
, IMS_ENABLE_MASK
|
1401 static void igb_update_mng_vlan(struct igb_adapter
*adapter
)
1403 struct e1000_hw
*hw
= &adapter
->hw
;
1404 u16 vid
= adapter
->hw
.mng_cookie
.vlan_id
;
1405 u16 old_vid
= adapter
->mng_vlan_id
;
1407 if (hw
->mng_cookie
.status
& E1000_MNG_DHCP_COOKIE_STATUS_VLAN
) {
1408 /* add VID to filter table */
1409 igb_vfta_set(hw
, vid
, true);
1410 adapter
->mng_vlan_id
= vid
;
1412 adapter
->mng_vlan_id
= IGB_MNG_VLAN_NONE
;
1415 if ((old_vid
!= (u16
)IGB_MNG_VLAN_NONE
) &&
1417 !test_bit(old_vid
, adapter
->active_vlans
)) {
1418 /* remove VID from filter table */
1419 igb_vfta_set(hw
, old_vid
, false);
1424 * igb_release_hw_control - release control of the h/w to f/w
1425 * @adapter: address of board private structure
1427 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1428 * For ASF and Pass Through versions of f/w this means that the
1429 * driver is no longer loaded.
1432 static void igb_release_hw_control(struct igb_adapter
*adapter
)
1434 struct e1000_hw
*hw
= &adapter
->hw
;
1437 /* Let firmware take over control of h/w */
1438 ctrl_ext
= rd32(E1000_CTRL_EXT
);
1439 wr32(E1000_CTRL_EXT
,
1440 ctrl_ext
& ~E1000_CTRL_EXT_DRV_LOAD
);
1444 * igb_get_hw_control - get control of the h/w from f/w
1445 * @adapter: address of board private structure
1447 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1448 * For ASF and Pass Through versions of f/w this means that
1449 * the driver is loaded.
1452 static void igb_get_hw_control(struct igb_adapter
*adapter
)
1454 struct e1000_hw
*hw
= &adapter
->hw
;
1457 /* Let firmware know the driver has taken over */
1458 ctrl_ext
= rd32(E1000_CTRL_EXT
);
1459 wr32(E1000_CTRL_EXT
,
1460 ctrl_ext
| E1000_CTRL_EXT_DRV_LOAD
);
1464 * igb_configure - configure the hardware for RX and TX
1465 * @adapter: private board structure
1467 static void igb_configure(struct igb_adapter
*adapter
)
1469 struct net_device
*netdev
= adapter
->netdev
;
1472 igb_get_hw_control(adapter
);
1473 igb_set_rx_mode(netdev
);
1475 igb_restore_vlan(adapter
);
1477 igb_setup_tctl(adapter
);
1478 igb_setup_mrqc(adapter
);
1479 igb_setup_rctl(adapter
);
1481 igb_configure_tx(adapter
);
1482 igb_configure_rx(adapter
);
1484 igb_rx_fifo_flush_82575(&adapter
->hw
);
1486 /* call igb_desc_unused which always leaves
1487 * at least 1 descriptor unused to make sure
1488 * next_to_use != next_to_clean */
1489 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
1490 struct igb_ring
*ring
= adapter
->rx_ring
[i
];
1491 igb_alloc_rx_buffers(ring
, igb_desc_unused(ring
));
1496 * igb_power_up_link - Power up the phy/serdes link
1497 * @adapter: address of board private structure
1499 void igb_power_up_link(struct igb_adapter
*adapter
)
1501 if (adapter
->hw
.phy
.media_type
== e1000_media_type_copper
)
1502 igb_power_up_phy_copper(&adapter
->hw
);
1504 igb_power_up_serdes_link_82575(&adapter
->hw
);
1508 * igb_power_down_link - Power down the phy/serdes link
1509 * @adapter: address of board private structure
1511 static void igb_power_down_link(struct igb_adapter
*adapter
)
1513 if (adapter
->hw
.phy
.media_type
== e1000_media_type_copper
)
1514 igb_power_down_phy_copper_82575(&adapter
->hw
);
1516 igb_shutdown_serdes_link_82575(&adapter
->hw
);
1520 * igb_up - Open the interface and prepare it to handle traffic
1521 * @adapter: board private structure
1523 int igb_up(struct igb_adapter
*adapter
)
1525 struct e1000_hw
*hw
= &adapter
->hw
;
1528 /* hardware has been reset, we need to reload some things */
1529 igb_configure(adapter
);
1531 clear_bit(__IGB_DOWN
, &adapter
->state
);
1533 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1534 napi_enable(&(adapter
->q_vector
[i
]->napi
));
1536 if (adapter
->msix_entries
)
1537 igb_configure_msix(adapter
);
1539 igb_assign_vector(adapter
->q_vector
[0], 0);
1541 /* Clear any pending interrupts. */
1543 igb_irq_enable(adapter
);
1545 /* notify VFs that reset has been completed */
1546 if (adapter
->vfs_allocated_count
) {
1547 u32 reg_data
= rd32(E1000_CTRL_EXT
);
1548 reg_data
|= E1000_CTRL_EXT_PFRSTD
;
1549 wr32(E1000_CTRL_EXT
, reg_data
);
1552 netif_tx_start_all_queues(adapter
->netdev
);
1554 /* start the watchdog. */
1555 hw
->mac
.get_link_status
= 1;
1556 schedule_work(&adapter
->watchdog_task
);
1561 void igb_down(struct igb_adapter
*adapter
)
1563 struct net_device
*netdev
= adapter
->netdev
;
1564 struct e1000_hw
*hw
= &adapter
->hw
;
1568 /* signal that we're down so the interrupt handler does not
1569 * reschedule our watchdog timer */
1570 set_bit(__IGB_DOWN
, &adapter
->state
);
1572 /* disable receives in the hardware */
1573 rctl
= rd32(E1000_RCTL
);
1574 wr32(E1000_RCTL
, rctl
& ~E1000_RCTL_EN
);
1575 /* flush and sleep below */
1577 netif_tx_stop_all_queues(netdev
);
1579 /* disable transmits in the hardware */
1580 tctl
= rd32(E1000_TCTL
);
1581 tctl
&= ~E1000_TCTL_EN
;
1582 wr32(E1000_TCTL
, tctl
);
1583 /* flush both disables and wait for them to finish */
1587 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1588 napi_disable(&(adapter
->q_vector
[i
]->napi
));
1590 igb_irq_disable(adapter
);
1592 del_timer_sync(&adapter
->watchdog_timer
);
1593 del_timer_sync(&adapter
->phy_info_timer
);
1595 netif_carrier_off(netdev
);
1597 /* record the stats before reset*/
1598 spin_lock(&adapter
->stats64_lock
);
1599 igb_update_stats(adapter
, &adapter
->stats64
);
1600 spin_unlock(&adapter
->stats64_lock
);
1602 adapter
->link_speed
= 0;
1603 adapter
->link_duplex
= 0;
1605 if (!pci_channel_offline(adapter
->pdev
))
1607 igb_clean_all_tx_rings(adapter
);
1608 igb_clean_all_rx_rings(adapter
);
1609 #ifdef CONFIG_IGB_DCA
1611 /* since we reset the hardware DCA settings were cleared */
1612 igb_setup_dca(adapter
);
1616 void igb_reinit_locked(struct igb_adapter
*adapter
)
1618 WARN_ON(in_interrupt());
1619 while (test_and_set_bit(__IGB_RESETTING
, &adapter
->state
))
1623 clear_bit(__IGB_RESETTING
, &adapter
->state
);
1626 void igb_reset(struct igb_adapter
*adapter
)
1628 struct pci_dev
*pdev
= adapter
->pdev
;
1629 struct e1000_hw
*hw
= &adapter
->hw
;
1630 struct e1000_mac_info
*mac
= &hw
->mac
;
1631 struct e1000_fc_info
*fc
= &hw
->fc
;
1632 u32 pba
= 0, tx_space
, min_tx_space
, min_rx_space
;
1635 /* Repartition Pba for greater than 9k mtu
1636 * To take effect CTRL.RST is required.
1638 switch (mac
->type
) {
1641 pba
= rd32(E1000_RXPBS
);
1642 pba
= igb_rxpbs_adjust_82580(pba
);
1645 pba
= rd32(E1000_RXPBS
);
1646 pba
&= E1000_RXPBS_SIZE_MASK_82576
;
1650 pba
= E1000_PBA_34K
;
1654 if ((adapter
->max_frame_size
> ETH_FRAME_LEN
+ ETH_FCS_LEN
) &&
1655 (mac
->type
< e1000_82576
)) {
1656 /* adjust PBA for jumbo frames */
1657 wr32(E1000_PBA
, pba
);
1659 /* To maintain wire speed transmits, the Tx FIFO should be
1660 * large enough to accommodate two full transmit packets,
1661 * rounded up to the next 1KB and expressed in KB. Likewise,
1662 * the Rx FIFO should be large enough to accommodate at least
1663 * one full receive packet and is similarly rounded up and
1664 * expressed in KB. */
1665 pba
= rd32(E1000_PBA
);
1666 /* upper 16 bits has Tx packet buffer allocation size in KB */
1667 tx_space
= pba
>> 16;
1668 /* lower 16 bits has Rx packet buffer allocation size in KB */
1670 /* the tx fifo also stores 16 bytes of information about the tx
1671 * but don't include ethernet FCS because hardware appends it */
1672 min_tx_space
= (adapter
->max_frame_size
+
1673 sizeof(union e1000_adv_tx_desc
) -
1675 min_tx_space
= ALIGN(min_tx_space
, 1024);
1676 min_tx_space
>>= 10;
1677 /* software strips receive CRC, so leave room for it */
1678 min_rx_space
= adapter
->max_frame_size
;
1679 min_rx_space
= ALIGN(min_rx_space
, 1024);
1680 min_rx_space
>>= 10;
1682 /* If current Tx allocation is less than the min Tx FIFO size,
1683 * and the min Tx FIFO size is less than the current Rx FIFO
1684 * allocation, take space away from current Rx allocation */
1685 if (tx_space
< min_tx_space
&&
1686 ((min_tx_space
- tx_space
) < pba
)) {
1687 pba
= pba
- (min_tx_space
- tx_space
);
1689 /* if short on rx space, rx wins and must trump tx
1691 if (pba
< min_rx_space
)
1694 wr32(E1000_PBA
, pba
);
1697 /* flow control settings */
1698 /* The high water mark must be low enough to fit one full frame
1699 * (or the size used for early receive) above it in the Rx FIFO.
1700 * Set it to the lower of:
1701 * - 90% of the Rx FIFO size, or
1702 * - the full Rx FIFO size minus one full frame */
1703 hwm
= min(((pba
<< 10) * 9 / 10),
1704 ((pba
<< 10) - 2 * adapter
->max_frame_size
));
1706 fc
->high_water
= hwm
& 0xFFF0; /* 16-byte granularity */
1707 fc
->low_water
= fc
->high_water
- 16;
1708 fc
->pause_time
= 0xFFFF;
1710 fc
->current_mode
= fc
->requested_mode
;
1712 /* disable receive for all VFs and wait one second */
1713 if (adapter
->vfs_allocated_count
) {
1715 for (i
= 0 ; i
< adapter
->vfs_allocated_count
; i
++)
1716 adapter
->vf_data
[i
].flags
&= IGB_VF_FLAG_PF_SET_MAC
;
1718 /* ping all the active vfs to let them know we are going down */
1719 igb_ping_all_vfs(adapter
);
1721 /* disable transmits and receives */
1722 wr32(E1000_VFRE
, 0);
1723 wr32(E1000_VFTE
, 0);
1726 /* Allow time for pending master requests to run */
1727 hw
->mac
.ops
.reset_hw(hw
);
1730 if (hw
->mac
.ops
.init_hw(hw
))
1731 dev_err(&pdev
->dev
, "Hardware Error\n");
1733 igb_init_dmac(adapter
, pba
);
1734 if (!netif_running(adapter
->netdev
))
1735 igb_power_down_link(adapter
);
1737 igb_update_mng_vlan(adapter
);
1739 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1740 wr32(E1000_VET
, ETHERNET_IEEE_VLAN_TYPE
);
1742 igb_get_phy_info(hw
);
1745 static u32
igb_fix_features(struct net_device
*netdev
, u32 features
)
1748 * Since there is no support for separate rx/tx vlan accel
1749 * enable/disable make sure tx flag is always in same state as rx.
1751 if (features
& NETIF_F_HW_VLAN_RX
)
1752 features
|= NETIF_F_HW_VLAN_TX
;
1754 features
&= ~NETIF_F_HW_VLAN_TX
;
1759 static int igb_set_features(struct net_device
*netdev
, u32 features
)
1761 u32 changed
= netdev
->features
^ features
;
1763 if (changed
& NETIF_F_HW_VLAN_RX
)
1764 igb_vlan_mode(netdev
, features
);
1769 static const struct net_device_ops igb_netdev_ops
= {
1770 .ndo_open
= igb_open
,
1771 .ndo_stop
= igb_close
,
1772 .ndo_start_xmit
= igb_xmit_frame
,
1773 .ndo_get_stats64
= igb_get_stats64
,
1774 .ndo_set_rx_mode
= igb_set_rx_mode
,
1775 .ndo_set_mac_address
= igb_set_mac
,
1776 .ndo_change_mtu
= igb_change_mtu
,
1777 .ndo_do_ioctl
= igb_ioctl
,
1778 .ndo_tx_timeout
= igb_tx_timeout
,
1779 .ndo_validate_addr
= eth_validate_addr
,
1780 .ndo_vlan_rx_add_vid
= igb_vlan_rx_add_vid
,
1781 .ndo_vlan_rx_kill_vid
= igb_vlan_rx_kill_vid
,
1782 .ndo_set_vf_mac
= igb_ndo_set_vf_mac
,
1783 .ndo_set_vf_vlan
= igb_ndo_set_vf_vlan
,
1784 .ndo_set_vf_tx_rate
= igb_ndo_set_vf_bw
,
1785 .ndo_get_vf_config
= igb_ndo_get_vf_config
,
1786 #ifdef CONFIG_NET_POLL_CONTROLLER
1787 .ndo_poll_controller
= igb_netpoll
,
1789 .ndo_fix_features
= igb_fix_features
,
1790 .ndo_set_features
= igb_set_features
,
1794 * igb_probe - Device Initialization Routine
1795 * @pdev: PCI device information struct
1796 * @ent: entry in igb_pci_tbl
1798 * Returns 0 on success, negative on failure
1800 * igb_probe initializes an adapter identified by a pci_dev structure.
1801 * The OS initialization, configuring of the adapter private structure,
1802 * and a hardware reset occur.
1804 static int __devinit
igb_probe(struct pci_dev
*pdev
,
1805 const struct pci_device_id
*ent
)
1807 struct net_device
*netdev
;
1808 struct igb_adapter
*adapter
;
1809 struct e1000_hw
*hw
;
1810 u16 eeprom_data
= 0;
1812 static int global_quad_port_a
; /* global quad port a indication */
1813 const struct e1000_info
*ei
= igb_info_tbl
[ent
->driver_data
];
1814 unsigned long mmio_start
, mmio_len
;
1815 int err
, pci_using_dac
;
1816 u16 eeprom_apme_mask
= IGB_EEPROM_APME
;
1817 u8 part_str
[E1000_PBANUM_LENGTH
];
1819 /* Catch broken hardware that put the wrong VF device ID in
1820 * the PCIe SR-IOV capability.
1822 if (pdev
->is_virtfn
) {
1823 WARN(1, KERN_ERR
"%s (%hx:%hx) should not be a VF!\n",
1824 pci_name(pdev
), pdev
->vendor
, pdev
->device
);
1828 err
= pci_enable_device_mem(pdev
);
1833 err
= dma_set_mask(&pdev
->dev
, DMA_BIT_MASK(64));
1835 err
= dma_set_coherent_mask(&pdev
->dev
, DMA_BIT_MASK(64));
1839 err
= dma_set_mask(&pdev
->dev
, DMA_BIT_MASK(32));
1841 err
= dma_set_coherent_mask(&pdev
->dev
, DMA_BIT_MASK(32));
1843 dev_err(&pdev
->dev
, "No usable DMA "
1844 "configuration, aborting\n");
1850 err
= pci_request_selected_regions(pdev
, pci_select_bars(pdev
,
1856 pci_enable_pcie_error_reporting(pdev
);
1858 pci_set_master(pdev
);
1859 pci_save_state(pdev
);
1862 netdev
= alloc_etherdev_mq(sizeof(struct igb_adapter
),
1865 goto err_alloc_etherdev
;
1867 SET_NETDEV_DEV(netdev
, &pdev
->dev
);
1869 pci_set_drvdata(pdev
, netdev
);
1870 adapter
= netdev_priv(netdev
);
1871 adapter
->netdev
= netdev
;
1872 adapter
->pdev
= pdev
;
1875 adapter
->msg_enable
= NETIF_MSG_DRV
| NETIF_MSG_PROBE
;
1877 mmio_start
= pci_resource_start(pdev
, 0);
1878 mmio_len
= pci_resource_len(pdev
, 0);
1881 hw
->hw_addr
= ioremap(mmio_start
, mmio_len
);
1885 netdev
->netdev_ops
= &igb_netdev_ops
;
1886 igb_set_ethtool_ops(netdev
);
1887 netdev
->watchdog_timeo
= 5 * HZ
;
1889 strncpy(netdev
->name
, pci_name(pdev
), sizeof(netdev
->name
) - 1);
1891 netdev
->mem_start
= mmio_start
;
1892 netdev
->mem_end
= mmio_start
+ mmio_len
;
1894 /* PCI config space info */
1895 hw
->vendor_id
= pdev
->vendor
;
1896 hw
->device_id
= pdev
->device
;
1897 hw
->revision_id
= pdev
->revision
;
1898 hw
->subsystem_vendor_id
= pdev
->subsystem_vendor
;
1899 hw
->subsystem_device_id
= pdev
->subsystem_device
;
1901 /* Copy the default MAC, PHY and NVM function pointers */
1902 memcpy(&hw
->mac
.ops
, ei
->mac_ops
, sizeof(hw
->mac
.ops
));
1903 memcpy(&hw
->phy
.ops
, ei
->phy_ops
, sizeof(hw
->phy
.ops
));
1904 memcpy(&hw
->nvm
.ops
, ei
->nvm_ops
, sizeof(hw
->nvm
.ops
));
1905 /* Initialize skew-specific constants */
1906 err
= ei
->get_invariants(hw
);
1910 /* setup the private structure */
1911 err
= igb_sw_init(adapter
);
1915 igb_get_bus_info_pcie(hw
);
1917 hw
->phy
.autoneg_wait_to_complete
= false;
1919 /* Copper options */
1920 if (hw
->phy
.media_type
== e1000_media_type_copper
) {
1921 hw
->phy
.mdix
= AUTO_ALL_MODES
;
1922 hw
->phy
.disable_polarity_correction
= false;
1923 hw
->phy
.ms_type
= e1000_ms_hw_default
;
1926 if (igb_check_reset_block(hw
))
1927 dev_info(&pdev
->dev
,
1928 "PHY reset is blocked due to SOL/IDER session.\n");
1931 * features is initialized to 0 in allocation, it might have bits
1932 * set by igb_sw_init so we should use an or instead of an
1935 netdev
->features
|= NETIF_F_SG
|
1942 NETIF_F_HW_VLAN_RX
|
1945 /* copy netdev features into list of user selectable features */
1946 netdev
->hw_features
|= netdev
->features
;
1948 /* set this bit last since it cannot be part of hw_features */
1949 netdev
->features
|= NETIF_F_HW_VLAN_FILTER
;
1951 netdev
->vlan_features
|= NETIF_F_TSO
|
1957 if (pci_using_dac
) {
1958 netdev
->features
|= NETIF_F_HIGHDMA
;
1959 netdev
->vlan_features
|= NETIF_F_HIGHDMA
;
1962 if (hw
->mac
.type
>= e1000_82576
) {
1963 netdev
->hw_features
|= NETIF_F_SCTP_CSUM
;
1964 netdev
->features
|= NETIF_F_SCTP_CSUM
;
1967 netdev
->priv_flags
|= IFF_UNICAST_FLT
;
1969 adapter
->en_mng_pt
= igb_enable_mng_pass_thru(hw
);
1971 /* before reading the NVM, reset the controller to put the device in a
1972 * known good starting state */
1973 hw
->mac
.ops
.reset_hw(hw
);
1975 /* make sure the NVM is good */
1976 if (hw
->nvm
.ops
.validate(hw
) < 0) {
1977 dev_err(&pdev
->dev
, "The NVM Checksum Is Not Valid\n");
1982 /* copy the MAC address out of the NVM */
1983 if (hw
->mac
.ops
.read_mac_addr(hw
))
1984 dev_err(&pdev
->dev
, "NVM Read Error\n");
1986 memcpy(netdev
->dev_addr
, hw
->mac
.addr
, netdev
->addr_len
);
1987 memcpy(netdev
->perm_addr
, hw
->mac
.addr
, netdev
->addr_len
);
1989 if (!is_valid_ether_addr(netdev
->perm_addr
)) {
1990 dev_err(&pdev
->dev
, "Invalid MAC Address\n");
1995 setup_timer(&adapter
->watchdog_timer
, igb_watchdog
,
1996 (unsigned long) adapter
);
1997 setup_timer(&adapter
->phy_info_timer
, igb_update_phy_info
,
1998 (unsigned long) adapter
);
2000 INIT_WORK(&adapter
->reset_task
, igb_reset_task
);
2001 INIT_WORK(&adapter
->watchdog_task
, igb_watchdog_task
);
2003 /* Initialize link properties that are user-changeable */
2004 adapter
->fc_autoneg
= true;
2005 hw
->mac
.autoneg
= true;
2006 hw
->phy
.autoneg_advertised
= 0x2f;
2008 hw
->fc
.requested_mode
= e1000_fc_default
;
2009 hw
->fc
.current_mode
= e1000_fc_default
;
2011 igb_validate_mdi_setting(hw
);
2013 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2014 * enable the ACPI Magic Packet filter
2017 if (hw
->bus
.func
== 0)
2018 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_A
, 1, &eeprom_data
);
2019 else if (hw
->mac
.type
>= e1000_82580
)
2020 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_A
+
2021 NVM_82580_LAN_FUNC_OFFSET(hw
->bus
.func
), 1,
2023 else if (hw
->bus
.func
== 1)
2024 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_B
, 1, &eeprom_data
);
2026 if (eeprom_data
& eeprom_apme_mask
)
2027 adapter
->eeprom_wol
|= E1000_WUFC_MAG
;
2029 /* now that we have the eeprom settings, apply the special cases where
2030 * the eeprom may be wrong or the board simply won't support wake on
2031 * lan on a particular port */
2032 switch (pdev
->device
) {
2033 case E1000_DEV_ID_82575GB_QUAD_COPPER
:
2034 adapter
->eeprom_wol
= 0;
2036 case E1000_DEV_ID_82575EB_FIBER_SERDES
:
2037 case E1000_DEV_ID_82576_FIBER
:
2038 case E1000_DEV_ID_82576_SERDES
:
2039 /* Wake events only supported on port A for dual fiber
2040 * regardless of eeprom setting */
2041 if (rd32(E1000_STATUS
) & E1000_STATUS_FUNC_1
)
2042 adapter
->eeprom_wol
= 0;
2044 case E1000_DEV_ID_82576_QUAD_COPPER
:
2045 case E1000_DEV_ID_82576_QUAD_COPPER_ET2
:
2046 /* if quad port adapter, disable WoL on all but port A */
2047 if (global_quad_port_a
!= 0)
2048 adapter
->eeprom_wol
= 0;
2050 adapter
->flags
|= IGB_FLAG_QUAD_PORT_A
;
2051 /* Reset for multiple quad port adapters */
2052 if (++global_quad_port_a
== 4)
2053 global_quad_port_a
= 0;
2057 /* initialize the wol settings based on the eeprom settings */
2058 adapter
->wol
= adapter
->eeprom_wol
;
2059 device_set_wakeup_enable(&adapter
->pdev
->dev
, adapter
->wol
);
2061 /* reset the hardware with the new settings */
2064 /* let the f/w know that the h/w is now under the control of the
2066 igb_get_hw_control(adapter
);
2068 strcpy(netdev
->name
, "eth%d");
2069 err
= register_netdev(netdev
);
2073 /* carrier off reporting is important to ethtool even BEFORE open */
2074 netif_carrier_off(netdev
);
2076 #ifdef CONFIG_IGB_DCA
2077 if (dca_add_requester(&pdev
->dev
) == 0) {
2078 adapter
->flags
|= IGB_FLAG_DCA_ENABLED
;
2079 dev_info(&pdev
->dev
, "DCA enabled\n");
2080 igb_setup_dca(adapter
);
2084 /* do hw tstamp init after resetting */
2085 igb_init_hw_timer(adapter
);
2087 dev_info(&pdev
->dev
, "Intel(R) Gigabit Ethernet Network Connection\n");
2088 /* print bus type/speed/width info */
2089 dev_info(&pdev
->dev
, "%s: (PCIe:%s:%s) %pM\n",
2091 ((hw
->bus
.speed
== e1000_bus_speed_2500
) ? "2.5Gb/s" :
2092 (hw
->bus
.speed
== e1000_bus_speed_5000
) ? "5.0Gb/s" :
2094 ((hw
->bus
.width
== e1000_bus_width_pcie_x4
) ? "Width x4" :
2095 (hw
->bus
.width
== e1000_bus_width_pcie_x2
) ? "Width x2" :
2096 (hw
->bus
.width
== e1000_bus_width_pcie_x1
) ? "Width x1" :
2100 ret_val
= igb_read_part_string(hw
, part_str
, E1000_PBANUM_LENGTH
);
2102 strcpy(part_str
, "Unknown");
2103 dev_info(&pdev
->dev
, "%s: PBA No: %s\n", netdev
->name
, part_str
);
2104 dev_info(&pdev
->dev
,
2105 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2106 adapter
->msix_entries
? "MSI-X" :
2107 (adapter
->flags
& IGB_FLAG_HAS_MSI
) ? "MSI" : "legacy",
2108 adapter
->num_rx_queues
, adapter
->num_tx_queues
);
2109 switch (hw
->mac
.type
) {
2111 igb_set_eee_i350(hw
);
2119 igb_release_hw_control(adapter
);
2121 if (!igb_check_reset_block(hw
))
2124 if (hw
->flash_address
)
2125 iounmap(hw
->flash_address
);
2127 igb_clear_interrupt_scheme(adapter
);
2128 iounmap(hw
->hw_addr
);
2130 free_netdev(netdev
);
2132 pci_release_selected_regions(pdev
,
2133 pci_select_bars(pdev
, IORESOURCE_MEM
));
2136 pci_disable_device(pdev
);
2141 * igb_remove - Device Removal Routine
2142 * @pdev: PCI device information struct
2144 * igb_remove is called by the PCI subsystem to alert the driver
2145 * that it should release a PCI device. The could be caused by a
2146 * Hot-Plug event, or because the driver is going to be removed from
2149 static void __devexit
igb_remove(struct pci_dev
*pdev
)
2151 struct net_device
*netdev
= pci_get_drvdata(pdev
);
2152 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2153 struct e1000_hw
*hw
= &adapter
->hw
;
2156 * The watchdog timer may be rescheduled, so explicitly
2157 * disable watchdog from being rescheduled.
2159 set_bit(__IGB_DOWN
, &adapter
->state
);
2160 del_timer_sync(&adapter
->watchdog_timer
);
2161 del_timer_sync(&adapter
->phy_info_timer
);
2163 cancel_work_sync(&adapter
->reset_task
);
2164 cancel_work_sync(&adapter
->watchdog_task
);
2166 #ifdef CONFIG_IGB_DCA
2167 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
) {
2168 dev_info(&pdev
->dev
, "DCA disabled\n");
2169 dca_remove_requester(&pdev
->dev
);
2170 adapter
->flags
&= ~IGB_FLAG_DCA_ENABLED
;
2171 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_DISABLE
);
2175 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2176 * would have already happened in close and is redundant. */
2177 igb_release_hw_control(adapter
);
2179 unregister_netdev(netdev
);
2181 igb_clear_interrupt_scheme(adapter
);
2183 #ifdef CONFIG_PCI_IOV
2184 /* reclaim resources allocated to VFs */
2185 if (adapter
->vf_data
) {
2186 /* disable iov and allow time for transactions to clear */
2187 if (!igb_check_vf_assignment(adapter
)) {
2188 pci_disable_sriov(pdev
);
2191 dev_info(&pdev
->dev
, "VF(s) assigned to guests!\n");
2194 kfree(adapter
->vf_data
);
2195 adapter
->vf_data
= NULL
;
2196 wr32(E1000_IOVCTL
, E1000_IOVCTL_REUSE_VFQ
);
2199 dev_info(&pdev
->dev
, "IOV Disabled\n");
2203 iounmap(hw
->hw_addr
);
2204 if (hw
->flash_address
)
2205 iounmap(hw
->flash_address
);
2206 pci_release_selected_regions(pdev
,
2207 pci_select_bars(pdev
, IORESOURCE_MEM
));
2209 kfree(adapter
->shadow_vfta
);
2210 free_netdev(netdev
);
2212 pci_disable_pcie_error_reporting(pdev
);
2214 pci_disable_device(pdev
);
2218 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2219 * @adapter: board private structure to initialize
2221 * This function initializes the vf specific data storage and then attempts to
2222 * allocate the VFs. The reason for ordering it this way is because it is much
2223 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2224 * the memory for the VFs.
2226 static void __devinit
igb_probe_vfs(struct igb_adapter
* adapter
)
2228 #ifdef CONFIG_PCI_IOV
2229 struct pci_dev
*pdev
= adapter
->pdev
;
2230 int old_vfs
= igb_find_enabled_vfs(adapter
);
2234 dev_info(&pdev
->dev
, "%d pre-allocated VFs found - override "
2235 "max_vfs setting of %d\n", old_vfs
, max_vfs
);
2236 adapter
->vfs_allocated_count
= old_vfs
;
2239 if (!adapter
->vfs_allocated_count
)
2242 adapter
->vf_data
= kcalloc(adapter
->vfs_allocated_count
,
2243 sizeof(struct vf_data_storage
), GFP_KERNEL
);
2244 /* if allocation failed then we do not support SR-IOV */
2245 if (!adapter
->vf_data
) {
2246 adapter
->vfs_allocated_count
= 0;
2247 dev_err(&pdev
->dev
, "Unable to allocate memory for VF "
2253 if (pci_enable_sriov(pdev
, adapter
->vfs_allocated_count
))
2256 dev_info(&pdev
->dev
, "%d VFs allocated\n",
2257 adapter
->vfs_allocated_count
);
2258 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++)
2259 igb_vf_configure(adapter
, i
);
2261 /* DMA Coalescing is not supported in IOV mode. */
2262 adapter
->flags
&= ~IGB_FLAG_DMAC
;
2265 kfree(adapter
->vf_data
);
2266 adapter
->vf_data
= NULL
;
2267 adapter
->vfs_allocated_count
= 0;
2270 #endif /* CONFIG_PCI_IOV */
2274 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2275 * @adapter: board private structure to initialize
2277 * igb_init_hw_timer initializes the function pointer and values for the hw
2278 * timer found in hardware.
2280 static void igb_init_hw_timer(struct igb_adapter
*adapter
)
2282 struct e1000_hw
*hw
= &adapter
->hw
;
2284 switch (hw
->mac
.type
) {
2287 memset(&adapter
->cycles
, 0, sizeof(adapter
->cycles
));
2288 adapter
->cycles
.read
= igb_read_clock
;
2289 adapter
->cycles
.mask
= CLOCKSOURCE_MASK(64);
2290 adapter
->cycles
.mult
= 1;
2292 * The 82580 timesync updates the system timer every 8ns by 8ns
2293 * and the value cannot be shifted. Instead we need to shift
2294 * the registers to generate a 64bit timer value. As a result
2295 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2296 * 24 in order to generate a larger value for synchronization.
2298 adapter
->cycles
.shift
= IGB_82580_TSYNC_SHIFT
;
2299 /* disable system timer temporarily by setting bit 31 */
2300 wr32(E1000_TSAUXC
, 0x80000000);
2303 /* Set registers so that rollover occurs soon to test this. */
2304 wr32(E1000_SYSTIMR
, 0x00000000);
2305 wr32(E1000_SYSTIML
, 0x80000000);
2306 wr32(E1000_SYSTIMH
, 0x000000FF);
2309 /* enable system timer by clearing bit 31 */
2310 wr32(E1000_TSAUXC
, 0x0);
2313 timecounter_init(&adapter
->clock
,
2315 ktime_to_ns(ktime_get_real()));
2317 * Synchronize our NIC clock against system wall clock. NIC
2318 * time stamp reading requires ~3us per sample, each sample
2319 * was pretty stable even under load => only require 10
2320 * samples for each offset comparison.
2322 memset(&adapter
->compare
, 0, sizeof(adapter
->compare
));
2323 adapter
->compare
.source
= &adapter
->clock
;
2324 adapter
->compare
.target
= ktime_get_real
;
2325 adapter
->compare
.num_samples
= 10;
2326 timecompare_update(&adapter
->compare
, 0);
2330 * Initialize hardware timer: we keep it running just in case
2331 * that some program needs it later on.
2333 memset(&adapter
->cycles
, 0, sizeof(adapter
->cycles
));
2334 adapter
->cycles
.read
= igb_read_clock
;
2335 adapter
->cycles
.mask
= CLOCKSOURCE_MASK(64);
2336 adapter
->cycles
.mult
= 1;
2338 * Scale the NIC clock cycle by a large factor so that
2339 * relatively small clock corrections can be added or
2340 * subtracted at each clock tick. The drawbacks of a large
2341 * factor are a) that the clock register overflows more quickly
2342 * (not such a big deal) and b) that the increment per tick has
2343 * to fit into 24 bits. As a result we need to use a shift of
2344 * 19 so we can fit a value of 16 into the TIMINCA register.
2346 adapter
->cycles
.shift
= IGB_82576_TSYNC_SHIFT
;
2348 (1 << E1000_TIMINCA_16NS_SHIFT
) |
2349 (16 << IGB_82576_TSYNC_SHIFT
));
2351 /* Set registers so that rollover occurs soon to test this. */
2352 wr32(E1000_SYSTIML
, 0x00000000);
2353 wr32(E1000_SYSTIMH
, 0xFF800000);
2356 timecounter_init(&adapter
->clock
,
2358 ktime_to_ns(ktime_get_real()));
2360 * Synchronize our NIC clock against system wall clock. NIC
2361 * time stamp reading requires ~3us per sample, each sample
2362 * was pretty stable even under load => only require 10
2363 * samples for each offset comparison.
2365 memset(&adapter
->compare
, 0, sizeof(adapter
->compare
));
2366 adapter
->compare
.source
= &adapter
->clock
;
2367 adapter
->compare
.target
= ktime_get_real
;
2368 adapter
->compare
.num_samples
= 10;
2369 timecompare_update(&adapter
->compare
, 0);
2372 /* 82575 does not support timesync */
2380 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2381 * @adapter: board private structure to initialize
2383 * igb_sw_init initializes the Adapter private data structure.
2384 * Fields are initialized based on PCI device information and
2385 * OS network device settings (MTU size).
2387 static int __devinit
igb_sw_init(struct igb_adapter
*adapter
)
2389 struct e1000_hw
*hw
= &adapter
->hw
;
2390 struct net_device
*netdev
= adapter
->netdev
;
2391 struct pci_dev
*pdev
= adapter
->pdev
;
2393 pci_read_config_word(pdev
, PCI_COMMAND
, &hw
->bus
.pci_cmd_word
);
2395 /* set default ring sizes */
2396 adapter
->tx_ring_count
= IGB_DEFAULT_TXD
;
2397 adapter
->rx_ring_count
= IGB_DEFAULT_RXD
;
2399 /* set default ITR values */
2400 adapter
->rx_itr_setting
= IGB_DEFAULT_ITR
;
2401 adapter
->tx_itr_setting
= IGB_DEFAULT_ITR
;
2403 /* set default work limits */
2404 adapter
->tx_work_limit
= IGB_DEFAULT_TX_WORK
;
2406 adapter
->max_frame_size
= netdev
->mtu
+ ETH_HLEN
+ ETH_FCS_LEN
+
2408 adapter
->min_frame_size
= ETH_ZLEN
+ ETH_FCS_LEN
;
2412 spin_lock_init(&adapter
->stats64_lock
);
2413 #ifdef CONFIG_PCI_IOV
2414 switch (hw
->mac
.type
) {
2418 dev_warn(&pdev
->dev
,
2419 "Maximum of 7 VFs per PF, using max\n");
2420 adapter
->vfs_allocated_count
= 7;
2422 adapter
->vfs_allocated_count
= max_vfs
;
2427 #endif /* CONFIG_PCI_IOV */
2428 adapter
->rss_queues
= min_t(u32
, IGB_MAX_RX_QUEUES
, num_online_cpus());
2429 /* i350 cannot do RSS and SR-IOV at the same time */
2430 if (hw
->mac
.type
== e1000_i350
&& adapter
->vfs_allocated_count
)
2431 adapter
->rss_queues
= 1;
2434 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2435 * then we should combine the queues into a queue pair in order to
2436 * conserve interrupts due to limited supply
2438 if ((adapter
->rss_queues
> 4) ||
2439 ((adapter
->rss_queues
> 1) && (adapter
->vfs_allocated_count
> 6)))
2440 adapter
->flags
|= IGB_FLAG_QUEUE_PAIRS
;
2442 /* Setup and initialize a copy of the hw vlan table array */
2443 adapter
->shadow_vfta
= kzalloc(sizeof(u32
) *
2444 E1000_VLAN_FILTER_TBL_SIZE
,
2447 /* This call may decrease the number of queues */
2448 if (igb_init_interrupt_scheme(adapter
)) {
2449 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
2453 igb_probe_vfs(adapter
);
2455 /* Explicitly disable IRQ since the NIC can be in any state. */
2456 igb_irq_disable(adapter
);
2458 if (hw
->mac
.type
== e1000_i350
)
2459 adapter
->flags
&= ~IGB_FLAG_DMAC
;
2461 set_bit(__IGB_DOWN
, &adapter
->state
);
2466 * igb_open - Called when a network interface is made active
2467 * @netdev: network interface device structure
2469 * Returns 0 on success, negative value on failure
2471 * The open entry point is called when a network interface is made
2472 * active by the system (IFF_UP). At this point all resources needed
2473 * for transmit and receive operations are allocated, the interrupt
2474 * handler is registered with the OS, the watchdog timer is started,
2475 * and the stack is notified that the interface is ready.
2477 static int igb_open(struct net_device
*netdev
)
2479 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2480 struct e1000_hw
*hw
= &adapter
->hw
;
2484 /* disallow open during test */
2485 if (test_bit(__IGB_TESTING
, &adapter
->state
))
2488 netif_carrier_off(netdev
);
2490 /* allocate transmit descriptors */
2491 err
= igb_setup_all_tx_resources(adapter
);
2495 /* allocate receive descriptors */
2496 err
= igb_setup_all_rx_resources(adapter
);
2500 igb_power_up_link(adapter
);
2502 /* before we allocate an interrupt, we must be ready to handle it.
2503 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2504 * as soon as we call pci_request_irq, so we have to setup our
2505 * clean_rx handler before we do so. */
2506 igb_configure(adapter
);
2508 err
= igb_request_irq(adapter
);
2512 /* From here on the code is the same as igb_up() */
2513 clear_bit(__IGB_DOWN
, &adapter
->state
);
2515 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
2516 napi_enable(&(adapter
->q_vector
[i
]->napi
));
2518 /* Clear any pending interrupts. */
2521 igb_irq_enable(adapter
);
2523 /* notify VFs that reset has been completed */
2524 if (adapter
->vfs_allocated_count
) {
2525 u32 reg_data
= rd32(E1000_CTRL_EXT
);
2526 reg_data
|= E1000_CTRL_EXT_PFRSTD
;
2527 wr32(E1000_CTRL_EXT
, reg_data
);
2530 netif_tx_start_all_queues(netdev
);
2532 /* start the watchdog. */
2533 hw
->mac
.get_link_status
= 1;
2534 schedule_work(&adapter
->watchdog_task
);
2539 igb_release_hw_control(adapter
);
2540 igb_power_down_link(adapter
);
2541 igb_free_all_rx_resources(adapter
);
2543 igb_free_all_tx_resources(adapter
);
2551 * igb_close - Disables a network interface
2552 * @netdev: network interface device structure
2554 * Returns 0, this is not allowed to fail
2556 * The close entry point is called when an interface is de-activated
2557 * by the OS. The hardware is still under the driver's control, but
2558 * needs to be disabled. A global MAC reset is issued to stop the
2559 * hardware, and all transmit and receive resources are freed.
2561 static int igb_close(struct net_device
*netdev
)
2563 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2565 WARN_ON(test_bit(__IGB_RESETTING
, &adapter
->state
));
2568 igb_free_irq(adapter
);
2570 igb_free_all_tx_resources(adapter
);
2571 igb_free_all_rx_resources(adapter
);
2577 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2578 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2580 * Return 0 on success, negative on failure
2582 int igb_setup_tx_resources(struct igb_ring
*tx_ring
)
2584 struct device
*dev
= tx_ring
->dev
;
2585 int orig_node
= dev_to_node(dev
);
2588 size
= sizeof(struct igb_tx_buffer
) * tx_ring
->count
;
2589 tx_ring
->tx_buffer_info
= vzalloc_node(size
, tx_ring
->numa_node
);
2590 if (!tx_ring
->tx_buffer_info
)
2591 tx_ring
->tx_buffer_info
= vzalloc(size
);
2592 if (!tx_ring
->tx_buffer_info
)
2595 /* round up to nearest 4K */
2596 tx_ring
->size
= tx_ring
->count
* sizeof(union e1000_adv_tx_desc
);
2597 tx_ring
->size
= ALIGN(tx_ring
->size
, 4096);
2599 set_dev_node(dev
, tx_ring
->numa_node
);
2600 tx_ring
->desc
= dma_alloc_coherent(dev
,
2604 set_dev_node(dev
, orig_node
);
2606 tx_ring
->desc
= dma_alloc_coherent(dev
,
2614 tx_ring
->next_to_use
= 0;
2615 tx_ring
->next_to_clean
= 0;
2620 vfree(tx_ring
->tx_buffer_info
);
2622 "Unable to allocate memory for the transmit descriptor ring\n");
2627 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2628 * (Descriptors) for all queues
2629 * @adapter: board private structure
2631 * Return 0 on success, negative on failure
2633 static int igb_setup_all_tx_resources(struct igb_adapter
*adapter
)
2635 struct pci_dev
*pdev
= adapter
->pdev
;
2638 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
2639 err
= igb_setup_tx_resources(adapter
->tx_ring
[i
]);
2642 "Allocation for Tx Queue %u failed\n", i
);
2643 for (i
--; i
>= 0; i
--)
2644 igb_free_tx_resources(adapter
->tx_ring
[i
]);
2653 * igb_setup_tctl - configure the transmit control registers
2654 * @adapter: Board private structure
2656 void igb_setup_tctl(struct igb_adapter
*adapter
)
2658 struct e1000_hw
*hw
= &adapter
->hw
;
2661 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2662 wr32(E1000_TXDCTL(0), 0);
2664 /* Program the Transmit Control Register */
2665 tctl
= rd32(E1000_TCTL
);
2666 tctl
&= ~E1000_TCTL_CT
;
2667 tctl
|= E1000_TCTL_PSP
| E1000_TCTL_RTLC
|
2668 (E1000_COLLISION_THRESHOLD
<< E1000_CT_SHIFT
);
2670 igb_config_collision_dist(hw
);
2672 /* Enable transmits */
2673 tctl
|= E1000_TCTL_EN
;
2675 wr32(E1000_TCTL
, tctl
);
2679 * igb_configure_tx_ring - Configure transmit ring after Reset
2680 * @adapter: board private structure
2681 * @ring: tx ring to configure
2683 * Configure a transmit ring after a reset.
2685 void igb_configure_tx_ring(struct igb_adapter
*adapter
,
2686 struct igb_ring
*ring
)
2688 struct e1000_hw
*hw
= &adapter
->hw
;
2690 u64 tdba
= ring
->dma
;
2691 int reg_idx
= ring
->reg_idx
;
2693 /* disable the queue */
2694 wr32(E1000_TXDCTL(reg_idx
), 0);
2698 wr32(E1000_TDLEN(reg_idx
),
2699 ring
->count
* sizeof(union e1000_adv_tx_desc
));
2700 wr32(E1000_TDBAL(reg_idx
),
2701 tdba
& 0x00000000ffffffffULL
);
2702 wr32(E1000_TDBAH(reg_idx
), tdba
>> 32);
2704 ring
->tail
= hw
->hw_addr
+ E1000_TDT(reg_idx
);
2705 wr32(E1000_TDH(reg_idx
), 0);
2706 writel(0, ring
->tail
);
2708 txdctl
|= IGB_TX_PTHRESH
;
2709 txdctl
|= IGB_TX_HTHRESH
<< 8;
2710 txdctl
|= IGB_TX_WTHRESH
<< 16;
2712 txdctl
|= E1000_TXDCTL_QUEUE_ENABLE
;
2713 wr32(E1000_TXDCTL(reg_idx
), txdctl
);
2717 * igb_configure_tx - Configure transmit Unit after Reset
2718 * @adapter: board private structure
2720 * Configure the Tx unit of the MAC after a reset.
2722 static void igb_configure_tx(struct igb_adapter
*adapter
)
2726 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
2727 igb_configure_tx_ring(adapter
, adapter
->tx_ring
[i
]);
2731 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2732 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2734 * Returns 0 on success, negative on failure
2736 int igb_setup_rx_resources(struct igb_ring
*rx_ring
)
2738 struct device
*dev
= rx_ring
->dev
;
2739 int orig_node
= dev_to_node(dev
);
2742 size
= sizeof(struct igb_rx_buffer
) * rx_ring
->count
;
2743 rx_ring
->rx_buffer_info
= vzalloc_node(size
, rx_ring
->numa_node
);
2744 if (!rx_ring
->rx_buffer_info
)
2745 rx_ring
->rx_buffer_info
= vzalloc(size
);
2746 if (!rx_ring
->rx_buffer_info
)
2749 desc_len
= sizeof(union e1000_adv_rx_desc
);
2751 /* Round up to nearest 4K */
2752 rx_ring
->size
= rx_ring
->count
* desc_len
;
2753 rx_ring
->size
= ALIGN(rx_ring
->size
, 4096);
2755 set_dev_node(dev
, rx_ring
->numa_node
);
2756 rx_ring
->desc
= dma_alloc_coherent(dev
,
2760 set_dev_node(dev
, orig_node
);
2762 rx_ring
->desc
= dma_alloc_coherent(dev
,
2770 rx_ring
->next_to_clean
= 0;
2771 rx_ring
->next_to_use
= 0;
2776 vfree(rx_ring
->rx_buffer_info
);
2777 rx_ring
->rx_buffer_info
= NULL
;
2778 dev_err(dev
, "Unable to allocate memory for the receive descriptor"
2784 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2785 * (Descriptors) for all queues
2786 * @adapter: board private structure
2788 * Return 0 on success, negative on failure
2790 static int igb_setup_all_rx_resources(struct igb_adapter
*adapter
)
2792 struct pci_dev
*pdev
= adapter
->pdev
;
2795 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
2796 err
= igb_setup_rx_resources(adapter
->rx_ring
[i
]);
2799 "Allocation for Rx Queue %u failed\n", i
);
2800 for (i
--; i
>= 0; i
--)
2801 igb_free_rx_resources(adapter
->rx_ring
[i
]);
2810 * igb_setup_mrqc - configure the multiple receive queue control registers
2811 * @adapter: Board private structure
2813 static void igb_setup_mrqc(struct igb_adapter
*adapter
)
2815 struct e1000_hw
*hw
= &adapter
->hw
;
2817 u32 j
, num_rx_queues
, shift
= 0, shift2
= 0;
2822 static const u8 rsshash
[40] = {
2823 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2824 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2825 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2826 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2828 /* Fill out hash function seeds */
2829 for (j
= 0; j
< 10; j
++) {
2830 u32 rsskey
= rsshash
[(j
* 4)];
2831 rsskey
|= rsshash
[(j
* 4) + 1] << 8;
2832 rsskey
|= rsshash
[(j
* 4) + 2] << 16;
2833 rsskey
|= rsshash
[(j
* 4) + 3] << 24;
2834 array_wr32(E1000_RSSRK(0), j
, rsskey
);
2837 num_rx_queues
= adapter
->rss_queues
;
2839 if (adapter
->vfs_allocated_count
) {
2840 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2841 switch (hw
->mac
.type
) {
2858 if (hw
->mac
.type
== e1000_82575
)
2862 for (j
= 0; j
< (32 * 4); j
++) {
2863 reta
.bytes
[j
& 3] = (j
% num_rx_queues
) << shift
;
2865 reta
.bytes
[j
& 3] |= num_rx_queues
<< shift2
;
2867 wr32(E1000_RETA(j
>> 2), reta
.dword
);
2871 * Disable raw packet checksumming so that RSS hash is placed in
2872 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2873 * offloads as they are enabled by default
2875 rxcsum
= rd32(E1000_RXCSUM
);
2876 rxcsum
|= E1000_RXCSUM_PCSD
;
2878 if (adapter
->hw
.mac
.type
>= e1000_82576
)
2879 /* Enable Receive Checksum Offload for SCTP */
2880 rxcsum
|= E1000_RXCSUM_CRCOFL
;
2882 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2883 wr32(E1000_RXCSUM
, rxcsum
);
2885 /* If VMDq is enabled then we set the appropriate mode for that, else
2886 * we default to RSS so that an RSS hash is calculated per packet even
2887 * if we are only using one queue */
2888 if (adapter
->vfs_allocated_count
) {
2889 if (hw
->mac
.type
> e1000_82575
) {
2890 /* Set the default pool for the PF's first queue */
2891 u32 vtctl
= rd32(E1000_VT_CTL
);
2892 vtctl
&= ~(E1000_VT_CTL_DEFAULT_POOL_MASK
|
2893 E1000_VT_CTL_DISABLE_DEF_POOL
);
2894 vtctl
|= adapter
->vfs_allocated_count
<<
2895 E1000_VT_CTL_DEFAULT_POOL_SHIFT
;
2896 wr32(E1000_VT_CTL
, vtctl
);
2898 if (adapter
->rss_queues
> 1)
2899 mrqc
= E1000_MRQC_ENABLE_VMDQ_RSS_2Q
;
2901 mrqc
= E1000_MRQC_ENABLE_VMDQ
;
2903 mrqc
= E1000_MRQC_ENABLE_RSS_4Q
;
2905 igb_vmm_control(adapter
);
2908 * Generate RSS hash based on TCP port numbers and/or
2909 * IPv4/v6 src and dst addresses since UDP cannot be
2910 * hashed reliably due to IP fragmentation
2912 mrqc
|= E1000_MRQC_RSS_FIELD_IPV4
|
2913 E1000_MRQC_RSS_FIELD_IPV4_TCP
|
2914 E1000_MRQC_RSS_FIELD_IPV6
|
2915 E1000_MRQC_RSS_FIELD_IPV6_TCP
|
2916 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX
;
2918 wr32(E1000_MRQC
, mrqc
);
2922 * igb_setup_rctl - configure the receive control registers
2923 * @adapter: Board private structure
2925 void igb_setup_rctl(struct igb_adapter
*adapter
)
2927 struct e1000_hw
*hw
= &adapter
->hw
;
2930 rctl
= rd32(E1000_RCTL
);
2932 rctl
&= ~(3 << E1000_RCTL_MO_SHIFT
);
2933 rctl
&= ~(E1000_RCTL_LBM_TCVR
| E1000_RCTL_LBM_MAC
);
2935 rctl
|= E1000_RCTL_EN
| E1000_RCTL_BAM
| E1000_RCTL_RDMTS_HALF
|
2936 (hw
->mac
.mc_filter_type
<< E1000_RCTL_MO_SHIFT
);
2939 * enable stripping of CRC. It's unlikely this will break BMC
2940 * redirection as it did with e1000. Newer features require
2941 * that the HW strips the CRC.
2943 rctl
|= E1000_RCTL_SECRC
;
2945 /* disable store bad packets and clear size bits. */
2946 rctl
&= ~(E1000_RCTL_SBP
| E1000_RCTL_SZ_256
);
2948 /* enable LPE to prevent packets larger than max_frame_size */
2949 rctl
|= E1000_RCTL_LPE
;
2951 /* disable queue 0 to prevent tail write w/o re-config */
2952 wr32(E1000_RXDCTL(0), 0);
2954 /* Attention!!! For SR-IOV PF driver operations you must enable
2955 * queue drop for all VF and PF queues to prevent head of line blocking
2956 * if an un-trusted VF does not provide descriptors to hardware.
2958 if (adapter
->vfs_allocated_count
) {
2959 /* set all queue drop enable bits */
2960 wr32(E1000_QDE
, ALL_QUEUES
);
2963 wr32(E1000_RCTL
, rctl
);
2966 static inline int igb_set_vf_rlpml(struct igb_adapter
*adapter
, int size
,
2969 struct e1000_hw
*hw
= &adapter
->hw
;
2972 /* if it isn't the PF check to see if VFs are enabled and
2973 * increase the size to support vlan tags */
2974 if (vfn
< adapter
->vfs_allocated_count
&&
2975 adapter
->vf_data
[vfn
].vlans_enabled
)
2976 size
+= VLAN_TAG_SIZE
;
2978 vmolr
= rd32(E1000_VMOLR(vfn
));
2979 vmolr
&= ~E1000_VMOLR_RLPML_MASK
;
2980 vmolr
|= size
| E1000_VMOLR_LPE
;
2981 wr32(E1000_VMOLR(vfn
), vmolr
);
2987 * igb_rlpml_set - set maximum receive packet size
2988 * @adapter: board private structure
2990 * Configure maximum receivable packet size.
2992 static void igb_rlpml_set(struct igb_adapter
*adapter
)
2994 u32 max_frame_size
= adapter
->max_frame_size
;
2995 struct e1000_hw
*hw
= &adapter
->hw
;
2996 u16 pf_id
= adapter
->vfs_allocated_count
;
2999 igb_set_vf_rlpml(adapter
, max_frame_size
, pf_id
);
3001 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3002 * to our max jumbo frame size, in case we need to enable
3003 * jumbo frames on one of the rings later.
3004 * This will not pass over-length frames into the default
3005 * queue because it's gated by the VMOLR.RLPML.
3007 max_frame_size
= MAX_JUMBO_FRAME_SIZE
;
3010 wr32(E1000_RLPML
, max_frame_size
);
3013 static inline void igb_set_vmolr(struct igb_adapter
*adapter
,
3016 struct e1000_hw
*hw
= &adapter
->hw
;
3020 * This register exists only on 82576 and newer so if we are older then
3021 * we should exit and do nothing
3023 if (hw
->mac
.type
< e1000_82576
)
3026 vmolr
= rd32(E1000_VMOLR(vfn
));
3027 vmolr
|= E1000_VMOLR_STRVLAN
; /* Strip vlan tags */
3029 vmolr
|= E1000_VMOLR_AUPE
; /* Accept untagged packets */
3031 vmolr
&= ~(E1000_VMOLR_AUPE
); /* Tagged packets ONLY */
3033 /* clear all bits that might not be set */
3034 vmolr
&= ~(E1000_VMOLR_BAM
| E1000_VMOLR_RSSE
);
3036 if (adapter
->rss_queues
> 1 && vfn
== adapter
->vfs_allocated_count
)
3037 vmolr
|= E1000_VMOLR_RSSE
; /* enable RSS */
3039 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3042 if (vfn
<= adapter
->vfs_allocated_count
)
3043 vmolr
|= E1000_VMOLR_BAM
; /* Accept broadcast */
3045 wr32(E1000_VMOLR(vfn
), vmolr
);
3049 * igb_configure_rx_ring - Configure a receive ring after Reset
3050 * @adapter: board private structure
3051 * @ring: receive ring to be configured
3053 * Configure the Rx unit of the MAC after a reset.
3055 void igb_configure_rx_ring(struct igb_adapter
*adapter
,
3056 struct igb_ring
*ring
)
3058 struct e1000_hw
*hw
= &adapter
->hw
;
3059 u64 rdba
= ring
->dma
;
3060 int reg_idx
= ring
->reg_idx
;
3061 u32 srrctl
= 0, rxdctl
= 0;
3063 /* disable the queue */
3064 wr32(E1000_RXDCTL(reg_idx
), 0);
3066 /* Set DMA base address registers */
3067 wr32(E1000_RDBAL(reg_idx
),
3068 rdba
& 0x00000000ffffffffULL
);
3069 wr32(E1000_RDBAH(reg_idx
), rdba
>> 32);
3070 wr32(E1000_RDLEN(reg_idx
),
3071 ring
->count
* sizeof(union e1000_adv_rx_desc
));
3073 /* initialize head and tail */
3074 ring
->tail
= hw
->hw_addr
+ E1000_RDT(reg_idx
);
3075 wr32(E1000_RDH(reg_idx
), 0);
3076 writel(0, ring
->tail
);
3078 /* set descriptor configuration */
3079 srrctl
= IGB_RX_HDR_LEN
<< E1000_SRRCTL_BSIZEHDRSIZE_SHIFT
;
3080 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3081 srrctl
|= IGB_RXBUFFER_16384
>> E1000_SRRCTL_BSIZEPKT_SHIFT
;
3083 srrctl
|= (PAGE_SIZE
/ 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT
;
3085 srrctl
|= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS
;
3086 if (hw
->mac
.type
>= e1000_82580
)
3087 srrctl
|= E1000_SRRCTL_TIMESTAMP
;
3088 /* Only set Drop Enable if we are supporting multiple queues */
3089 if (adapter
->vfs_allocated_count
|| adapter
->num_rx_queues
> 1)
3090 srrctl
|= E1000_SRRCTL_DROP_EN
;
3092 wr32(E1000_SRRCTL(reg_idx
), srrctl
);
3094 /* set filtering for VMDQ pools */
3095 igb_set_vmolr(adapter
, reg_idx
& 0x7, true);
3097 rxdctl
|= IGB_RX_PTHRESH
;
3098 rxdctl
|= IGB_RX_HTHRESH
<< 8;
3099 rxdctl
|= IGB_RX_WTHRESH
<< 16;
3101 /* enable receive descriptor fetching */
3102 rxdctl
|= E1000_RXDCTL_QUEUE_ENABLE
;
3103 wr32(E1000_RXDCTL(reg_idx
), rxdctl
);
3107 * igb_configure_rx - Configure receive Unit after Reset
3108 * @adapter: board private structure
3110 * Configure the Rx unit of the MAC after a reset.
3112 static void igb_configure_rx(struct igb_adapter
*adapter
)
3116 /* set UTA to appropriate mode */
3117 igb_set_uta(adapter
);
3119 /* set the correct pool for the PF default MAC address in entry 0 */
3120 igb_rar_set_qsel(adapter
, adapter
->hw
.mac
.addr
, 0,
3121 adapter
->vfs_allocated_count
);
3123 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3124 * the Base and Length of the Rx Descriptor Ring */
3125 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3126 igb_configure_rx_ring(adapter
, adapter
->rx_ring
[i
]);
3130 * igb_free_tx_resources - Free Tx Resources per Queue
3131 * @tx_ring: Tx descriptor ring for a specific queue
3133 * Free all transmit software resources
3135 void igb_free_tx_resources(struct igb_ring
*tx_ring
)
3137 igb_clean_tx_ring(tx_ring
);
3139 vfree(tx_ring
->tx_buffer_info
);
3140 tx_ring
->tx_buffer_info
= NULL
;
3142 /* if not set, then don't free */
3146 dma_free_coherent(tx_ring
->dev
, tx_ring
->size
,
3147 tx_ring
->desc
, tx_ring
->dma
);
3149 tx_ring
->desc
= NULL
;
3153 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3154 * @adapter: board private structure
3156 * Free all transmit software resources
3158 static void igb_free_all_tx_resources(struct igb_adapter
*adapter
)
3162 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
3163 igb_free_tx_resources(adapter
->tx_ring
[i
]);
3166 void igb_unmap_and_free_tx_resource(struct igb_ring
*ring
,
3167 struct igb_tx_buffer
*tx_buffer
)
3169 if (tx_buffer
->skb
) {
3170 dev_kfree_skb_any(tx_buffer
->skb
);
3172 dma_unmap_single(ring
->dev
,
3176 } else if (tx_buffer
->dma
) {
3177 dma_unmap_page(ring
->dev
,
3182 tx_buffer
->next_to_watch
= NULL
;
3183 tx_buffer
->skb
= NULL
;
3185 /* buffer_info must be completely set up in the transmit path */
3189 * igb_clean_tx_ring - Free Tx Buffers
3190 * @tx_ring: ring to be cleaned
3192 static void igb_clean_tx_ring(struct igb_ring
*tx_ring
)
3194 struct igb_tx_buffer
*buffer_info
;
3198 if (!tx_ring
->tx_buffer_info
)
3200 /* Free all the Tx ring sk_buffs */
3202 for (i
= 0; i
< tx_ring
->count
; i
++) {
3203 buffer_info
= &tx_ring
->tx_buffer_info
[i
];
3204 igb_unmap_and_free_tx_resource(tx_ring
, buffer_info
);
3207 size
= sizeof(struct igb_tx_buffer
) * tx_ring
->count
;
3208 memset(tx_ring
->tx_buffer_info
, 0, size
);
3210 /* Zero out the descriptor ring */
3211 memset(tx_ring
->desc
, 0, tx_ring
->size
);
3213 tx_ring
->next_to_use
= 0;
3214 tx_ring
->next_to_clean
= 0;
3218 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3219 * @adapter: board private structure
3221 static void igb_clean_all_tx_rings(struct igb_adapter
*adapter
)
3225 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
3226 igb_clean_tx_ring(adapter
->tx_ring
[i
]);
3230 * igb_free_rx_resources - Free Rx Resources
3231 * @rx_ring: ring to clean the resources from
3233 * Free all receive software resources
3235 void igb_free_rx_resources(struct igb_ring
*rx_ring
)
3237 igb_clean_rx_ring(rx_ring
);
3239 vfree(rx_ring
->rx_buffer_info
);
3240 rx_ring
->rx_buffer_info
= NULL
;
3242 /* if not set, then don't free */
3246 dma_free_coherent(rx_ring
->dev
, rx_ring
->size
,
3247 rx_ring
->desc
, rx_ring
->dma
);
3249 rx_ring
->desc
= NULL
;
3253 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3254 * @adapter: board private structure
3256 * Free all receive software resources
3258 static void igb_free_all_rx_resources(struct igb_adapter
*adapter
)
3262 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3263 igb_free_rx_resources(adapter
->rx_ring
[i
]);
3267 * igb_clean_rx_ring - Free Rx Buffers per Queue
3268 * @rx_ring: ring to free buffers from
3270 static void igb_clean_rx_ring(struct igb_ring
*rx_ring
)
3275 if (!rx_ring
->rx_buffer_info
)
3278 /* Free all the Rx ring sk_buffs */
3279 for (i
= 0; i
< rx_ring
->count
; i
++) {
3280 struct igb_rx_buffer
*buffer_info
= &rx_ring
->rx_buffer_info
[i
];
3281 if (buffer_info
->dma
) {
3282 dma_unmap_single(rx_ring
->dev
,
3286 buffer_info
->dma
= 0;
3289 if (buffer_info
->skb
) {
3290 dev_kfree_skb(buffer_info
->skb
);
3291 buffer_info
->skb
= NULL
;
3293 if (buffer_info
->page_dma
) {
3294 dma_unmap_page(rx_ring
->dev
,
3295 buffer_info
->page_dma
,
3298 buffer_info
->page_dma
= 0;
3300 if (buffer_info
->page
) {
3301 put_page(buffer_info
->page
);
3302 buffer_info
->page
= NULL
;
3303 buffer_info
->page_offset
= 0;
3307 size
= sizeof(struct igb_rx_buffer
) * rx_ring
->count
;
3308 memset(rx_ring
->rx_buffer_info
, 0, size
);
3310 /* Zero out the descriptor ring */
3311 memset(rx_ring
->desc
, 0, rx_ring
->size
);
3313 rx_ring
->next_to_clean
= 0;
3314 rx_ring
->next_to_use
= 0;
3318 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3319 * @adapter: board private structure
3321 static void igb_clean_all_rx_rings(struct igb_adapter
*adapter
)
3325 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3326 igb_clean_rx_ring(adapter
->rx_ring
[i
]);
3330 * igb_set_mac - Change the Ethernet Address of the NIC
3331 * @netdev: network interface device structure
3332 * @p: pointer to an address structure
3334 * Returns 0 on success, negative on failure
3336 static int igb_set_mac(struct net_device
*netdev
, void *p
)
3338 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3339 struct e1000_hw
*hw
= &adapter
->hw
;
3340 struct sockaddr
*addr
= p
;
3342 if (!is_valid_ether_addr(addr
->sa_data
))
3343 return -EADDRNOTAVAIL
;
3345 memcpy(netdev
->dev_addr
, addr
->sa_data
, netdev
->addr_len
);
3346 memcpy(hw
->mac
.addr
, addr
->sa_data
, netdev
->addr_len
);
3348 /* set the correct pool for the new PF MAC address in entry 0 */
3349 igb_rar_set_qsel(adapter
, hw
->mac
.addr
, 0,
3350 adapter
->vfs_allocated_count
);
3356 * igb_write_mc_addr_list - write multicast addresses to MTA
3357 * @netdev: network interface device structure
3359 * Writes multicast address list to the MTA hash table.
3360 * Returns: -ENOMEM on failure
3361 * 0 on no addresses written
3362 * X on writing X addresses to MTA
3364 static int igb_write_mc_addr_list(struct net_device
*netdev
)
3366 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3367 struct e1000_hw
*hw
= &adapter
->hw
;
3368 struct netdev_hw_addr
*ha
;
3372 if (netdev_mc_empty(netdev
)) {
3373 /* nothing to program, so clear mc list */
3374 igb_update_mc_addr_list(hw
, NULL
, 0);
3375 igb_restore_vf_multicasts(adapter
);
3379 mta_list
= kzalloc(netdev_mc_count(netdev
) * 6, GFP_ATOMIC
);
3383 /* The shared function expects a packed array of only addresses. */
3385 netdev_for_each_mc_addr(ha
, netdev
)
3386 memcpy(mta_list
+ (i
++ * ETH_ALEN
), ha
->addr
, ETH_ALEN
);
3388 igb_update_mc_addr_list(hw
, mta_list
, i
);
3391 return netdev_mc_count(netdev
);
3395 * igb_write_uc_addr_list - write unicast addresses to RAR table
3396 * @netdev: network interface device structure
3398 * Writes unicast address list to the RAR table.
3399 * Returns: -ENOMEM on failure/insufficient address space
3400 * 0 on no addresses written
3401 * X on writing X addresses to the RAR table
3403 static int igb_write_uc_addr_list(struct net_device
*netdev
)
3405 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3406 struct e1000_hw
*hw
= &adapter
->hw
;
3407 unsigned int vfn
= adapter
->vfs_allocated_count
;
3408 unsigned int rar_entries
= hw
->mac
.rar_entry_count
- (vfn
+ 1);
3411 /* return ENOMEM indicating insufficient memory for addresses */
3412 if (netdev_uc_count(netdev
) > rar_entries
)
3415 if (!netdev_uc_empty(netdev
) && rar_entries
) {
3416 struct netdev_hw_addr
*ha
;
3418 netdev_for_each_uc_addr(ha
, netdev
) {
3421 igb_rar_set_qsel(adapter
, ha
->addr
,
3427 /* write the addresses in reverse order to avoid write combining */
3428 for (; rar_entries
> 0 ; rar_entries
--) {
3429 wr32(E1000_RAH(rar_entries
), 0);
3430 wr32(E1000_RAL(rar_entries
), 0);
3438 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3439 * @netdev: network interface device structure
3441 * The set_rx_mode entry point is called whenever the unicast or multicast
3442 * address lists or the network interface flags are updated. This routine is
3443 * responsible for configuring the hardware for proper unicast, multicast,
3444 * promiscuous mode, and all-multi behavior.
3446 static void igb_set_rx_mode(struct net_device
*netdev
)
3448 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3449 struct e1000_hw
*hw
= &adapter
->hw
;
3450 unsigned int vfn
= adapter
->vfs_allocated_count
;
3451 u32 rctl
, vmolr
= 0;
3454 /* Check for Promiscuous and All Multicast modes */
3455 rctl
= rd32(E1000_RCTL
);
3457 /* clear the effected bits */
3458 rctl
&= ~(E1000_RCTL_UPE
| E1000_RCTL_MPE
| E1000_RCTL_VFE
);
3460 if (netdev
->flags
& IFF_PROMISC
) {
3461 rctl
|= (E1000_RCTL_UPE
| E1000_RCTL_MPE
);
3462 vmolr
|= (E1000_VMOLR_ROPE
| E1000_VMOLR_MPME
);
3464 if (netdev
->flags
& IFF_ALLMULTI
) {
3465 rctl
|= E1000_RCTL_MPE
;
3466 vmolr
|= E1000_VMOLR_MPME
;
3469 * Write addresses to the MTA, if the attempt fails
3470 * then we should just turn on promiscuous mode so
3471 * that we can at least receive multicast traffic
3473 count
= igb_write_mc_addr_list(netdev
);
3475 rctl
|= E1000_RCTL_MPE
;
3476 vmolr
|= E1000_VMOLR_MPME
;
3478 vmolr
|= E1000_VMOLR_ROMPE
;
3482 * Write addresses to available RAR registers, if there is not
3483 * sufficient space to store all the addresses then enable
3484 * unicast promiscuous mode
3486 count
= igb_write_uc_addr_list(netdev
);
3488 rctl
|= E1000_RCTL_UPE
;
3489 vmolr
|= E1000_VMOLR_ROPE
;
3491 rctl
|= E1000_RCTL_VFE
;
3493 wr32(E1000_RCTL
, rctl
);
3496 * In order to support SR-IOV and eventually VMDq it is necessary to set
3497 * the VMOLR to enable the appropriate modes. Without this workaround
3498 * we will have issues with VLAN tag stripping not being done for frames
3499 * that are only arriving because we are the default pool
3501 if (hw
->mac
.type
< e1000_82576
)
3504 vmolr
|= rd32(E1000_VMOLR(vfn
)) &
3505 ~(E1000_VMOLR_ROPE
| E1000_VMOLR_MPME
| E1000_VMOLR_ROMPE
);
3506 wr32(E1000_VMOLR(vfn
), vmolr
);
3507 igb_restore_vf_multicasts(adapter
);
3510 static void igb_check_wvbr(struct igb_adapter
*adapter
)
3512 struct e1000_hw
*hw
= &adapter
->hw
;
3515 switch (hw
->mac
.type
) {
3518 if (!(wvbr
= rd32(E1000_WVBR
)))
3525 adapter
->wvbr
|= wvbr
;
3528 #define IGB_STAGGERED_QUEUE_OFFSET 8
3530 static void igb_spoof_check(struct igb_adapter
*adapter
)
3537 for(j
= 0; j
< adapter
->vfs_allocated_count
; j
++) {
3538 if (adapter
->wvbr
& (1 << j
) ||
3539 adapter
->wvbr
& (1 << (j
+ IGB_STAGGERED_QUEUE_OFFSET
))) {
3540 dev_warn(&adapter
->pdev
->dev
,
3541 "Spoof event(s) detected on VF %d\n", j
);
3544 (1 << (j
+ IGB_STAGGERED_QUEUE_OFFSET
)));
3549 /* Need to wait a few seconds after link up to get diagnostic information from
3551 static void igb_update_phy_info(unsigned long data
)
3553 struct igb_adapter
*adapter
= (struct igb_adapter
*) data
;
3554 igb_get_phy_info(&adapter
->hw
);
3558 * igb_has_link - check shared code for link and determine up/down
3559 * @adapter: pointer to driver private info
3561 bool igb_has_link(struct igb_adapter
*adapter
)
3563 struct e1000_hw
*hw
= &adapter
->hw
;
3564 bool link_active
= false;
3567 /* get_link_status is set on LSC (link status) interrupt or
3568 * rx sequence error interrupt. get_link_status will stay
3569 * false until the e1000_check_for_link establishes link
3570 * for copper adapters ONLY
3572 switch (hw
->phy
.media_type
) {
3573 case e1000_media_type_copper
:
3574 if (hw
->mac
.get_link_status
) {
3575 ret_val
= hw
->mac
.ops
.check_for_link(hw
);
3576 link_active
= !hw
->mac
.get_link_status
;
3581 case e1000_media_type_internal_serdes
:
3582 ret_val
= hw
->mac
.ops
.check_for_link(hw
);
3583 link_active
= hw
->mac
.serdes_has_link
;
3586 case e1000_media_type_unknown
:
3593 static bool igb_thermal_sensor_event(struct e1000_hw
*hw
, u32 event
)
3596 u32 ctrl_ext
, thstat
;
3598 /* check for thermal sensor event on i350, copper only */
3599 if (hw
->mac
.type
== e1000_i350
) {
3600 thstat
= rd32(E1000_THSTAT
);
3601 ctrl_ext
= rd32(E1000_CTRL_EXT
);
3603 if ((hw
->phy
.media_type
== e1000_media_type_copper
) &&
3604 !(ctrl_ext
& E1000_CTRL_EXT_LINK_MODE_SGMII
)) {
3605 ret
= !!(thstat
& event
);
3613 * igb_watchdog - Timer Call-back
3614 * @data: pointer to adapter cast into an unsigned long
3616 static void igb_watchdog(unsigned long data
)
3618 struct igb_adapter
*adapter
= (struct igb_adapter
*)data
;
3619 /* Do the rest outside of interrupt context */
3620 schedule_work(&adapter
->watchdog_task
);
3623 static void igb_watchdog_task(struct work_struct
*work
)
3625 struct igb_adapter
*adapter
= container_of(work
,
3628 struct e1000_hw
*hw
= &adapter
->hw
;
3629 struct net_device
*netdev
= adapter
->netdev
;
3633 link
= igb_has_link(adapter
);
3635 if (!netif_carrier_ok(netdev
)) {
3637 hw
->mac
.ops
.get_speed_and_duplex(hw
,
3638 &adapter
->link_speed
,
3639 &adapter
->link_duplex
);
3641 ctrl
= rd32(E1000_CTRL
);
3642 /* Links status message must follow this format */
3643 printk(KERN_INFO
"igb: %s NIC Link is Up %d Mbps %s, "
3644 "Flow Control: %s\n",
3646 adapter
->link_speed
,
3647 adapter
->link_duplex
== FULL_DUPLEX
?
3648 "Full Duplex" : "Half Duplex",
3649 ((ctrl
& E1000_CTRL_TFCE
) &&
3650 (ctrl
& E1000_CTRL_RFCE
)) ? "RX/TX" :
3651 ((ctrl
& E1000_CTRL_RFCE
) ? "RX" :
3652 ((ctrl
& E1000_CTRL_TFCE
) ? "TX" : "None")));
3654 /* check for thermal sensor event */
3655 if (igb_thermal_sensor_event(hw
, E1000_THSTAT_LINK_THROTTLE
)) {
3656 printk(KERN_INFO
"igb: %s The network adapter "
3657 "link speed was downshifted "
3658 "because it overheated.\n",
3662 /* adjust timeout factor according to speed/duplex */
3663 adapter
->tx_timeout_factor
= 1;
3664 switch (adapter
->link_speed
) {
3666 adapter
->tx_timeout_factor
= 14;
3669 /* maybe add some timeout factor ? */
3673 netif_carrier_on(netdev
);
3675 igb_ping_all_vfs(adapter
);
3676 igb_check_vf_rate_limit(adapter
);
3678 /* link state has changed, schedule phy info update */
3679 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3680 mod_timer(&adapter
->phy_info_timer
,
3681 round_jiffies(jiffies
+ 2 * HZ
));
3684 if (netif_carrier_ok(netdev
)) {
3685 adapter
->link_speed
= 0;
3686 adapter
->link_duplex
= 0;
3688 /* check for thermal sensor event */
3689 if (igb_thermal_sensor_event(hw
, E1000_THSTAT_PWR_DOWN
)) {
3690 printk(KERN_ERR
"igb: %s The network adapter "
3691 "was stopped because it "
3696 /* Links status message must follow this format */
3697 printk(KERN_INFO
"igb: %s NIC Link is Down\n",
3699 netif_carrier_off(netdev
);
3701 igb_ping_all_vfs(adapter
);
3703 /* link state has changed, schedule phy info update */
3704 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3705 mod_timer(&adapter
->phy_info_timer
,
3706 round_jiffies(jiffies
+ 2 * HZ
));
3710 spin_lock(&adapter
->stats64_lock
);
3711 igb_update_stats(adapter
, &adapter
->stats64
);
3712 spin_unlock(&adapter
->stats64_lock
);
3714 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
3715 struct igb_ring
*tx_ring
= adapter
->tx_ring
[i
];
3716 if (!netif_carrier_ok(netdev
)) {
3717 /* We've lost link, so the controller stops DMA,
3718 * but we've got queued Tx work that's never going
3719 * to get done, so reset controller to flush Tx.
3720 * (Do the reset outside of interrupt context). */
3721 if (igb_desc_unused(tx_ring
) + 1 < tx_ring
->count
) {
3722 adapter
->tx_timeout_count
++;
3723 schedule_work(&adapter
->reset_task
);
3724 /* return immediately since reset is imminent */
3729 /* Force detection of hung controller every watchdog period */
3730 set_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
);
3733 /* Cause software interrupt to ensure rx ring is cleaned */
3734 if (adapter
->msix_entries
) {
3736 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
3737 eics
|= adapter
->q_vector
[i
]->eims_value
;
3738 wr32(E1000_EICS
, eics
);
3740 wr32(E1000_ICS
, E1000_ICS_RXDMT0
);
3743 igb_spoof_check(adapter
);
3745 /* Reset the timer */
3746 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3747 mod_timer(&adapter
->watchdog_timer
,
3748 round_jiffies(jiffies
+ 2 * HZ
));
3751 enum latency_range
{
3755 latency_invalid
= 255
3759 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3761 * Stores a new ITR value based on strictly on packet size. This
3762 * algorithm is less sophisticated than that used in igb_update_itr,
3763 * due to the difficulty of synchronizing statistics across multiple
3764 * receive rings. The divisors and thresholds used by this function
3765 * were determined based on theoretical maximum wire speed and testing
3766 * data, in order to minimize response time while increasing bulk
3768 * This functionality is controlled by the InterruptThrottleRate module
3769 * parameter (see igb_param.c)
3770 * NOTE: This function is called only when operating in a multiqueue
3771 * receive environment.
3772 * @q_vector: pointer to q_vector
3774 static void igb_update_ring_itr(struct igb_q_vector
*q_vector
)
3776 int new_val
= q_vector
->itr_val
;
3777 int avg_wire_size
= 0;
3778 struct igb_adapter
*adapter
= q_vector
->adapter
;
3779 unsigned int packets
;
3781 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3782 * ints/sec - ITR timer value of 120 ticks.
3784 if (adapter
->link_speed
!= SPEED_1000
) {
3785 new_val
= IGB_4K_ITR
;
3789 packets
= q_vector
->rx
.total_packets
;
3791 avg_wire_size
= q_vector
->rx
.total_bytes
/ packets
;
3793 packets
= q_vector
->tx
.total_packets
;
3795 avg_wire_size
= max_t(u32
, avg_wire_size
,
3796 q_vector
->tx
.total_bytes
/ packets
);
3798 /* if avg_wire_size isn't set no work was done */
3802 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3803 avg_wire_size
+= 24;
3805 /* Don't starve jumbo frames */
3806 avg_wire_size
= min(avg_wire_size
, 3000);
3808 /* Give a little boost to mid-size frames */
3809 if ((avg_wire_size
> 300) && (avg_wire_size
< 1200))
3810 new_val
= avg_wire_size
/ 3;
3812 new_val
= avg_wire_size
/ 2;
3814 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3815 if (new_val
< IGB_20K_ITR
&&
3816 ((q_vector
->rx
.ring
&& adapter
->rx_itr_setting
== 3) ||
3817 (!q_vector
->rx
.ring
&& adapter
->tx_itr_setting
== 3)))
3818 new_val
= IGB_20K_ITR
;
3821 if (new_val
!= q_vector
->itr_val
) {
3822 q_vector
->itr_val
= new_val
;
3823 q_vector
->set_itr
= 1;
3826 q_vector
->rx
.total_bytes
= 0;
3827 q_vector
->rx
.total_packets
= 0;
3828 q_vector
->tx
.total_bytes
= 0;
3829 q_vector
->tx
.total_packets
= 0;
3833 * igb_update_itr - update the dynamic ITR value based on statistics
3834 * Stores a new ITR value based on packets and byte
3835 * counts during the last interrupt. The advantage of per interrupt
3836 * computation is faster updates and more accurate ITR for the current
3837 * traffic pattern. Constants in this function were computed
3838 * based on theoretical maximum wire speed and thresholds were set based
3839 * on testing data as well as attempting to minimize response time
3840 * while increasing bulk throughput.
3841 * this functionality is controlled by the InterruptThrottleRate module
3842 * parameter (see igb_param.c)
3843 * NOTE: These calculations are only valid when operating in a single-
3844 * queue environment.
3845 * @q_vector: pointer to q_vector
3846 * @ring_container: ring info to update the itr for
3848 static void igb_update_itr(struct igb_q_vector
*q_vector
,
3849 struct igb_ring_container
*ring_container
)
3851 unsigned int packets
= ring_container
->total_packets
;
3852 unsigned int bytes
= ring_container
->total_bytes
;
3853 u8 itrval
= ring_container
->itr
;
3855 /* no packets, exit with status unchanged */
3860 case lowest_latency
:
3861 /* handle TSO and jumbo frames */
3862 if (bytes
/packets
> 8000)
3863 itrval
= bulk_latency
;
3864 else if ((packets
< 5) && (bytes
> 512))
3865 itrval
= low_latency
;
3867 case low_latency
: /* 50 usec aka 20000 ints/s */
3868 if (bytes
> 10000) {
3869 /* this if handles the TSO accounting */
3870 if (bytes
/packets
> 8000) {
3871 itrval
= bulk_latency
;
3872 } else if ((packets
< 10) || ((bytes
/packets
) > 1200)) {
3873 itrval
= bulk_latency
;
3874 } else if ((packets
> 35)) {
3875 itrval
= lowest_latency
;
3877 } else if (bytes
/packets
> 2000) {
3878 itrval
= bulk_latency
;
3879 } else if (packets
<= 2 && bytes
< 512) {
3880 itrval
= lowest_latency
;
3883 case bulk_latency
: /* 250 usec aka 4000 ints/s */
3884 if (bytes
> 25000) {
3886 itrval
= low_latency
;
3887 } else if (bytes
< 1500) {
3888 itrval
= low_latency
;
3893 /* clear work counters since we have the values we need */
3894 ring_container
->total_bytes
= 0;
3895 ring_container
->total_packets
= 0;
3897 /* write updated itr to ring container */
3898 ring_container
->itr
= itrval
;
3901 static void igb_set_itr(struct igb_q_vector
*q_vector
)
3903 struct igb_adapter
*adapter
= q_vector
->adapter
;
3904 u32 new_itr
= q_vector
->itr_val
;
3907 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3908 if (adapter
->link_speed
!= SPEED_1000
) {
3910 new_itr
= IGB_4K_ITR
;
3914 igb_update_itr(q_vector
, &q_vector
->tx
);
3915 igb_update_itr(q_vector
, &q_vector
->rx
);
3917 current_itr
= max(q_vector
->rx
.itr
, q_vector
->tx
.itr
);
3919 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3920 if (current_itr
== lowest_latency
&&
3921 ((q_vector
->rx
.ring
&& adapter
->rx_itr_setting
== 3) ||
3922 (!q_vector
->rx
.ring
&& adapter
->tx_itr_setting
== 3)))
3923 current_itr
= low_latency
;
3925 switch (current_itr
) {
3926 /* counts and packets in update_itr are dependent on these numbers */
3927 case lowest_latency
:
3928 new_itr
= IGB_70K_ITR
; /* 70,000 ints/sec */
3931 new_itr
= IGB_20K_ITR
; /* 20,000 ints/sec */
3934 new_itr
= IGB_4K_ITR
; /* 4,000 ints/sec */
3941 if (new_itr
!= q_vector
->itr_val
) {
3942 /* this attempts to bias the interrupt rate towards Bulk
3943 * by adding intermediate steps when interrupt rate is
3945 new_itr
= new_itr
> q_vector
->itr_val
?
3946 max((new_itr
* q_vector
->itr_val
) /
3947 (new_itr
+ (q_vector
->itr_val
>> 2)),
3950 /* Don't write the value here; it resets the adapter's
3951 * internal timer, and causes us to delay far longer than
3952 * we should between interrupts. Instead, we write the ITR
3953 * value at the beginning of the next interrupt so the timing
3954 * ends up being correct.
3956 q_vector
->itr_val
= new_itr
;
3957 q_vector
->set_itr
= 1;
3961 void igb_tx_ctxtdesc(struct igb_ring
*tx_ring
, u32 vlan_macip_lens
,
3962 u32 type_tucmd
, u32 mss_l4len_idx
)
3964 struct e1000_adv_tx_context_desc
*context_desc
;
3965 u16 i
= tx_ring
->next_to_use
;
3967 context_desc
= IGB_TX_CTXTDESC(tx_ring
, i
);
3970 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
3972 /* set bits to identify this as an advanced context descriptor */
3973 type_tucmd
|= E1000_TXD_CMD_DEXT
| E1000_ADVTXD_DTYP_CTXT
;
3975 /* For 82575, context index must be unique per ring. */
3976 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX
, &tx_ring
->flags
))
3977 mss_l4len_idx
|= tx_ring
->reg_idx
<< 4;
3979 context_desc
->vlan_macip_lens
= cpu_to_le32(vlan_macip_lens
);
3980 context_desc
->seqnum_seed
= 0;
3981 context_desc
->type_tucmd_mlhl
= cpu_to_le32(type_tucmd
);
3982 context_desc
->mss_l4len_idx
= cpu_to_le32(mss_l4len_idx
);
3985 static int igb_tso(struct igb_ring
*tx_ring
,
3986 struct igb_tx_buffer
*first
,
3989 struct sk_buff
*skb
= first
->skb
;
3990 u32 vlan_macip_lens
, type_tucmd
;
3991 u32 mss_l4len_idx
, l4len
;
3993 if (!skb_is_gso(skb
))
3996 if (skb_header_cloned(skb
)) {
3997 int err
= pskb_expand_head(skb
, 0, 0, GFP_ATOMIC
);
4002 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4003 type_tucmd
= E1000_ADVTXD_TUCMD_L4T_TCP
;
4005 if (first
->protocol
== __constant_htons(ETH_P_IP
)) {
4006 struct iphdr
*iph
= ip_hdr(skb
);
4009 tcp_hdr(skb
)->check
= ~csum_tcpudp_magic(iph
->saddr
,
4013 type_tucmd
|= E1000_ADVTXD_TUCMD_IPV4
;
4014 first
->tx_flags
|= IGB_TX_FLAGS_TSO
|
4017 } else if (skb_is_gso_v6(skb
)) {
4018 ipv6_hdr(skb
)->payload_len
= 0;
4019 tcp_hdr(skb
)->check
= ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
4020 &ipv6_hdr(skb
)->daddr
,
4022 first
->tx_flags
|= IGB_TX_FLAGS_TSO
|
4026 /* compute header lengths */
4027 l4len
= tcp_hdrlen(skb
);
4028 *hdr_len
= skb_transport_offset(skb
) + l4len
;
4030 /* update gso size and bytecount with header size */
4031 first
->gso_segs
= skb_shinfo(skb
)->gso_segs
;
4032 first
->bytecount
+= (first
->gso_segs
- 1) * *hdr_len
;
4035 mss_l4len_idx
= l4len
<< E1000_ADVTXD_L4LEN_SHIFT
;
4036 mss_l4len_idx
|= skb_shinfo(skb
)->gso_size
<< E1000_ADVTXD_MSS_SHIFT
;
4038 /* VLAN MACLEN IPLEN */
4039 vlan_macip_lens
= skb_network_header_len(skb
);
4040 vlan_macip_lens
|= skb_network_offset(skb
) << E1000_ADVTXD_MACLEN_SHIFT
;
4041 vlan_macip_lens
|= first
->tx_flags
& IGB_TX_FLAGS_VLAN_MASK
;
4043 igb_tx_ctxtdesc(tx_ring
, vlan_macip_lens
, type_tucmd
, mss_l4len_idx
);
4048 static void igb_tx_csum(struct igb_ring
*tx_ring
, struct igb_tx_buffer
*first
)
4050 struct sk_buff
*skb
= first
->skb
;
4051 u32 vlan_macip_lens
= 0;
4052 u32 mss_l4len_idx
= 0;
4055 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
4056 if (!(first
->tx_flags
& IGB_TX_FLAGS_VLAN
))
4060 switch (first
->protocol
) {
4061 case __constant_htons(ETH_P_IP
):
4062 vlan_macip_lens
|= skb_network_header_len(skb
);
4063 type_tucmd
|= E1000_ADVTXD_TUCMD_IPV4
;
4064 l4_hdr
= ip_hdr(skb
)->protocol
;
4066 case __constant_htons(ETH_P_IPV6
):
4067 vlan_macip_lens
|= skb_network_header_len(skb
);
4068 l4_hdr
= ipv6_hdr(skb
)->nexthdr
;
4071 if (unlikely(net_ratelimit())) {
4072 dev_warn(tx_ring
->dev
,
4073 "partial checksum but proto=%x!\n",
4081 type_tucmd
|= E1000_ADVTXD_TUCMD_L4T_TCP
;
4082 mss_l4len_idx
= tcp_hdrlen(skb
) <<
4083 E1000_ADVTXD_L4LEN_SHIFT
;
4086 type_tucmd
|= E1000_ADVTXD_TUCMD_L4T_SCTP
;
4087 mss_l4len_idx
= sizeof(struct sctphdr
) <<
4088 E1000_ADVTXD_L4LEN_SHIFT
;
4091 mss_l4len_idx
= sizeof(struct udphdr
) <<
4092 E1000_ADVTXD_L4LEN_SHIFT
;
4095 if (unlikely(net_ratelimit())) {
4096 dev_warn(tx_ring
->dev
,
4097 "partial checksum but l4 proto=%x!\n",
4103 /* update TX checksum flag */
4104 first
->tx_flags
|= IGB_TX_FLAGS_CSUM
;
4107 vlan_macip_lens
|= skb_network_offset(skb
) << E1000_ADVTXD_MACLEN_SHIFT
;
4108 vlan_macip_lens
|= first
->tx_flags
& IGB_TX_FLAGS_VLAN_MASK
;
4110 igb_tx_ctxtdesc(tx_ring
, vlan_macip_lens
, type_tucmd
, mss_l4len_idx
);
4113 static __le32
igb_tx_cmd_type(u32 tx_flags
)
4115 /* set type for advanced descriptor with frame checksum insertion */
4116 __le32 cmd_type
= cpu_to_le32(E1000_ADVTXD_DTYP_DATA
|
4117 E1000_ADVTXD_DCMD_IFCS
|
4118 E1000_ADVTXD_DCMD_DEXT
);
4120 /* set HW vlan bit if vlan is present */
4121 if (tx_flags
& IGB_TX_FLAGS_VLAN
)
4122 cmd_type
|= cpu_to_le32(E1000_ADVTXD_DCMD_VLE
);
4124 /* set timestamp bit if present */
4125 if (tx_flags
& IGB_TX_FLAGS_TSTAMP
)
4126 cmd_type
|= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP
);
4128 /* set segmentation bits for TSO */
4129 if (tx_flags
& IGB_TX_FLAGS_TSO
)
4130 cmd_type
|= cpu_to_le32(E1000_ADVTXD_DCMD_TSE
);
4135 static void igb_tx_olinfo_status(struct igb_ring
*tx_ring
,
4136 union e1000_adv_tx_desc
*tx_desc
,
4137 u32 tx_flags
, unsigned int paylen
)
4139 u32 olinfo_status
= paylen
<< E1000_ADVTXD_PAYLEN_SHIFT
;
4141 /* 82575 requires a unique index per ring if any offload is enabled */
4142 if ((tx_flags
& (IGB_TX_FLAGS_CSUM
| IGB_TX_FLAGS_VLAN
)) &&
4143 test_bit(IGB_RING_FLAG_TX_CTX_IDX
, &tx_ring
->flags
))
4144 olinfo_status
|= tx_ring
->reg_idx
<< 4;
4146 /* insert L4 checksum */
4147 if (tx_flags
& IGB_TX_FLAGS_CSUM
) {
4148 olinfo_status
|= E1000_TXD_POPTS_TXSM
<< 8;
4150 /* insert IPv4 checksum */
4151 if (tx_flags
& IGB_TX_FLAGS_IPV4
)
4152 olinfo_status
|= E1000_TXD_POPTS_IXSM
<< 8;
4155 tx_desc
->read
.olinfo_status
= cpu_to_le32(olinfo_status
);
4159 * The largest size we can write to the descriptor is 65535. In order to
4160 * maintain a power of two alignment we have to limit ourselves to 32K.
4162 #define IGB_MAX_TXD_PWR 15
4163 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4165 static void igb_tx_map(struct igb_ring
*tx_ring
,
4166 struct igb_tx_buffer
*first
,
4169 struct sk_buff
*skb
= first
->skb
;
4170 struct igb_tx_buffer
*tx_buffer_info
;
4171 union e1000_adv_tx_desc
*tx_desc
;
4173 struct skb_frag_struct
*frag
= &skb_shinfo(skb
)->frags
[0];
4174 unsigned int data_len
= skb
->data_len
;
4175 unsigned int size
= skb_headlen(skb
);
4176 unsigned int paylen
= skb
->len
- hdr_len
;
4178 u32 tx_flags
= first
->tx_flags
;
4179 u16 i
= tx_ring
->next_to_use
;
4181 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
4183 igb_tx_olinfo_status(tx_ring
, tx_desc
, tx_flags
, paylen
);
4184 cmd_type
= igb_tx_cmd_type(tx_flags
);
4186 dma
= dma_map_single(tx_ring
->dev
, skb
->data
, size
, DMA_TO_DEVICE
);
4187 if (dma_mapping_error(tx_ring
->dev
, dma
))
4190 /* record length, and DMA address */
4191 first
->length
= size
;
4193 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4196 while (unlikely(size
> IGB_MAX_DATA_PER_TXD
)) {
4197 tx_desc
->read
.cmd_type_len
=
4198 cmd_type
| cpu_to_le32(IGB_MAX_DATA_PER_TXD
);
4202 if (i
== tx_ring
->count
) {
4203 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
4207 dma
+= IGB_MAX_DATA_PER_TXD
;
4208 size
-= IGB_MAX_DATA_PER_TXD
;
4210 tx_desc
->read
.olinfo_status
= 0;
4211 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4214 if (likely(!data_len
))
4217 tx_desc
->read
.cmd_type_len
= cmd_type
| cpu_to_le32(size
);
4221 if (i
== tx_ring
->count
) {
4222 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
4226 size
= skb_frag_size(frag
);
4229 dma
= skb_frag_dma_map(tx_ring
->dev
, frag
, 0,
4230 size
, DMA_TO_DEVICE
);
4231 if (dma_mapping_error(tx_ring
->dev
, dma
))
4234 tx_buffer_info
= &tx_ring
->tx_buffer_info
[i
];
4235 tx_buffer_info
->length
= size
;
4236 tx_buffer_info
->dma
= dma
;
4238 tx_desc
->read
.olinfo_status
= 0;
4239 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4244 /* write last descriptor with RS and EOP bits */
4245 cmd_type
|= cpu_to_le32(size
) | cpu_to_le32(IGB_TXD_DCMD
);
4246 tx_desc
->read
.cmd_type_len
= cmd_type
;
4248 /* set the timestamp */
4249 first
->time_stamp
= jiffies
;
4252 * Force memory writes to complete before letting h/w know there
4253 * are new descriptors to fetch. (Only applicable for weak-ordered
4254 * memory model archs, such as IA-64).
4256 * We also need this memory barrier to make certain all of the
4257 * status bits have been updated before next_to_watch is written.
4261 /* set next_to_watch value indicating a packet is present */
4262 first
->next_to_watch
= tx_desc
;
4265 if (i
== tx_ring
->count
)
4268 tx_ring
->next_to_use
= i
;
4270 writel(i
, tx_ring
->tail
);
4272 /* we need this if more than one processor can write to our tail
4273 * at a time, it syncronizes IO on IA64/Altix systems */
4279 dev_err(tx_ring
->dev
, "TX DMA map failed\n");
4281 /* clear dma mappings for failed tx_buffer_info map */
4283 tx_buffer_info
= &tx_ring
->tx_buffer_info
[i
];
4284 igb_unmap_and_free_tx_resource(tx_ring
, tx_buffer_info
);
4285 if (tx_buffer_info
== first
)
4292 tx_ring
->next_to_use
= i
;
4295 static int __igb_maybe_stop_tx(struct igb_ring
*tx_ring
, const u16 size
)
4297 struct net_device
*netdev
= tx_ring
->netdev
;
4299 netif_stop_subqueue(netdev
, tx_ring
->queue_index
);
4301 /* Herbert's original patch had:
4302 * smp_mb__after_netif_stop_queue();
4303 * but since that doesn't exist yet, just open code it. */
4306 /* We need to check again in a case another CPU has just
4307 * made room available. */
4308 if (igb_desc_unused(tx_ring
) < size
)
4312 netif_wake_subqueue(netdev
, tx_ring
->queue_index
);
4314 u64_stats_update_begin(&tx_ring
->tx_syncp2
);
4315 tx_ring
->tx_stats
.restart_queue2
++;
4316 u64_stats_update_end(&tx_ring
->tx_syncp2
);
4321 static inline int igb_maybe_stop_tx(struct igb_ring
*tx_ring
, const u16 size
)
4323 if (igb_desc_unused(tx_ring
) >= size
)
4325 return __igb_maybe_stop_tx(tx_ring
, size
);
4328 netdev_tx_t
igb_xmit_frame_ring(struct sk_buff
*skb
,
4329 struct igb_ring
*tx_ring
)
4331 struct igb_tx_buffer
*first
;
4334 __be16 protocol
= vlan_get_protocol(skb
);
4337 /* need: 1 descriptor per page,
4338 * + 2 desc gap to keep tail from touching head,
4339 * + 1 desc for skb->data,
4340 * + 1 desc for context descriptor,
4341 * otherwise try next time */
4342 if (igb_maybe_stop_tx(tx_ring
, skb_shinfo(skb
)->nr_frags
+ 4)) {
4343 /* this is a hard error */
4344 return NETDEV_TX_BUSY
;
4347 /* record the location of the first descriptor for this packet */
4348 first
= &tx_ring
->tx_buffer_info
[tx_ring
->next_to_use
];
4350 first
->bytecount
= skb
->len
;
4351 first
->gso_segs
= 1;
4353 if (unlikely(skb_shinfo(skb
)->tx_flags
& SKBTX_HW_TSTAMP
)) {
4354 skb_shinfo(skb
)->tx_flags
|= SKBTX_IN_PROGRESS
;
4355 tx_flags
|= IGB_TX_FLAGS_TSTAMP
;
4358 if (vlan_tx_tag_present(skb
)) {
4359 tx_flags
|= IGB_TX_FLAGS_VLAN
;
4360 tx_flags
|= (vlan_tx_tag_get(skb
) << IGB_TX_FLAGS_VLAN_SHIFT
);
4363 /* record initial flags and protocol */
4364 first
->tx_flags
= tx_flags
;
4365 first
->protocol
= protocol
;
4367 tso
= igb_tso(tx_ring
, first
, &hdr_len
);
4371 igb_tx_csum(tx_ring
, first
);
4373 igb_tx_map(tx_ring
, first
, hdr_len
);
4375 /* Make sure there is space in the ring for the next send. */
4376 igb_maybe_stop_tx(tx_ring
, MAX_SKB_FRAGS
+ 4);
4378 return NETDEV_TX_OK
;
4381 igb_unmap_and_free_tx_resource(tx_ring
, first
);
4383 return NETDEV_TX_OK
;
4386 static inline struct igb_ring
*igb_tx_queue_mapping(struct igb_adapter
*adapter
,
4387 struct sk_buff
*skb
)
4389 unsigned int r_idx
= skb
->queue_mapping
;
4391 if (r_idx
>= adapter
->num_tx_queues
)
4392 r_idx
= r_idx
% adapter
->num_tx_queues
;
4394 return adapter
->tx_ring
[r_idx
];
4397 static netdev_tx_t
igb_xmit_frame(struct sk_buff
*skb
,
4398 struct net_device
*netdev
)
4400 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4402 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
4403 dev_kfree_skb_any(skb
);
4404 return NETDEV_TX_OK
;
4407 if (skb
->len
<= 0) {
4408 dev_kfree_skb_any(skb
);
4409 return NETDEV_TX_OK
;
4413 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4414 * in order to meet this minimum size requirement.
4416 if (skb
->len
< 17) {
4417 if (skb_padto(skb
, 17))
4418 return NETDEV_TX_OK
;
4422 return igb_xmit_frame_ring(skb
, igb_tx_queue_mapping(adapter
, skb
));
4426 * igb_tx_timeout - Respond to a Tx Hang
4427 * @netdev: network interface device structure
4429 static void igb_tx_timeout(struct net_device
*netdev
)
4431 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4432 struct e1000_hw
*hw
= &adapter
->hw
;
4434 /* Do the reset outside of interrupt context */
4435 adapter
->tx_timeout_count
++;
4437 if (hw
->mac
.type
>= e1000_82580
)
4438 hw
->dev_spec
._82575
.global_device_reset
= true;
4440 schedule_work(&adapter
->reset_task
);
4442 (adapter
->eims_enable_mask
& ~adapter
->eims_other
));
4445 static void igb_reset_task(struct work_struct
*work
)
4447 struct igb_adapter
*adapter
;
4448 adapter
= container_of(work
, struct igb_adapter
, reset_task
);
4451 netdev_err(adapter
->netdev
, "Reset adapter\n");
4452 igb_reinit_locked(adapter
);
4456 * igb_get_stats64 - Get System Network Statistics
4457 * @netdev: network interface device structure
4458 * @stats: rtnl_link_stats64 pointer
4461 static struct rtnl_link_stats64
*igb_get_stats64(struct net_device
*netdev
,
4462 struct rtnl_link_stats64
*stats
)
4464 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4466 spin_lock(&adapter
->stats64_lock
);
4467 igb_update_stats(adapter
, &adapter
->stats64
);
4468 memcpy(stats
, &adapter
->stats64
, sizeof(*stats
));
4469 spin_unlock(&adapter
->stats64_lock
);
4475 * igb_change_mtu - Change the Maximum Transfer Unit
4476 * @netdev: network interface device structure
4477 * @new_mtu: new value for maximum frame size
4479 * Returns 0 on success, negative on failure
4481 static int igb_change_mtu(struct net_device
*netdev
, int new_mtu
)
4483 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4484 struct pci_dev
*pdev
= adapter
->pdev
;
4485 int max_frame
= new_mtu
+ ETH_HLEN
+ ETH_FCS_LEN
+ VLAN_HLEN
;
4487 if ((new_mtu
< 68) || (max_frame
> MAX_JUMBO_FRAME_SIZE
)) {
4488 dev_err(&pdev
->dev
, "Invalid MTU setting\n");
4492 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4493 if (max_frame
> MAX_STD_JUMBO_FRAME_SIZE
) {
4494 dev_err(&pdev
->dev
, "MTU > 9216 not supported.\n");
4498 while (test_and_set_bit(__IGB_RESETTING
, &adapter
->state
))
4501 /* igb_down has a dependency on max_frame_size */
4502 adapter
->max_frame_size
= max_frame
;
4504 if (netif_running(netdev
))
4507 dev_info(&pdev
->dev
, "changing MTU from %d to %d\n",
4508 netdev
->mtu
, new_mtu
);
4509 netdev
->mtu
= new_mtu
;
4511 if (netif_running(netdev
))
4516 clear_bit(__IGB_RESETTING
, &adapter
->state
);
4522 * igb_update_stats - Update the board statistics counters
4523 * @adapter: board private structure
4526 void igb_update_stats(struct igb_adapter
*adapter
,
4527 struct rtnl_link_stats64
*net_stats
)
4529 struct e1000_hw
*hw
= &adapter
->hw
;
4530 struct pci_dev
*pdev
= adapter
->pdev
;
4536 u64 _bytes
, _packets
;
4538 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4541 * Prevent stats update while adapter is being reset, or if the pci
4542 * connection is down.
4544 if (adapter
->link_speed
== 0)
4546 if (pci_channel_offline(pdev
))
4551 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
4552 u32 rqdpc_tmp
= rd32(E1000_RQDPC(i
)) & 0x0FFF;
4553 struct igb_ring
*ring
= adapter
->rx_ring
[i
];
4555 ring
->rx_stats
.drops
+= rqdpc_tmp
;
4556 net_stats
->rx_fifo_errors
+= rqdpc_tmp
;
4559 start
= u64_stats_fetch_begin_bh(&ring
->rx_syncp
);
4560 _bytes
= ring
->rx_stats
.bytes
;
4561 _packets
= ring
->rx_stats
.packets
;
4562 } while (u64_stats_fetch_retry_bh(&ring
->rx_syncp
, start
));
4564 packets
+= _packets
;
4567 net_stats
->rx_bytes
= bytes
;
4568 net_stats
->rx_packets
= packets
;
4572 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
4573 struct igb_ring
*ring
= adapter
->tx_ring
[i
];
4575 start
= u64_stats_fetch_begin_bh(&ring
->tx_syncp
);
4576 _bytes
= ring
->tx_stats
.bytes
;
4577 _packets
= ring
->tx_stats
.packets
;
4578 } while (u64_stats_fetch_retry_bh(&ring
->tx_syncp
, start
));
4580 packets
+= _packets
;
4582 net_stats
->tx_bytes
= bytes
;
4583 net_stats
->tx_packets
= packets
;
4585 /* read stats registers */
4586 adapter
->stats
.crcerrs
+= rd32(E1000_CRCERRS
);
4587 adapter
->stats
.gprc
+= rd32(E1000_GPRC
);
4588 adapter
->stats
.gorc
+= rd32(E1000_GORCL
);
4589 rd32(E1000_GORCH
); /* clear GORCL */
4590 adapter
->stats
.bprc
+= rd32(E1000_BPRC
);
4591 adapter
->stats
.mprc
+= rd32(E1000_MPRC
);
4592 adapter
->stats
.roc
+= rd32(E1000_ROC
);
4594 adapter
->stats
.prc64
+= rd32(E1000_PRC64
);
4595 adapter
->stats
.prc127
+= rd32(E1000_PRC127
);
4596 adapter
->stats
.prc255
+= rd32(E1000_PRC255
);
4597 adapter
->stats
.prc511
+= rd32(E1000_PRC511
);
4598 adapter
->stats
.prc1023
+= rd32(E1000_PRC1023
);
4599 adapter
->stats
.prc1522
+= rd32(E1000_PRC1522
);
4600 adapter
->stats
.symerrs
+= rd32(E1000_SYMERRS
);
4601 adapter
->stats
.sec
+= rd32(E1000_SEC
);
4603 mpc
= rd32(E1000_MPC
);
4604 adapter
->stats
.mpc
+= mpc
;
4605 net_stats
->rx_fifo_errors
+= mpc
;
4606 adapter
->stats
.scc
+= rd32(E1000_SCC
);
4607 adapter
->stats
.ecol
+= rd32(E1000_ECOL
);
4608 adapter
->stats
.mcc
+= rd32(E1000_MCC
);
4609 adapter
->stats
.latecol
+= rd32(E1000_LATECOL
);
4610 adapter
->stats
.dc
+= rd32(E1000_DC
);
4611 adapter
->stats
.rlec
+= rd32(E1000_RLEC
);
4612 adapter
->stats
.xonrxc
+= rd32(E1000_XONRXC
);
4613 adapter
->stats
.xontxc
+= rd32(E1000_XONTXC
);
4614 adapter
->stats
.xoffrxc
+= rd32(E1000_XOFFRXC
);
4615 adapter
->stats
.xofftxc
+= rd32(E1000_XOFFTXC
);
4616 adapter
->stats
.fcruc
+= rd32(E1000_FCRUC
);
4617 adapter
->stats
.gptc
+= rd32(E1000_GPTC
);
4618 adapter
->stats
.gotc
+= rd32(E1000_GOTCL
);
4619 rd32(E1000_GOTCH
); /* clear GOTCL */
4620 adapter
->stats
.rnbc
+= rd32(E1000_RNBC
);
4621 adapter
->stats
.ruc
+= rd32(E1000_RUC
);
4622 adapter
->stats
.rfc
+= rd32(E1000_RFC
);
4623 adapter
->stats
.rjc
+= rd32(E1000_RJC
);
4624 adapter
->stats
.tor
+= rd32(E1000_TORH
);
4625 adapter
->stats
.tot
+= rd32(E1000_TOTH
);
4626 adapter
->stats
.tpr
+= rd32(E1000_TPR
);
4628 adapter
->stats
.ptc64
+= rd32(E1000_PTC64
);
4629 adapter
->stats
.ptc127
+= rd32(E1000_PTC127
);
4630 adapter
->stats
.ptc255
+= rd32(E1000_PTC255
);
4631 adapter
->stats
.ptc511
+= rd32(E1000_PTC511
);
4632 adapter
->stats
.ptc1023
+= rd32(E1000_PTC1023
);
4633 adapter
->stats
.ptc1522
+= rd32(E1000_PTC1522
);
4635 adapter
->stats
.mptc
+= rd32(E1000_MPTC
);
4636 adapter
->stats
.bptc
+= rd32(E1000_BPTC
);
4638 adapter
->stats
.tpt
+= rd32(E1000_TPT
);
4639 adapter
->stats
.colc
+= rd32(E1000_COLC
);
4641 adapter
->stats
.algnerrc
+= rd32(E1000_ALGNERRC
);
4642 /* read internal phy specific stats */
4643 reg
= rd32(E1000_CTRL_EXT
);
4644 if (!(reg
& E1000_CTRL_EXT_LINK_MODE_MASK
)) {
4645 adapter
->stats
.rxerrc
+= rd32(E1000_RXERRC
);
4646 adapter
->stats
.tncrs
+= rd32(E1000_TNCRS
);
4649 adapter
->stats
.tsctc
+= rd32(E1000_TSCTC
);
4650 adapter
->stats
.tsctfc
+= rd32(E1000_TSCTFC
);
4652 adapter
->stats
.iac
+= rd32(E1000_IAC
);
4653 adapter
->stats
.icrxoc
+= rd32(E1000_ICRXOC
);
4654 adapter
->stats
.icrxptc
+= rd32(E1000_ICRXPTC
);
4655 adapter
->stats
.icrxatc
+= rd32(E1000_ICRXATC
);
4656 adapter
->stats
.ictxptc
+= rd32(E1000_ICTXPTC
);
4657 adapter
->stats
.ictxatc
+= rd32(E1000_ICTXATC
);
4658 adapter
->stats
.ictxqec
+= rd32(E1000_ICTXQEC
);
4659 adapter
->stats
.ictxqmtc
+= rd32(E1000_ICTXQMTC
);
4660 adapter
->stats
.icrxdmtc
+= rd32(E1000_ICRXDMTC
);
4662 /* Fill out the OS statistics structure */
4663 net_stats
->multicast
= adapter
->stats
.mprc
;
4664 net_stats
->collisions
= adapter
->stats
.colc
;
4668 /* RLEC on some newer hardware can be incorrect so build
4669 * our own version based on RUC and ROC */
4670 net_stats
->rx_errors
= adapter
->stats
.rxerrc
+
4671 adapter
->stats
.crcerrs
+ adapter
->stats
.algnerrc
+
4672 adapter
->stats
.ruc
+ adapter
->stats
.roc
+
4673 adapter
->stats
.cexterr
;
4674 net_stats
->rx_length_errors
= adapter
->stats
.ruc
+
4676 net_stats
->rx_crc_errors
= adapter
->stats
.crcerrs
;
4677 net_stats
->rx_frame_errors
= adapter
->stats
.algnerrc
;
4678 net_stats
->rx_missed_errors
= adapter
->stats
.mpc
;
4681 net_stats
->tx_errors
= adapter
->stats
.ecol
+
4682 adapter
->stats
.latecol
;
4683 net_stats
->tx_aborted_errors
= adapter
->stats
.ecol
;
4684 net_stats
->tx_window_errors
= adapter
->stats
.latecol
;
4685 net_stats
->tx_carrier_errors
= adapter
->stats
.tncrs
;
4687 /* Tx Dropped needs to be maintained elsewhere */
4690 if (hw
->phy
.media_type
== e1000_media_type_copper
) {
4691 if ((adapter
->link_speed
== SPEED_1000
) &&
4692 (!igb_read_phy_reg(hw
, PHY_1000T_STATUS
, &phy_tmp
))) {
4693 phy_tmp
&= PHY_IDLE_ERROR_COUNT_MASK
;
4694 adapter
->phy_stats
.idle_errors
+= phy_tmp
;
4698 /* Management Stats */
4699 adapter
->stats
.mgptc
+= rd32(E1000_MGTPTC
);
4700 adapter
->stats
.mgprc
+= rd32(E1000_MGTPRC
);
4701 adapter
->stats
.mgpdc
+= rd32(E1000_MGTPDC
);
4704 reg
= rd32(E1000_MANC
);
4705 if (reg
& E1000_MANC_EN_BMC2OS
) {
4706 adapter
->stats
.o2bgptc
+= rd32(E1000_O2BGPTC
);
4707 adapter
->stats
.o2bspc
+= rd32(E1000_O2BSPC
);
4708 adapter
->stats
.b2ospc
+= rd32(E1000_B2OSPC
);
4709 adapter
->stats
.b2ogprc
+= rd32(E1000_B2OGPRC
);
4713 static irqreturn_t
igb_msix_other(int irq
, void *data
)
4715 struct igb_adapter
*adapter
= data
;
4716 struct e1000_hw
*hw
= &adapter
->hw
;
4717 u32 icr
= rd32(E1000_ICR
);
4718 /* reading ICR causes bit 31 of EICR to be cleared */
4720 if (icr
& E1000_ICR_DRSTA
)
4721 schedule_work(&adapter
->reset_task
);
4723 if (icr
& E1000_ICR_DOUTSYNC
) {
4724 /* HW is reporting DMA is out of sync */
4725 adapter
->stats
.doosync
++;
4726 /* The DMA Out of Sync is also indication of a spoof event
4727 * in IOV mode. Check the Wrong VM Behavior register to
4728 * see if it is really a spoof event. */
4729 igb_check_wvbr(adapter
);
4732 /* Check for a mailbox event */
4733 if (icr
& E1000_ICR_VMMB
)
4734 igb_msg_task(adapter
);
4736 if (icr
& E1000_ICR_LSC
) {
4737 hw
->mac
.get_link_status
= 1;
4738 /* guard against interrupt when we're going down */
4739 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
4740 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
4743 wr32(E1000_EIMS
, adapter
->eims_other
);
4748 static void igb_write_itr(struct igb_q_vector
*q_vector
)
4750 struct igb_adapter
*adapter
= q_vector
->adapter
;
4751 u32 itr_val
= q_vector
->itr_val
& 0x7FFC;
4753 if (!q_vector
->set_itr
)
4759 if (adapter
->hw
.mac
.type
== e1000_82575
)
4760 itr_val
|= itr_val
<< 16;
4762 itr_val
|= E1000_EITR_CNT_IGNR
;
4764 writel(itr_val
, q_vector
->itr_register
);
4765 q_vector
->set_itr
= 0;
4768 static irqreturn_t
igb_msix_ring(int irq
, void *data
)
4770 struct igb_q_vector
*q_vector
= data
;
4772 /* Write the ITR value calculated from the previous interrupt. */
4773 igb_write_itr(q_vector
);
4775 napi_schedule(&q_vector
->napi
);
4780 #ifdef CONFIG_IGB_DCA
4781 static void igb_update_dca(struct igb_q_vector
*q_vector
)
4783 struct igb_adapter
*adapter
= q_vector
->adapter
;
4784 struct e1000_hw
*hw
= &adapter
->hw
;
4785 int cpu
= get_cpu();
4787 if (q_vector
->cpu
== cpu
)
4790 if (q_vector
->tx
.ring
) {
4791 int q
= q_vector
->tx
.ring
->reg_idx
;
4792 u32 dca_txctrl
= rd32(E1000_DCA_TXCTRL(q
));
4793 if (hw
->mac
.type
== e1000_82575
) {
4794 dca_txctrl
&= ~E1000_DCA_TXCTRL_CPUID_MASK
;
4795 dca_txctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
);
4797 dca_txctrl
&= ~E1000_DCA_TXCTRL_CPUID_MASK_82576
;
4798 dca_txctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
) <<
4799 E1000_DCA_TXCTRL_CPUID_SHIFT
;
4801 dca_txctrl
|= E1000_DCA_TXCTRL_DESC_DCA_EN
;
4802 wr32(E1000_DCA_TXCTRL(q
), dca_txctrl
);
4804 if (q_vector
->rx
.ring
) {
4805 int q
= q_vector
->rx
.ring
->reg_idx
;
4806 u32 dca_rxctrl
= rd32(E1000_DCA_RXCTRL(q
));
4807 if (hw
->mac
.type
== e1000_82575
) {
4808 dca_rxctrl
&= ~E1000_DCA_RXCTRL_CPUID_MASK
;
4809 dca_rxctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
);
4811 dca_rxctrl
&= ~E1000_DCA_RXCTRL_CPUID_MASK_82576
;
4812 dca_rxctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
) <<
4813 E1000_DCA_RXCTRL_CPUID_SHIFT
;
4815 dca_rxctrl
|= E1000_DCA_RXCTRL_DESC_DCA_EN
;
4816 dca_rxctrl
|= E1000_DCA_RXCTRL_HEAD_DCA_EN
;
4817 dca_rxctrl
|= E1000_DCA_RXCTRL_DATA_DCA_EN
;
4818 wr32(E1000_DCA_RXCTRL(q
), dca_rxctrl
);
4820 q_vector
->cpu
= cpu
;
4825 static void igb_setup_dca(struct igb_adapter
*adapter
)
4827 struct e1000_hw
*hw
= &adapter
->hw
;
4830 if (!(adapter
->flags
& IGB_FLAG_DCA_ENABLED
))
4833 /* Always use CB2 mode, difference is masked in the CB driver. */
4834 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_CB2
);
4836 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
4837 adapter
->q_vector
[i
]->cpu
= -1;
4838 igb_update_dca(adapter
->q_vector
[i
]);
4842 static int __igb_notify_dca(struct device
*dev
, void *data
)
4844 struct net_device
*netdev
= dev_get_drvdata(dev
);
4845 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4846 struct pci_dev
*pdev
= adapter
->pdev
;
4847 struct e1000_hw
*hw
= &adapter
->hw
;
4848 unsigned long event
= *(unsigned long *)data
;
4851 case DCA_PROVIDER_ADD
:
4852 /* if already enabled, don't do it again */
4853 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
)
4855 if (dca_add_requester(dev
) == 0) {
4856 adapter
->flags
|= IGB_FLAG_DCA_ENABLED
;
4857 dev_info(&pdev
->dev
, "DCA enabled\n");
4858 igb_setup_dca(adapter
);
4861 /* Fall Through since DCA is disabled. */
4862 case DCA_PROVIDER_REMOVE
:
4863 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
) {
4864 /* without this a class_device is left
4865 * hanging around in the sysfs model */
4866 dca_remove_requester(dev
);
4867 dev_info(&pdev
->dev
, "DCA disabled\n");
4868 adapter
->flags
&= ~IGB_FLAG_DCA_ENABLED
;
4869 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_DISABLE
);
4877 static int igb_notify_dca(struct notifier_block
*nb
, unsigned long event
,
4882 ret_val
= driver_for_each_device(&igb_driver
.driver
, NULL
, &event
,
4885 return ret_val
? NOTIFY_BAD
: NOTIFY_DONE
;
4887 #endif /* CONFIG_IGB_DCA */
4889 #ifdef CONFIG_PCI_IOV
4890 static int igb_vf_configure(struct igb_adapter
*adapter
, int vf
)
4892 unsigned char mac_addr
[ETH_ALEN
];
4893 struct pci_dev
*pdev
= adapter
->pdev
;
4894 struct e1000_hw
*hw
= &adapter
->hw
;
4895 struct pci_dev
*pvfdev
;
4896 unsigned int device_id
;
4899 random_ether_addr(mac_addr
);
4900 igb_set_vf_mac(adapter
, vf
, mac_addr
);
4902 switch (adapter
->hw
.mac
.type
) {
4904 device_id
= IGB_82576_VF_DEV_ID
;
4905 /* VF Stride for 82576 is 2 */
4906 thisvf_devfn
= (pdev
->devfn
+ 0x80 + (vf
<< 1)) |
4910 device_id
= IGB_I350_VF_DEV_ID
;
4911 /* VF Stride for I350 is 4 */
4912 thisvf_devfn
= (pdev
->devfn
+ 0x80 + (vf
<< 2)) |
4921 pvfdev
= pci_get_device(hw
->vendor_id
, device_id
, NULL
);
4923 if (pvfdev
->devfn
== thisvf_devfn
)
4925 pvfdev
= pci_get_device(hw
->vendor_id
,
4930 adapter
->vf_data
[vf
].vfdev
= pvfdev
;
4933 "Couldn't find pci dev ptr for VF %4.4x\n",
4935 return pvfdev
!= NULL
;
4938 static int igb_find_enabled_vfs(struct igb_adapter
*adapter
)
4940 struct e1000_hw
*hw
= &adapter
->hw
;
4941 struct pci_dev
*pdev
= adapter
->pdev
;
4942 struct pci_dev
*pvfdev
;
4945 unsigned int device_id
;
4948 switch (adapter
->hw
.mac
.type
) {
4950 device_id
= IGB_82576_VF_DEV_ID
;
4951 /* VF Stride for 82576 is 2 */
4955 device_id
= IGB_I350_VF_DEV_ID
;
4956 /* VF Stride for I350 is 4 */
4965 vf_devfn
= pdev
->devfn
+ 0x80;
4966 pvfdev
= pci_get_device(hw
->vendor_id
, device_id
, NULL
);
4968 if (pvfdev
->devfn
== vf_devfn
)
4970 vf_devfn
+= vf_stride
;
4971 pvfdev
= pci_get_device(hw
->vendor_id
,
4978 static int igb_check_vf_assignment(struct igb_adapter
*adapter
)
4981 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
4982 if (adapter
->vf_data
[i
].vfdev
) {
4983 if (adapter
->vf_data
[i
].vfdev
->dev_flags
&
4984 PCI_DEV_FLAGS_ASSIGNED
)
4992 static void igb_ping_all_vfs(struct igb_adapter
*adapter
)
4994 struct e1000_hw
*hw
= &adapter
->hw
;
4998 for (i
= 0 ; i
< adapter
->vfs_allocated_count
; i
++) {
4999 ping
= E1000_PF_CONTROL_MSG
;
5000 if (adapter
->vf_data
[i
].flags
& IGB_VF_FLAG_CTS
)
5001 ping
|= E1000_VT_MSGTYPE_CTS
;
5002 igb_write_mbx(hw
, &ping
, 1, i
);
5006 static int igb_set_vf_promisc(struct igb_adapter
*adapter
, u32
*msgbuf
, u32 vf
)
5008 struct e1000_hw
*hw
= &adapter
->hw
;
5009 u32 vmolr
= rd32(E1000_VMOLR(vf
));
5010 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5012 vf_data
->flags
&= ~(IGB_VF_FLAG_UNI_PROMISC
|
5013 IGB_VF_FLAG_MULTI_PROMISC
);
5014 vmolr
&= ~(E1000_VMOLR_ROPE
| E1000_VMOLR_ROMPE
| E1000_VMOLR_MPME
);
5016 if (*msgbuf
& E1000_VF_SET_PROMISC_MULTICAST
) {
5017 vmolr
|= E1000_VMOLR_MPME
;
5018 vf_data
->flags
|= IGB_VF_FLAG_MULTI_PROMISC
;
5019 *msgbuf
&= ~E1000_VF_SET_PROMISC_MULTICAST
;
5022 * if we have hashes and we are clearing a multicast promisc
5023 * flag we need to write the hashes to the MTA as this step
5024 * was previously skipped
5026 if (vf_data
->num_vf_mc_hashes
> 30) {
5027 vmolr
|= E1000_VMOLR_MPME
;
5028 } else if (vf_data
->num_vf_mc_hashes
) {
5030 vmolr
|= E1000_VMOLR_ROMPE
;
5031 for (j
= 0; j
< vf_data
->num_vf_mc_hashes
; j
++)
5032 igb_mta_set(hw
, vf_data
->vf_mc_hashes
[j
]);
5036 wr32(E1000_VMOLR(vf
), vmolr
);
5038 /* there are flags left unprocessed, likely not supported */
5039 if (*msgbuf
& E1000_VT_MSGINFO_MASK
)
5046 static int igb_set_vf_multicasts(struct igb_adapter
*adapter
,
5047 u32
*msgbuf
, u32 vf
)
5049 int n
= (msgbuf
[0] & E1000_VT_MSGINFO_MASK
) >> E1000_VT_MSGINFO_SHIFT
;
5050 u16
*hash_list
= (u16
*)&msgbuf
[1];
5051 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5054 /* salt away the number of multicast addresses assigned
5055 * to this VF for later use to restore when the PF multi cast
5058 vf_data
->num_vf_mc_hashes
= n
;
5060 /* only up to 30 hash values supported */
5064 /* store the hashes for later use */
5065 for (i
= 0; i
< n
; i
++)
5066 vf_data
->vf_mc_hashes
[i
] = hash_list
[i
];
5068 /* Flush and reset the mta with the new values */
5069 igb_set_rx_mode(adapter
->netdev
);
5074 static void igb_restore_vf_multicasts(struct igb_adapter
*adapter
)
5076 struct e1000_hw
*hw
= &adapter
->hw
;
5077 struct vf_data_storage
*vf_data
;
5080 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
5081 u32 vmolr
= rd32(E1000_VMOLR(i
));
5082 vmolr
&= ~(E1000_VMOLR_ROMPE
| E1000_VMOLR_MPME
);
5084 vf_data
= &adapter
->vf_data
[i
];
5086 if ((vf_data
->num_vf_mc_hashes
> 30) ||
5087 (vf_data
->flags
& IGB_VF_FLAG_MULTI_PROMISC
)) {
5088 vmolr
|= E1000_VMOLR_MPME
;
5089 } else if (vf_data
->num_vf_mc_hashes
) {
5090 vmolr
|= E1000_VMOLR_ROMPE
;
5091 for (j
= 0; j
< vf_data
->num_vf_mc_hashes
; j
++)
5092 igb_mta_set(hw
, vf_data
->vf_mc_hashes
[j
]);
5094 wr32(E1000_VMOLR(i
), vmolr
);
5098 static void igb_clear_vf_vfta(struct igb_adapter
*adapter
, u32 vf
)
5100 struct e1000_hw
*hw
= &adapter
->hw
;
5101 u32 pool_mask
, reg
, vid
;
5104 pool_mask
= 1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
);
5106 /* Find the vlan filter for this id */
5107 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5108 reg
= rd32(E1000_VLVF(i
));
5110 /* remove the vf from the pool */
5113 /* if pool is empty then remove entry from vfta */
5114 if (!(reg
& E1000_VLVF_POOLSEL_MASK
) &&
5115 (reg
& E1000_VLVF_VLANID_ENABLE
)) {
5117 vid
= reg
& E1000_VLVF_VLANID_MASK
;
5118 igb_vfta_set(hw
, vid
, false);
5121 wr32(E1000_VLVF(i
), reg
);
5124 adapter
->vf_data
[vf
].vlans_enabled
= 0;
5127 static s32
igb_vlvf_set(struct igb_adapter
*adapter
, u32 vid
, bool add
, u32 vf
)
5129 struct e1000_hw
*hw
= &adapter
->hw
;
5132 /* The vlvf table only exists on 82576 hardware and newer */
5133 if (hw
->mac
.type
< e1000_82576
)
5136 /* we only need to do this if VMDq is enabled */
5137 if (!adapter
->vfs_allocated_count
)
5140 /* Find the vlan filter for this id */
5141 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5142 reg
= rd32(E1000_VLVF(i
));
5143 if ((reg
& E1000_VLVF_VLANID_ENABLE
) &&
5144 vid
== (reg
& E1000_VLVF_VLANID_MASK
))
5149 if (i
== E1000_VLVF_ARRAY_SIZE
) {
5150 /* Did not find a matching VLAN ID entry that was
5151 * enabled. Search for a free filter entry, i.e.
5152 * one without the enable bit set
5154 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5155 reg
= rd32(E1000_VLVF(i
));
5156 if (!(reg
& E1000_VLVF_VLANID_ENABLE
))
5160 if (i
< E1000_VLVF_ARRAY_SIZE
) {
5161 /* Found an enabled/available entry */
5162 reg
|= 1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
);
5164 /* if !enabled we need to set this up in vfta */
5165 if (!(reg
& E1000_VLVF_VLANID_ENABLE
)) {
5166 /* add VID to filter table */
5167 igb_vfta_set(hw
, vid
, true);
5168 reg
|= E1000_VLVF_VLANID_ENABLE
;
5170 reg
&= ~E1000_VLVF_VLANID_MASK
;
5172 wr32(E1000_VLVF(i
), reg
);
5174 /* do not modify RLPML for PF devices */
5175 if (vf
>= adapter
->vfs_allocated_count
)
5178 if (!adapter
->vf_data
[vf
].vlans_enabled
) {
5180 reg
= rd32(E1000_VMOLR(vf
));
5181 size
= reg
& E1000_VMOLR_RLPML_MASK
;
5183 reg
&= ~E1000_VMOLR_RLPML_MASK
;
5185 wr32(E1000_VMOLR(vf
), reg
);
5188 adapter
->vf_data
[vf
].vlans_enabled
++;
5191 if (i
< E1000_VLVF_ARRAY_SIZE
) {
5192 /* remove vf from the pool */
5193 reg
&= ~(1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
));
5194 /* if pool is empty then remove entry from vfta */
5195 if (!(reg
& E1000_VLVF_POOLSEL_MASK
)) {
5197 igb_vfta_set(hw
, vid
, false);
5199 wr32(E1000_VLVF(i
), reg
);
5201 /* do not modify RLPML for PF devices */
5202 if (vf
>= adapter
->vfs_allocated_count
)
5205 adapter
->vf_data
[vf
].vlans_enabled
--;
5206 if (!adapter
->vf_data
[vf
].vlans_enabled
) {
5208 reg
= rd32(E1000_VMOLR(vf
));
5209 size
= reg
& E1000_VMOLR_RLPML_MASK
;
5211 reg
&= ~E1000_VMOLR_RLPML_MASK
;
5213 wr32(E1000_VMOLR(vf
), reg
);
5220 static void igb_set_vmvir(struct igb_adapter
*adapter
, u32 vid
, u32 vf
)
5222 struct e1000_hw
*hw
= &adapter
->hw
;
5225 wr32(E1000_VMVIR(vf
), (vid
| E1000_VMVIR_VLANA_DEFAULT
));
5227 wr32(E1000_VMVIR(vf
), 0);
5230 static int igb_ndo_set_vf_vlan(struct net_device
*netdev
,
5231 int vf
, u16 vlan
, u8 qos
)
5234 struct igb_adapter
*adapter
= netdev_priv(netdev
);
5236 if ((vf
>= adapter
->vfs_allocated_count
) || (vlan
> 4095) || (qos
> 7))
5239 err
= igb_vlvf_set(adapter
, vlan
, !!vlan
, vf
);
5242 igb_set_vmvir(adapter
, vlan
| (qos
<< VLAN_PRIO_SHIFT
), vf
);
5243 igb_set_vmolr(adapter
, vf
, !vlan
);
5244 adapter
->vf_data
[vf
].pf_vlan
= vlan
;
5245 adapter
->vf_data
[vf
].pf_qos
= qos
;
5246 dev_info(&adapter
->pdev
->dev
,
5247 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan
, qos
, vf
);
5248 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
5249 dev_warn(&adapter
->pdev
->dev
,
5250 "The VF VLAN has been set,"
5251 " but the PF device is not up.\n");
5252 dev_warn(&adapter
->pdev
->dev
,
5253 "Bring the PF device up before"
5254 " attempting to use the VF device.\n");
5257 igb_vlvf_set(adapter
, adapter
->vf_data
[vf
].pf_vlan
,
5259 igb_set_vmvir(adapter
, vlan
, vf
);
5260 igb_set_vmolr(adapter
, vf
, true);
5261 adapter
->vf_data
[vf
].pf_vlan
= 0;
5262 adapter
->vf_data
[vf
].pf_qos
= 0;
5268 static int igb_set_vf_vlan(struct igb_adapter
*adapter
, u32
*msgbuf
, u32 vf
)
5270 int add
= (msgbuf
[0] & E1000_VT_MSGINFO_MASK
) >> E1000_VT_MSGINFO_SHIFT
;
5271 int vid
= (msgbuf
[1] & E1000_VLVF_VLANID_MASK
);
5273 return igb_vlvf_set(adapter
, vid
, add
, vf
);
5276 static inline void igb_vf_reset(struct igb_adapter
*adapter
, u32 vf
)
5278 /* clear flags - except flag that indicates PF has set the MAC */
5279 adapter
->vf_data
[vf
].flags
&= IGB_VF_FLAG_PF_SET_MAC
;
5280 adapter
->vf_data
[vf
].last_nack
= jiffies
;
5282 /* reset offloads to defaults */
5283 igb_set_vmolr(adapter
, vf
, true);
5285 /* reset vlans for device */
5286 igb_clear_vf_vfta(adapter
, vf
);
5287 if (adapter
->vf_data
[vf
].pf_vlan
)
5288 igb_ndo_set_vf_vlan(adapter
->netdev
, vf
,
5289 adapter
->vf_data
[vf
].pf_vlan
,
5290 adapter
->vf_data
[vf
].pf_qos
);
5292 igb_clear_vf_vfta(adapter
, vf
);
5294 /* reset multicast table array for vf */
5295 adapter
->vf_data
[vf
].num_vf_mc_hashes
= 0;
5297 /* Flush and reset the mta with the new values */
5298 igb_set_rx_mode(adapter
->netdev
);
5301 static void igb_vf_reset_event(struct igb_adapter
*adapter
, u32 vf
)
5303 unsigned char *vf_mac
= adapter
->vf_data
[vf
].vf_mac_addresses
;
5305 /* generate a new mac address as we were hotplug removed/added */
5306 if (!(adapter
->vf_data
[vf
].flags
& IGB_VF_FLAG_PF_SET_MAC
))
5307 random_ether_addr(vf_mac
);
5309 /* process remaining reset events */
5310 igb_vf_reset(adapter
, vf
);
5313 static void igb_vf_reset_msg(struct igb_adapter
*adapter
, u32 vf
)
5315 struct e1000_hw
*hw
= &adapter
->hw
;
5316 unsigned char *vf_mac
= adapter
->vf_data
[vf
].vf_mac_addresses
;
5317 int rar_entry
= hw
->mac
.rar_entry_count
- (vf
+ 1);
5319 u8
*addr
= (u8
*)(&msgbuf
[1]);
5321 /* process all the same items cleared in a function level reset */
5322 igb_vf_reset(adapter
, vf
);
5324 /* set vf mac address */
5325 igb_rar_set_qsel(adapter
, vf_mac
, rar_entry
, vf
);
5327 /* enable transmit and receive for vf */
5328 reg
= rd32(E1000_VFTE
);
5329 wr32(E1000_VFTE
, reg
| (1 << vf
));
5330 reg
= rd32(E1000_VFRE
);
5331 wr32(E1000_VFRE
, reg
| (1 << vf
));
5333 adapter
->vf_data
[vf
].flags
|= IGB_VF_FLAG_CTS
;
5335 /* reply to reset with ack and vf mac address */
5336 msgbuf
[0] = E1000_VF_RESET
| E1000_VT_MSGTYPE_ACK
;
5337 memcpy(addr
, vf_mac
, 6);
5338 igb_write_mbx(hw
, msgbuf
, 3, vf
);
5341 static int igb_set_vf_mac_addr(struct igb_adapter
*adapter
, u32
*msg
, int vf
)
5344 * The VF MAC Address is stored in a packed array of bytes
5345 * starting at the second 32 bit word of the msg array
5347 unsigned char *addr
= (char *)&msg
[1];
5350 if (is_valid_ether_addr(addr
))
5351 err
= igb_set_vf_mac(adapter
, vf
, addr
);
5356 static void igb_rcv_ack_from_vf(struct igb_adapter
*adapter
, u32 vf
)
5358 struct e1000_hw
*hw
= &adapter
->hw
;
5359 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5360 u32 msg
= E1000_VT_MSGTYPE_NACK
;
5362 /* if device isn't clear to send it shouldn't be reading either */
5363 if (!(vf_data
->flags
& IGB_VF_FLAG_CTS
) &&
5364 time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
))) {
5365 igb_write_mbx(hw
, &msg
, 1, vf
);
5366 vf_data
->last_nack
= jiffies
;
5370 static void igb_rcv_msg_from_vf(struct igb_adapter
*adapter
, u32 vf
)
5372 struct pci_dev
*pdev
= adapter
->pdev
;
5373 u32 msgbuf
[E1000_VFMAILBOX_SIZE
];
5374 struct e1000_hw
*hw
= &adapter
->hw
;
5375 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5378 retval
= igb_read_mbx(hw
, msgbuf
, E1000_VFMAILBOX_SIZE
, vf
);
5381 /* if receive failed revoke VF CTS stats and restart init */
5382 dev_err(&pdev
->dev
, "Error receiving message from VF\n");
5383 vf_data
->flags
&= ~IGB_VF_FLAG_CTS
;
5384 if (!time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
)))
5389 /* this is a message we already processed, do nothing */
5390 if (msgbuf
[0] & (E1000_VT_MSGTYPE_ACK
| E1000_VT_MSGTYPE_NACK
))
5394 * until the vf completes a reset it should not be
5395 * allowed to start any configuration.
5398 if (msgbuf
[0] == E1000_VF_RESET
) {
5399 igb_vf_reset_msg(adapter
, vf
);
5403 if (!(vf_data
->flags
& IGB_VF_FLAG_CTS
)) {
5404 if (!time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
)))
5410 switch ((msgbuf
[0] & 0xFFFF)) {
5411 case E1000_VF_SET_MAC_ADDR
:
5413 if (!(vf_data
->flags
& IGB_VF_FLAG_PF_SET_MAC
))
5414 retval
= igb_set_vf_mac_addr(adapter
, msgbuf
, vf
);
5416 dev_warn(&pdev
->dev
,
5417 "VF %d attempted to override administratively "
5418 "set MAC address\nReload the VF driver to "
5419 "resume operations\n", vf
);
5421 case E1000_VF_SET_PROMISC
:
5422 retval
= igb_set_vf_promisc(adapter
, msgbuf
, vf
);
5424 case E1000_VF_SET_MULTICAST
:
5425 retval
= igb_set_vf_multicasts(adapter
, msgbuf
, vf
);
5427 case E1000_VF_SET_LPE
:
5428 retval
= igb_set_vf_rlpml(adapter
, msgbuf
[1], vf
);
5430 case E1000_VF_SET_VLAN
:
5432 if (vf_data
->pf_vlan
)
5433 dev_warn(&pdev
->dev
,
5434 "VF %d attempted to override administratively "
5435 "set VLAN tag\nReload the VF driver to "
5436 "resume operations\n", vf
);
5438 retval
= igb_set_vf_vlan(adapter
, msgbuf
, vf
);
5441 dev_err(&pdev
->dev
, "Unhandled Msg %08x\n", msgbuf
[0]);
5446 msgbuf
[0] |= E1000_VT_MSGTYPE_CTS
;
5448 /* notify the VF of the results of what it sent us */
5450 msgbuf
[0] |= E1000_VT_MSGTYPE_NACK
;
5452 msgbuf
[0] |= E1000_VT_MSGTYPE_ACK
;
5454 igb_write_mbx(hw
, msgbuf
, 1, vf
);
5457 static void igb_msg_task(struct igb_adapter
*adapter
)
5459 struct e1000_hw
*hw
= &adapter
->hw
;
5462 for (vf
= 0; vf
< adapter
->vfs_allocated_count
; vf
++) {
5463 /* process any reset requests */
5464 if (!igb_check_for_rst(hw
, vf
))
5465 igb_vf_reset_event(adapter
, vf
);
5467 /* process any messages pending */
5468 if (!igb_check_for_msg(hw
, vf
))
5469 igb_rcv_msg_from_vf(adapter
, vf
);
5471 /* process any acks */
5472 if (!igb_check_for_ack(hw
, vf
))
5473 igb_rcv_ack_from_vf(adapter
, vf
);
5478 * igb_set_uta - Set unicast filter table address
5479 * @adapter: board private structure
5481 * The unicast table address is a register array of 32-bit registers.
5482 * The table is meant to be used in a way similar to how the MTA is used
5483 * however due to certain limitations in the hardware it is necessary to
5484 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5485 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5487 static void igb_set_uta(struct igb_adapter
*adapter
)
5489 struct e1000_hw
*hw
= &adapter
->hw
;
5492 /* The UTA table only exists on 82576 hardware and newer */
5493 if (hw
->mac
.type
< e1000_82576
)
5496 /* we only need to do this if VMDq is enabled */
5497 if (!adapter
->vfs_allocated_count
)
5500 for (i
= 0; i
< hw
->mac
.uta_reg_count
; i
++)
5501 array_wr32(E1000_UTA
, i
, ~0);
5505 * igb_intr_msi - Interrupt Handler
5506 * @irq: interrupt number
5507 * @data: pointer to a network interface device structure
5509 static irqreturn_t
igb_intr_msi(int irq
, void *data
)
5511 struct igb_adapter
*adapter
= data
;
5512 struct igb_q_vector
*q_vector
= adapter
->q_vector
[0];
5513 struct e1000_hw
*hw
= &adapter
->hw
;
5514 /* read ICR disables interrupts using IAM */
5515 u32 icr
= rd32(E1000_ICR
);
5517 igb_write_itr(q_vector
);
5519 if (icr
& E1000_ICR_DRSTA
)
5520 schedule_work(&adapter
->reset_task
);
5522 if (icr
& E1000_ICR_DOUTSYNC
) {
5523 /* HW is reporting DMA is out of sync */
5524 adapter
->stats
.doosync
++;
5527 if (icr
& (E1000_ICR_RXSEQ
| E1000_ICR_LSC
)) {
5528 hw
->mac
.get_link_status
= 1;
5529 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
5530 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
5533 napi_schedule(&q_vector
->napi
);
5539 * igb_intr - Legacy Interrupt Handler
5540 * @irq: interrupt number
5541 * @data: pointer to a network interface device structure
5543 static irqreturn_t
igb_intr(int irq
, void *data
)
5545 struct igb_adapter
*adapter
= data
;
5546 struct igb_q_vector
*q_vector
= adapter
->q_vector
[0];
5547 struct e1000_hw
*hw
= &adapter
->hw
;
5548 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5549 * need for the IMC write */
5550 u32 icr
= rd32(E1000_ICR
);
5552 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5553 * not set, then the adapter didn't send an interrupt */
5554 if (!(icr
& E1000_ICR_INT_ASSERTED
))
5557 igb_write_itr(q_vector
);
5559 if (icr
& E1000_ICR_DRSTA
)
5560 schedule_work(&adapter
->reset_task
);
5562 if (icr
& E1000_ICR_DOUTSYNC
) {
5563 /* HW is reporting DMA is out of sync */
5564 adapter
->stats
.doosync
++;
5567 if (icr
& (E1000_ICR_RXSEQ
| E1000_ICR_LSC
)) {
5568 hw
->mac
.get_link_status
= 1;
5569 /* guard against interrupt when we're going down */
5570 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
5571 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
5574 napi_schedule(&q_vector
->napi
);
5579 void igb_ring_irq_enable(struct igb_q_vector
*q_vector
)
5581 struct igb_adapter
*adapter
= q_vector
->adapter
;
5582 struct e1000_hw
*hw
= &adapter
->hw
;
5584 if ((q_vector
->rx
.ring
&& (adapter
->rx_itr_setting
& 3)) ||
5585 (!q_vector
->rx
.ring
&& (adapter
->tx_itr_setting
& 3))) {
5586 if ((adapter
->num_q_vectors
== 1) && !adapter
->vf_data
)
5587 igb_set_itr(q_vector
);
5589 igb_update_ring_itr(q_vector
);
5592 if (!test_bit(__IGB_DOWN
, &adapter
->state
)) {
5593 if (adapter
->msix_entries
)
5594 wr32(E1000_EIMS
, q_vector
->eims_value
);
5596 igb_irq_enable(adapter
);
5601 * igb_poll - NAPI Rx polling callback
5602 * @napi: napi polling structure
5603 * @budget: count of how many packets we should handle
5605 static int igb_poll(struct napi_struct
*napi
, int budget
)
5607 struct igb_q_vector
*q_vector
= container_of(napi
,
5608 struct igb_q_vector
,
5610 bool clean_complete
= true;
5612 #ifdef CONFIG_IGB_DCA
5613 if (q_vector
->adapter
->flags
& IGB_FLAG_DCA_ENABLED
)
5614 igb_update_dca(q_vector
);
5616 if (q_vector
->tx
.ring
)
5617 clean_complete
= igb_clean_tx_irq(q_vector
);
5619 if (q_vector
->rx
.ring
)
5620 clean_complete
&= igb_clean_rx_irq(q_vector
, budget
);
5622 /* If all work not completed, return budget and keep polling */
5623 if (!clean_complete
)
5626 /* If not enough Rx work done, exit the polling mode */
5627 napi_complete(napi
);
5628 igb_ring_irq_enable(q_vector
);
5634 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5635 * @adapter: board private structure
5636 * @shhwtstamps: timestamp structure to update
5637 * @regval: unsigned 64bit system time value.
5639 * We need to convert the system time value stored in the RX/TXSTMP registers
5640 * into a hwtstamp which can be used by the upper level timestamping functions
5642 static void igb_systim_to_hwtstamp(struct igb_adapter
*adapter
,
5643 struct skb_shared_hwtstamps
*shhwtstamps
,
5649 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5650 * 24 to match clock shift we setup earlier.
5652 if (adapter
->hw
.mac
.type
>= e1000_82580
)
5653 regval
<<= IGB_82580_TSYNC_SHIFT
;
5655 ns
= timecounter_cyc2time(&adapter
->clock
, regval
);
5656 timecompare_update(&adapter
->compare
, ns
);
5657 memset(shhwtstamps
, 0, sizeof(struct skb_shared_hwtstamps
));
5658 shhwtstamps
->hwtstamp
= ns_to_ktime(ns
);
5659 shhwtstamps
->syststamp
= timecompare_transform(&adapter
->compare
, ns
);
5663 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5664 * @q_vector: pointer to q_vector containing needed info
5665 * @buffer: pointer to igb_tx_buffer structure
5667 * If we were asked to do hardware stamping and such a time stamp is
5668 * available, then it must have been for this skb here because we only
5669 * allow only one such packet into the queue.
5671 static void igb_tx_hwtstamp(struct igb_q_vector
*q_vector
,
5672 struct igb_tx_buffer
*buffer_info
)
5674 struct igb_adapter
*adapter
= q_vector
->adapter
;
5675 struct e1000_hw
*hw
= &adapter
->hw
;
5676 struct skb_shared_hwtstamps shhwtstamps
;
5679 /* if skb does not support hw timestamp or TX stamp not valid exit */
5680 if (likely(!(buffer_info
->tx_flags
& IGB_TX_FLAGS_TSTAMP
)) ||
5681 !(rd32(E1000_TSYNCTXCTL
) & E1000_TSYNCTXCTL_VALID
))
5684 regval
= rd32(E1000_TXSTMPL
);
5685 regval
|= (u64
)rd32(E1000_TXSTMPH
) << 32;
5687 igb_systim_to_hwtstamp(adapter
, &shhwtstamps
, regval
);
5688 skb_tstamp_tx(buffer_info
->skb
, &shhwtstamps
);
5692 * igb_clean_tx_irq - Reclaim resources after transmit completes
5693 * @q_vector: pointer to q_vector containing needed info
5694 * returns true if ring is completely cleaned
5696 static bool igb_clean_tx_irq(struct igb_q_vector
*q_vector
)
5698 struct igb_adapter
*adapter
= q_vector
->adapter
;
5699 struct igb_ring
*tx_ring
= q_vector
->tx
.ring
;
5700 struct igb_tx_buffer
*tx_buffer
;
5701 union e1000_adv_tx_desc
*tx_desc
, *eop_desc
;
5702 unsigned int total_bytes
= 0, total_packets
= 0;
5703 unsigned int budget
= q_vector
->tx
.work_limit
;
5704 unsigned int i
= tx_ring
->next_to_clean
;
5706 if (test_bit(__IGB_DOWN
, &adapter
->state
))
5709 tx_buffer
= &tx_ring
->tx_buffer_info
[i
];
5710 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
5711 i
-= tx_ring
->count
;
5713 for (; budget
; budget
--) {
5714 eop_desc
= tx_buffer
->next_to_watch
;
5716 /* prevent any other reads prior to eop_desc */
5719 /* if next_to_watch is not set then there is no work pending */
5723 /* if DD is not set pending work has not been completed */
5724 if (!(eop_desc
->wb
.status
& cpu_to_le32(E1000_TXD_STAT_DD
)))
5727 /* clear next_to_watch to prevent false hangs */
5728 tx_buffer
->next_to_watch
= NULL
;
5730 /* update the statistics for this packet */
5731 total_bytes
+= tx_buffer
->bytecount
;
5732 total_packets
+= tx_buffer
->gso_segs
;
5734 /* retrieve hardware timestamp */
5735 igb_tx_hwtstamp(q_vector
, tx_buffer
);
5738 dev_kfree_skb_any(tx_buffer
->skb
);
5739 tx_buffer
->skb
= NULL
;
5741 /* unmap skb header data */
5742 dma_unmap_single(tx_ring
->dev
,
5747 /* clear last DMA location and unmap remaining buffers */
5748 while (tx_desc
!= eop_desc
) {
5755 i
-= tx_ring
->count
;
5756 tx_buffer
= tx_ring
->tx_buffer_info
;
5757 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
5760 /* unmap any remaining paged data */
5761 if (tx_buffer
->dma
) {
5762 dma_unmap_page(tx_ring
->dev
,
5769 /* clear last DMA location */
5772 /* move us one more past the eop_desc for start of next pkt */
5777 i
-= tx_ring
->count
;
5778 tx_buffer
= tx_ring
->tx_buffer_info
;
5779 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
5783 i
+= tx_ring
->count
;
5784 tx_ring
->next_to_clean
= i
;
5785 u64_stats_update_begin(&tx_ring
->tx_syncp
);
5786 tx_ring
->tx_stats
.bytes
+= total_bytes
;
5787 tx_ring
->tx_stats
.packets
+= total_packets
;
5788 u64_stats_update_end(&tx_ring
->tx_syncp
);
5789 q_vector
->tx
.total_bytes
+= total_bytes
;
5790 q_vector
->tx
.total_packets
+= total_packets
;
5792 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
)) {
5793 struct e1000_hw
*hw
= &adapter
->hw
;
5795 eop_desc
= tx_buffer
->next_to_watch
;
5797 /* Detect a transmit hang in hardware, this serializes the
5798 * check with the clearing of time_stamp and movement of i */
5799 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
);
5801 time_after(jiffies
, tx_buffer
->time_stamp
+
5802 (adapter
->tx_timeout_factor
* HZ
)) &&
5803 !(rd32(E1000_STATUS
) & E1000_STATUS_TXOFF
)) {
5805 /* detected Tx unit hang */
5806 dev_err(tx_ring
->dev
,
5807 "Detected Tx Unit Hang\n"
5811 " next_to_use <%x>\n"
5812 " next_to_clean <%x>\n"
5813 "buffer_info[next_to_clean]\n"
5814 " time_stamp <%lx>\n"
5815 " next_to_watch <%p>\n"
5817 " desc.status <%x>\n",
5818 tx_ring
->queue_index
,
5819 rd32(E1000_TDH(tx_ring
->reg_idx
)),
5820 readl(tx_ring
->tail
),
5821 tx_ring
->next_to_use
,
5822 tx_ring
->next_to_clean
,
5823 tx_buffer
->time_stamp
,
5826 eop_desc
->wb
.status
);
5827 netif_stop_subqueue(tx_ring
->netdev
,
5828 tx_ring
->queue_index
);
5830 /* we are about to reset, no point in enabling stuff */
5835 if (unlikely(total_packets
&&
5836 netif_carrier_ok(tx_ring
->netdev
) &&
5837 igb_desc_unused(tx_ring
) >= IGB_TX_QUEUE_WAKE
)) {
5838 /* Make sure that anybody stopping the queue after this
5839 * sees the new next_to_clean.
5842 if (__netif_subqueue_stopped(tx_ring
->netdev
,
5843 tx_ring
->queue_index
) &&
5844 !(test_bit(__IGB_DOWN
, &adapter
->state
))) {
5845 netif_wake_subqueue(tx_ring
->netdev
,
5846 tx_ring
->queue_index
);
5848 u64_stats_update_begin(&tx_ring
->tx_syncp
);
5849 tx_ring
->tx_stats
.restart_queue
++;
5850 u64_stats_update_end(&tx_ring
->tx_syncp
);
5857 static inline void igb_rx_checksum(struct igb_ring
*ring
,
5858 union e1000_adv_rx_desc
*rx_desc
,
5859 struct sk_buff
*skb
)
5861 skb_checksum_none_assert(skb
);
5863 /* Ignore Checksum bit is set */
5864 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_IXSM
))
5867 /* Rx checksum disabled via ethtool */
5868 if (!(ring
->netdev
->features
& NETIF_F_RXCSUM
))
5871 /* TCP/UDP checksum error bit is set */
5872 if (igb_test_staterr(rx_desc
,
5873 E1000_RXDEXT_STATERR_TCPE
|
5874 E1000_RXDEXT_STATERR_IPE
)) {
5876 * work around errata with sctp packets where the TCPE aka
5877 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5878 * packets, (aka let the stack check the crc32c)
5880 if (!((skb
->len
== 60) &&
5881 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM
, &ring
->flags
))) {
5882 u64_stats_update_begin(&ring
->rx_syncp
);
5883 ring
->rx_stats
.csum_err
++;
5884 u64_stats_update_end(&ring
->rx_syncp
);
5886 /* let the stack verify checksum errors */
5889 /* It must be a TCP or UDP packet with a valid checksum */
5890 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_TCPCS
|
5891 E1000_RXD_STAT_UDPCS
))
5892 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
5894 dev_dbg(ring
->dev
, "cksum success: bits %08X\n",
5895 le32_to_cpu(rx_desc
->wb
.upper
.status_error
));
5898 static inline void igb_rx_hash(struct igb_ring
*ring
,
5899 union e1000_adv_rx_desc
*rx_desc
,
5900 struct sk_buff
*skb
)
5902 if (ring
->netdev
->features
& NETIF_F_RXHASH
)
5903 skb
->rxhash
= le32_to_cpu(rx_desc
->wb
.lower
.hi_dword
.rss
);
5906 static void igb_rx_hwtstamp(struct igb_q_vector
*q_vector
,
5907 union e1000_adv_rx_desc
*rx_desc
,
5908 struct sk_buff
*skb
)
5910 struct igb_adapter
*adapter
= q_vector
->adapter
;
5911 struct e1000_hw
*hw
= &adapter
->hw
;
5914 if (!igb_test_staterr(rx_desc
, E1000_RXDADV_STAT_TSIP
|
5915 E1000_RXDADV_STAT_TS
))
5919 * If this bit is set, then the RX registers contain the time stamp. No
5920 * other packet will be time stamped until we read these registers, so
5921 * read the registers to make them available again. Because only one
5922 * packet can be time stamped at a time, we know that the register
5923 * values must belong to this one here and therefore we don't need to
5924 * compare any of the additional attributes stored for it.
5926 * If nothing went wrong, then it should have a shared tx_flags that we
5927 * can turn into a skb_shared_hwtstamps.
5929 if (igb_test_staterr(rx_desc
, E1000_RXDADV_STAT_TSIP
)) {
5930 u32
*stamp
= (u32
*)skb
->data
;
5931 regval
= le32_to_cpu(*(stamp
+ 2));
5932 regval
|= (u64
)le32_to_cpu(*(stamp
+ 3)) << 32;
5933 skb_pull(skb
, IGB_TS_HDR_LEN
);
5935 if(!(rd32(E1000_TSYNCRXCTL
) & E1000_TSYNCRXCTL_VALID
))
5938 regval
= rd32(E1000_RXSTMPL
);
5939 regval
|= (u64
)rd32(E1000_RXSTMPH
) << 32;
5942 igb_systim_to_hwtstamp(adapter
, skb_hwtstamps(skb
), regval
);
5945 static void igb_rx_vlan(struct igb_ring
*ring
,
5946 union e1000_adv_rx_desc
*rx_desc
,
5947 struct sk_buff
*skb
)
5949 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_VP
)) {
5951 if (igb_test_staterr(rx_desc
, E1000_RXDEXT_STATERR_LB
) &&
5952 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP
, &ring
->flags
))
5953 vid
= be16_to_cpu(rx_desc
->wb
.upper
.vlan
);
5955 vid
= le16_to_cpu(rx_desc
->wb
.upper
.vlan
);
5957 __vlan_hwaccel_put_tag(skb
, vid
);
5961 static inline u16
igb_get_hlen(union e1000_adv_rx_desc
*rx_desc
)
5963 /* HW will not DMA in data larger than the given buffer, even if it
5964 * parses the (NFS, of course) header to be larger. In that case, it
5965 * fills the header buffer and spills the rest into the page.
5967 u16 hlen
= (le16_to_cpu(rx_desc
->wb
.lower
.lo_dword
.hdr_info
) &
5968 E1000_RXDADV_HDRBUFLEN_MASK
) >> E1000_RXDADV_HDRBUFLEN_SHIFT
;
5969 if (hlen
> IGB_RX_HDR_LEN
)
5970 hlen
= IGB_RX_HDR_LEN
;
5974 static bool igb_clean_rx_irq(struct igb_q_vector
*q_vector
, int budget
)
5976 struct igb_ring
*rx_ring
= q_vector
->rx
.ring
;
5977 union e1000_adv_rx_desc
*rx_desc
;
5978 const int current_node
= numa_node_id();
5979 unsigned int total_bytes
= 0, total_packets
= 0;
5980 u16 cleaned_count
= igb_desc_unused(rx_ring
);
5981 u16 i
= rx_ring
->next_to_clean
;
5983 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
5985 while (igb_test_staterr(rx_desc
, E1000_RXD_STAT_DD
)) {
5986 struct igb_rx_buffer
*buffer_info
= &rx_ring
->rx_buffer_info
[i
];
5987 struct sk_buff
*skb
= buffer_info
->skb
;
5988 union e1000_adv_rx_desc
*next_rxd
;
5990 buffer_info
->skb
= NULL
;
5991 prefetch(skb
->data
);
5994 if (i
== rx_ring
->count
)
5997 next_rxd
= IGB_RX_DESC(rx_ring
, i
);
6001 * This memory barrier is needed to keep us from reading
6002 * any other fields out of the rx_desc until we know the
6003 * RXD_STAT_DD bit is set
6007 if (!skb_is_nonlinear(skb
)) {
6008 __skb_put(skb
, igb_get_hlen(rx_desc
));
6009 dma_unmap_single(rx_ring
->dev
, buffer_info
->dma
,
6012 buffer_info
->dma
= 0;
6015 if (rx_desc
->wb
.upper
.length
) {
6016 u16 length
= le16_to_cpu(rx_desc
->wb
.upper
.length
);
6018 skb_fill_page_desc(skb
, skb_shinfo(skb
)->nr_frags
,
6020 buffer_info
->page_offset
,
6024 skb
->data_len
+= length
;
6025 skb
->truesize
+= PAGE_SIZE
/ 2;
6027 if ((page_count(buffer_info
->page
) != 1) ||
6028 (page_to_nid(buffer_info
->page
) != current_node
))
6029 buffer_info
->page
= NULL
;
6031 get_page(buffer_info
->page
);
6033 dma_unmap_page(rx_ring
->dev
, buffer_info
->page_dma
,
6034 PAGE_SIZE
/ 2, DMA_FROM_DEVICE
);
6035 buffer_info
->page_dma
= 0;
6038 if (!igb_test_staterr(rx_desc
, E1000_RXD_STAT_EOP
)) {
6039 struct igb_rx_buffer
*next_buffer
;
6040 next_buffer
= &rx_ring
->rx_buffer_info
[i
];
6041 buffer_info
->skb
= next_buffer
->skb
;
6042 buffer_info
->dma
= next_buffer
->dma
;
6043 next_buffer
->skb
= skb
;
6044 next_buffer
->dma
= 0;
6048 if (igb_test_staterr(rx_desc
,
6049 E1000_RXDEXT_ERR_FRAME_ERR_MASK
)) {
6050 dev_kfree_skb_any(skb
);
6054 igb_rx_hwtstamp(q_vector
, rx_desc
, skb
);
6055 igb_rx_hash(rx_ring
, rx_desc
, skb
);
6056 igb_rx_checksum(rx_ring
, rx_desc
, skb
);
6057 igb_rx_vlan(rx_ring
, rx_desc
, skb
);
6059 total_bytes
+= skb
->len
;
6062 skb
->protocol
= eth_type_trans(skb
, rx_ring
->netdev
);
6064 napi_gro_receive(&q_vector
->napi
, skb
);
6072 /* return some buffers to hardware, one at a time is too slow */
6073 if (cleaned_count
>= IGB_RX_BUFFER_WRITE
) {
6074 igb_alloc_rx_buffers(rx_ring
, cleaned_count
);
6078 /* use prefetched values */
6082 rx_ring
->next_to_clean
= i
;
6083 u64_stats_update_begin(&rx_ring
->rx_syncp
);
6084 rx_ring
->rx_stats
.packets
+= total_packets
;
6085 rx_ring
->rx_stats
.bytes
+= total_bytes
;
6086 u64_stats_update_end(&rx_ring
->rx_syncp
);
6087 q_vector
->rx
.total_packets
+= total_packets
;
6088 q_vector
->rx
.total_bytes
+= total_bytes
;
6091 igb_alloc_rx_buffers(rx_ring
, cleaned_count
);
6096 static bool igb_alloc_mapped_skb(struct igb_ring
*rx_ring
,
6097 struct igb_rx_buffer
*bi
)
6099 struct sk_buff
*skb
= bi
->skb
;
6100 dma_addr_t dma
= bi
->dma
;
6106 skb
= netdev_alloc_skb_ip_align(rx_ring
->netdev
,
6110 rx_ring
->rx_stats
.alloc_failed
++;
6114 /* initialize skb for ring */
6115 skb_record_rx_queue(skb
, rx_ring
->queue_index
);
6118 dma
= dma_map_single(rx_ring
->dev
, skb
->data
,
6119 IGB_RX_HDR_LEN
, DMA_FROM_DEVICE
);
6121 if (dma_mapping_error(rx_ring
->dev
, dma
)) {
6122 rx_ring
->rx_stats
.alloc_failed
++;
6130 static bool igb_alloc_mapped_page(struct igb_ring
*rx_ring
,
6131 struct igb_rx_buffer
*bi
)
6133 struct page
*page
= bi
->page
;
6134 dma_addr_t page_dma
= bi
->page_dma
;
6135 unsigned int page_offset
= bi
->page_offset
^ (PAGE_SIZE
/ 2);
6141 page
= netdev_alloc_page(rx_ring
->netdev
);
6143 if (unlikely(!page
)) {
6144 rx_ring
->rx_stats
.alloc_failed
++;
6149 page_dma
= dma_map_page(rx_ring
->dev
, page
,
6150 page_offset
, PAGE_SIZE
/ 2,
6153 if (dma_mapping_error(rx_ring
->dev
, page_dma
)) {
6154 rx_ring
->rx_stats
.alloc_failed
++;
6158 bi
->page_dma
= page_dma
;
6159 bi
->page_offset
= page_offset
;
6164 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6165 * @adapter: address of board private structure
6167 void igb_alloc_rx_buffers(struct igb_ring
*rx_ring
, u16 cleaned_count
)
6169 union e1000_adv_rx_desc
*rx_desc
;
6170 struct igb_rx_buffer
*bi
;
6171 u16 i
= rx_ring
->next_to_use
;
6173 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
6174 bi
= &rx_ring
->rx_buffer_info
[i
];
6175 i
-= rx_ring
->count
;
6177 while (cleaned_count
--) {
6178 if (!igb_alloc_mapped_skb(rx_ring
, bi
))
6181 /* Refresh the desc even if buffer_addrs didn't change
6182 * because each write-back erases this info. */
6183 rx_desc
->read
.hdr_addr
= cpu_to_le64(bi
->dma
);
6185 if (!igb_alloc_mapped_page(rx_ring
, bi
))
6188 rx_desc
->read
.pkt_addr
= cpu_to_le64(bi
->page_dma
);
6194 rx_desc
= IGB_RX_DESC(rx_ring
, 0);
6195 bi
= rx_ring
->rx_buffer_info
;
6196 i
-= rx_ring
->count
;
6199 /* clear the hdr_addr for the next_to_use descriptor */
6200 rx_desc
->read
.hdr_addr
= 0;
6203 i
+= rx_ring
->count
;
6205 if (rx_ring
->next_to_use
!= i
) {
6206 rx_ring
->next_to_use
= i
;
6208 /* Force memory writes to complete before letting h/w
6209 * know there are new descriptors to fetch. (Only
6210 * applicable for weak-ordered memory model archs,
6211 * such as IA-64). */
6213 writel(i
, rx_ring
->tail
);
6223 static int igb_mii_ioctl(struct net_device
*netdev
, struct ifreq
*ifr
, int cmd
)
6225 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6226 struct mii_ioctl_data
*data
= if_mii(ifr
);
6228 if (adapter
->hw
.phy
.media_type
!= e1000_media_type_copper
)
6233 data
->phy_id
= adapter
->hw
.phy
.addr
;
6236 if (igb_read_phy_reg(&adapter
->hw
, data
->reg_num
& 0x1F,
6248 * igb_hwtstamp_ioctl - control hardware time stamping
6253 * Outgoing time stamping can be enabled and disabled. Play nice and
6254 * disable it when requested, although it shouldn't case any overhead
6255 * when no packet needs it. At most one packet in the queue may be
6256 * marked for time stamping, otherwise it would be impossible to tell
6257 * for sure to which packet the hardware time stamp belongs.
6259 * Incoming time stamping has to be configured via the hardware
6260 * filters. Not all combinations are supported, in particular event
6261 * type has to be specified. Matching the kind of event packet is
6262 * not supported, with the exception of "all V2 events regardless of
6266 static int igb_hwtstamp_ioctl(struct net_device
*netdev
,
6267 struct ifreq
*ifr
, int cmd
)
6269 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6270 struct e1000_hw
*hw
= &adapter
->hw
;
6271 struct hwtstamp_config config
;
6272 u32 tsync_tx_ctl
= E1000_TSYNCTXCTL_ENABLED
;
6273 u32 tsync_rx_ctl
= E1000_TSYNCRXCTL_ENABLED
;
6274 u32 tsync_rx_cfg
= 0;
6279 if (copy_from_user(&config
, ifr
->ifr_data
, sizeof(config
)))
6282 /* reserved for future extensions */
6286 switch (config
.tx_type
) {
6287 case HWTSTAMP_TX_OFF
:
6289 case HWTSTAMP_TX_ON
:
6295 switch (config
.rx_filter
) {
6296 case HWTSTAMP_FILTER_NONE
:
6299 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT
:
6300 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT
:
6301 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT
:
6302 case HWTSTAMP_FILTER_ALL
:
6304 * register TSYNCRXCFG must be set, therefore it is not
6305 * possible to time stamp both Sync and Delay_Req messages
6306 * => fall back to time stamping all packets
6308 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_ALL
;
6309 config
.rx_filter
= HWTSTAMP_FILTER_ALL
;
6311 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC
:
6312 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L4_V1
;
6313 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE
;
6316 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ
:
6317 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L4_V1
;
6318 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE
;
6321 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC
:
6322 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC
:
6323 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L2_L4_V2
;
6324 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE
;
6327 config
.rx_filter
= HWTSTAMP_FILTER_SOME
;
6329 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ
:
6330 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ
:
6331 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L2_L4_V2
;
6332 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE
;
6335 config
.rx_filter
= HWTSTAMP_FILTER_SOME
;
6337 case HWTSTAMP_FILTER_PTP_V2_EVENT
:
6338 case HWTSTAMP_FILTER_PTP_V2_SYNC
:
6339 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ
:
6340 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_EVENT_V2
;
6341 config
.rx_filter
= HWTSTAMP_FILTER_PTP_V2_EVENT
;
6349 if (hw
->mac
.type
== e1000_82575
) {
6350 if (tsync_rx_ctl
| tsync_tx_ctl
)
6356 * Per-packet timestamping only works if all packets are
6357 * timestamped, so enable timestamping in all packets as
6358 * long as one rx filter was configured.
6360 if ((hw
->mac
.type
>= e1000_82580
) && tsync_rx_ctl
) {
6361 tsync_rx_ctl
= E1000_TSYNCRXCTL_ENABLED
;
6362 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_ALL
;
6365 /* enable/disable TX */
6366 regval
= rd32(E1000_TSYNCTXCTL
);
6367 regval
&= ~E1000_TSYNCTXCTL_ENABLED
;
6368 regval
|= tsync_tx_ctl
;
6369 wr32(E1000_TSYNCTXCTL
, regval
);
6371 /* enable/disable RX */
6372 regval
= rd32(E1000_TSYNCRXCTL
);
6373 regval
&= ~(E1000_TSYNCRXCTL_ENABLED
| E1000_TSYNCRXCTL_TYPE_MASK
);
6374 regval
|= tsync_rx_ctl
;
6375 wr32(E1000_TSYNCRXCTL
, regval
);
6377 /* define which PTP packets are time stamped */
6378 wr32(E1000_TSYNCRXCFG
, tsync_rx_cfg
);
6380 /* define ethertype filter for timestamped packets */
6383 (E1000_ETQF_FILTER_ENABLE
| /* enable filter */
6384 E1000_ETQF_1588
| /* enable timestamping */
6385 ETH_P_1588
)); /* 1588 eth protocol type */
6387 wr32(E1000_ETQF(3), 0);
6389 #define PTP_PORT 319
6390 /* L4 Queue Filter[3]: filter by destination port and protocol */
6392 u32 ftqf
= (IPPROTO_UDP
/* UDP */
6393 | E1000_FTQF_VF_BP
/* VF not compared */
6394 | E1000_FTQF_1588_TIME_STAMP
/* Enable Timestamping */
6395 | E1000_FTQF_MASK
); /* mask all inputs */
6396 ftqf
&= ~E1000_FTQF_MASK_PROTO_BP
; /* enable protocol check */
6398 wr32(E1000_IMIR(3), htons(PTP_PORT
));
6399 wr32(E1000_IMIREXT(3),
6400 (E1000_IMIREXT_SIZE_BP
| E1000_IMIREXT_CTRL_BP
));
6401 if (hw
->mac
.type
== e1000_82576
) {
6402 /* enable source port check */
6403 wr32(E1000_SPQF(3), htons(PTP_PORT
));
6404 ftqf
&= ~E1000_FTQF_MASK_SOURCE_PORT_BP
;
6406 wr32(E1000_FTQF(3), ftqf
);
6408 wr32(E1000_FTQF(3), E1000_FTQF_MASK
);
6412 adapter
->hwtstamp_config
= config
;
6414 /* clear TX/RX time stamp registers, just to be sure */
6415 regval
= rd32(E1000_TXSTMPH
);
6416 regval
= rd32(E1000_RXSTMPH
);
6418 return copy_to_user(ifr
->ifr_data
, &config
, sizeof(config
)) ?
6428 static int igb_ioctl(struct net_device
*netdev
, struct ifreq
*ifr
, int cmd
)
6434 return igb_mii_ioctl(netdev
, ifr
, cmd
);
6436 return igb_hwtstamp_ioctl(netdev
, ifr
, cmd
);
6442 s32
igb_read_pcie_cap_reg(struct e1000_hw
*hw
, u32 reg
, u16
*value
)
6444 struct igb_adapter
*adapter
= hw
->back
;
6447 cap_offset
= adapter
->pdev
->pcie_cap
;
6449 return -E1000_ERR_CONFIG
;
6451 pci_read_config_word(adapter
->pdev
, cap_offset
+ reg
, value
);
6456 s32
igb_write_pcie_cap_reg(struct e1000_hw
*hw
, u32 reg
, u16
*value
)
6458 struct igb_adapter
*adapter
= hw
->back
;
6461 cap_offset
= adapter
->pdev
->pcie_cap
;
6463 return -E1000_ERR_CONFIG
;
6465 pci_write_config_word(adapter
->pdev
, cap_offset
+ reg
, *value
);
6470 static void igb_vlan_mode(struct net_device
*netdev
, u32 features
)
6472 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6473 struct e1000_hw
*hw
= &adapter
->hw
;
6475 bool enable
= !!(features
& NETIF_F_HW_VLAN_RX
);
6478 /* enable VLAN tag insert/strip */
6479 ctrl
= rd32(E1000_CTRL
);
6480 ctrl
|= E1000_CTRL_VME
;
6481 wr32(E1000_CTRL
, ctrl
);
6483 /* Disable CFI check */
6484 rctl
= rd32(E1000_RCTL
);
6485 rctl
&= ~E1000_RCTL_CFIEN
;
6486 wr32(E1000_RCTL
, rctl
);
6488 /* disable VLAN tag insert/strip */
6489 ctrl
= rd32(E1000_CTRL
);
6490 ctrl
&= ~E1000_CTRL_VME
;
6491 wr32(E1000_CTRL
, ctrl
);
6494 igb_rlpml_set(adapter
);
6497 static void igb_vlan_rx_add_vid(struct net_device
*netdev
, u16 vid
)
6499 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6500 struct e1000_hw
*hw
= &adapter
->hw
;
6501 int pf_id
= adapter
->vfs_allocated_count
;
6503 /* attempt to add filter to vlvf array */
6504 igb_vlvf_set(adapter
, vid
, true, pf_id
);
6506 /* add the filter since PF can receive vlans w/o entry in vlvf */
6507 igb_vfta_set(hw
, vid
, true);
6509 set_bit(vid
, adapter
->active_vlans
);
6512 static void igb_vlan_rx_kill_vid(struct net_device
*netdev
, u16 vid
)
6514 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6515 struct e1000_hw
*hw
= &adapter
->hw
;
6516 int pf_id
= adapter
->vfs_allocated_count
;
6519 /* remove vlan from VLVF table array */
6520 err
= igb_vlvf_set(adapter
, vid
, false, pf_id
);
6522 /* if vid was not present in VLVF just remove it from table */
6524 igb_vfta_set(hw
, vid
, false);
6526 clear_bit(vid
, adapter
->active_vlans
);
6529 static void igb_restore_vlan(struct igb_adapter
*adapter
)
6533 igb_vlan_mode(adapter
->netdev
, adapter
->netdev
->features
);
6535 for_each_set_bit(vid
, adapter
->active_vlans
, VLAN_N_VID
)
6536 igb_vlan_rx_add_vid(adapter
->netdev
, vid
);
6539 int igb_set_spd_dplx(struct igb_adapter
*adapter
, u32 spd
, u8 dplx
)
6541 struct pci_dev
*pdev
= adapter
->pdev
;
6542 struct e1000_mac_info
*mac
= &adapter
->hw
.mac
;
6546 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6547 * for the switch() below to work */
6548 if ((spd
& 1) || (dplx
& ~1))
6551 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6552 if ((adapter
->hw
.phy
.media_type
== e1000_media_type_internal_serdes
) &&
6553 spd
!= SPEED_1000
&&
6554 dplx
!= DUPLEX_FULL
)
6557 switch (spd
+ dplx
) {
6558 case SPEED_10
+ DUPLEX_HALF
:
6559 mac
->forced_speed_duplex
= ADVERTISE_10_HALF
;
6561 case SPEED_10
+ DUPLEX_FULL
:
6562 mac
->forced_speed_duplex
= ADVERTISE_10_FULL
;
6564 case SPEED_100
+ DUPLEX_HALF
:
6565 mac
->forced_speed_duplex
= ADVERTISE_100_HALF
;
6567 case SPEED_100
+ DUPLEX_FULL
:
6568 mac
->forced_speed_duplex
= ADVERTISE_100_FULL
;
6570 case SPEED_1000
+ DUPLEX_FULL
:
6572 adapter
->hw
.phy
.autoneg_advertised
= ADVERTISE_1000_FULL
;
6574 case SPEED_1000
+ DUPLEX_HALF
: /* not supported */
6581 dev_err(&pdev
->dev
, "Unsupported Speed/Duplex configuration\n");
6585 static int __igb_shutdown(struct pci_dev
*pdev
, bool *enable_wake
)
6587 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6588 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6589 struct e1000_hw
*hw
= &adapter
->hw
;
6590 u32 ctrl
, rctl
, status
;
6591 u32 wufc
= adapter
->wol
;
6596 netif_device_detach(netdev
);
6598 if (netif_running(netdev
))
6601 igb_clear_interrupt_scheme(adapter
);
6604 retval
= pci_save_state(pdev
);
6609 status
= rd32(E1000_STATUS
);
6610 if (status
& E1000_STATUS_LU
)
6611 wufc
&= ~E1000_WUFC_LNKC
;
6614 igb_setup_rctl(adapter
);
6615 igb_set_rx_mode(netdev
);
6617 /* turn on all-multi mode if wake on multicast is enabled */
6618 if (wufc
& E1000_WUFC_MC
) {
6619 rctl
= rd32(E1000_RCTL
);
6620 rctl
|= E1000_RCTL_MPE
;
6621 wr32(E1000_RCTL
, rctl
);
6624 ctrl
= rd32(E1000_CTRL
);
6625 /* advertise wake from D3Cold */
6626 #define E1000_CTRL_ADVD3WUC 0x00100000
6627 /* phy power management enable */
6628 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6629 ctrl
|= E1000_CTRL_ADVD3WUC
;
6630 wr32(E1000_CTRL
, ctrl
);
6632 /* Allow time for pending master requests to run */
6633 igb_disable_pcie_master(hw
);
6635 wr32(E1000_WUC
, E1000_WUC_PME_EN
);
6636 wr32(E1000_WUFC
, wufc
);
6639 wr32(E1000_WUFC
, 0);
6642 *enable_wake
= wufc
|| adapter
->en_mng_pt
;
6644 igb_power_down_link(adapter
);
6646 igb_power_up_link(adapter
);
6648 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6649 * would have already happened in close and is redundant. */
6650 igb_release_hw_control(adapter
);
6652 pci_disable_device(pdev
);
6658 static int igb_suspend(struct pci_dev
*pdev
, pm_message_t state
)
6663 retval
= __igb_shutdown(pdev
, &wake
);
6668 pci_prepare_to_sleep(pdev
);
6670 pci_wake_from_d3(pdev
, false);
6671 pci_set_power_state(pdev
, PCI_D3hot
);
6677 static int igb_resume(struct pci_dev
*pdev
)
6679 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6680 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6681 struct e1000_hw
*hw
= &adapter
->hw
;
6684 pci_set_power_state(pdev
, PCI_D0
);
6685 pci_restore_state(pdev
);
6686 pci_save_state(pdev
);
6688 err
= pci_enable_device_mem(pdev
);
6691 "igb: Cannot enable PCI device from suspend\n");
6694 pci_set_master(pdev
);
6696 pci_enable_wake(pdev
, PCI_D3hot
, 0);
6697 pci_enable_wake(pdev
, PCI_D3cold
, 0);
6699 if (igb_init_interrupt_scheme(adapter
)) {
6700 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
6706 /* let the f/w know that the h/w is now under the control of the
6708 igb_get_hw_control(adapter
);
6710 wr32(E1000_WUS
, ~0);
6712 if (netif_running(netdev
)) {
6713 err
= igb_open(netdev
);
6718 netif_device_attach(netdev
);
6724 static void igb_shutdown(struct pci_dev
*pdev
)
6728 __igb_shutdown(pdev
, &wake
);
6730 if (system_state
== SYSTEM_POWER_OFF
) {
6731 pci_wake_from_d3(pdev
, wake
);
6732 pci_set_power_state(pdev
, PCI_D3hot
);
6736 #ifdef CONFIG_NET_POLL_CONTROLLER
6738 * Polling 'interrupt' - used by things like netconsole to send skbs
6739 * without having to re-enable interrupts. It's not called while
6740 * the interrupt routine is executing.
6742 static void igb_netpoll(struct net_device
*netdev
)
6744 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6745 struct e1000_hw
*hw
= &adapter
->hw
;
6746 struct igb_q_vector
*q_vector
;
6749 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
6750 q_vector
= adapter
->q_vector
[i
];
6751 if (adapter
->msix_entries
)
6752 wr32(E1000_EIMC
, q_vector
->eims_value
);
6754 igb_irq_disable(adapter
);
6755 napi_schedule(&q_vector
->napi
);
6758 #endif /* CONFIG_NET_POLL_CONTROLLER */
6761 * igb_io_error_detected - called when PCI error is detected
6762 * @pdev: Pointer to PCI device
6763 * @state: The current pci connection state
6765 * This function is called after a PCI bus error affecting
6766 * this device has been detected.
6768 static pci_ers_result_t
igb_io_error_detected(struct pci_dev
*pdev
,
6769 pci_channel_state_t state
)
6771 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6772 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6774 netif_device_detach(netdev
);
6776 if (state
== pci_channel_io_perm_failure
)
6777 return PCI_ERS_RESULT_DISCONNECT
;
6779 if (netif_running(netdev
))
6781 pci_disable_device(pdev
);
6783 /* Request a slot slot reset. */
6784 return PCI_ERS_RESULT_NEED_RESET
;
6788 * igb_io_slot_reset - called after the pci bus has been reset.
6789 * @pdev: Pointer to PCI device
6791 * Restart the card from scratch, as if from a cold-boot. Implementation
6792 * resembles the first-half of the igb_resume routine.
6794 static pci_ers_result_t
igb_io_slot_reset(struct pci_dev
*pdev
)
6796 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6797 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6798 struct e1000_hw
*hw
= &adapter
->hw
;
6799 pci_ers_result_t result
;
6802 if (pci_enable_device_mem(pdev
)) {
6804 "Cannot re-enable PCI device after reset.\n");
6805 result
= PCI_ERS_RESULT_DISCONNECT
;
6807 pci_set_master(pdev
);
6808 pci_restore_state(pdev
);
6809 pci_save_state(pdev
);
6811 pci_enable_wake(pdev
, PCI_D3hot
, 0);
6812 pci_enable_wake(pdev
, PCI_D3cold
, 0);
6815 wr32(E1000_WUS
, ~0);
6816 result
= PCI_ERS_RESULT_RECOVERED
;
6819 err
= pci_cleanup_aer_uncorrect_error_status(pdev
);
6821 dev_err(&pdev
->dev
, "pci_cleanup_aer_uncorrect_error_status "
6822 "failed 0x%0x\n", err
);
6823 /* non-fatal, continue */
6830 * igb_io_resume - called when traffic can start flowing again.
6831 * @pdev: Pointer to PCI device
6833 * This callback is called when the error recovery driver tells us that
6834 * its OK to resume normal operation. Implementation resembles the
6835 * second-half of the igb_resume routine.
6837 static void igb_io_resume(struct pci_dev
*pdev
)
6839 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6840 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6842 if (netif_running(netdev
)) {
6843 if (igb_up(adapter
)) {
6844 dev_err(&pdev
->dev
, "igb_up failed after reset\n");
6849 netif_device_attach(netdev
);
6851 /* let the f/w know that the h/w is now under the control of the
6853 igb_get_hw_control(adapter
);
6856 static void igb_rar_set_qsel(struct igb_adapter
*adapter
, u8
*addr
, u32 index
,
6859 u32 rar_low
, rar_high
;
6860 struct e1000_hw
*hw
= &adapter
->hw
;
6862 /* HW expects these in little endian so we reverse the byte order
6863 * from network order (big endian) to little endian
6865 rar_low
= ((u32
) addr
[0] | ((u32
) addr
[1] << 8) |
6866 ((u32
) addr
[2] << 16) | ((u32
) addr
[3] << 24));
6867 rar_high
= ((u32
) addr
[4] | ((u32
) addr
[5] << 8));
6869 /* Indicate to hardware the Address is Valid. */
6870 rar_high
|= E1000_RAH_AV
;
6872 if (hw
->mac
.type
== e1000_82575
)
6873 rar_high
|= E1000_RAH_POOL_1
* qsel
;
6875 rar_high
|= E1000_RAH_POOL_1
<< qsel
;
6877 wr32(E1000_RAL(index
), rar_low
);
6879 wr32(E1000_RAH(index
), rar_high
);
6883 static int igb_set_vf_mac(struct igb_adapter
*adapter
,
6884 int vf
, unsigned char *mac_addr
)
6886 struct e1000_hw
*hw
= &adapter
->hw
;
6887 /* VF MAC addresses start at end of receive addresses and moves
6888 * torwards the first, as a result a collision should not be possible */
6889 int rar_entry
= hw
->mac
.rar_entry_count
- (vf
+ 1);
6891 memcpy(adapter
->vf_data
[vf
].vf_mac_addresses
, mac_addr
, ETH_ALEN
);
6893 igb_rar_set_qsel(adapter
, mac_addr
, rar_entry
, vf
);
6898 static int igb_ndo_set_vf_mac(struct net_device
*netdev
, int vf
, u8
*mac
)
6900 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6901 if (!is_valid_ether_addr(mac
) || (vf
>= adapter
->vfs_allocated_count
))
6903 adapter
->vf_data
[vf
].flags
|= IGB_VF_FLAG_PF_SET_MAC
;
6904 dev_info(&adapter
->pdev
->dev
, "setting MAC %pM on VF %d\n", mac
, vf
);
6905 dev_info(&adapter
->pdev
->dev
, "Reload the VF driver to make this"
6906 " change effective.");
6907 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
6908 dev_warn(&adapter
->pdev
->dev
, "The VF MAC address has been set,"
6909 " but the PF device is not up.\n");
6910 dev_warn(&adapter
->pdev
->dev
, "Bring the PF device up before"
6911 " attempting to use the VF device.\n");
6913 return igb_set_vf_mac(adapter
, vf
, mac
);
6916 static int igb_link_mbps(int internal_link_speed
)
6918 switch (internal_link_speed
) {
6928 static void igb_set_vf_rate_limit(struct e1000_hw
*hw
, int vf
, int tx_rate
,
6935 /* Calculate the rate factor values to set */
6936 rf_int
= link_speed
/ tx_rate
;
6937 rf_dec
= (link_speed
- (rf_int
* tx_rate
));
6938 rf_dec
= (rf_dec
* (1<<E1000_RTTBCNRC_RF_INT_SHIFT
)) / tx_rate
;
6940 bcnrc_val
= E1000_RTTBCNRC_RS_ENA
;
6941 bcnrc_val
|= ((rf_int
<<E1000_RTTBCNRC_RF_INT_SHIFT
) &
6942 E1000_RTTBCNRC_RF_INT_MASK
);
6943 bcnrc_val
|= (rf_dec
& E1000_RTTBCNRC_RF_DEC_MASK
);
6948 wr32(E1000_RTTDQSEL
, vf
); /* vf X uses queue X */
6949 wr32(E1000_RTTBCNRC
, bcnrc_val
);
6952 static void igb_check_vf_rate_limit(struct igb_adapter
*adapter
)
6954 int actual_link_speed
, i
;
6955 bool reset_rate
= false;
6957 /* VF TX rate limit was not set or not supported */
6958 if ((adapter
->vf_rate_link_speed
== 0) ||
6959 (adapter
->hw
.mac
.type
!= e1000_82576
))
6962 actual_link_speed
= igb_link_mbps(adapter
->link_speed
);
6963 if (actual_link_speed
!= adapter
->vf_rate_link_speed
) {
6965 adapter
->vf_rate_link_speed
= 0;
6966 dev_info(&adapter
->pdev
->dev
,
6967 "Link speed has been changed. VF Transmit "
6968 "rate is disabled\n");
6971 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
6973 adapter
->vf_data
[i
].tx_rate
= 0;
6975 igb_set_vf_rate_limit(&adapter
->hw
, i
,
6976 adapter
->vf_data
[i
].tx_rate
,
6981 static int igb_ndo_set_vf_bw(struct net_device
*netdev
, int vf
, int tx_rate
)
6983 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6984 struct e1000_hw
*hw
= &adapter
->hw
;
6985 int actual_link_speed
;
6987 if (hw
->mac
.type
!= e1000_82576
)
6990 actual_link_speed
= igb_link_mbps(adapter
->link_speed
);
6991 if ((vf
>= adapter
->vfs_allocated_count
) ||
6992 (!(rd32(E1000_STATUS
) & E1000_STATUS_LU
)) ||
6993 (tx_rate
< 0) || (tx_rate
> actual_link_speed
))
6996 adapter
->vf_rate_link_speed
= actual_link_speed
;
6997 adapter
->vf_data
[vf
].tx_rate
= (u16
)tx_rate
;
6998 igb_set_vf_rate_limit(hw
, vf
, tx_rate
, actual_link_speed
);
7003 static int igb_ndo_get_vf_config(struct net_device
*netdev
,
7004 int vf
, struct ifla_vf_info
*ivi
)
7006 struct igb_adapter
*adapter
= netdev_priv(netdev
);
7007 if (vf
>= adapter
->vfs_allocated_count
)
7010 memcpy(&ivi
->mac
, adapter
->vf_data
[vf
].vf_mac_addresses
, ETH_ALEN
);
7011 ivi
->tx_rate
= adapter
->vf_data
[vf
].tx_rate
;
7012 ivi
->vlan
= adapter
->vf_data
[vf
].pf_vlan
;
7013 ivi
->qos
= adapter
->vf_data
[vf
].pf_qos
;
7017 static void igb_vmm_control(struct igb_adapter
*adapter
)
7019 struct e1000_hw
*hw
= &adapter
->hw
;
7022 switch (hw
->mac
.type
) {
7025 /* replication is not supported for 82575 */
7028 /* notify HW that the MAC is adding vlan tags */
7029 reg
= rd32(E1000_DTXCTL
);
7030 reg
|= E1000_DTXCTL_VLAN_ADDED
;
7031 wr32(E1000_DTXCTL
, reg
);
7033 /* enable replication vlan tag stripping */
7034 reg
= rd32(E1000_RPLOLR
);
7035 reg
|= E1000_RPLOLR_STRVLAN
;
7036 wr32(E1000_RPLOLR
, reg
);
7038 /* none of the above registers are supported by i350 */
7042 if (adapter
->vfs_allocated_count
) {
7043 igb_vmdq_set_loopback_pf(hw
, true);
7044 igb_vmdq_set_replication_pf(hw
, true);
7045 igb_vmdq_set_anti_spoofing_pf(hw
, true,
7046 adapter
->vfs_allocated_count
);
7048 igb_vmdq_set_loopback_pf(hw
, false);
7049 igb_vmdq_set_replication_pf(hw
, false);
7053 static void igb_init_dmac(struct igb_adapter
*adapter
, u32 pba
)
7055 struct e1000_hw
*hw
= &adapter
->hw
;
7059 if (hw
->mac
.type
> e1000_82580
) {
7060 if (adapter
->flags
& IGB_FLAG_DMAC
) {
7063 /* force threshold to 0. */
7064 wr32(E1000_DMCTXTH
, 0);
7067 * DMA Coalescing high water mark needs to be higher
7068 * than the RX threshold. set hwm to PBA - 2 * max
7071 hwm
= pba
- (2 * adapter
->max_frame_size
);
7072 reg
= rd32(E1000_DMACR
);
7073 reg
&= ~E1000_DMACR_DMACTHR_MASK
;
7076 reg
|= ((dmac_thr
<< E1000_DMACR_DMACTHR_SHIFT
)
7077 & E1000_DMACR_DMACTHR_MASK
);
7079 /* transition to L0x or L1 if available..*/
7080 reg
|= (E1000_DMACR_DMAC_EN
| E1000_DMACR_DMAC_LX_MASK
);
7082 /* watchdog timer= +-1000 usec in 32usec intervals */
7084 wr32(E1000_DMACR
, reg
);
7087 * no lower threshold to disable
7088 * coalescing(smart fifb)-UTRESH=0
7090 wr32(E1000_DMCRTRH
, 0);
7091 wr32(E1000_FCRTC
, hwm
);
7093 reg
= (IGB_DMCTLX_DCFLUSH_DIS
| 0x4);
7095 wr32(E1000_DMCTLX
, reg
);
7098 * free space in tx packet buffer to wake from
7101 wr32(E1000_DMCTXTH
, (IGB_MIN_TXPBSIZE
-
7102 (IGB_TX_BUF_4096
+ adapter
->max_frame_size
)) >> 6);
7105 * make low power state decision controlled
7108 reg
= rd32(E1000_PCIEMISC
);
7109 reg
&= ~E1000_PCIEMISC_LX_DECISION
;
7110 wr32(E1000_PCIEMISC
, reg
);
7111 } /* endif adapter->dmac is not disabled */
7112 } else if (hw
->mac
.type
== e1000_82580
) {
7113 u32 reg
= rd32(E1000_PCIEMISC
);
7114 wr32(E1000_PCIEMISC
, reg
& ~E1000_PCIEMISC_LX_DECISION
);
7115 wr32(E1000_DMACR
, 0);