Revert "gdbstub: Do not kill target in system emulation mode"
[qemu/qmp-unstable.git] / hw / net / e1000.c
blob091d61acc3b2e713a789f0d68a255675dc0278f2
1 /*
2 * QEMU e1000 emulation
4 * Software developer's manual:
5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8 * Copyright (c) 2008 Qumranet
9 * Based on work done by:
10 * Copyright (c) 2007 Dan Aloni
11 * Copyright (c) 2004 Antony T Curtis
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
38 #include "e1000_regs.h"
40 #define E1000_DEBUG
42 #ifdef E1000_DEBUG
43 enum {
44 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
45 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
46 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
47 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET,
49 #define DBGBIT(x) (1<<DEBUG_##x)
50 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
52 #define DBGOUT(what, fmt, ...) do { \
53 if (debugflags & DBGBIT(what)) \
54 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
55 } while (0)
56 #else
57 #define DBGOUT(what, fmt, ...) do {} while (0)
58 #endif
60 #define IOPORT_SIZE 0x40
61 #define PNPMMIO_SIZE 0x20000
62 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
64 /* this is the size past which hardware will drop packets when setting LPE=0 */
65 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
66 /* this is the size past which hardware will drop packets when setting LPE=1 */
67 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
69 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
72 * HW models:
73 * E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
74 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
75 * E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
76 * Others never tested
79 typedef struct E1000State_st {
80 /*< private >*/
81 PCIDevice parent_obj;
82 /*< public >*/
84 NICState *nic;
85 NICConf conf;
86 MemoryRegion mmio;
87 MemoryRegion io;
89 uint32_t mac_reg[0x8000];
90 uint16_t phy_reg[0x20];
91 uint16_t eeprom_data[64];
93 uint32_t rxbuf_size;
94 uint32_t rxbuf_min_shift;
95 struct e1000_tx {
96 unsigned char header[256];
97 unsigned char vlan_header[4];
98 /* Fields vlan and data must not be reordered or separated. */
99 unsigned char vlan[4];
100 unsigned char data[0x10000];
101 uint16_t size;
102 unsigned char sum_needed;
103 unsigned char vlan_needed;
104 uint8_t ipcss;
105 uint8_t ipcso;
106 uint16_t ipcse;
107 uint8_t tucss;
108 uint8_t tucso;
109 uint16_t tucse;
110 uint8_t hdr_len;
111 uint16_t mss;
112 uint32_t paylen;
113 uint16_t tso_frames;
114 char tse;
115 int8_t ip;
116 int8_t tcp;
117 char cptse; // current packet tse bit
118 } tx;
120 struct {
121 uint32_t val_in; // shifted in from guest driver
122 uint16_t bitnum_in;
123 uint16_t bitnum_out;
124 uint16_t reading;
125 uint32_t old_eecd;
126 } eecd_state;
128 QEMUTimer *autoneg_timer;
130 QEMUTimer *mit_timer; /* Mitigation timer. */
131 bool mit_timer_on; /* Mitigation timer is running. */
132 bool mit_irq_level; /* Tracks interrupt pin level. */
133 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
135 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
136 #define E1000_FLAG_AUTONEG_BIT 0
137 #define E1000_FLAG_MIT_BIT 1
138 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
139 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
140 uint32_t compat_flags;
141 } E1000State;
143 typedef struct E1000BaseClass {
144 PCIDeviceClass parent_class;
145 uint16_t phy_id2;
146 } E1000BaseClass;
148 #define TYPE_E1000_BASE "e1000-base"
150 #define E1000(obj) \
151 OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
153 #define E1000_DEVICE_CLASS(klass) \
154 OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
155 #define E1000_DEVICE_GET_CLASS(obj) \
156 OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
158 #define defreg(x) x = (E1000_##x>>2)
159 enum {
160 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
161 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
162 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
163 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
164 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
165 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
166 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
167 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
168 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
169 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
170 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
171 defreg(ITR),
174 static void
175 e1000_link_down(E1000State *s)
177 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
178 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
179 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
180 s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
183 static void
184 e1000_link_up(E1000State *s)
186 s->mac_reg[STATUS] |= E1000_STATUS_LU;
187 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
190 static bool
191 have_autoneg(E1000State *s)
193 return (s->compat_flags & E1000_FLAG_AUTONEG) &&
194 (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
197 static void
198 set_phy_ctrl(E1000State *s, int index, uint16_t val)
200 /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
201 s->phy_reg[PHY_CTRL] = val & ~(0x3f |
202 MII_CR_RESET |
203 MII_CR_RESTART_AUTO_NEG);
206 * QEMU 1.3 does not support link auto-negotiation emulation, so if we
207 * migrate during auto negotiation, after migration the link will be
208 * down.
210 if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
211 e1000_link_down(s);
212 DBGOUT(PHY, "Start link auto negotiation\n");
213 timer_mod(s->autoneg_timer,
214 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
218 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
219 [PHY_CTRL] = set_phy_ctrl,
222 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
224 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
225 static const char phy_regcap[0x20] = {
226 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
227 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
228 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
229 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
230 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
231 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R,
232 [PHY_AUTONEG_EXP] = PHY_R,
235 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
236 static const uint16_t phy_reg_init[] = {
237 [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB |
238 MII_CR_FULL_DUPLEX |
239 MII_CR_AUTO_NEG_EN,
241 [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
242 MII_SR_LINK_STATUS | /* link initially up */
243 MII_SR_AUTONEG_CAPS |
244 /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
245 MII_SR_PREAMBLE_SUPPRESS |
246 MII_SR_EXTENDED_STATUS |
247 MII_SR_10T_HD_CAPS |
248 MII_SR_10T_FD_CAPS |
249 MII_SR_100X_HD_CAPS |
250 MII_SR_100X_FD_CAPS,
252 [PHY_ID1] = 0x141,
253 /* [PHY_ID2] configured per DevId, from e1000_reset() */
254 [PHY_AUTONEG_ADV] = 0xde1,
255 [PHY_LP_ABILITY] = 0x1e0,
256 [PHY_1000T_CTRL] = 0x0e00,
257 [PHY_1000T_STATUS] = 0x3c00,
258 [M88E1000_PHY_SPEC_CTRL] = 0x360,
259 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
260 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
263 static const uint32_t mac_reg_init[] = {
264 [PBA] = 0x00100030,
265 [LEDCTL] = 0x602,
266 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
267 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
268 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
269 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
270 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
271 E1000_STATUS_LU,
272 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
273 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
274 E1000_MANC_RMCP_EN,
277 /* Helper function, *curr == 0 means the value is not set */
278 static inline void
279 mit_update_delay(uint32_t *curr, uint32_t value)
281 if (value && (*curr == 0 || value < *curr)) {
282 *curr = value;
286 static void
287 set_interrupt_cause(E1000State *s, int index, uint32_t val)
289 PCIDevice *d = PCI_DEVICE(s);
290 uint32_t pending_ints;
291 uint32_t mit_delay;
293 s->mac_reg[ICR] = val;
296 * Make sure ICR and ICS registers have the same value.
297 * The spec says that the ICS register is write-only. However in practice,
298 * on real hardware ICS is readable, and for reads it has the same value as
299 * ICR (except that ICS does not have the clear on read behaviour of ICR).
301 * The VxWorks PRO/1000 driver uses this behaviour.
303 s->mac_reg[ICS] = val;
305 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
306 if (!s->mit_irq_level && pending_ints) {
308 * Here we detect a potential raising edge. We postpone raising the
309 * interrupt line if we are inside the mitigation delay window
310 * (s->mit_timer_on == 1).
311 * We provide a partial implementation of interrupt mitigation,
312 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
313 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
314 * RADV; relative timers based on TIDV and RDTR are not implemented.
316 if (s->mit_timer_on) {
317 return;
319 if (s->compat_flags & E1000_FLAG_MIT) {
320 /* Compute the next mitigation delay according to pending
321 * interrupts and the current values of RADV (provided
322 * RDTR!=0), TADV and ITR.
323 * Then rearm the timer.
325 mit_delay = 0;
326 if (s->mit_ide &&
327 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
328 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
330 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
331 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
333 mit_update_delay(&mit_delay, s->mac_reg[ITR]);
335 if (mit_delay) {
336 s->mit_timer_on = 1;
337 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
338 mit_delay * 256);
340 s->mit_ide = 0;
344 s->mit_irq_level = (pending_ints != 0);
345 pci_set_irq(d, s->mit_irq_level);
348 static void
349 e1000_mit_timer(void *opaque)
351 E1000State *s = opaque;
353 s->mit_timer_on = 0;
354 /* Call set_interrupt_cause to update the irq level (if necessary). */
355 set_interrupt_cause(s, 0, s->mac_reg[ICR]);
358 static void
359 set_ics(E1000State *s, int index, uint32_t val)
361 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
362 s->mac_reg[IMS]);
363 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
366 static void
367 e1000_autoneg_timer(void *opaque)
369 E1000State *s = opaque;
370 if (!qemu_get_queue(s->nic)->link_down) {
371 e1000_link_up(s);
372 s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
373 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
374 DBGOUT(PHY, "Auto negotiation is completed\n");
375 set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
379 static int
380 rxbufsize(uint32_t v)
382 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
383 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
384 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
385 switch (v) {
386 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
387 return 16384;
388 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
389 return 8192;
390 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
391 return 4096;
392 case E1000_RCTL_SZ_1024:
393 return 1024;
394 case E1000_RCTL_SZ_512:
395 return 512;
396 case E1000_RCTL_SZ_256:
397 return 256;
399 return 2048;
402 static void e1000_reset(void *opaque)
404 E1000State *d = opaque;
405 E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
406 uint8_t *macaddr = d->conf.macaddr.a;
407 int i;
409 timer_del(d->autoneg_timer);
410 timer_del(d->mit_timer);
411 d->mit_timer_on = 0;
412 d->mit_irq_level = 0;
413 d->mit_ide = 0;
414 memset(d->phy_reg, 0, sizeof d->phy_reg);
415 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
416 d->phy_reg[PHY_ID2] = edc->phy_id2;
417 memset(d->mac_reg, 0, sizeof d->mac_reg);
418 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
419 d->rxbuf_min_shift = 1;
420 memset(&d->tx, 0, sizeof d->tx);
422 if (qemu_get_queue(d->nic)->link_down) {
423 e1000_link_down(d);
426 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
427 d->mac_reg[RA] = 0;
428 d->mac_reg[RA + 1] = E1000_RAH_AV;
429 for (i = 0; i < 4; i++) {
430 d->mac_reg[RA] |= macaddr[i] << (8 * i);
431 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
433 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
436 static void
437 set_ctrl(E1000State *s, int index, uint32_t val)
439 /* RST is self clearing */
440 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
443 static void
444 set_rx_control(E1000State *s, int index, uint32_t val)
446 s->mac_reg[RCTL] = val;
447 s->rxbuf_size = rxbufsize(val);
448 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
449 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
450 s->mac_reg[RCTL]);
451 qemu_flush_queued_packets(qemu_get_queue(s->nic));
454 static void
455 set_mdic(E1000State *s, int index, uint32_t val)
457 uint32_t data = val & E1000_MDIC_DATA_MASK;
458 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
460 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
461 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
462 else if (val & E1000_MDIC_OP_READ) {
463 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
464 if (!(phy_regcap[addr] & PHY_R)) {
465 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
466 val |= E1000_MDIC_ERROR;
467 } else
468 val = (val ^ data) | s->phy_reg[addr];
469 } else if (val & E1000_MDIC_OP_WRITE) {
470 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
471 if (!(phy_regcap[addr] & PHY_W)) {
472 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
473 val |= E1000_MDIC_ERROR;
474 } else {
475 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
476 phyreg_writeops[addr](s, index, data);
477 } else {
478 s->phy_reg[addr] = data;
482 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
484 if (val & E1000_MDIC_INT_EN) {
485 set_ics(s, 0, E1000_ICR_MDAC);
489 static uint32_t
490 get_eecd(E1000State *s, int index)
492 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
494 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
495 s->eecd_state.bitnum_out, s->eecd_state.reading);
496 if (!s->eecd_state.reading ||
497 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
498 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
499 ret |= E1000_EECD_DO;
500 return ret;
503 static void
504 set_eecd(E1000State *s, int index, uint32_t val)
506 uint32_t oldval = s->eecd_state.old_eecd;
508 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
509 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
510 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do
511 return;
512 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state
513 s->eecd_state.val_in = 0;
514 s->eecd_state.bitnum_in = 0;
515 s->eecd_state.bitnum_out = 0;
516 s->eecd_state.reading = 0;
518 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge
519 return;
520 if (!(E1000_EECD_SK & val)) { // falling edge
521 s->eecd_state.bitnum_out++;
522 return;
524 s->eecd_state.val_in <<= 1;
525 if (val & E1000_EECD_DI)
526 s->eecd_state.val_in |= 1;
527 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
528 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
529 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
530 EEPROM_READ_OPCODE_MICROWIRE);
532 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
533 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
534 s->eecd_state.reading);
537 static uint32_t
538 flash_eerd_read(E1000State *s, int x)
540 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
542 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
543 return (s->mac_reg[EERD]);
545 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
546 return (E1000_EEPROM_RW_REG_DONE | r);
548 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
549 E1000_EEPROM_RW_REG_DONE | r);
552 static void
553 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
555 uint32_t sum;
557 if (cse && cse < n)
558 n = cse + 1;
559 if (sloc < n-1) {
560 sum = net_checksum_add(n-css, data+css);
561 stw_be_p(data + sloc, net_checksum_finish(sum));
565 static inline int
566 vlan_enabled(E1000State *s)
568 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
571 static inline int
572 vlan_rx_filter_enabled(E1000State *s)
574 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
577 static inline int
578 is_vlan_packet(E1000State *s, const uint8_t *buf)
580 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
581 le16_to_cpu(s->mac_reg[VET]));
584 static inline int
585 is_vlan_txd(uint32_t txd_lower)
587 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
590 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
591 * fill it in, just pad descriptor length by 4 bytes unless guest
592 * told us to strip it off the packet. */
593 static inline int
594 fcs_len(E1000State *s)
596 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
599 static void
600 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
602 NetClientState *nc = qemu_get_queue(s->nic);
603 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
604 nc->info->receive(nc, buf, size);
605 } else {
606 qemu_send_packet(nc, buf, size);
610 static void
611 xmit_seg(E1000State *s)
613 uint16_t len, *sp;
614 unsigned int frames = s->tx.tso_frames, css, sofar, n;
615 struct e1000_tx *tp = &s->tx;
617 if (tp->tse && tp->cptse) {
618 css = tp->ipcss;
619 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
620 frames, tp->size, css);
621 if (tp->ip) { // IPv4
622 stw_be_p(tp->data+css+2, tp->size - css);
623 stw_be_p(tp->data+css+4,
624 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
625 } else // IPv6
626 stw_be_p(tp->data+css+4, tp->size - css);
627 css = tp->tucss;
628 len = tp->size - css;
629 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
630 if (tp->tcp) {
631 sofar = frames * tp->mss;
632 stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
633 if (tp->paylen - sofar > tp->mss)
634 tp->data[css + 13] &= ~9; // PSH, FIN
635 } else // UDP
636 stw_be_p(tp->data+css+4, len);
637 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
638 unsigned int phsum;
639 // add pseudo-header length before checksum calculation
640 sp = (uint16_t *)(tp->data + tp->tucso);
641 phsum = be16_to_cpup(sp) + len;
642 phsum = (phsum >> 16) + (phsum & 0xffff);
643 stw_be_p(sp, phsum);
645 tp->tso_frames++;
648 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
649 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
650 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
651 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
652 if (tp->vlan_needed) {
653 memmove(tp->vlan, tp->data, 4);
654 memmove(tp->data, tp->data + 4, 8);
655 memcpy(tp->data + 8, tp->vlan_header, 4);
656 e1000_send_packet(s, tp->vlan, tp->size + 4);
657 } else
658 e1000_send_packet(s, tp->data, tp->size);
659 s->mac_reg[TPT]++;
660 s->mac_reg[GPTC]++;
661 n = s->mac_reg[TOTL];
662 if ((s->mac_reg[TOTL] += s->tx.size) < n)
663 s->mac_reg[TOTH]++;
666 static void
667 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
669 PCIDevice *d = PCI_DEVICE(s);
670 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
671 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
672 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
673 unsigned int msh = 0xfffff;
674 uint64_t addr;
675 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
676 struct e1000_tx *tp = &s->tx;
678 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
679 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
680 op = le32_to_cpu(xp->cmd_and_length);
681 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
682 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
683 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
684 tp->tucss = xp->upper_setup.tcp_fields.tucss;
685 tp->tucso = xp->upper_setup.tcp_fields.tucso;
686 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
687 tp->paylen = op & 0xfffff;
688 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
689 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
690 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
691 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
692 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
693 tp->tso_frames = 0;
694 if (tp->tucso == 0) { // this is probably wrong
695 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
696 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
698 return;
699 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
700 // data descriptor
701 if (tp->size == 0) {
702 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
704 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
705 } else {
706 // legacy descriptor
707 tp->cptse = 0;
710 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
711 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
712 tp->vlan_needed = 1;
713 stw_be_p(tp->vlan_header,
714 le16_to_cpu(s->mac_reg[VET]));
715 stw_be_p(tp->vlan_header + 2,
716 le16_to_cpu(dp->upper.fields.special));
719 addr = le64_to_cpu(dp->buffer_addr);
720 if (tp->tse && tp->cptse) {
721 msh = tp->hdr_len + tp->mss;
722 do {
723 bytes = split_size;
724 if (tp->size + bytes > msh)
725 bytes = msh - tp->size;
727 bytes = MIN(sizeof(tp->data) - tp->size, bytes);
728 pci_dma_read(d, addr, tp->data + tp->size, bytes);
729 sz = tp->size + bytes;
730 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
731 memmove(tp->header, tp->data, tp->hdr_len);
733 tp->size = sz;
734 addr += bytes;
735 if (sz == msh) {
736 xmit_seg(s);
737 memmove(tp->data, tp->header, tp->hdr_len);
738 tp->size = tp->hdr_len;
740 } while (split_size -= bytes);
741 } else if (!tp->tse && tp->cptse) {
742 // context descriptor TSE is not set, while data descriptor TSE is set
743 DBGOUT(TXERR, "TCP segmentation error\n");
744 } else {
745 split_size = MIN(sizeof(tp->data) - tp->size, split_size);
746 pci_dma_read(d, addr, tp->data + tp->size, split_size);
747 tp->size += split_size;
750 if (!(txd_lower & E1000_TXD_CMD_EOP))
751 return;
752 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
753 xmit_seg(s);
755 tp->tso_frames = 0;
756 tp->sum_needed = 0;
757 tp->vlan_needed = 0;
758 tp->size = 0;
759 tp->cptse = 0;
762 static uint32_t
763 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
765 PCIDevice *d = PCI_DEVICE(s);
766 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
768 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
769 return 0;
770 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
771 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
772 dp->upper.data = cpu_to_le32(txd_upper);
773 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
774 &dp->upper, sizeof(dp->upper));
775 return E1000_ICR_TXDW;
778 static uint64_t tx_desc_base(E1000State *s)
780 uint64_t bah = s->mac_reg[TDBAH];
781 uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
783 return (bah << 32) + bal;
786 static void
787 start_xmit(E1000State *s)
789 PCIDevice *d = PCI_DEVICE(s);
790 dma_addr_t base;
791 struct e1000_tx_desc desc;
792 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
794 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
795 DBGOUT(TX, "tx disabled\n");
796 return;
799 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
800 base = tx_desc_base(s) +
801 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
802 pci_dma_read(d, base, &desc, sizeof(desc));
804 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
805 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
806 desc.upper.data);
808 process_tx_desc(s, &desc);
809 cause |= txdesc_writeback(s, base, &desc);
811 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
812 s->mac_reg[TDH] = 0;
814 * the following could happen only if guest sw assigns
815 * bogus values to TDT/TDLEN.
816 * there's nothing too intelligent we could do about this.
818 if (s->mac_reg[TDH] == tdh_start) {
819 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
820 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
821 break;
824 set_ics(s, 0, cause);
827 static int
828 receive_filter(E1000State *s, const uint8_t *buf, int size)
830 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
831 static const int mta_shift[] = {4, 3, 2, 0};
832 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
834 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
835 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
836 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
837 ((vid >> 5) & 0x7f));
838 if ((vfta & (1 << (vid & 0x1f))) == 0)
839 return 0;
842 if (rctl & E1000_RCTL_UPE) // promiscuous
843 return 1;
845 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast
846 return 1;
848 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
849 return 1;
851 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
852 if (!(rp[1] & E1000_RAH_AV))
853 continue;
854 ra[0] = cpu_to_le32(rp[0]);
855 ra[1] = cpu_to_le32(rp[1]);
856 if (!memcmp(buf, (uint8_t *)ra, 6)) {
857 DBGOUT(RXFILTER,
858 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
859 (int)(rp - s->mac_reg - RA)/2,
860 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
861 return 1;
864 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
865 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
867 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
868 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
869 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
870 return 1;
871 DBGOUT(RXFILTER,
872 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
873 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
874 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
875 s->mac_reg[MTA + (f >> 5)]);
877 return 0;
880 static void
881 e1000_set_link_status(NetClientState *nc)
883 E1000State *s = qemu_get_nic_opaque(nc);
884 uint32_t old_status = s->mac_reg[STATUS];
886 if (nc->link_down) {
887 e1000_link_down(s);
888 } else {
889 if (have_autoneg(s) &&
890 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
891 /* emulate auto-negotiation if supported */
892 timer_mod(s->autoneg_timer,
893 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
894 } else {
895 e1000_link_up(s);
899 if (s->mac_reg[STATUS] != old_status)
900 set_ics(s, 0, E1000_ICR_LSC);
903 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
905 int bufs;
906 /* Fast-path short packets */
907 if (total_size <= s->rxbuf_size) {
908 return s->mac_reg[RDH] != s->mac_reg[RDT];
910 if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
911 bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
912 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
913 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
914 s->mac_reg[RDT] - s->mac_reg[RDH];
915 } else {
916 return false;
918 return total_size <= bufs * s->rxbuf_size;
921 static int
922 e1000_can_receive(NetClientState *nc)
924 E1000State *s = qemu_get_nic_opaque(nc);
926 return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
927 (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
928 (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
929 e1000_has_rxbufs(s, 1);
932 static uint64_t rx_desc_base(E1000State *s)
934 uint64_t bah = s->mac_reg[RDBAH];
935 uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
937 return (bah << 32) + bal;
940 static ssize_t
941 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
943 E1000State *s = qemu_get_nic_opaque(nc);
944 PCIDevice *d = PCI_DEVICE(s);
945 struct e1000_rx_desc desc;
946 dma_addr_t base;
947 unsigned int n, rdt;
948 uint32_t rdh_start;
949 uint16_t vlan_special = 0;
950 uint8_t vlan_status = 0;
951 uint8_t min_buf[MIN_BUF_SIZE];
952 struct iovec min_iov;
953 uint8_t *filter_buf = iov->iov_base;
954 size_t size = iov_size(iov, iovcnt);
955 size_t iov_ofs = 0;
956 size_t desc_offset;
957 size_t desc_size;
958 size_t total_size;
960 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
961 return -1;
964 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
965 return -1;
968 /* Pad to minimum Ethernet frame length */
969 if (size < sizeof(min_buf)) {
970 iov_to_buf(iov, iovcnt, 0, min_buf, size);
971 memset(&min_buf[size], 0, sizeof(min_buf) - size);
972 min_iov.iov_base = filter_buf = min_buf;
973 min_iov.iov_len = size = sizeof(min_buf);
974 iovcnt = 1;
975 iov = &min_iov;
976 } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
977 /* This is very unlikely, but may happen. */
978 iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
979 filter_buf = min_buf;
982 /* Discard oversized packets if !LPE and !SBP. */
983 if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
984 (size > MAXIMUM_ETHERNET_VLAN_SIZE
985 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
986 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
987 return size;
990 if (!receive_filter(s, filter_buf, size)) {
991 return size;
994 if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
995 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
996 + 14)));
997 iov_ofs = 4;
998 if (filter_buf == iov->iov_base) {
999 memmove(filter_buf + 4, filter_buf, 12);
1000 } else {
1001 iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1002 while (iov->iov_len <= iov_ofs) {
1003 iov_ofs -= iov->iov_len;
1004 iov++;
1007 vlan_status = E1000_RXD_STAT_VP;
1008 size -= 4;
1011 rdh_start = s->mac_reg[RDH];
1012 desc_offset = 0;
1013 total_size = size + fcs_len(s);
1014 if (!e1000_has_rxbufs(s, total_size)) {
1015 set_ics(s, 0, E1000_ICS_RXO);
1016 return -1;
1018 do {
1019 desc_size = total_size - desc_offset;
1020 if (desc_size > s->rxbuf_size) {
1021 desc_size = s->rxbuf_size;
1023 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1024 pci_dma_read(d, base, &desc, sizeof(desc));
1025 desc.special = vlan_special;
1026 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1027 if (desc.buffer_addr) {
1028 if (desc_offset < size) {
1029 size_t iov_copy;
1030 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1031 size_t copy_size = size - desc_offset;
1032 if (copy_size > s->rxbuf_size) {
1033 copy_size = s->rxbuf_size;
1035 do {
1036 iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1037 pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1038 copy_size -= iov_copy;
1039 ba += iov_copy;
1040 iov_ofs += iov_copy;
1041 if (iov_ofs == iov->iov_len) {
1042 iov++;
1043 iov_ofs = 0;
1045 } while (copy_size);
1047 desc_offset += desc_size;
1048 desc.length = cpu_to_le16(desc_size);
1049 if (desc_offset >= total_size) {
1050 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1051 } else {
1052 /* Guest zeroing out status is not a hardware requirement.
1053 Clear EOP in case guest didn't do it. */
1054 desc.status &= ~E1000_RXD_STAT_EOP;
1056 } else { // as per intel docs; skip descriptors with null buf addr
1057 DBGOUT(RX, "Null RX descriptor!!\n");
1059 pci_dma_write(d, base, &desc, sizeof(desc));
1061 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1062 s->mac_reg[RDH] = 0;
1063 /* see comment in start_xmit; same here */
1064 if (s->mac_reg[RDH] == rdh_start) {
1065 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1066 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1067 set_ics(s, 0, E1000_ICS_RXO);
1068 return -1;
1070 } while (desc_offset < total_size);
1072 s->mac_reg[GPRC]++;
1073 s->mac_reg[TPR]++;
1074 /* TOR - Total Octets Received:
1075 * This register includes bytes received in a packet from the <Destination
1076 * Address> field through the <CRC> field, inclusively.
1078 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1079 if (n < s->mac_reg[TORL])
1080 s->mac_reg[TORH]++;
1081 s->mac_reg[TORL] = n;
1083 n = E1000_ICS_RXT0;
1084 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1085 rdt += s->mac_reg[RDLEN] / sizeof(desc);
1086 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1087 s->rxbuf_min_shift)
1088 n |= E1000_ICS_RXDMT0;
1090 set_ics(s, 0, n);
1092 return size;
1095 static ssize_t
1096 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1098 const struct iovec iov = {
1099 .iov_base = (uint8_t *)buf,
1100 .iov_len = size
1103 return e1000_receive_iov(nc, &iov, 1);
1106 static uint32_t
1107 mac_readreg(E1000State *s, int index)
1109 return s->mac_reg[index];
1112 static uint32_t
1113 mac_icr_read(E1000State *s, int index)
1115 uint32_t ret = s->mac_reg[ICR];
1117 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1118 set_interrupt_cause(s, 0, 0);
1119 return ret;
1122 static uint32_t
1123 mac_read_clr4(E1000State *s, int index)
1125 uint32_t ret = s->mac_reg[index];
1127 s->mac_reg[index] = 0;
1128 return ret;
1131 static uint32_t
1132 mac_read_clr8(E1000State *s, int index)
1134 uint32_t ret = s->mac_reg[index];
1136 s->mac_reg[index] = 0;
1137 s->mac_reg[index-1] = 0;
1138 return ret;
1141 static void
1142 mac_writereg(E1000State *s, int index, uint32_t val)
1144 uint32_t macaddr[2];
1146 s->mac_reg[index] = val;
1148 if (index == RA + 1) {
1149 macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1150 macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1151 qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1155 static void
1156 set_rdt(E1000State *s, int index, uint32_t val)
1158 s->mac_reg[index] = val & 0xffff;
1159 if (e1000_has_rxbufs(s, 1)) {
1160 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1164 static void
1165 set_16bit(E1000State *s, int index, uint32_t val)
1167 s->mac_reg[index] = val & 0xffff;
1170 static void
1171 set_dlen(E1000State *s, int index, uint32_t val)
1173 s->mac_reg[index] = val & 0xfff80;
1176 static void
1177 set_tctl(E1000State *s, int index, uint32_t val)
1179 s->mac_reg[index] = val;
1180 s->mac_reg[TDT] &= 0xffff;
1181 start_xmit(s);
1184 static void
1185 set_icr(E1000State *s, int index, uint32_t val)
1187 DBGOUT(INTERRUPT, "set_icr %x\n", val);
1188 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1191 static void
1192 set_imc(E1000State *s, int index, uint32_t val)
1194 s->mac_reg[IMS] &= ~val;
1195 set_ics(s, 0, 0);
1198 static void
1199 set_ims(E1000State *s, int index, uint32_t val)
1201 s->mac_reg[IMS] |= val;
1202 set_ics(s, 0, 0);
1205 #define getreg(x) [x] = mac_readreg
1206 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1207 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
1208 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
1209 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
1210 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
1211 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
1212 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
1213 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
1214 getreg(TADV), getreg(ITR),
1216 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
1217 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
1218 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read,
1219 [CRCERRS ... MPC] = &mac_readreg,
1220 [RA ... RA+31] = &mac_readreg,
1221 [MTA ... MTA+127] = &mac_readreg,
1222 [VFTA ... VFTA+127] = &mac_readreg,
1224 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1226 #define putreg(x) [x] = mac_writereg
1227 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1228 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
1229 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
1230 putreg(RDBAL), putreg(LEDCTL), putreg(VET),
1231 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
1232 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
1233 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
1234 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
1235 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1236 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
1237 [ITR] = set_16bit,
1238 [RA ... RA+31] = &mac_writereg,
1239 [MTA ... MTA+127] = &mac_writereg,
1240 [VFTA ... VFTA+127] = &mac_writereg,
1243 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1245 static void
1246 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1247 unsigned size)
1249 E1000State *s = opaque;
1250 unsigned int index = (addr & 0x1ffff) >> 2;
1252 if (index < NWRITEOPS && macreg_writeops[index]) {
1253 macreg_writeops[index](s, index, val);
1254 } else if (index < NREADOPS && macreg_readops[index]) {
1255 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1256 } else {
1257 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1258 index<<2, val);
1262 static uint64_t
1263 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1265 E1000State *s = opaque;
1266 unsigned int index = (addr & 0x1ffff) >> 2;
1268 if (index < NREADOPS && macreg_readops[index])
1270 return macreg_readops[index](s, index);
1272 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1273 return 0;
1276 static const MemoryRegionOps e1000_mmio_ops = {
1277 .read = e1000_mmio_read,
1278 .write = e1000_mmio_write,
1279 .endianness = DEVICE_LITTLE_ENDIAN,
1280 .impl = {
1281 .min_access_size = 4,
1282 .max_access_size = 4,
1286 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1287 unsigned size)
1289 E1000State *s = opaque;
1291 (void)s;
1292 return 0;
1295 static void e1000_io_write(void *opaque, hwaddr addr,
1296 uint64_t val, unsigned size)
1298 E1000State *s = opaque;
1300 (void)s;
1303 static const MemoryRegionOps e1000_io_ops = {
1304 .read = e1000_io_read,
1305 .write = e1000_io_write,
1306 .endianness = DEVICE_LITTLE_ENDIAN,
1309 static bool is_version_1(void *opaque, int version_id)
1311 return version_id == 1;
1314 static void e1000_pre_save(void *opaque)
1316 E1000State *s = opaque;
1317 NetClientState *nc = qemu_get_queue(s->nic);
1319 /* If the mitigation timer is active, emulate a timeout now. */
1320 if (s->mit_timer_on) {
1321 e1000_mit_timer(s);
1325 * If link is down and auto-negotiation is supported and ongoing,
1326 * complete auto-negotiation immediately. This allows us to look
1327 * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1329 if (nc->link_down && have_autoneg(s)) {
1330 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1334 static int e1000_post_load(void *opaque, int version_id)
1336 E1000State *s = opaque;
1337 NetClientState *nc = qemu_get_queue(s->nic);
1339 if (!(s->compat_flags & E1000_FLAG_MIT)) {
1340 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1341 s->mac_reg[TADV] = 0;
1342 s->mit_irq_level = false;
1344 s->mit_ide = 0;
1345 s->mit_timer_on = false;
1347 /* nc.link_down can't be migrated, so infer link_down according
1348 * to link status bit in mac_reg[STATUS].
1349 * Alternatively, restart link negotiation if it was in progress. */
1350 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1352 if (have_autoneg(s) &&
1353 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1354 nc->link_down = false;
1355 timer_mod(s->autoneg_timer,
1356 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1359 return 0;
1362 static bool e1000_mit_state_needed(void *opaque)
1364 E1000State *s = opaque;
1366 return s->compat_flags & E1000_FLAG_MIT;
1369 static const VMStateDescription vmstate_e1000_mit_state = {
1370 .name = "e1000/mit_state",
1371 .version_id = 1,
1372 .minimum_version_id = 1,
1373 .fields = (VMStateField[]) {
1374 VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1375 VMSTATE_UINT32(mac_reg[RADV], E1000State),
1376 VMSTATE_UINT32(mac_reg[TADV], E1000State),
1377 VMSTATE_UINT32(mac_reg[ITR], E1000State),
1378 VMSTATE_BOOL(mit_irq_level, E1000State),
1379 VMSTATE_END_OF_LIST()
1383 static const VMStateDescription vmstate_e1000 = {
1384 .name = "e1000",
1385 .version_id = 2,
1386 .minimum_version_id = 1,
1387 .pre_save = e1000_pre_save,
1388 .post_load = e1000_post_load,
1389 .fields = (VMStateField[]) {
1390 VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1391 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1392 VMSTATE_UNUSED(4), /* Was mmio_base. */
1393 VMSTATE_UINT32(rxbuf_size, E1000State),
1394 VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1395 VMSTATE_UINT32(eecd_state.val_in, E1000State),
1396 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1397 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1398 VMSTATE_UINT16(eecd_state.reading, E1000State),
1399 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1400 VMSTATE_UINT8(tx.ipcss, E1000State),
1401 VMSTATE_UINT8(tx.ipcso, E1000State),
1402 VMSTATE_UINT16(tx.ipcse, E1000State),
1403 VMSTATE_UINT8(tx.tucss, E1000State),
1404 VMSTATE_UINT8(tx.tucso, E1000State),
1405 VMSTATE_UINT16(tx.tucse, E1000State),
1406 VMSTATE_UINT32(tx.paylen, E1000State),
1407 VMSTATE_UINT8(tx.hdr_len, E1000State),
1408 VMSTATE_UINT16(tx.mss, E1000State),
1409 VMSTATE_UINT16(tx.size, E1000State),
1410 VMSTATE_UINT16(tx.tso_frames, E1000State),
1411 VMSTATE_UINT8(tx.sum_needed, E1000State),
1412 VMSTATE_INT8(tx.ip, E1000State),
1413 VMSTATE_INT8(tx.tcp, E1000State),
1414 VMSTATE_BUFFER(tx.header, E1000State),
1415 VMSTATE_BUFFER(tx.data, E1000State),
1416 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1417 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1418 VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1419 VMSTATE_UINT32(mac_reg[EECD], E1000State),
1420 VMSTATE_UINT32(mac_reg[EERD], E1000State),
1421 VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1422 VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1423 VMSTATE_UINT32(mac_reg[ICR], E1000State),
1424 VMSTATE_UINT32(mac_reg[ICS], E1000State),
1425 VMSTATE_UINT32(mac_reg[IMC], E1000State),
1426 VMSTATE_UINT32(mac_reg[IMS], E1000State),
1427 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1428 VMSTATE_UINT32(mac_reg[MANC], E1000State),
1429 VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1430 VMSTATE_UINT32(mac_reg[MPC], E1000State),
1431 VMSTATE_UINT32(mac_reg[PBA], E1000State),
1432 VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1433 VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1434 VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1435 VMSTATE_UINT32(mac_reg[RDH], E1000State),
1436 VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1437 VMSTATE_UINT32(mac_reg[RDT], E1000State),
1438 VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1439 VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1440 VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1441 VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1442 VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1443 VMSTATE_UINT32(mac_reg[TDH], E1000State),
1444 VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1445 VMSTATE_UINT32(mac_reg[TDT], E1000State),
1446 VMSTATE_UINT32(mac_reg[TORH], E1000State),
1447 VMSTATE_UINT32(mac_reg[TORL], E1000State),
1448 VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1449 VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1450 VMSTATE_UINT32(mac_reg[TPR], E1000State),
1451 VMSTATE_UINT32(mac_reg[TPT], E1000State),
1452 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1453 VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1454 VMSTATE_UINT32(mac_reg[VET], E1000State),
1455 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1456 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1457 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1458 VMSTATE_END_OF_LIST()
1460 .subsections = (VMStateSubsection[]) {
1462 .vmsd = &vmstate_e1000_mit_state,
1463 .needed = e1000_mit_state_needed,
1464 }, {
1465 /* empty */
1471 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1472 * Note: A valid DevId will be inserted during pci_e1000_init().
1474 static const uint16_t e1000_eeprom_template[64] = {
1475 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
1476 0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1477 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
1478 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
1479 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
1480 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1481 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1482 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
1485 /* PCI interface */
1487 static void
1488 e1000_mmio_setup(E1000State *d)
1490 int i;
1491 const uint32_t excluded_regs[] = {
1492 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1493 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1496 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1497 "e1000-mmio", PNPMMIO_SIZE);
1498 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1499 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1500 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1501 excluded_regs[i+1] - excluded_regs[i] - 4);
1502 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1505 static void
1506 pci_e1000_uninit(PCIDevice *dev)
1508 E1000State *d = E1000(dev);
1510 timer_del(d->autoneg_timer);
1511 timer_free(d->autoneg_timer);
1512 timer_del(d->mit_timer);
1513 timer_free(d->mit_timer);
1514 qemu_del_nic(d->nic);
1517 static NetClientInfo net_e1000_info = {
1518 .type = NET_CLIENT_OPTIONS_KIND_NIC,
1519 .size = sizeof(NICState),
1520 .can_receive = e1000_can_receive,
1521 .receive = e1000_receive,
1522 .receive_iov = e1000_receive_iov,
1523 .link_status_changed = e1000_set_link_status,
1526 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1527 uint32_t val, int len)
1529 E1000State *s = E1000(pci_dev);
1531 pci_default_write_config(pci_dev, address, val, len);
1533 if (range_covers_byte(address, len, PCI_COMMAND) &&
1534 (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1535 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1540 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1542 DeviceState *dev = DEVICE(pci_dev);
1543 E1000State *d = E1000(pci_dev);
1544 PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1545 uint8_t *pci_conf;
1546 uint16_t checksum = 0;
1547 int i;
1548 uint8_t *macaddr;
1550 pci_dev->config_write = e1000_write_config;
1552 pci_conf = pci_dev->config;
1554 /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1555 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1557 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1559 e1000_mmio_setup(d);
1561 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1563 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1565 memmove(d->eeprom_data, e1000_eeprom_template,
1566 sizeof e1000_eeprom_template);
1567 qemu_macaddr_default_if_unset(&d->conf.macaddr);
1568 macaddr = d->conf.macaddr.a;
1569 for (i = 0; i < 3; i++)
1570 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1571 d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1572 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1573 checksum += d->eeprom_data[i];
1574 checksum = (uint16_t) EEPROM_SUM - checksum;
1575 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1577 d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1578 object_get_typename(OBJECT(d)), dev->id, d);
1580 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1582 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1583 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1586 static void qdev_e1000_reset(DeviceState *dev)
1588 E1000State *d = E1000(dev);
1589 e1000_reset(d);
1592 static Property e1000_properties[] = {
1593 DEFINE_NIC_PROPERTIES(E1000State, conf),
1594 DEFINE_PROP_BIT("autonegotiation", E1000State,
1595 compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1596 DEFINE_PROP_BIT("mitigation", E1000State,
1597 compat_flags, E1000_FLAG_MIT_BIT, true),
1598 DEFINE_PROP_END_OF_LIST(),
1601 typedef struct E1000Info {
1602 const char *name;
1603 uint16_t device_id;
1604 uint8_t revision;
1605 uint16_t phy_id2;
1606 } E1000Info;
1608 static void e1000_class_init(ObjectClass *klass, void *data)
1610 DeviceClass *dc = DEVICE_CLASS(klass);
1611 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1612 E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1613 const E1000Info *info = data;
1615 k->realize = pci_e1000_realize;
1616 k->exit = pci_e1000_uninit;
1617 k->romfile = "efi-e1000.rom";
1618 k->vendor_id = PCI_VENDOR_ID_INTEL;
1619 k->device_id = info->device_id;
1620 k->revision = info->revision;
1621 e->phy_id2 = info->phy_id2;
1622 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1623 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1624 dc->desc = "Intel Gigabit Ethernet";
1625 dc->reset = qdev_e1000_reset;
1626 dc->vmsd = &vmstate_e1000;
1627 dc->props = e1000_properties;
1630 static void e1000_instance_init(Object *obj)
1632 E1000State *n = E1000(obj);
1633 device_add_bootindex_property(obj, &n->conf.bootindex,
1634 "bootindex", "/ethernet-phy@0",
1635 DEVICE(n), NULL);
1638 static const TypeInfo e1000_base_info = {
1639 .name = TYPE_E1000_BASE,
1640 .parent = TYPE_PCI_DEVICE,
1641 .instance_size = sizeof(E1000State),
1642 .instance_init = e1000_instance_init,
1643 .class_size = sizeof(E1000BaseClass),
1644 .abstract = true,
1647 static const E1000Info e1000_devices[] = {
1649 .name = "e1000-82540em",
1650 .device_id = E1000_DEV_ID_82540EM,
1651 .revision = 0x03,
1652 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1655 .name = "e1000-82544gc",
1656 .device_id = E1000_DEV_ID_82544GC_COPPER,
1657 .revision = 0x03,
1658 .phy_id2 = E1000_PHY_ID2_82544x,
1661 .name = "e1000-82545em",
1662 .device_id = E1000_DEV_ID_82545EM_COPPER,
1663 .revision = 0x03,
1664 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1668 static const TypeInfo e1000_default_info = {
1669 .name = "e1000",
1670 .parent = "e1000-82540em",
1673 static void e1000_register_types(void)
1675 int i;
1677 type_register_static(&e1000_base_info);
1678 for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1679 const E1000Info *info = &e1000_devices[i];
1680 TypeInfo type_info = {};
1682 type_info.name = info->name;
1683 type_info.parent = TYPE_E1000_BASE;
1684 type_info.class_data = (void *)info;
1685 type_info.class_init = e1000_class_init;
1686 type_info.instance_init = e1000_instance_init;
1688 type_register(&type_info);
1690 type_register_static(&e1000_default_info);
1693 type_init(e1000_register_types)