[tcp] Merge boolean flags into a single "flags" field
[gpxe.git] / src / drivers / net / myri10ge.c
blob218d56f1e7cc639ce616e4f7eb245fe8997d9097
1 /************************************************* -*- linux-c -*-
2 * Myricom 10Gb Network Interface Card Software
3 * Copyright 2009, Myricom, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 ****************************************************************/
19 FILE_LICENCE ( GPL2_ONLY );
22 * Author: Glenn Brown <glenn@myri.com>
26 * General Theory of Operation
28 * This is a minimal Myricom 10 gigabit Ethernet driver for network
29 * boot.
31 * Initialization
33 * myri10ge_pci_probe() is called by gPXE during initialization.
34 * Minimal NIC initialization is performed to minimize resources
35 * consumed when the driver is resident but unused.
37 * Network Boot
39 * myri10ge_net_open() is called by gPXE before attempting to network
40 * boot from the card. Packet buffers are allocated and the NIC
41 * interface is initialized.
43 * Transmit
45 * myri10ge_net_transmit() enqueues frames for transmission by writing
46 * discriptors to the NIC's tx ring. For simplicity and to avoid
47 * copies, we always have the NIC DMA up the packet. The sent I/O
48 * buffer is released once the NIC signals myri10ge_interrupt_handler()
49 * that the send has completed.
51 * Receive
53 * Receives are posted to the NIC's receive ring. The NIC fills a
54 * DMAable receive_completion ring with completion notifications.
55 * myri10ge_net_poll() polls for these receive notifications, posts
56 * replacement receive buffers to the NIC, and passes received frames
57 * to netdev_rx().
59 * NonVolatile Storage
61 * This driver supports NonVolatile Storage (nvs) in the NIC EEPROM.
62 * If the last EEPROM block is not otherwise filled, we tell
63 * gPXE it may store NonVolatile Options (nvo) there.
67 * Debugging levels:
68 * - DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
69 * TX overflow, corrupted packets, ...
70 * - DBG2() is for successful events, like packet received,
71 * packet transmitted, and other general notifications.
72 * - DBGP() prints the name of each called function on entry
75 #include <stdint.h>
77 #include <byteswap.h>
78 #include <errno.h>
79 #include <gpxe/ethernet.h>
80 #include <gpxe/if_ether.h>
81 #include <gpxe/iobuf.h>
82 #include <gpxe/malloc.h>
83 #include <gpxe/netdevice.h>
84 #include <gpxe/nvo.h>
85 #include <gpxe/nvs.h>
86 #include <gpxe/pci.h>
87 #include <gpxe/timer.h>
89 #include "myri10ge_mcp.h"
91 /****************************************************************
92 * Forward declarations
93 ****************************************************************/
95 /* PCI driver entry points */
97 static int myri10ge_pci_probe ( struct pci_device*,
98 const struct pci_device_id* );
99 static void myri10ge_pci_remove ( struct pci_device* );
101 /* Network device operations */
103 static void myri10ge_net_close ( struct net_device* );
104 static void myri10ge_net_irq ( struct net_device*, int enable );
105 static int myri10ge_net_open ( struct net_device* );
106 static void myri10ge_net_poll ( struct net_device* );
107 static int myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
109 /****************************************************************
110 * Constants
111 ****************************************************************/
113 /* Maximum ring indices, used to wrap ring indices. These must be 2**N-1. */
115 #define MYRI10GE_TRANSMIT_WRAP 1U
116 #define MYRI10GE_RECEIVE_WRAP 7U
117 #define MYRI10GE_RECEIVE_COMPLETION_WRAP 31U
119 /****************************************************************
120 * Driver internal data types.
121 ****************************************************************/
123 /* Structure holding all DMA buffers for a NIC, which we will
124 allocated as contiguous read/write DMAable memory when the NIC is
125 initialized. */
127 struct myri10ge_dma_buffers
129 /* The NIC DMAs receive completion notifications into this ring */
131 mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
133 /* Interrupt details are DMAd here before interrupting. */
135 mcp_irq_data_t irq_data; /* 64B */
137 /* NIC command completion status is DMAd here. */
139 mcp_cmd_response_t command_response; /* 8B */
142 struct myri10ge_private
144 /* Interrupt support */
146 uint32 *irq_claim; /* in NIC SRAM */
147 uint32 *irq_deassert; /* in NIC SRAM */
149 /* DMA buffers. */
151 struct myri10ge_dma_buffers *dma;
154 * Transmit state.
156 * The counts here are uint32 for easy comparison with
157 * priv->dma->irq_data.send_done_count and with each other.
160 mcp_kreq_ether_send_t *transmit_ring; /* in NIC SRAM */
161 uint32 transmit_ring_wrap;
162 uint32 transmits_posted;
163 uint32 transmits_done;
164 struct io_buffer *transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
167 * Receive state.
170 mcp_kreq_ether_recv_t *receive_post_ring; /* in NIC SRAM */
171 unsigned int receive_post_ring_wrap;
172 unsigned int receives_posted;
173 unsigned int receives_done;
174 struct io_buffer *receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
176 /* Address for writing commands to the firmware.
177 BEWARE: the value must be written 32 bits at a time. */
179 mcp_cmd_t *command;
182 * Nonvolatile Storage for configuration options.
185 struct nvs_device nvs;
186 struct nvo_fragment nvo_fragment[2];
187 struct nvo_block nvo;
189 /* Cached PCI capability locations. */
191 uint8 pci_cap_vs;
194 /****************************************************************
195 * Driver internal functions.
196 ****************************************************************/
198 /* Print ring status when debugging. Use this only after a printed
199 value changes. */
201 #define DBG2_RINGS( priv ) \
202 DBG2 ( "tx %x/%x rx %x/%x in %s() \n", \
203 ( priv ) ->transmits_done, ( priv ) -> transmits_posted, \
204 ( priv ) ->receives_done, ( priv ) -> receives_posted, \
205 __FUNCTION__ )
208 * Return a pointer to the driver private data for a network device.
210 * @v netdev Network device created by this driver.
211 * @ret priv The corresponding driver private data.
213 static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
215 /* Our private data always follows the network device in memory,
216 since we use alloc_netdev() to allocate the storage. */
218 return ( struct myri10ge_private * ) ( nd + 1 );
222 * Convert a Myri10ge driver private data pointer to a netdev pointer.
224 * @v p Myri10ge device private data.
225 * @ret r The corresponding network device.
227 static inline struct net_device *myri10ge_netdev ( struct myri10ge_private *p )
229 return ( ( struct net_device * ) p ) - 1;
233 * Convert a network device pointer to a PCI device pointer.
235 * @v netdev A Network Device.
236 * @ret r The corresponding PCI device.
238 static inline struct pci_device *myri10ge_pcidev ( struct net_device *netdev )
240 return container_of (netdev->dev, struct pci_device, dev);
244 * Pass a receive buffer to the NIC to be filled.
246 * @v priv The network device to receive the buffer.
247 * @v iob The I/O buffer to fill.
249 * Receive buffers are filled in FIFO order.
251 static void myri10ge_post_receive ( struct myri10ge_private *priv,
252 struct io_buffer *iob )
254 unsigned int receives_posted;
255 mcp_kreq_ether_recv_t *request;
257 /* Record the posted I/O buffer, to be passed to netdev_rx() on
258 receive. */
260 receives_posted = priv->receives_posted;
261 priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
263 /* Post the receive. */
265 request = &priv->receive_post_ring[receives_posted
266 & priv->receive_post_ring_wrap];
267 request->addr_high = 0;
268 wmb();
269 request->addr_low = htonl ( virt_to_bus ( iob->data ) );
270 priv->receives_posted = ++receives_posted;
274 * Execute a command on the NIC.
276 * @v priv NIC to perform the command.
277 * @v cmd The command to perform.
278 * @v data I/O copy buffer for parameters/results
279 * @ret rc 0 on success, else an error code.
281 static int myri10ge_command ( struct myri10ge_private *priv,
282 uint32 cmd,
283 uint32 data[3] )
285 int i;
286 mcp_cmd_t *command;
287 uint32 result;
288 unsigned int slept_ms;
289 volatile mcp_cmd_response_t *response;
291 DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
292 command = priv->command;
293 response = &priv->dma->command_response;
295 /* Mark the command as incomplete. */
297 response->result = 0xFFFFFFFF;
299 /* Pass the command to the NIC. */
301 command->cmd = htonl ( cmd );
302 command->data0 = htonl ( data[0] );
303 command->data1 = htonl ( data[1] );
304 command->data2 = htonl ( data[2] );
305 command->response_addr.high = 0;
306 command->response_addr.low
307 = htonl ( virt_to_bus ( &priv->dma->command_response ) );
308 for ( i=0; i<36; i+=4 )
309 * ( uint32 * ) &command->pad[i] = 0;
310 wmb();
311 * ( uint32 * ) &command->pad[36] = 0;
313 /* Wait up to 2 seconds for a response. */
315 for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
316 result = response->result;
317 if ( result == 0 ) {
318 data[0] = ntohl ( response->data );
319 return 0;
320 } else if ( result != 0xFFFFFFFF ) {
321 DBG ( "cmd%d:0x%x\n",
322 cmd,
323 ntohl ( response->result ) );
324 return -EIO;
326 udelay ( 1000 );
327 rmb();
329 DBG ( "cmd%d:timed out\n", cmd );
330 return -ETIMEDOUT;
334 * Handle any pending interrupt.
336 * @v netdev Device being polled for interrupts.
338 * This is called periodically to let the driver check for interrupts.
340 static void myri10ge_interrupt_handler ( struct net_device *netdev )
342 struct myri10ge_private *priv;
343 mcp_irq_data_t *irq_data;
344 uint8 valid;
346 priv = myri10ge_priv ( netdev );
347 irq_data = &priv->dma->irq_data;
349 /* Return if there was no interrupt. */
351 rmb();
352 valid = irq_data->valid;
353 if ( !valid )
354 return;
355 DBG2 ( "irq " );
357 /* Tell the NIC to deassert the interrupt and clear
358 irq_data->valid.*/
360 *priv->irq_deassert = 0; /* any value is OK. */
361 mb();
363 /* Handle any new receives. */
365 if ( valid & 1 ) {
367 /* Pass the receive interrupt token back to the NIC. */
369 DBG2 ( "rx " );
370 *priv->irq_claim = htonl ( 3 );
371 wmb();
374 /* Handle any sent packet by freeing its I/O buffer, now that
375 we know it has been DMAd. */
377 if ( valid & 2 ) {
378 unsigned int nic_done_count;
380 DBG2 ( "snt " );
381 nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
382 while ( priv->transmits_done != nic_done_count ) {
383 struct io_buffer *iob;
385 iob = priv->transmit_iob [priv->transmits_done
386 & MYRI10GE_TRANSMIT_WRAP];
387 DBG2 ( "%p ", iob );
388 netdev_tx_complete ( netdev, iob );
389 ++priv->transmits_done;
393 /* Record any statistics update. */
395 if ( irq_data->stats_updated ) {
397 /* Update the link status. */
399 DBG2 ( "stats " );
400 if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
401 netdev_link_up ( netdev );
402 else
403 netdev_link_down ( netdev );
405 /* Ignore all error counters from the NIC. */
408 /* Wait for the interrupt to be deasserted, as indicated by
409 irq_data->valid, which is set by the NIC after the deassert. */
411 DBG2 ( "wait " );
412 do {
413 mb();
414 } while ( irq_data->valid );
416 /* Claim the interrupt to enable future interrupt generation. */
418 DBG2 ( "claim\n" );
419 * ( priv->irq_claim + 1 ) = htonl ( 3 );
420 mb();
423 /* Constants for reading the STRING_SPECS via the Myricom
424 Vendor Specific PCI configuration space capability. */
426 #define VS_EEPROM_READ_ADDR ( vs + 0x04 )
427 #define VS_EEPROM_READ_DATA ( vs + 0x08 )
428 #define VS_EEPROM_WRITE ( vs + 0x0C )
429 #define VS_ADDR ( vs + 0x18 )
430 #define VS_DATA ( vs + 0x14 )
431 #define VS_MODE ( vs + 0x10 )
432 #define VS_MODE_READ32 0x3
433 #define VS_MODE_LOCATE 0x8
434 #define VS_LOCATE_STRING_SPECS 0x3
435 #define VS_MODE_EEPROM_STREAM_WRITE 0xB
438 * Read MAC address from its 'string specs' via the vendor-specific
439 * capability. (This capability allows NIC SRAM and ROM to be read
440 * before it is mapped.)
442 * @v pci The device.
443 * @v vs Offset of the PCI Vendor-Specific Capability.
444 * @v mac Buffer to store the MAC address.
445 * @ret rc Returns 0 on success, else an error code.
447 static int mac_address_from_string_specs ( struct pci_device *pci,
448 unsigned int vs,
449 uint8 mac[ETH_ALEN] )
451 char string_specs[256];
452 char *ptr, *limit;
453 char *to = string_specs;
454 uint32 addr;
455 uint32 len;
456 int mac_set = 0;
458 /* Locate the String specs in LANai SRAM. */
460 pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
461 pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
462 pci_read_config_dword ( pci, VS_ADDR, &addr );
463 pci_read_config_dword ( pci, VS_DATA, &len );
464 DBG2 ( "ss@%x,%x\n", addr, len );
466 /* Copy in the string specs. Use 32-bit reads for performance. */
468 if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
469 pci_write_config_byte ( pci, VS_MODE, 0 );
470 DBG ( "SS too big\n" );
471 return -ENOTSUP;
474 pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
475 while ( len >= 4 ) {
476 uint32 tmp;
478 pci_write_config_byte ( pci, VS_ADDR, addr );
479 pci_read_config_dword ( pci, VS_DATA, &tmp );
480 tmp = ntohl ( tmp );
481 memcpy ( to, &tmp, 4 );
482 to += 4;
483 addr += 4;
484 len -= 4;
486 pci_write_config_byte ( pci, VS_MODE, 0 );
488 /* Parse the string specs. */
490 DBG2 ( "STRING_SPECS:\n" );
491 ptr = string_specs;
492 limit = string_specs + sizeof ( string_specs );
493 while ( *ptr != '\0' && ptr < limit ) {
494 DBG2 ( "%s\n", ptr );
495 if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
496 unsigned int i;
498 ptr += 4;
499 for ( i=0; i<6; i++ ) {
500 if ( ( ptr + 2 ) > limit ) {
501 DBG ( "bad MAC addr\n" );
502 return -ENOTSUP;
504 mac[i] = strtoul ( ptr, &ptr, 16 );
505 ptr += 1;
507 mac_set = 1;
509 else
510 while ( ptr < limit && *ptr++ );
513 /* Verify we parsed all we need. */
515 if ( !mac_set ) {
516 DBG ( "no MAC addr\n" );
517 return -ENOTSUP;
520 DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
521 mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
523 return 0;
526 /****************************************************************
527 * NonVolatile Storage support
528 ****************************************************************/
531 * Fill a buffer with data read from nonvolatile storage.
533 * @v nvs The NonVolatile Storage device to be read.
534 * @v addr The first NonVolatile Storage address to be read.
535 * @v _buf Pointer to the data buffer to be filled.
536 * @v len The number of bytes to copy.
537 * @ret rc 0 on success, else nonzero.
539 static int myri10ge_nvs_read ( struct nvs_device *nvs,
540 unsigned int addr,
541 void *_buf,
542 size_t len )
544 struct myri10ge_private *priv =
545 container_of (nvs, struct myri10ge_private, nvs);
546 struct pci_device *pci = myri10ge_pcidev ( myri10ge_netdev ( priv ) );
547 unsigned int vs = priv->pci_cap_vs;
548 unsigned char *buf = (unsigned char *) _buf;
549 unsigned int data;
550 unsigned int i, j;
552 DBGP ( "myri10ge_nvs_read\n" );
554 /* Issue the first read address. */
556 pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 3, addr>>16 );
557 pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 2, addr>>8 );
558 pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
559 addr++;
561 /* Issue all the reads, and harvest the results every 4th issue. */
563 for ( i=0; i<len; ++i,addr++ ) {
565 /* Issue the next read address, updating only the
566 bytes that need updating. We always update the
567 LSB, which triggers the read. */
569 if ( ( addr & 0xff ) == 0 ) {
570 if ( ( addr & 0xffff ) == 0 ) {
571 pci_write_config_byte ( pci,
572 VS_EEPROM_READ_ADDR + 3,
573 addr >> 16 );
575 pci_write_config_byte ( pci,
576 VS_EEPROM_READ_ADDR + 2,
577 addr >> 8 );
579 pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
581 /* If 4 data bytes are available, read them with a single read. */
583 if ( ( i & 3 ) == 3 ) {
584 pci_read_config_dword ( pci,
585 VS_EEPROM_READ_DATA,
586 &data );
587 for ( j=0; j<4; j++ ) {
588 buf[i-j] = data;
589 data >>= 8;
594 /* Harvest any remaining results. */
596 if ( ( i & 3 ) != 0 ) {
597 pci_read_config_dword ( pci, VS_EEPROM_READ_DATA, &data );
598 for ( j=1; j<=(i&3); j++ ) {
599 buf[i-j] = data;
600 data >>= 8;
604 DBGP_HDA ( addr - len, _buf, len );
605 return 0;
609 * Write a buffer into nonvolatile storage.
611 * @v nvs The NonVolatile Storage device to be written.
612 * @v address The NonVolatile Storage address to be written.
613 * @v _buf Pointer to the data to be written.
614 * @v len Length of the buffer to be written.
615 * @ret rc 0 on success, else nonzero.
617 static int myri10ge_nvs_write ( struct nvs_device *nvs,
618 unsigned int addr,
619 const void *_buf,
620 size_t len )
622 struct myri10ge_private *priv =
623 container_of (nvs, struct myri10ge_private, nvs);
624 struct pci_device *pci = myri10ge_pcidev ( myri10ge_netdev ( priv ) );
625 unsigned int vs = priv->pci_cap_vs;
626 const unsigned char *buf = (const unsigned char *)_buf;
627 unsigned int i;
628 uint8 verify;
630 DBGP ( "nvs_write " );
631 DBGP_HDA ( addr, _buf, len );
633 /* Start erase of the NonVolatile Options block. */
635 DBGP ( "erasing " );
636 pci_write_config_dword ( pci, VS_EEPROM_WRITE, ( addr << 8 ) | 0xff );
638 /* Wait for erase to complete. */
640 DBGP ( "waiting " );
641 pci_read_config_byte ( pci, VS_EEPROM_READ_DATA, &verify );
642 while ( verify != 0xff ) {
643 pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
644 pci_read_config_byte ( pci, VS_EEPROM_READ_DATA, &verify );
647 /* Write the data one byte at a time. */
649 DBGP ( "writing " );
650 pci_write_config_byte ( pci, VS_MODE, VS_MODE_EEPROM_STREAM_WRITE );
651 pci_write_config_dword ( pci, VS_ADDR, addr );
652 for (i=0; i<len; i++, addr++)
653 pci_write_config_byte ( pci, VS_DATA, buf[i] );
654 pci_write_config_dword ( pci, VS_ADDR, 0xffffffff );
655 pci_write_config_byte ( pci, VS_MODE, 0 );
657 DBGP ( "done\n" );
658 return 0;
662 * Initialize NonVolatile storage support for a device.
664 * @v priv Device private data for the device.
665 * @ret rc 0 on success, else an error code.
668 static int myri10ge_nv_init ( struct myri10ge_private *priv )
670 int rc;
671 struct myri10ge_eeprom_header
673 uint8 __jump[8];
674 uint32 eeprom_len;
675 uint32 eeprom_segment_len;
676 uint32 mcp1_offset;
677 uint32 mcp2_offset;
678 uint32 version;
679 } hdr;
680 uint32 mcp2_len;
681 unsigned int nvo_fragment_pos;
683 DBGP ( "myri10ge_nv_init\n" );
685 /* Read the EEPROM header, and byteswap the fields we will use.
686 This is safe even though priv->nvs is not yet initialized. */
688 rc = myri10ge_nvs_read ( &priv->nvs, 0, &hdr, sizeof ( hdr ) );
689 if ( rc ) {
690 DBG ( "EEPROM header unreadable\n" );
691 return rc;
693 hdr.eeprom_len = ntohl ( hdr.eeprom_len );
694 hdr.eeprom_segment_len = ntohl ( hdr.eeprom_segment_len );
695 hdr.mcp2_offset = ntohl ( hdr.mcp2_offset );
696 hdr.version = ntohl ( hdr.version );
697 DBG2 ( "eelen:%xh seglen:%xh mcp2@%xh ver%d\n", hdr.eeprom_len,
698 hdr.eeprom_segment_len, hdr.mcp2_offset, hdr.version );
700 /* If the firmware does not support EEPROM writes, simply return. */
702 if ( hdr.version < 1 ) {
703 DBG ( "No EEPROM write support\n" );
704 return 0;
707 /* Read the length of MCP2. */
709 rc = myri10ge_nvs_read ( &priv->nvs, hdr.mcp2_offset, &mcp2_len, 4 );
710 mcp2_len = ntohl ( mcp2_len );
711 DBG2 ( "mcp2len:%xh\n", mcp2_len );
713 /* Determine the position of the NonVolatile Options fragment and
714 simply return if it overlaps other data. */
716 nvo_fragment_pos = hdr.eeprom_len - hdr.eeprom_segment_len;
717 if ( hdr.mcp2_offset + mcp2_len > nvo_fragment_pos ) {
718 DBG ( "EEPROM full\n" );
719 return 0;
722 /* Initilize NonVolatile Storage state. */
724 priv->nvs.word_len_log2 = 0;
725 priv->nvs.size = hdr.eeprom_len;
726 priv->nvs.block_size = hdr.eeprom_segment_len;
727 priv->nvs.read = myri10ge_nvs_read;
728 priv->nvs.write = myri10ge_nvs_write;
730 /* Build the NonVolatile storage fragment list. We would like
731 to use the whole last EEPROM block for this, but we must
732 reduce the block size lest malloc fail in
733 src/core/nvo.o. */
735 priv->nvo_fragment[0].address = nvo_fragment_pos;
736 priv->nvo_fragment[0].len = 0x200;
738 /* Register the NonVolatile Options storage. */
740 nvo_init ( &priv->nvo,
741 &priv->nvs,
742 priv->nvo_fragment,
743 & myri10ge_netdev (priv) -> refcnt );
744 rc = register_nvo ( &priv->nvo,
745 netdev_settings ( myri10ge_netdev ( priv ) ) );
746 if ( rc ) {
747 DBG ("register_nvo failed");
748 priv->nvo_fragment[0].len = 0;
749 return rc;
752 DBG2 ( "NVO supported\n" );
753 return 0;
756 void
757 myri10ge_nv_fini ( struct myri10ge_private *priv )
759 /* Simply return if nonvolatile access is not supported. */
761 if ( 0 == priv->nvo_fragment[0].len )
762 return;
764 unregister_nvo ( &priv->nvo );
767 /****************************************************************
768 * gPXE PCI Device Driver API functions
769 ****************************************************************/
772 * Initialize the PCI device.
774 * @v pci The device's associated pci_device structure.
775 * @v id The PCI device + vendor id.
776 * @ret rc Returns zero if successfully initialized.
778 * This function is called very early on, while gPXE is initializing.
779 * This is a gPXE PCI Device Driver API function.
781 static int myri10ge_pci_probe ( struct pci_device *pci,
782 const struct pci_device_id *id __unused )
784 static struct net_device_operations myri10ge_operations = {
785 .open = myri10ge_net_open,
786 .close = myri10ge_net_close,
787 .transmit = myri10ge_net_transmit,
788 .poll = myri10ge_net_poll,
789 .irq = myri10ge_net_irq
792 const char *dbg;
793 int rc;
794 struct net_device *netdev;
795 struct myri10ge_private *priv;
797 DBGP ( "myri10ge_pci_probe: " );
799 netdev = alloc_etherdev ( sizeof ( *priv ) );
800 if ( !netdev ) {
801 rc = -ENOMEM;
802 dbg = "alloc_etherdev";
803 goto abort_with_nothing;
806 netdev_init ( netdev, &myri10ge_operations );
807 priv = myri10ge_priv ( netdev );
809 pci_set_drvdata ( pci, netdev );
810 netdev->dev = &pci->dev;
812 /* Make sure interrupts are disabled. */
814 myri10ge_net_irq ( netdev, 0 );
816 /* Find the PCI Vendor-Specific capability. */
818 priv->pci_cap_vs = pci_find_capability ( pci , PCI_CAP_ID_VNDR );
819 if ( 0 == priv->pci_cap_vs ) {
820 rc = -ENOTSUP;
821 dbg = "no_vs";
822 goto abort_with_netdev_init;
825 /* Read the NIC HW address. */
827 rc = mac_address_from_string_specs ( pci,
828 priv->pci_cap_vs,
829 netdev->hw_addr );
830 if ( rc ) {
831 dbg = "mac_from_ss";
832 goto abort_with_netdev_init;
834 DBGP ( "mac " );
836 /* Enable bus master, etc. */
838 adjust_pci_device ( pci );
839 DBGP ( "pci " );
841 /* Register the initialized network device. */
843 rc = register_netdev ( netdev );
844 if ( rc ) {
845 dbg = "register_netdev";
846 goto abort_with_netdev_init;
849 /* Initialize NonVolatile Storage support. */
851 rc = myri10ge_nv_init ( priv );
852 if ( rc ) {
853 dbg = "myri10ge_nv_init";
854 goto abort_with_registered_netdev;
857 DBGP ( "done\n" );
859 return 0;
861 abort_with_registered_netdev:
862 unregister_netdev ( netdev );
863 abort_with_netdev_init:
864 netdev_nullify ( netdev );
865 netdev_put ( netdev );
866 abort_with_nothing:
867 DBG ( "%s:%s\n", dbg, strerror ( rc ) );
868 return rc;
872 * Remove a device from the PCI device list.
874 * @v pci PCI device to remove.
876 * This is a PCI Device Driver API function.
878 static void myri10ge_pci_remove ( struct pci_device *pci )
880 struct net_device *netdev;
882 DBGP ( "myri10ge_pci_remove\n" );
883 netdev = pci_get_drvdata ( pci );
885 myri10ge_nv_fini ( myri10ge_priv ( netdev ) );
886 unregister_netdev ( netdev );
887 netdev_nullify ( netdev );
888 netdev_put ( netdev );
891 /****************************************************************
892 * gPXE Network Device Driver Operations
893 ****************************************************************/
896 * Close a network device.
898 * @v netdev Device to close.
900 * This is a gPXE Network Device Driver API function.
902 static void myri10ge_net_close ( struct net_device *netdev )
904 struct myri10ge_private *priv;
905 uint32 data[3];
907 DBGP ( "myri10ge_net_close\n" );
908 priv = myri10ge_priv ( netdev );
910 /* disable interrupts */
912 myri10ge_net_irq ( netdev, 0 );
914 /* Reset the NIC interface, so we won't get any more events from
915 the NIC. */
917 myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
919 /* Free receive buffers that were never filled. */
921 while ( priv->receives_done != priv->receives_posted ) {
922 free_iob ( priv->receive_iob[priv->receives_done
923 & MYRI10GE_RECEIVE_WRAP] );
924 ++priv->receives_done;
927 /* Release DMAable memory. */
929 free_dma ( priv->dma, sizeof ( *priv->dma ) );
931 /* Erase all state from the open. */
933 memset ( priv, 0, sizeof ( *priv ) );
935 DBG2_RINGS ( priv );
939 * Enable or disable IRQ masking.
941 * @v netdev Device to control.
942 * @v enable Zero to mask off IRQ, non-zero to enable IRQ.
944 * This is a gPXE Network Driver API function.
946 static void myri10ge_net_irq ( struct net_device *netdev, int enable )
948 struct pci_device *pci_dev;
949 uint16 val;
951 DBGP ( "myri10ge_net_irq\n" );
952 pci_dev = ( struct pci_device * ) netdev->dev;
954 /* Adjust the Interrupt Disable bit in the Command register of the
955 PCI Device. */
957 pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
958 if ( enable )
959 val &= ~PCI_COMMAND_INTX_DISABLE;
960 else
961 val |= PCI_COMMAND_INTX_DISABLE;
962 pci_write_config_word ( pci_dev, PCI_COMMAND, val );
966 * Opens a network device.
968 * @v netdev Device to be opened.
969 * @ret rc Non-zero if failed to open.
971 * This enables tx and rx on the device.
972 * This is a gPXE Network Device Driver API function.
974 static int myri10ge_net_open ( struct net_device *netdev )
976 const char *dbg; /* printed upon error return */
977 int rc;
978 struct io_buffer *iob;
979 struct myri10ge_private *priv;
980 uint32 data[3];
981 struct pci_device *pci_dev;
982 void *membase;
984 DBGP ( "myri10ge_net_open\n" );
985 priv = myri10ge_priv ( netdev );
986 pci_dev = ( struct pci_device * ) netdev->dev;
987 membase = phys_to_virt ( pci_dev->membase );
989 /* Compute address for passing commands to the firmware. */
991 priv->command = membase + MXGEFW_ETH_CMD;
993 /* Ensure interrupts are disabled. */
995 myri10ge_net_irq ( netdev, 0 );
997 /* Allocate cleared DMAable buffers. */
999 priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
1000 if ( !priv->dma ) {
1001 rc = -ENOMEM;
1002 dbg = "DMA";
1003 goto abort_with_nothing;
1005 memset ( priv->dma, 0, sizeof ( *priv->dma ) );
1007 /* Simplify following code. */
1009 #define TRY( prefix, base, suffix ) do { \
1010 rc = myri10ge_command ( priv, \
1011 MXGEFW_ \
1012 ## prefix \
1013 ## base \
1014 ## suffix, \
1015 data ); \
1016 if ( rc ) { \
1017 dbg = #base; \
1018 goto abort_with_dma; \
1020 } while ( 0 )
1022 /* Send a reset command to the card to see if it is alive,
1023 and to reset its queue state. */
1025 TRY ( CMD_, RESET , );
1027 /* Set the interrupt queue size. */
1029 data[0] = ( (uint32_t)( sizeof ( priv->dma->receive_completion ) )
1030 | MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
1031 TRY ( CMD_SET_ , INTRQ_SIZE , );
1033 /* Set the interrupt queue DMA address. */
1035 data[0] = virt_to_bus ( &priv->dma->receive_completion );
1036 data[1] = 0;
1037 TRY ( CMD_SET_, INTRQ_DMA, );
1039 /* Get the NIC interrupt claim address. */
1041 TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
1042 priv->irq_claim = membase + data[0];
1044 /* Get the NIC interrupt assert address. */
1046 TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
1047 priv->irq_deassert = membase + data[0];
1049 /* Disable interrupt coalescing, which is inappropriate for the
1050 minimal buffering we provide. */
1052 TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
1053 * ( ( uint32 * ) ( membase + data[0] ) ) = 0;
1055 /* Set the NIC mac address. */
1057 data[0] = ( netdev->ll_addr[0] << 24
1058 | netdev->ll_addr[1] << 16
1059 | netdev->ll_addr[2] << 8
1060 | netdev->ll_addr[3] );
1061 data[1] = ( ( netdev->ll_addr[4] << 8 )
1062 | netdev->ll_addr[5] );
1063 TRY ( SET_ , MAC_ADDRESS , );
1065 /* Enable multicast receives, because some gPXE clients don't work
1066 without multicast. . */
1068 TRY ( ENABLE_ , ALLMULTI , );
1070 /* Disable Ethernet flow control, so the NIC cannot deadlock the
1071 network under any circumstances. */
1073 TRY ( DISABLE_ , FLOW , _CONTROL );
1075 /* Compute transmit ring sizes. */
1077 data[0] = 0; /* slice 0 */
1078 TRY ( CMD_GET_, SEND_RING, _SIZE );
1079 priv->transmit_ring_wrap
1080 = data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
1081 if ( priv->transmit_ring_wrap
1082 & ( priv->transmit_ring_wrap + 1 ) ) {
1083 rc = -EPROTO;
1084 dbg = "TX_RING";
1085 goto abort_with_dma;
1088 /* Compute receive ring sizes. */
1090 data[0] = 0; /* slice 0 */
1091 TRY ( CMD_GET_ , RX_RING , _SIZE );
1092 priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
1093 if ( priv->receive_post_ring_wrap
1094 & ( priv->receive_post_ring_wrap + 1 ) ) {
1095 rc = -EPROTO;
1096 dbg = "RX_RING";
1097 goto abort_with_dma;
1100 /* Get NIC transmit ring address. */
1102 data[0] = 0; /* slice 0. */
1103 TRY ( CMD_GET_, SEND, _OFFSET );
1104 priv->transmit_ring = membase + data[0];
1106 /* Get the NIC receive ring address. */
1108 data[0] = 0; /* slice 0. */
1109 TRY ( CMD_GET_, SMALL_RX, _OFFSET );
1110 priv->receive_post_ring = membase + data[0];
1112 /* Set the Nic MTU. */
1114 data[0] = ETH_FRAME_LEN;
1115 TRY ( CMD_SET_, MTU, );
1117 /* Tell the NIC our buffer sizes. ( We use only small buffers, so we
1118 set both buffer sizes to the same value, which will force all
1119 received frames to use small buffers. ) */
1121 data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
1122 TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
1123 data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
1124 TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
1126 /* Tell firmware where to DMA IRQ data */
1128 data[0] = virt_to_bus ( &priv->dma->irq_data );
1129 data[1] = 0;
1130 data[2] = sizeof ( priv->dma->irq_data );
1131 TRY ( CMD_SET_, STATS_DMA_V2, );
1133 /* Post receives. */
1135 while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
1137 /* Reserve 2 extra bytes at the start of packets, since
1138 the firmware always skips the first 2 bytes of the buffer
1139 so TCP headers will be aligned. */
1141 iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
1142 if ( !iob ) {
1143 rc = -ENOMEM;
1144 dbg = "alloc_iob";
1145 goto abort_with_receives_posted;
1147 iob_reserve ( iob, MXGEFW_PAD );
1148 myri10ge_post_receive ( priv, iob );
1151 /* Bring up the link. */
1153 TRY ( CMD_, ETHERNET_UP, );
1155 DBG2_RINGS ( priv );
1156 return 0;
1158 abort_with_receives_posted:
1159 while ( priv->receives_posted-- )
1160 free_iob ( priv->receive_iob[priv->receives_posted] );
1161 abort_with_dma:
1162 /* Because the link is not up, we don't have to reset the NIC here. */
1163 free_dma ( priv->dma, sizeof ( *priv->dma ) );
1164 abort_with_nothing:
1165 /* Erase all signs of the failed open. */
1166 memset ( priv, 0, sizeof ( *priv ) );
1167 DBG ( "%s: %s\n", dbg, strerror ( rc ) );
1168 return ( rc );
1172 * This function allows a driver to process events during operation.
1174 * @v netdev Device being polled.
1176 * This is called periodically by gPXE to let the driver check the status of
1177 * transmitted packets and to allow the driver to check for received packets.
1178 * This is a gPXE Network Device Driver API function.
1180 static void myri10ge_net_poll ( struct net_device *netdev )
1182 struct io_buffer *iob;
1183 struct io_buffer *replacement;
1184 struct myri10ge_dma_buffers *dma;
1185 struct myri10ge_private *priv;
1186 unsigned int length;
1187 unsigned int orig_receives_posted;
1189 DBGP ( "myri10ge_net_poll\n" );
1190 priv = myri10ge_priv ( netdev );
1191 dma = priv->dma;
1193 /* Process any pending interrupt. */
1195 myri10ge_interrupt_handler ( netdev );
1197 /* Pass up received frames, but limit ourselves to receives posted
1198 before this function was called, so we cannot livelock if
1199 receives are arriving faster than we process them. */
1201 orig_receives_posted = priv->receives_posted;
1202 while ( priv->receives_done != orig_receives_posted ) {
1204 /* Stop if there is no pending receive. */
1206 length = ntohs ( dma->receive_completion
1207 [priv->receives_done
1208 & MYRI10GE_RECEIVE_COMPLETION_WRAP]
1209 .length );
1210 if ( length == 0 )
1211 break;
1213 /* Allocate a replacement buffer. If none is available,
1214 stop passing up packets until a buffer is available.
1216 Reserve 2 extra bytes at the start of packets, since
1217 the firmware always skips the first 2 bytes of the buffer
1218 so TCP headers will be aligned. */
1220 replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
1221 if ( !replacement ) {
1222 DBG ( "NO RX BUF\n" );
1223 break;
1225 iob_reserve ( replacement, MXGEFW_PAD );
1227 /* Pass up the received frame. */
1229 iob = priv->receive_iob[priv->receives_done
1230 & MYRI10GE_RECEIVE_WRAP];
1231 iob_put ( iob, length );
1232 netdev_rx ( netdev, iob );
1234 /* We have consumed the packet, so clear the receive
1235 notification. */
1237 dma->receive_completion [priv->receives_done
1238 & MYRI10GE_RECEIVE_COMPLETION_WRAP]
1239 .length = 0;
1240 wmb();
1242 /* Replace the passed-up I/O buffer. */
1244 myri10ge_post_receive ( priv, replacement );
1245 ++priv->receives_done;
1246 DBG2_RINGS ( priv );
1251 * This transmits a packet.
1253 * @v netdev Device to transmit from.
1254 * @v iobuf Data to transmit.
1255 * @ret rc Non-zero if failed to transmit.
1257 * This is a gPXE Network Driver API function.
1259 static int myri10ge_net_transmit ( struct net_device *netdev,
1260 struct io_buffer *iobuf )
1262 mcp_kreq_ether_send_t *kreq;
1263 size_t len;
1264 struct myri10ge_private *priv;
1265 uint32 transmits_posted;
1267 DBGP ( "myri10ge_net_transmit\n" );
1268 priv = myri10ge_priv ( netdev );
1270 /* Confirm space in the send ring. */
1272 transmits_posted = priv->transmits_posted;
1273 if ( transmits_posted - priv->transmits_done
1274 > MYRI10GE_TRANSMIT_WRAP ) {
1275 DBG ( "TX ring full\n" );
1276 return -ENOBUFS;
1279 DBG2 ( "TX %p+%zd ", iobuf->data, iob_len ( iobuf ) );
1280 DBG2_HD ( iobuf->data, 14 );
1282 /* Record the packet being transmitted, so we can later report
1283 send completion. */
1285 priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
1287 /* Copy and pad undersized frames, because the NIC does not pad,
1288 and we would rather copy small frames than do a gather. */
1290 len = iob_len ( iobuf );
1291 if ( len < ETH_ZLEN ) {
1292 iob_pad ( iobuf, ETH_ZLEN );
1293 len = ETH_ZLEN;
1296 /* Enqueue the packet by writing a descriptor to the NIC.
1297 This is a bit tricky because the HW requires 32-bit writes,
1298 but the structure has smaller fields. */
1300 kreq = &priv->transmit_ring[transmits_posted
1301 & priv->transmit_ring_wrap];
1302 kreq->addr_high = 0;
1303 kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
1304 ( ( uint32 * ) kreq ) [2] = htonl (
1305 0x0000 << 16 /* pseudo_header_offset */
1306 | ( len & 0xFFFF ) /* length */
1308 wmb();
1309 ( ( uint32 * ) kreq ) [3] = htonl (
1310 0x00 << 24 /* pad */
1311 | 0x01 << 16 /* rdma_count */
1312 | 0x00 << 8 /* cksum_offset */
1313 | ( MXGEFW_FLAGS_SMALL
1314 | MXGEFW_FLAGS_FIRST
1315 | MXGEFW_FLAGS_NO_TSO ) /* flags */
1317 wmb();
1319 /* Mark the slot as consumed and return. */
1321 priv->transmits_posted = ++transmits_posted;
1322 DBG2_RINGS ( priv );
1323 return 0;
1326 static struct pci_device_id myri10ge_nics[] = {
1327 /* Each of these macros must be a single line to satisfy a script. */
1328 PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
1331 struct pci_driver myri10ge_driver __pci_driver = {
1332 .ids = myri10ge_nics,
1333 .id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
1334 .probe = myri10ge_pci_probe,
1335 .remove = myri10ge_pci_remove
1339 * Local variables:
1340 * c-basic-offset: 8
1341 * c-indent-level: 8
1342 * tab-width: 8
1343 * End: