2 * IBM Power Virtual Ethernet Device Driver
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 * Copyright (C) IBM Corporation, 2003, 2010
19 * Authors: Dave Larson <larson1@us.ibm.com>
20 * Santiago Leon <santil@linux.vnet.ibm.com>
21 * Brian King <brking@linux.vnet.ibm.com>
22 * Robert Jennings <rcj@linux.vnet.ibm.com>
23 * Anton Blanchard <anton@au.ibm.com>
26 #include <linux/module.h>
27 #include <linux/types.h>
28 #include <linux/errno.h>
29 #include <linux/dma-mapping.h>
30 #include <linux/kernel.h>
31 #include <linux/netdevice.h>
32 #include <linux/etherdevice.h>
33 #include <linux/skbuff.h>
34 #include <linux/init.h>
35 #include <linux/interrupt.h>
38 #include <linux/ethtool.h>
41 #include <linux/ipv6.h>
42 #include <linux/slab.h>
43 #include <asm/hvcall.h>
44 #include <linux/atomic.h>
46 #include <asm/iommu.h>
47 #include <asm/firmware.h>
49 #include <net/ip6_checksum.h>
53 static irqreturn_t
ibmveth_interrupt(int irq
, void *dev_instance
);
54 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter
*adapter
);
55 static unsigned long ibmveth_get_desired_dma(struct vio_dev
*vdev
);
57 static struct kobj_type ktype_veth_pool
;
60 static const char ibmveth_driver_name
[] = "ibmveth";
61 static const char ibmveth_driver_string
[] = "IBM Power Virtual Ethernet Driver";
62 #define ibmveth_driver_version "1.06"
64 MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
65 MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
66 MODULE_LICENSE("GPL");
67 MODULE_VERSION(ibmveth_driver_version
);
69 static unsigned int tx_copybreak __read_mostly
= 128;
70 module_param(tx_copybreak
, uint
, 0644);
71 MODULE_PARM_DESC(tx_copybreak
,
72 "Maximum size of packet that is copied to a new buffer on transmit");
74 static unsigned int rx_copybreak __read_mostly
= 128;
75 module_param(rx_copybreak
, uint
, 0644);
76 MODULE_PARM_DESC(rx_copybreak
,
77 "Maximum size of packet that is copied to a new buffer on receive");
79 static unsigned int rx_flush __read_mostly
= 0;
80 module_param(rx_flush
, uint
, 0644);
81 MODULE_PARM_DESC(rx_flush
, "Flush receive buffers before use");
83 static bool old_large_send __read_mostly
;
84 module_param(old_large_send
, bool, 0444);
85 MODULE_PARM_DESC(old_large_send
,
86 "Use old large send method on firmware that supports the new method");
89 char name
[ETH_GSTRING_LEN
];
93 #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
94 #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
96 struct ibmveth_stat ibmveth_stats
[] = {
97 { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles
) },
98 { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem
) },
99 { "replenish_add_buff_failure",
100 IBMVETH_STAT_OFF(replenish_add_buff_failure
) },
101 { "replenish_add_buff_success",
102 IBMVETH_STAT_OFF(replenish_add_buff_success
) },
103 { "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer
) },
104 { "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer
) },
105 { "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed
) },
106 { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed
) },
107 { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support
) },
108 { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support
) },
109 { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets
) },
110 { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets
) },
111 { "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support
) }
114 /* simple methods of getting data from the current rxq entry */
115 static inline u32
ibmveth_rxq_flags(struct ibmveth_adapter
*adapter
)
117 return be32_to_cpu(adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].flags_off
);
120 static inline int ibmveth_rxq_toggle(struct ibmveth_adapter
*adapter
)
122 return (ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_TOGGLE
) >>
123 IBMVETH_RXQ_TOGGLE_SHIFT
;
126 static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter
*adapter
)
128 return ibmveth_rxq_toggle(adapter
) == adapter
->rx_queue
.toggle
;
131 static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter
*adapter
)
133 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_VALID
;
136 static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter
*adapter
)
138 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_OFF_MASK
;
141 static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter
*adapter
)
143 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_LRG_PKT
;
146 static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter
*adapter
)
148 return be32_to_cpu(adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].length
);
151 static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter
*adapter
)
153 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_CSUM_GOOD
;
156 /* setup the initial settings for a buffer pool */
157 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool
*pool
,
158 u32 pool_index
, u32 pool_size
,
159 u32 buff_size
, u32 pool_active
)
161 pool
->size
= pool_size
;
162 pool
->index
= pool_index
;
163 pool
->buff_size
= buff_size
;
164 pool
->threshold
= pool_size
* 7 / 8;
165 pool
->active
= pool_active
;
168 /* allocate and setup an buffer pool - called during open */
169 static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool
*pool
)
173 pool
->free_map
= kmalloc_array(pool
->size
, sizeof(u16
), GFP_KERNEL
);
178 pool
->dma_addr
= kcalloc(pool
->size
, sizeof(dma_addr_t
), GFP_KERNEL
);
179 if (!pool
->dma_addr
) {
180 kfree(pool
->free_map
);
181 pool
->free_map
= NULL
;
185 pool
->skbuff
= kcalloc(pool
->size
, sizeof(void *), GFP_KERNEL
);
188 kfree(pool
->dma_addr
);
189 pool
->dma_addr
= NULL
;
191 kfree(pool
->free_map
);
192 pool
->free_map
= NULL
;
196 for (i
= 0; i
< pool
->size
; ++i
)
197 pool
->free_map
[i
] = i
;
199 atomic_set(&pool
->available
, 0);
200 pool
->producer_index
= 0;
201 pool
->consumer_index
= 0;
206 static inline void ibmveth_flush_buffer(void *addr
, unsigned long length
)
208 unsigned long offset
;
210 for (offset
= 0; offset
< length
; offset
+= SMP_CACHE_BYTES
)
211 asm("dcbfl %0,%1" :: "b" (addr
), "r" (offset
));
214 /* replenish the buffers for a pool. note that we don't need to
215 * skb_reserve these since they are used for incoming...
217 static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter
*adapter
,
218 struct ibmveth_buff_pool
*pool
)
221 u32 count
= pool
->size
- atomic_read(&pool
->available
);
222 u32 buffers_added
= 0;
224 unsigned int free_index
, index
;
226 unsigned long lpar_rc
;
231 for (i
= 0; i
< count
; ++i
) {
232 union ibmveth_buf_desc desc
;
234 skb
= netdev_alloc_skb(adapter
->netdev
, pool
->buff_size
);
237 netdev_dbg(adapter
->netdev
,
238 "replenish: unable to allocate skb\n");
239 adapter
->replenish_no_mem
++;
243 free_index
= pool
->consumer_index
;
244 pool
->consumer_index
++;
245 if (pool
->consumer_index
>= pool
->size
)
246 pool
->consumer_index
= 0;
247 index
= pool
->free_map
[free_index
];
249 BUG_ON(index
== IBM_VETH_INVALID_MAP
);
250 BUG_ON(pool
->skbuff
[index
] != NULL
);
252 dma_addr
= dma_map_single(&adapter
->vdev
->dev
, skb
->data
,
253 pool
->buff_size
, DMA_FROM_DEVICE
);
255 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
258 pool
->free_map
[free_index
] = IBM_VETH_INVALID_MAP
;
259 pool
->dma_addr
[index
] = dma_addr
;
260 pool
->skbuff
[index
] = skb
;
262 correlator
= ((u64
)pool
->index
<< 32) | index
;
263 *(u64
*)skb
->data
= correlator
;
265 desc
.fields
.flags_len
= IBMVETH_BUF_VALID
| pool
->buff_size
;
266 desc
.fields
.address
= dma_addr
;
269 unsigned int len
= min(pool
->buff_size
,
270 adapter
->netdev
->mtu
+
272 ibmveth_flush_buffer(skb
->data
, len
);
274 lpar_rc
= h_add_logical_lan_buffer(adapter
->vdev
->unit_address
,
277 if (lpar_rc
!= H_SUCCESS
) {
281 adapter
->replenish_add_buff_success
++;
286 atomic_add(buffers_added
, &(pool
->available
));
290 pool
->free_map
[free_index
] = index
;
291 pool
->skbuff
[index
] = NULL
;
292 if (pool
->consumer_index
== 0)
293 pool
->consumer_index
= pool
->size
- 1;
295 pool
->consumer_index
--;
296 if (!dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
297 dma_unmap_single(&adapter
->vdev
->dev
,
298 pool
->dma_addr
[index
], pool
->buff_size
,
300 dev_kfree_skb_any(skb
);
301 adapter
->replenish_add_buff_failure
++;
304 atomic_add(buffers_added
, &(pool
->available
));
308 * The final 8 bytes of the buffer list is a counter of frames dropped
309 * because there was not a buffer in the buffer list capable of holding
312 static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter
*adapter
)
314 __be64
*p
= adapter
->buffer_list_addr
+ 4096 - 8;
316 adapter
->rx_no_buffer
= be64_to_cpup(p
);
319 /* replenish routine */
320 static void ibmveth_replenish_task(struct ibmveth_adapter
*adapter
)
324 adapter
->replenish_task_cycles
++;
326 for (i
= (IBMVETH_NUM_BUFF_POOLS
- 1); i
>= 0; i
--) {
327 struct ibmveth_buff_pool
*pool
= &adapter
->rx_buff_pool
[i
];
330 (atomic_read(&pool
->available
) < pool
->threshold
))
331 ibmveth_replenish_buffer_pool(adapter
, pool
);
334 ibmveth_update_rx_no_buffer(adapter
);
337 /* empty and free ana buffer pool - also used to do cleanup in error paths */
338 static void ibmveth_free_buffer_pool(struct ibmveth_adapter
*adapter
,
339 struct ibmveth_buff_pool
*pool
)
343 kfree(pool
->free_map
);
344 pool
->free_map
= NULL
;
346 if (pool
->skbuff
&& pool
->dma_addr
) {
347 for (i
= 0; i
< pool
->size
; ++i
) {
348 struct sk_buff
*skb
= pool
->skbuff
[i
];
350 dma_unmap_single(&adapter
->vdev
->dev
,
354 dev_kfree_skb_any(skb
);
355 pool
->skbuff
[i
] = NULL
;
360 if (pool
->dma_addr
) {
361 kfree(pool
->dma_addr
);
362 pool
->dma_addr
= NULL
;
371 /* remove a buffer from a pool */
372 static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter
*adapter
,
375 unsigned int pool
= correlator
>> 32;
376 unsigned int index
= correlator
& 0xffffffffUL
;
377 unsigned int free_index
;
380 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
381 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
383 skb
= adapter
->rx_buff_pool
[pool
].skbuff
[index
];
387 adapter
->rx_buff_pool
[pool
].skbuff
[index
] = NULL
;
389 dma_unmap_single(&adapter
->vdev
->dev
,
390 adapter
->rx_buff_pool
[pool
].dma_addr
[index
],
391 adapter
->rx_buff_pool
[pool
].buff_size
,
394 free_index
= adapter
->rx_buff_pool
[pool
].producer_index
;
395 adapter
->rx_buff_pool
[pool
].producer_index
++;
396 if (adapter
->rx_buff_pool
[pool
].producer_index
>=
397 adapter
->rx_buff_pool
[pool
].size
)
398 adapter
->rx_buff_pool
[pool
].producer_index
= 0;
399 adapter
->rx_buff_pool
[pool
].free_map
[free_index
] = index
;
403 atomic_dec(&(adapter
->rx_buff_pool
[pool
].available
));
406 /* get the current buffer on the rx queue */
407 static inline struct sk_buff
*ibmveth_rxq_get_buffer(struct ibmveth_adapter
*adapter
)
409 u64 correlator
= adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
;
410 unsigned int pool
= correlator
>> 32;
411 unsigned int index
= correlator
& 0xffffffffUL
;
413 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
414 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
416 return adapter
->rx_buff_pool
[pool
].skbuff
[index
];
419 /* recycle the current buffer on the rx queue */
420 static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter
*adapter
)
422 u32 q_index
= adapter
->rx_queue
.index
;
423 u64 correlator
= adapter
->rx_queue
.queue_addr
[q_index
].correlator
;
424 unsigned int pool
= correlator
>> 32;
425 unsigned int index
= correlator
& 0xffffffffUL
;
426 union ibmveth_buf_desc desc
;
427 unsigned long lpar_rc
;
430 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
431 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
433 if (!adapter
->rx_buff_pool
[pool
].active
) {
434 ibmveth_rxq_harvest_buffer(adapter
);
435 ibmveth_free_buffer_pool(adapter
, &adapter
->rx_buff_pool
[pool
]);
439 desc
.fields
.flags_len
= IBMVETH_BUF_VALID
|
440 adapter
->rx_buff_pool
[pool
].buff_size
;
441 desc
.fields
.address
= adapter
->rx_buff_pool
[pool
].dma_addr
[index
];
443 lpar_rc
= h_add_logical_lan_buffer(adapter
->vdev
->unit_address
, desc
.desc
);
445 if (lpar_rc
!= H_SUCCESS
) {
446 netdev_dbg(adapter
->netdev
, "h_add_logical_lan_buffer failed "
447 "during recycle rc=%ld", lpar_rc
);
448 ibmveth_remove_buffer_from_pool(adapter
, adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
);
452 if (++adapter
->rx_queue
.index
== adapter
->rx_queue
.num_slots
) {
453 adapter
->rx_queue
.index
= 0;
454 adapter
->rx_queue
.toggle
= !adapter
->rx_queue
.toggle
;
461 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter
*adapter
)
463 ibmveth_remove_buffer_from_pool(adapter
, adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
);
465 if (++adapter
->rx_queue
.index
== adapter
->rx_queue
.num_slots
) {
466 adapter
->rx_queue
.index
= 0;
467 adapter
->rx_queue
.toggle
= !adapter
->rx_queue
.toggle
;
471 static int ibmveth_register_logical_lan(struct ibmveth_adapter
*adapter
,
472 union ibmveth_buf_desc rxq_desc
, u64 mac_address
)
474 int rc
, try_again
= 1;
477 * After a kexec the adapter will still be open, so our attempt to
478 * open it will fail. So if we get a failure we free the adapter and
479 * try again, but only once.
482 rc
= h_register_logical_lan(adapter
->vdev
->unit_address
,
483 adapter
->buffer_list_dma
, rxq_desc
.desc
,
484 adapter
->filter_list_dma
, mac_address
);
486 if (rc
!= H_SUCCESS
&& try_again
) {
488 rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
489 } while (H_IS_LONG_BUSY(rc
) || (rc
== H_BUSY
));
498 static u64
ibmveth_encode_mac_addr(u8
*mac
)
503 for (i
= 0; i
< ETH_ALEN
; i
++)
504 encoded
= (encoded
<< 8) | mac
[i
];
509 static int ibmveth_open(struct net_device
*netdev
)
511 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
514 unsigned long lpar_rc
;
516 union ibmveth_buf_desc rxq_desc
;
520 netdev_dbg(netdev
, "open starting\n");
522 napi_enable(&adapter
->napi
);
524 for(i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
525 rxq_entries
+= adapter
->rx_buff_pool
[i
].size
;
528 adapter
->buffer_list_addr
= (void*) get_zeroed_page(GFP_KERNEL
);
529 if (!adapter
->buffer_list_addr
) {
530 netdev_err(netdev
, "unable to allocate list pages\n");
534 adapter
->filter_list_addr
= (void*) get_zeroed_page(GFP_KERNEL
);
535 if (!adapter
->filter_list_addr
) {
536 netdev_err(netdev
, "unable to allocate filter pages\n");
537 goto out_free_buffer_list
;
540 dev
= &adapter
->vdev
->dev
;
542 adapter
->rx_queue
.queue_len
= sizeof(struct ibmveth_rx_q_entry
) *
544 adapter
->rx_queue
.queue_addr
=
545 dma_alloc_coherent(dev
, adapter
->rx_queue
.queue_len
,
546 &adapter
->rx_queue
.queue_dma
, GFP_KERNEL
);
547 if (!adapter
->rx_queue
.queue_addr
)
548 goto out_free_filter_list
;
550 adapter
->buffer_list_dma
= dma_map_single(dev
,
551 adapter
->buffer_list_addr
, 4096, DMA_BIDIRECTIONAL
);
552 if (dma_mapping_error(dev
, adapter
->buffer_list_dma
)) {
553 netdev_err(netdev
, "unable to map buffer list pages\n");
554 goto out_free_queue_mem
;
557 adapter
->filter_list_dma
= dma_map_single(dev
,
558 adapter
->filter_list_addr
, 4096, DMA_BIDIRECTIONAL
);
559 if (dma_mapping_error(dev
, adapter
->filter_list_dma
)) {
560 netdev_err(netdev
, "unable to map filter list pages\n");
561 goto out_unmap_buffer_list
;
564 adapter
->rx_queue
.index
= 0;
565 adapter
->rx_queue
.num_slots
= rxq_entries
;
566 adapter
->rx_queue
.toggle
= 1;
568 mac_address
= ibmveth_encode_mac_addr(netdev
->dev_addr
);
570 rxq_desc
.fields
.flags_len
= IBMVETH_BUF_VALID
|
571 adapter
->rx_queue
.queue_len
;
572 rxq_desc
.fields
.address
= adapter
->rx_queue
.queue_dma
;
574 netdev_dbg(netdev
, "buffer list @ 0x%p\n", adapter
->buffer_list_addr
);
575 netdev_dbg(netdev
, "filter list @ 0x%p\n", adapter
->filter_list_addr
);
576 netdev_dbg(netdev
, "receive q @ 0x%p\n", adapter
->rx_queue
.queue_addr
);
578 h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_DISABLE
);
580 lpar_rc
= ibmveth_register_logical_lan(adapter
, rxq_desc
, mac_address
);
582 if (lpar_rc
!= H_SUCCESS
) {
583 netdev_err(netdev
, "h_register_logical_lan failed with %ld\n",
585 netdev_err(netdev
, "buffer TCE:0x%llx filter TCE:0x%llx rxq "
586 "desc:0x%llx MAC:0x%llx\n",
587 adapter
->buffer_list_dma
,
588 adapter
->filter_list_dma
,
592 goto out_unmap_filter_list
;
595 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
596 if (!adapter
->rx_buff_pool
[i
].active
)
598 if (ibmveth_alloc_buffer_pool(&adapter
->rx_buff_pool
[i
])) {
599 netdev_err(netdev
, "unable to alloc pool\n");
600 adapter
->rx_buff_pool
[i
].active
= 0;
602 goto out_free_buffer_pools
;
606 netdev_dbg(netdev
, "registering irq 0x%x\n", netdev
->irq
);
607 rc
= request_irq(netdev
->irq
, ibmveth_interrupt
, 0, netdev
->name
,
610 netdev_err(netdev
, "unable to request irq 0x%x, rc %d\n",
613 lpar_rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
614 } while (H_IS_LONG_BUSY(lpar_rc
) || (lpar_rc
== H_BUSY
));
616 goto out_free_buffer_pools
;
620 adapter
->bounce_buffer
=
621 kmalloc(netdev
->mtu
+ IBMVETH_BUFF_OH
, GFP_KERNEL
);
622 if (!adapter
->bounce_buffer
)
625 adapter
->bounce_buffer_dma
=
626 dma_map_single(&adapter
->vdev
->dev
, adapter
->bounce_buffer
,
627 netdev
->mtu
+ IBMVETH_BUFF_OH
, DMA_BIDIRECTIONAL
);
628 if (dma_mapping_error(dev
, adapter
->bounce_buffer_dma
)) {
629 netdev_err(netdev
, "unable to map bounce buffer\n");
630 goto out_free_bounce_buffer
;
633 netdev_dbg(netdev
, "initial replenish cycle\n");
634 ibmveth_interrupt(netdev
->irq
, netdev
);
636 netif_start_queue(netdev
);
638 netdev_dbg(netdev
, "open complete\n");
642 out_free_bounce_buffer
:
643 kfree(adapter
->bounce_buffer
);
645 free_irq(netdev
->irq
, netdev
);
646 out_free_buffer_pools
:
648 if (adapter
->rx_buff_pool
[i
].active
)
649 ibmveth_free_buffer_pool(adapter
,
650 &adapter
->rx_buff_pool
[i
]);
652 out_unmap_filter_list
:
653 dma_unmap_single(dev
, adapter
->filter_list_dma
, 4096,
655 out_unmap_buffer_list
:
656 dma_unmap_single(dev
, adapter
->buffer_list_dma
, 4096,
659 dma_free_coherent(dev
, adapter
->rx_queue
.queue_len
,
660 adapter
->rx_queue
.queue_addr
,
661 adapter
->rx_queue
.queue_dma
);
662 out_free_filter_list
:
663 free_page((unsigned long)adapter
->filter_list_addr
);
664 out_free_buffer_list
:
665 free_page((unsigned long)adapter
->buffer_list_addr
);
667 napi_disable(&adapter
->napi
);
671 static int ibmveth_close(struct net_device
*netdev
)
673 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
674 struct device
*dev
= &adapter
->vdev
->dev
;
678 netdev_dbg(netdev
, "close starting\n");
680 napi_disable(&adapter
->napi
);
682 if (!adapter
->pool_config
)
683 netif_stop_queue(netdev
);
685 h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_DISABLE
);
688 lpar_rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
689 } while (H_IS_LONG_BUSY(lpar_rc
) || (lpar_rc
== H_BUSY
));
691 if (lpar_rc
!= H_SUCCESS
) {
692 netdev_err(netdev
, "h_free_logical_lan failed with %lx, "
693 "continuing with close\n", lpar_rc
);
696 free_irq(netdev
->irq
, netdev
);
698 ibmveth_update_rx_no_buffer(adapter
);
700 dma_unmap_single(dev
, adapter
->buffer_list_dma
, 4096,
702 free_page((unsigned long)adapter
->buffer_list_addr
);
704 dma_unmap_single(dev
, adapter
->filter_list_dma
, 4096,
706 free_page((unsigned long)adapter
->filter_list_addr
);
708 dma_free_coherent(dev
, adapter
->rx_queue
.queue_len
,
709 adapter
->rx_queue
.queue_addr
,
710 adapter
->rx_queue
.queue_dma
);
712 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
713 if (adapter
->rx_buff_pool
[i
].active
)
714 ibmveth_free_buffer_pool(adapter
,
715 &adapter
->rx_buff_pool
[i
]);
717 dma_unmap_single(&adapter
->vdev
->dev
, adapter
->bounce_buffer_dma
,
718 adapter
->netdev
->mtu
+ IBMVETH_BUFF_OH
,
720 kfree(adapter
->bounce_buffer
);
722 netdev_dbg(netdev
, "close complete\n");
727 static int netdev_get_link_ksettings(struct net_device
*dev
,
728 struct ethtool_link_ksettings
*cmd
)
730 u32 supported
, advertising
;
732 supported
= (SUPPORTED_1000baseT_Full
| SUPPORTED_Autoneg
|
734 advertising
= (ADVERTISED_1000baseT_Full
| ADVERTISED_Autoneg
|
736 cmd
->base
.speed
= SPEED_1000
;
737 cmd
->base
.duplex
= DUPLEX_FULL
;
738 cmd
->base
.port
= PORT_FIBRE
;
739 cmd
->base
.phy_address
= 0;
740 cmd
->base
.autoneg
= AUTONEG_ENABLE
;
742 ethtool_convert_legacy_u32_to_link_mode(cmd
->link_modes
.supported
,
744 ethtool_convert_legacy_u32_to_link_mode(cmd
->link_modes
.advertising
,
750 static void netdev_get_drvinfo(struct net_device
*dev
,
751 struct ethtool_drvinfo
*info
)
753 strlcpy(info
->driver
, ibmveth_driver_name
, sizeof(info
->driver
));
754 strlcpy(info
->version
, ibmveth_driver_version
, sizeof(info
->version
));
757 static netdev_features_t
ibmveth_fix_features(struct net_device
*dev
,
758 netdev_features_t features
)
761 * Since the ibmveth firmware interface does not have the
762 * concept of separate tx/rx checksum offload enable, if rx
763 * checksum is disabled we also have to disable tx checksum
764 * offload. Once we disable rx checksum offload, we are no
765 * longer allowed to send tx buffers that are not properly
769 if (!(features
& NETIF_F_RXCSUM
))
770 features
&= ~NETIF_F_CSUM_MASK
;
775 static int ibmveth_set_csum_offload(struct net_device
*dev
, u32 data
)
777 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
778 unsigned long set_attr
, clr_attr
, ret_attr
;
779 unsigned long set_attr6
, clr_attr6
;
780 long ret
, ret4
, ret6
;
781 int rc1
= 0, rc2
= 0;
784 if (netif_running(dev
)) {
786 adapter
->pool_config
= 1;
788 adapter
->pool_config
= 0;
797 set_attr
= IBMVETH_ILLAN_IPV4_TCP_CSUM
;
798 set_attr6
= IBMVETH_ILLAN_IPV6_TCP_CSUM
;
800 clr_attr
= IBMVETH_ILLAN_IPV4_TCP_CSUM
;
801 clr_attr6
= IBMVETH_ILLAN_IPV6_TCP_CSUM
;
804 ret
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
806 if (ret
== H_SUCCESS
&&
807 (ret_attr
& IBMVETH_ILLAN_PADDED_PKT_CSUM
)) {
808 ret4
= h_illan_attributes(adapter
->vdev
->unit_address
, clr_attr
,
809 set_attr
, &ret_attr
);
811 if (ret4
!= H_SUCCESS
) {
812 netdev_err(dev
, "unable to change IPv4 checksum "
813 "offload settings. %d rc=%ld\n",
816 h_illan_attributes(adapter
->vdev
->unit_address
,
817 set_attr
, clr_attr
, &ret_attr
);
820 dev
->features
&= ~NETIF_F_IP_CSUM
;
823 adapter
->fw_ipv4_csum_support
= data
;
826 ret6
= h_illan_attributes(adapter
->vdev
->unit_address
,
827 clr_attr6
, set_attr6
, &ret_attr
);
829 if (ret6
!= H_SUCCESS
) {
830 netdev_err(dev
, "unable to change IPv6 checksum "
831 "offload settings. %d rc=%ld\n",
834 h_illan_attributes(adapter
->vdev
->unit_address
,
835 set_attr6
, clr_attr6
, &ret_attr
);
838 dev
->features
&= ~NETIF_F_IPV6_CSUM
;
841 adapter
->fw_ipv6_csum_support
= data
;
843 if (ret4
== H_SUCCESS
|| ret6
== H_SUCCESS
)
844 adapter
->rx_csum
= data
;
849 netdev_err(dev
, "unable to change checksum offload settings."
850 " %d rc=%ld ret_attr=%lx\n", data
, ret
,
855 rc2
= ibmveth_open(dev
);
857 return rc1
? rc1
: rc2
;
860 static int ibmveth_set_tso(struct net_device
*dev
, u32 data
)
862 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
863 unsigned long set_attr
, clr_attr
, ret_attr
;
865 int rc1
= 0, rc2
= 0;
868 if (netif_running(dev
)) {
870 adapter
->pool_config
= 1;
872 adapter
->pool_config
= 0;
879 set_attr
= IBMVETH_ILLAN_LRG_SR_ENABLED
;
881 clr_attr
= IBMVETH_ILLAN_LRG_SR_ENABLED
;
883 ret1
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
885 if (ret1
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_LRG_SND_SUPPORT
) &&
887 ret2
= h_illan_attributes(adapter
->vdev
->unit_address
, clr_attr
,
888 set_attr
, &ret_attr
);
890 if (ret2
!= H_SUCCESS
) {
891 netdev_err(dev
, "unable to change tso settings. %d rc=%ld\n",
894 h_illan_attributes(adapter
->vdev
->unit_address
,
895 set_attr
, clr_attr
, &ret_attr
);
898 dev
->features
&= ~(NETIF_F_TSO
| NETIF_F_TSO6
);
902 adapter
->fw_large_send_support
= data
;
903 adapter
->large_send
= data
;
906 /* Older firmware version of large send offload does not
910 dev
->features
&= ~NETIF_F_TSO6
;
911 netdev_info(dev
, "TSO feature requires all partitions to have updated driver");
913 adapter
->large_send
= data
;
917 rc2
= ibmveth_open(dev
);
919 return rc1
? rc1
: rc2
;
922 static int ibmveth_set_features(struct net_device
*dev
,
923 netdev_features_t features
)
925 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
926 int rx_csum
= !!(features
& NETIF_F_RXCSUM
);
927 int large_send
= !!(features
& (NETIF_F_TSO
| NETIF_F_TSO6
));
928 int rc1
= 0, rc2
= 0;
930 if (rx_csum
!= adapter
->rx_csum
) {
931 rc1
= ibmveth_set_csum_offload(dev
, rx_csum
);
932 if (rc1
&& !adapter
->rx_csum
)
934 features
& ~(NETIF_F_CSUM_MASK
|
938 if (large_send
!= adapter
->large_send
) {
939 rc2
= ibmveth_set_tso(dev
, large_send
);
940 if (rc2
&& !adapter
->large_send
)
942 features
& ~(NETIF_F_TSO
| NETIF_F_TSO6
);
945 return rc1
? rc1
: rc2
;
948 static void ibmveth_get_strings(struct net_device
*dev
, u32 stringset
, u8
*data
)
952 if (stringset
!= ETH_SS_STATS
)
955 for (i
= 0; i
< ARRAY_SIZE(ibmveth_stats
); i
++, data
+= ETH_GSTRING_LEN
)
956 memcpy(data
, ibmveth_stats
[i
].name
, ETH_GSTRING_LEN
);
959 static int ibmveth_get_sset_count(struct net_device
*dev
, int sset
)
963 return ARRAY_SIZE(ibmveth_stats
);
969 static void ibmveth_get_ethtool_stats(struct net_device
*dev
,
970 struct ethtool_stats
*stats
, u64
*data
)
973 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
975 for (i
= 0; i
< ARRAY_SIZE(ibmveth_stats
); i
++)
976 data
[i
] = IBMVETH_GET_STAT(adapter
, ibmveth_stats
[i
].offset
);
979 static const struct ethtool_ops netdev_ethtool_ops
= {
980 .get_drvinfo
= netdev_get_drvinfo
,
981 .get_link
= ethtool_op_get_link
,
982 .get_strings
= ibmveth_get_strings
,
983 .get_sset_count
= ibmveth_get_sset_count
,
984 .get_ethtool_stats
= ibmveth_get_ethtool_stats
,
985 .get_link_ksettings
= netdev_get_link_ksettings
,
988 static int ibmveth_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
993 #define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1))
995 static int ibmveth_send(struct ibmveth_adapter
*adapter
,
996 union ibmveth_buf_desc
*descs
, unsigned long mss
)
998 unsigned long correlator
;
999 unsigned int retry_count
;
1003 * The retry count sets a maximum for the number of broadcast and
1004 * multicast destinations within the system.
1009 ret
= h_send_logical_lan(adapter
->vdev
->unit_address
,
1010 descs
[0].desc
, descs
[1].desc
,
1011 descs
[2].desc
, descs
[3].desc
,
1012 descs
[4].desc
, descs
[5].desc
,
1013 correlator
, &correlator
, mss
,
1014 adapter
->fw_large_send_support
);
1015 } while ((ret
== H_BUSY
) && (retry_count
--));
1017 if (ret
!= H_SUCCESS
&& ret
!= H_DROPPED
) {
1018 netdev_err(adapter
->netdev
, "tx: h_send_logical_lan failed "
1019 "with rc=%ld\n", ret
);
1026 static netdev_tx_t
ibmveth_start_xmit(struct sk_buff
*skb
,
1027 struct net_device
*netdev
)
1029 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1030 unsigned int desc_flags
;
1031 union ibmveth_buf_desc descs
[6];
1033 int force_bounce
= 0;
1034 dma_addr_t dma_addr
;
1035 unsigned long mss
= 0;
1037 /* veth doesn't handle frag_list, so linearize the skb.
1038 * When GRO is enabled SKB's can have frag_list.
1040 if (adapter
->is_active_trunk
&&
1041 skb_has_frag_list(skb
) && __skb_linearize(skb
)) {
1042 netdev
->stats
.tx_dropped
++;
1047 * veth handles a maximum of 6 segments including the header, so
1048 * we have to linearize the skb if there are more than this.
1050 if (skb_shinfo(skb
)->nr_frags
> 5 && __skb_linearize(skb
)) {
1051 netdev
->stats
.tx_dropped
++;
1055 /* veth can't checksum offload UDP */
1056 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
1057 ((skb
->protocol
== htons(ETH_P_IP
) &&
1058 ip_hdr(skb
)->protocol
!= IPPROTO_TCP
) ||
1059 (skb
->protocol
== htons(ETH_P_IPV6
) &&
1060 ipv6_hdr(skb
)->nexthdr
!= IPPROTO_TCP
)) &&
1061 skb_checksum_help(skb
)) {
1063 netdev_err(netdev
, "tx: failed to checksum packet\n");
1064 netdev
->stats
.tx_dropped
++;
1068 desc_flags
= IBMVETH_BUF_VALID
;
1070 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1071 unsigned char *buf
= skb_transport_header(skb
) +
1074 desc_flags
|= (IBMVETH_BUF_NO_CSUM
| IBMVETH_BUF_CSUM_GOOD
);
1076 /* Need to zero out the checksum */
1080 if (skb_is_gso(skb
) && adapter
->fw_large_send_support
)
1081 desc_flags
|= IBMVETH_BUF_LRG_SND
;
1085 memset(descs
, 0, sizeof(descs
));
1088 * If a linear packet is below the rx threshold then
1089 * copy it into the static bounce buffer. This avoids the
1090 * cost of a TCE insert and remove.
1092 if (force_bounce
|| (!skb_is_nonlinear(skb
) &&
1093 (skb
->len
< tx_copybreak
))) {
1094 skb_copy_from_linear_data(skb
, adapter
->bounce_buffer
,
1097 descs
[0].fields
.flags_len
= desc_flags
| skb
->len
;
1098 descs
[0].fields
.address
= adapter
->bounce_buffer_dma
;
1100 if (ibmveth_send(adapter
, descs
, 0)) {
1101 adapter
->tx_send_failed
++;
1102 netdev
->stats
.tx_dropped
++;
1104 netdev
->stats
.tx_packets
++;
1105 netdev
->stats
.tx_bytes
+= skb
->len
;
1111 /* Map the header */
1112 dma_addr
= dma_map_single(&adapter
->vdev
->dev
, skb
->data
,
1113 skb_headlen(skb
), DMA_TO_DEVICE
);
1114 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
1117 descs
[0].fields
.flags_len
= desc_flags
| skb_headlen(skb
);
1118 descs
[0].fields
.address
= dma_addr
;
1121 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1122 const skb_frag_t
*frag
= &skb_shinfo(skb
)->frags
[i
];
1124 dma_addr
= skb_frag_dma_map(&adapter
->vdev
->dev
, frag
, 0,
1125 skb_frag_size(frag
), DMA_TO_DEVICE
);
1127 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
1128 goto map_failed_frags
;
1130 descs
[i
+1].fields
.flags_len
= desc_flags
| skb_frag_size(frag
);
1131 descs
[i
+1].fields
.address
= dma_addr
;
1134 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1135 if (adapter
->fw_large_send_support
) {
1136 mss
= (unsigned long)skb_shinfo(skb
)->gso_size
;
1137 adapter
->tx_large_packets
++;
1138 } else if (!skb_is_gso_v6(skb
)) {
1139 /* Put -1 in the IP checksum to tell phyp it
1140 * is a largesend packet. Put the mss in
1143 ip_hdr(skb
)->check
= 0xffff;
1144 tcp_hdr(skb
)->check
=
1145 cpu_to_be16(skb_shinfo(skb
)->gso_size
);
1146 adapter
->tx_large_packets
++;
1150 if (ibmveth_send(adapter
, descs
, mss
)) {
1151 adapter
->tx_send_failed
++;
1152 netdev
->stats
.tx_dropped
++;
1154 netdev
->stats
.tx_packets
++;
1155 netdev
->stats
.tx_bytes
+= skb
->len
;
1158 dma_unmap_single(&adapter
->vdev
->dev
,
1159 descs
[0].fields
.address
,
1160 descs
[0].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1163 for (i
= 1; i
< skb_shinfo(skb
)->nr_frags
+ 1; i
++)
1164 dma_unmap_page(&adapter
->vdev
->dev
, descs
[i
].fields
.address
,
1165 descs
[i
].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1169 dev_consume_skb_any(skb
);
1170 return NETDEV_TX_OK
;
1174 for (i
= 0; i
< last
; i
++)
1175 dma_unmap_page(&adapter
->vdev
->dev
, descs
[i
].fields
.address
,
1176 descs
[i
].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1180 if (!firmware_has_feature(FW_FEATURE_CMO
))
1181 netdev_err(netdev
, "tx: unable to map xmit buffer\n");
1182 adapter
->tx_map_failed
++;
1183 if (skb_linearize(skb
)) {
1184 netdev
->stats
.tx_dropped
++;
1191 static void ibmveth_rx_mss_helper(struct sk_buff
*skb
, u16 mss
, int lrg_pkt
)
1193 struct tcphdr
*tcph
;
1197 /* only TCP packets will be aggregated */
1198 if (skb
->protocol
== htons(ETH_P_IP
)) {
1199 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
1201 if (iph
->protocol
== IPPROTO_TCP
) {
1202 offset
= iph
->ihl
* 4;
1203 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1207 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
1208 struct ipv6hdr
*iph6
= (struct ipv6hdr
*)skb
->data
;
1210 if (iph6
->nexthdr
== IPPROTO_TCP
) {
1211 offset
= sizeof(struct ipv6hdr
);
1212 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1219 /* if mss is not set through Large Packet bit/mss in rx buffer,
1220 * expect that the mss will be written to the tcp header checksum.
1222 tcph
= (struct tcphdr
*)(skb
->data
+ offset
);
1224 skb_shinfo(skb
)->gso_size
= mss
;
1225 } else if (offset
) {
1226 skb_shinfo(skb
)->gso_size
= ntohs(tcph
->check
);
1230 if (skb_shinfo(skb
)->gso_size
) {
1231 hdr_len
= offset
+ tcph
->doff
* 4;
1232 skb_shinfo(skb
)->gso_segs
=
1233 DIV_ROUND_UP(skb
->len
- hdr_len
,
1234 skb_shinfo(skb
)->gso_size
);
1238 static void ibmveth_rx_csum_helper(struct sk_buff
*skb
,
1239 struct ibmveth_adapter
*adapter
)
1241 struct iphdr
*iph
= NULL
;
1242 struct ipv6hdr
*iph6
= NULL
;
1243 __be16 skb_proto
= 0;
1248 skb_proto
= be16_to_cpu(skb
->protocol
);
1250 if (skb_proto
== ETH_P_IP
) {
1251 iph
= (struct iphdr
*)skb
->data
;
1253 /* If the IP checksum is not offloaded and if the packet
1254 * is large send, the checksum must be rebuilt.
1256 if (iph
->check
== 0xffff) {
1258 iph
->check
= ip_fast_csum((unsigned char *)iph
,
1262 iphlen
= iph
->ihl
* 4;
1263 iph_proto
= iph
->protocol
;
1264 } else if (skb_proto
== ETH_P_IPV6
) {
1265 iph6
= (struct ipv6hdr
*)skb
->data
;
1266 iphlen
= sizeof(struct ipv6hdr
);
1267 iph_proto
= iph6
->nexthdr
;
1270 /* In OVS environment, when a flow is not cached, specifically for a
1271 * new TCP connection, the first packet information is passed up
1272 * the user space for finding a flow. During this process, OVS computes
1273 * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
1275 * Given that we zeroed out TCP checksum field in transmit path
1276 * (refer ibmveth_start_xmit routine) as we set "no checksum bit",
1277 * OVS computed checksum will be incorrect w/o TCP pseudo checksum
1278 * in the packet. This leads to OVS dropping the packet and hence
1279 * TCP retransmissions are seen.
1281 * So, re-compute TCP pseudo header checksum.
1283 if (iph_proto
== IPPROTO_TCP
&& adapter
->is_active_trunk
) {
1284 struct tcphdr
*tcph
= (struct tcphdr
*)(skb
->data
+ iphlen
);
1286 tcphdrlen
= skb
->len
- iphlen
;
1288 /* Recompute TCP pseudo header checksum */
1289 if (skb_proto
== ETH_P_IP
)
1290 tcph
->check
= ~csum_tcpudp_magic(iph
->saddr
,
1291 iph
->daddr
, tcphdrlen
, iph_proto
, 0);
1292 else if (skb_proto
== ETH_P_IPV6
)
1293 tcph
->check
= ~csum_ipv6_magic(&iph6
->saddr
,
1294 &iph6
->daddr
, tcphdrlen
, iph_proto
, 0);
1296 /* Setup SKB fields for checksum offload */
1297 skb_partial_csum_set(skb
, iphlen
,
1298 offsetof(struct tcphdr
, check
));
1299 skb_reset_network_header(skb
);
1303 static int ibmveth_poll(struct napi_struct
*napi
, int budget
)
1305 struct ibmveth_adapter
*adapter
=
1306 container_of(napi
, struct ibmveth_adapter
, napi
);
1307 struct net_device
*netdev
= adapter
->netdev
;
1308 int frames_processed
= 0;
1309 unsigned long lpar_rc
;
1313 while (frames_processed
< budget
) {
1314 if (!ibmveth_rxq_pending_buffer(adapter
))
1318 if (!ibmveth_rxq_buffer_valid(adapter
)) {
1319 wmb(); /* suggested by larson1 */
1320 adapter
->rx_invalid_buffer
++;
1321 netdev_dbg(netdev
, "recycling invalid buffer\n");
1322 ibmveth_rxq_recycle_buffer(adapter
);
1324 struct sk_buff
*skb
, *new_skb
;
1325 int length
= ibmveth_rxq_frame_length(adapter
);
1326 int offset
= ibmveth_rxq_frame_offset(adapter
);
1327 int csum_good
= ibmveth_rxq_csum_good(adapter
);
1328 int lrg_pkt
= ibmveth_rxq_large_packet(adapter
);
1330 skb
= ibmveth_rxq_get_buffer(adapter
);
1332 /* if the large packet bit is set in the rx queue
1333 * descriptor, the mss will be written by PHYP eight
1334 * bytes from the start of the rx buffer, which is
1335 * skb->data at this stage
1338 __be64
*rxmss
= (__be64
*)(skb
->data
+ 8);
1340 mss
= (u16
)be64_to_cpu(*rxmss
);
1344 if (length
< rx_copybreak
)
1345 new_skb
= netdev_alloc_skb(netdev
, length
);
1348 skb_copy_to_linear_data(new_skb
,
1352 ibmveth_flush_buffer(skb
->data
,
1354 if (!ibmveth_rxq_recycle_buffer(adapter
))
1358 ibmveth_rxq_harvest_buffer(adapter
);
1359 skb_reserve(skb
, offset
);
1362 skb_put(skb
, length
);
1363 skb
->protocol
= eth_type_trans(skb
, netdev
);
1366 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1367 ibmveth_rx_csum_helper(skb
, adapter
);
1370 if (length
> netdev
->mtu
+ ETH_HLEN
) {
1371 ibmveth_rx_mss_helper(skb
, mss
, lrg_pkt
);
1372 adapter
->rx_large_packets
++;
1375 napi_gro_receive(napi
, skb
); /* send it up */
1377 netdev
->stats
.rx_packets
++;
1378 netdev
->stats
.rx_bytes
+= length
;
1383 ibmveth_replenish_task(adapter
);
1385 if (frames_processed
< budget
) {
1386 napi_complete_done(napi
, frames_processed
);
1388 /* We think we are done - reenable interrupts,
1389 * then check once more to make sure we are done.
1391 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1394 BUG_ON(lpar_rc
!= H_SUCCESS
);
1396 if (ibmveth_rxq_pending_buffer(adapter
) &&
1397 napi_reschedule(napi
)) {
1398 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1404 return frames_processed
;
1407 static irqreturn_t
ibmveth_interrupt(int irq
, void *dev_instance
)
1409 struct net_device
*netdev
= dev_instance
;
1410 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1411 unsigned long lpar_rc
;
1413 if (napi_schedule_prep(&adapter
->napi
)) {
1414 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1416 BUG_ON(lpar_rc
!= H_SUCCESS
);
1417 __napi_schedule(&adapter
->napi
);
1422 static void ibmveth_set_multicast_list(struct net_device
*netdev
)
1424 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1425 unsigned long lpar_rc
;
1427 if ((netdev
->flags
& IFF_PROMISC
) ||
1428 (netdev_mc_count(netdev
) > adapter
->mcastFilterSize
)) {
1429 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1430 IbmVethMcastEnableRecv
|
1431 IbmVethMcastDisableFiltering
,
1433 if (lpar_rc
!= H_SUCCESS
) {
1434 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1435 "entering promisc mode\n", lpar_rc
);
1438 struct netdev_hw_addr
*ha
;
1439 /* clear the filter table & disable filtering */
1440 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1441 IbmVethMcastEnableRecv
|
1442 IbmVethMcastDisableFiltering
|
1443 IbmVethMcastClearFilterTable
,
1445 if (lpar_rc
!= H_SUCCESS
) {
1446 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1447 "attempting to clear filter table\n",
1450 /* add the addresses to the filter table */
1451 netdev_for_each_mc_addr(ha
, netdev
) {
1452 /* add the multicast address to the filter table */
1454 mcast_addr
= ibmveth_encode_mac_addr(ha
->addr
);
1455 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1456 IbmVethMcastAddFilter
,
1458 if (lpar_rc
!= H_SUCCESS
) {
1459 netdev_err(netdev
, "h_multicast_ctrl rc=%ld "
1460 "when adding an entry to the filter "
1461 "table\n", lpar_rc
);
1465 /* re-enable filtering */
1466 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1467 IbmVethMcastEnableFiltering
,
1469 if (lpar_rc
!= H_SUCCESS
) {
1470 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1471 "enabling filtering\n", lpar_rc
);
1476 static int ibmveth_change_mtu(struct net_device
*dev
, int new_mtu
)
1478 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
1479 struct vio_dev
*viodev
= adapter
->vdev
;
1480 int new_mtu_oh
= new_mtu
+ IBMVETH_BUFF_OH
;
1482 int need_restart
= 0;
1484 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
1485 if (new_mtu_oh
<= adapter
->rx_buff_pool
[i
].buff_size
)
1488 if (i
== IBMVETH_NUM_BUFF_POOLS
)
1491 /* Deactivate all the buffer pools so that the next loop can activate
1492 only the buffer pools necessary to hold the new MTU */
1493 if (netif_running(adapter
->netdev
)) {
1495 adapter
->pool_config
= 1;
1496 ibmveth_close(adapter
->netdev
);
1497 adapter
->pool_config
= 0;
1500 /* Look for an active buffer pool that can hold the new MTU */
1501 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1502 adapter
->rx_buff_pool
[i
].active
= 1;
1504 if (new_mtu_oh
<= adapter
->rx_buff_pool
[i
].buff_size
) {
1506 vio_cmo_set_dev_desired(viodev
,
1507 ibmveth_get_desired_dma
1510 return ibmveth_open(adapter
->netdev
);
1516 if (need_restart
&& (rc
= ibmveth_open(adapter
->netdev
)))
1522 #ifdef CONFIG_NET_POLL_CONTROLLER
1523 static void ibmveth_poll_controller(struct net_device
*dev
)
1525 ibmveth_replenish_task(netdev_priv(dev
));
1526 ibmveth_interrupt(dev
->irq
, dev
);
1531 * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1533 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1536 * Number of bytes of IO data the driver will need to perform well.
1538 static unsigned long ibmveth_get_desired_dma(struct vio_dev
*vdev
)
1540 struct net_device
*netdev
= dev_get_drvdata(&vdev
->dev
);
1541 struct ibmveth_adapter
*adapter
;
1542 struct iommu_table
*tbl
;
1547 tbl
= get_iommu_table_base(&vdev
->dev
);
1549 /* netdev inits at probe time along with the structures we need below*/
1551 return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT
, tbl
);
1553 adapter
= netdev_priv(netdev
);
1555 ret
= IBMVETH_BUFF_LIST_SIZE
+ IBMVETH_FILT_LIST_SIZE
;
1556 ret
+= IOMMU_PAGE_ALIGN(netdev
->mtu
, tbl
);
1558 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1559 /* add the size of the active receive buffers */
1560 if (adapter
->rx_buff_pool
[i
].active
)
1562 adapter
->rx_buff_pool
[i
].size
*
1563 IOMMU_PAGE_ALIGN(adapter
->rx_buff_pool
[i
].
1565 rxqentries
+= adapter
->rx_buff_pool
[i
].size
;
1567 /* add the size of the receive queue entries */
1568 ret
+= IOMMU_PAGE_ALIGN(
1569 rxqentries
* sizeof(struct ibmveth_rx_q_entry
), tbl
);
1574 static int ibmveth_set_mac_addr(struct net_device
*dev
, void *p
)
1576 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
1577 struct sockaddr
*addr
= p
;
1581 if (!is_valid_ether_addr(addr
->sa_data
))
1582 return -EADDRNOTAVAIL
;
1584 mac_address
= ibmveth_encode_mac_addr(addr
->sa_data
);
1585 rc
= h_change_logical_lan_mac(adapter
->vdev
->unit_address
, mac_address
);
1587 netdev_err(adapter
->netdev
, "h_change_logical_lan_mac failed with rc=%d\n", rc
);
1591 ether_addr_copy(dev
->dev_addr
, addr
->sa_data
);
1596 static const struct net_device_ops ibmveth_netdev_ops
= {
1597 .ndo_open
= ibmveth_open
,
1598 .ndo_stop
= ibmveth_close
,
1599 .ndo_start_xmit
= ibmveth_start_xmit
,
1600 .ndo_set_rx_mode
= ibmveth_set_multicast_list
,
1601 .ndo_do_ioctl
= ibmveth_ioctl
,
1602 .ndo_change_mtu
= ibmveth_change_mtu
,
1603 .ndo_fix_features
= ibmveth_fix_features
,
1604 .ndo_set_features
= ibmveth_set_features
,
1605 .ndo_validate_addr
= eth_validate_addr
,
1606 .ndo_set_mac_address
= ibmveth_set_mac_addr
,
1607 #ifdef CONFIG_NET_POLL_CONTROLLER
1608 .ndo_poll_controller
= ibmveth_poll_controller
,
1612 static int ibmveth_probe(struct vio_dev
*dev
, const struct vio_device_id
*id
)
1615 struct net_device
*netdev
;
1616 struct ibmveth_adapter
*adapter
;
1617 unsigned char *mac_addr_p
;
1618 unsigned int *mcastFilterSize_p
;
1620 unsigned long ret_attr
;
1622 dev_dbg(&dev
->dev
, "entering ibmveth_probe for UA 0x%x\n",
1625 mac_addr_p
= (unsigned char *)vio_get_attribute(dev
, VETH_MAC_ADDR
,
1628 dev_err(&dev
->dev
, "Can't find VETH_MAC_ADDR attribute\n");
1631 /* Workaround for old/broken pHyp */
1634 else if (mac_len
!= 6) {
1635 dev_err(&dev
->dev
, "VETH_MAC_ADDR attribute wrong len %d\n",
1640 mcastFilterSize_p
= (unsigned int *)vio_get_attribute(dev
,
1641 VETH_MCAST_FILTER_SIZE
, NULL
);
1642 if (!mcastFilterSize_p
) {
1643 dev_err(&dev
->dev
, "Can't find VETH_MCAST_FILTER_SIZE "
1648 netdev
= alloc_etherdev(sizeof(struct ibmveth_adapter
));
1653 adapter
= netdev_priv(netdev
);
1654 dev_set_drvdata(&dev
->dev
, netdev
);
1656 adapter
->vdev
= dev
;
1657 adapter
->netdev
= netdev
;
1658 adapter
->mcastFilterSize
= *mcastFilterSize_p
;
1659 adapter
->pool_config
= 0;
1661 netif_napi_add(netdev
, &adapter
->napi
, ibmveth_poll
, 16);
1663 netdev
->irq
= dev
->irq
;
1664 netdev
->netdev_ops
= &ibmveth_netdev_ops
;
1665 netdev
->ethtool_ops
= &netdev_ethtool_ops
;
1666 SET_NETDEV_DEV(netdev
, &dev
->dev
);
1667 netdev
->hw_features
= NETIF_F_SG
;
1668 if (vio_get_attribute(dev
, "ibm,illan-options", NULL
) != NULL
) {
1669 netdev
->hw_features
|= NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
|
1673 netdev
->features
|= netdev
->hw_features
;
1675 ret
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
1677 /* If running older firmware, TSO should not be enabled by default */
1678 if (ret
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_LRG_SND_SUPPORT
) &&
1680 netdev
->hw_features
|= NETIF_F_TSO
| NETIF_F_TSO6
;
1681 netdev
->features
|= netdev
->hw_features
;
1683 netdev
->hw_features
|= NETIF_F_TSO
;
1686 adapter
->is_active_trunk
= false;
1687 if (ret
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_ACTIVE_TRUNK
)) {
1688 adapter
->is_active_trunk
= true;
1689 netdev
->hw_features
|= NETIF_F_FRAGLIST
;
1690 netdev
->features
|= NETIF_F_FRAGLIST
;
1693 netdev
->min_mtu
= IBMVETH_MIN_MTU
;
1694 netdev
->max_mtu
= ETH_MAX_MTU
;
1696 memcpy(netdev
->dev_addr
, mac_addr_p
, ETH_ALEN
);
1698 if (firmware_has_feature(FW_FEATURE_CMO
))
1699 memcpy(pool_count
, pool_count_cmo
, sizeof(pool_count
));
1701 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1702 struct kobject
*kobj
= &adapter
->rx_buff_pool
[i
].kobj
;
1705 ibmveth_init_buffer_pool(&adapter
->rx_buff_pool
[i
], i
,
1706 pool_count
[i
], pool_size
[i
],
1708 error
= kobject_init_and_add(kobj
, &ktype_veth_pool
,
1709 &dev
->dev
.kobj
, "pool%d", i
);
1711 kobject_uevent(kobj
, KOBJ_ADD
);
1714 netdev_dbg(netdev
, "adapter @ 0x%p\n", adapter
);
1715 netdev_dbg(netdev
, "registering netdev...\n");
1717 ibmveth_set_features(netdev
, netdev
->features
);
1719 rc
= register_netdev(netdev
);
1722 netdev_dbg(netdev
, "failed to register netdev rc=%d\n", rc
);
1723 free_netdev(netdev
);
1727 netdev_dbg(netdev
, "registered\n");
1732 static int ibmveth_remove(struct vio_dev
*dev
)
1734 struct net_device
*netdev
= dev_get_drvdata(&dev
->dev
);
1735 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1738 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
1739 kobject_put(&adapter
->rx_buff_pool
[i
].kobj
);
1741 unregister_netdev(netdev
);
1743 free_netdev(netdev
);
1744 dev_set_drvdata(&dev
->dev
, NULL
);
1749 static struct attribute veth_active_attr
;
1750 static struct attribute veth_num_attr
;
1751 static struct attribute veth_size_attr
;
1753 static ssize_t
veth_pool_show(struct kobject
*kobj
,
1754 struct attribute
*attr
, char *buf
)
1756 struct ibmveth_buff_pool
*pool
= container_of(kobj
,
1757 struct ibmveth_buff_pool
,
1760 if (attr
== &veth_active_attr
)
1761 return sprintf(buf
, "%d\n", pool
->active
);
1762 else if (attr
== &veth_num_attr
)
1763 return sprintf(buf
, "%d\n", pool
->size
);
1764 else if (attr
== &veth_size_attr
)
1765 return sprintf(buf
, "%d\n", pool
->buff_size
);
1769 static ssize_t
veth_pool_store(struct kobject
*kobj
, struct attribute
*attr
,
1770 const char *buf
, size_t count
)
1772 struct ibmveth_buff_pool
*pool
= container_of(kobj
,
1773 struct ibmveth_buff_pool
,
1775 struct net_device
*netdev
= dev_get_drvdata(
1776 container_of(kobj
->parent
, struct device
, kobj
));
1777 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1778 long value
= simple_strtol(buf
, NULL
, 10);
1781 if (attr
== &veth_active_attr
) {
1782 if (value
&& !pool
->active
) {
1783 if (netif_running(netdev
)) {
1784 if (ibmveth_alloc_buffer_pool(pool
)) {
1786 "unable to alloc pool\n");
1790 adapter
->pool_config
= 1;
1791 ibmveth_close(netdev
);
1792 adapter
->pool_config
= 0;
1793 if ((rc
= ibmveth_open(netdev
)))
1798 } else if (!value
&& pool
->active
) {
1799 int mtu
= netdev
->mtu
+ IBMVETH_BUFF_OH
;
1801 /* Make sure there is a buffer pool with buffers that
1802 can hold a packet of the size of the MTU */
1803 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1804 if (pool
== &adapter
->rx_buff_pool
[i
])
1806 if (!adapter
->rx_buff_pool
[i
].active
)
1808 if (mtu
<= adapter
->rx_buff_pool
[i
].buff_size
)
1812 if (i
== IBMVETH_NUM_BUFF_POOLS
) {
1813 netdev_err(netdev
, "no active pool >= MTU\n");
1817 if (netif_running(netdev
)) {
1818 adapter
->pool_config
= 1;
1819 ibmveth_close(netdev
);
1821 adapter
->pool_config
= 0;
1822 if ((rc
= ibmveth_open(netdev
)))
1827 } else if (attr
== &veth_num_attr
) {
1828 if (value
<= 0 || value
> IBMVETH_MAX_POOL_COUNT
) {
1831 if (netif_running(netdev
)) {
1832 adapter
->pool_config
= 1;
1833 ibmveth_close(netdev
);
1834 adapter
->pool_config
= 0;
1836 if ((rc
= ibmveth_open(netdev
)))
1842 } else if (attr
== &veth_size_attr
) {
1843 if (value
<= IBMVETH_BUFF_OH
|| value
> IBMVETH_MAX_BUF_SIZE
) {
1846 if (netif_running(netdev
)) {
1847 adapter
->pool_config
= 1;
1848 ibmveth_close(netdev
);
1849 adapter
->pool_config
= 0;
1850 pool
->buff_size
= value
;
1851 if ((rc
= ibmveth_open(netdev
)))
1854 pool
->buff_size
= value
;
1859 /* kick the interrupt handler to allocate/deallocate pools */
1860 ibmveth_interrupt(netdev
->irq
, netdev
);
1865 #define ATTR(_name, _mode) \
1866 struct attribute veth_##_name##_attr = { \
1867 .name = __stringify(_name), .mode = _mode, \
1870 static ATTR(active
, 0644);
1871 static ATTR(num
, 0644);
1872 static ATTR(size
, 0644);
1874 static struct attribute
*veth_pool_attrs
[] = {
1881 static const struct sysfs_ops veth_pool_ops
= {
1882 .show
= veth_pool_show
,
1883 .store
= veth_pool_store
,
1886 static struct kobj_type ktype_veth_pool
= {
1888 .sysfs_ops
= &veth_pool_ops
,
1889 .default_attrs
= veth_pool_attrs
,
1892 static int ibmveth_resume(struct device
*dev
)
1894 struct net_device
*netdev
= dev_get_drvdata(dev
);
1895 ibmveth_interrupt(netdev
->irq
, netdev
);
1899 static const struct vio_device_id ibmveth_device_table
[] = {
1900 { "network", "IBM,l-lan"},
1903 MODULE_DEVICE_TABLE(vio
, ibmveth_device_table
);
1905 static const struct dev_pm_ops ibmveth_pm_ops
= {
1906 .resume
= ibmveth_resume
1909 static struct vio_driver ibmveth_driver
= {
1910 .id_table
= ibmveth_device_table
,
1911 .probe
= ibmveth_probe
,
1912 .remove
= ibmveth_remove
,
1913 .get_desired_dma
= ibmveth_get_desired_dma
,
1914 .name
= ibmveth_driver_name
,
1915 .pm
= &ibmveth_pm_ops
,
1918 static int __init
ibmveth_module_init(void)
1920 printk(KERN_DEBUG
"%s: %s %s\n", ibmveth_driver_name
,
1921 ibmveth_driver_string
, ibmveth_driver_version
);
1923 return vio_register_driver(&ibmveth_driver
);
1926 static void __exit
ibmveth_module_exit(void)
1928 vio_unregister_driver(&ibmveth_driver
);
1931 module_init(ibmveth_module_init
);
1932 module_exit(ibmveth_module_exit
);