2 * IBM Power Virtual Ethernet Device Driver
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 * Copyright (C) IBM Corporation, 2003, 2010
19 * Authors: Dave Larson <larson1@us.ibm.com>
20 * Santiago Leon <santil@linux.vnet.ibm.com>
21 * Brian King <brking@linux.vnet.ibm.com>
22 * Robert Jennings <rcj@linux.vnet.ibm.com>
23 * Anton Blanchard <anton@au.ibm.com>
26 #include <linux/module.h>
27 #include <linux/moduleparam.h>
28 #include <linux/types.h>
29 #include <linux/errno.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/kernel.h>
32 #include <linux/netdevice.h>
33 #include <linux/etherdevice.h>
34 #include <linux/skbuff.h>
35 #include <linux/init.h>
36 #include <linux/interrupt.h>
39 #include <linux/ethtool.h>
42 #include <linux/ipv6.h>
43 #include <linux/slab.h>
44 #include <asm/hvcall.h>
45 #include <linux/atomic.h>
47 #include <asm/iommu.h>
48 #include <asm/firmware.h>
50 #include <net/ip6_checksum.h>
54 static irqreturn_t
ibmveth_interrupt(int irq
, void *dev_instance
);
55 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter
*adapter
);
56 static unsigned long ibmveth_get_desired_dma(struct vio_dev
*vdev
);
58 static struct kobj_type ktype_veth_pool
;
61 static const char ibmveth_driver_name
[] = "ibmveth";
62 static const char ibmveth_driver_string
[] = "IBM Power Virtual Ethernet Driver";
63 #define ibmveth_driver_version "1.06"
65 MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
66 MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
67 MODULE_LICENSE("GPL");
68 MODULE_VERSION(ibmveth_driver_version
);
70 static unsigned int tx_copybreak __read_mostly
= 128;
71 module_param(tx_copybreak
, uint
, 0644);
72 MODULE_PARM_DESC(tx_copybreak
,
73 "Maximum size of packet that is copied to a new buffer on transmit");
75 static unsigned int rx_copybreak __read_mostly
= 128;
76 module_param(rx_copybreak
, uint
, 0644);
77 MODULE_PARM_DESC(rx_copybreak
,
78 "Maximum size of packet that is copied to a new buffer on receive");
80 static unsigned int rx_flush __read_mostly
= 0;
81 module_param(rx_flush
, uint
, 0644);
82 MODULE_PARM_DESC(rx_flush
, "Flush receive buffers before use");
84 static bool old_large_send __read_mostly
;
85 module_param(old_large_send
, bool, S_IRUGO
);
86 MODULE_PARM_DESC(old_large_send
,
87 "Use old large send method on firmware that supports the new method");
90 char name
[ETH_GSTRING_LEN
];
94 #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
95 #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
97 struct ibmveth_stat ibmveth_stats
[] = {
98 { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles
) },
99 { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem
) },
100 { "replenish_add_buff_failure",
101 IBMVETH_STAT_OFF(replenish_add_buff_failure
) },
102 { "replenish_add_buff_success",
103 IBMVETH_STAT_OFF(replenish_add_buff_success
) },
104 { "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer
) },
105 { "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer
) },
106 { "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed
) },
107 { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed
) },
108 { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support
) },
109 { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support
) },
110 { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets
) },
111 { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets
) },
112 { "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support
) }
115 /* simple methods of getting data from the current rxq entry */
116 static inline u32
ibmveth_rxq_flags(struct ibmveth_adapter
*adapter
)
118 return be32_to_cpu(adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].flags_off
);
121 static inline int ibmveth_rxq_toggle(struct ibmveth_adapter
*adapter
)
123 return (ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_TOGGLE
) >>
124 IBMVETH_RXQ_TOGGLE_SHIFT
;
127 static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter
*adapter
)
129 return ibmveth_rxq_toggle(adapter
) == adapter
->rx_queue
.toggle
;
132 static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter
*adapter
)
134 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_VALID
;
137 static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter
*adapter
)
139 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_OFF_MASK
;
142 static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter
*adapter
)
144 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_LRG_PKT
;
147 static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter
*adapter
)
149 return be32_to_cpu(adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].length
);
152 static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter
*adapter
)
154 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_CSUM_GOOD
;
157 /* setup the initial settings for a buffer pool */
158 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool
*pool
,
159 u32 pool_index
, u32 pool_size
,
160 u32 buff_size
, u32 pool_active
)
162 pool
->size
= pool_size
;
163 pool
->index
= pool_index
;
164 pool
->buff_size
= buff_size
;
165 pool
->threshold
= pool_size
* 7 / 8;
166 pool
->active
= pool_active
;
169 /* allocate and setup an buffer pool - called during open */
170 static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool
*pool
)
174 pool
->free_map
= kmalloc(sizeof(u16
) * pool
->size
, GFP_KERNEL
);
179 pool
->dma_addr
= kcalloc(pool
->size
, sizeof(dma_addr_t
), GFP_KERNEL
);
180 if (!pool
->dma_addr
) {
181 kfree(pool
->free_map
);
182 pool
->free_map
= NULL
;
186 pool
->skbuff
= kcalloc(pool
->size
, sizeof(void *), GFP_KERNEL
);
189 kfree(pool
->dma_addr
);
190 pool
->dma_addr
= NULL
;
192 kfree(pool
->free_map
);
193 pool
->free_map
= NULL
;
197 for (i
= 0; i
< pool
->size
; ++i
)
198 pool
->free_map
[i
] = i
;
200 atomic_set(&pool
->available
, 0);
201 pool
->producer_index
= 0;
202 pool
->consumer_index
= 0;
207 static inline void ibmveth_flush_buffer(void *addr
, unsigned long length
)
209 unsigned long offset
;
211 for (offset
= 0; offset
< length
; offset
+= SMP_CACHE_BYTES
)
212 asm("dcbfl %0,%1" :: "b" (addr
), "r" (offset
));
215 /* replenish the buffers for a pool. note that we don't need to
216 * skb_reserve these since they are used for incoming...
218 static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter
*adapter
,
219 struct ibmveth_buff_pool
*pool
)
222 u32 count
= pool
->size
- atomic_read(&pool
->available
);
223 u32 buffers_added
= 0;
225 unsigned int free_index
, index
;
227 unsigned long lpar_rc
;
232 for (i
= 0; i
< count
; ++i
) {
233 union ibmveth_buf_desc desc
;
235 skb
= netdev_alloc_skb(adapter
->netdev
, pool
->buff_size
);
238 netdev_dbg(adapter
->netdev
,
239 "replenish: unable to allocate skb\n");
240 adapter
->replenish_no_mem
++;
244 free_index
= pool
->consumer_index
;
245 pool
->consumer_index
++;
246 if (pool
->consumer_index
>= pool
->size
)
247 pool
->consumer_index
= 0;
248 index
= pool
->free_map
[free_index
];
250 BUG_ON(index
== IBM_VETH_INVALID_MAP
);
251 BUG_ON(pool
->skbuff
[index
] != NULL
);
253 dma_addr
= dma_map_single(&adapter
->vdev
->dev
, skb
->data
,
254 pool
->buff_size
, DMA_FROM_DEVICE
);
256 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
259 pool
->free_map
[free_index
] = IBM_VETH_INVALID_MAP
;
260 pool
->dma_addr
[index
] = dma_addr
;
261 pool
->skbuff
[index
] = skb
;
263 correlator
= ((u64
)pool
->index
<< 32) | index
;
264 *(u64
*)skb
->data
= correlator
;
266 desc
.fields
.flags_len
= IBMVETH_BUF_VALID
| pool
->buff_size
;
267 desc
.fields
.address
= dma_addr
;
270 unsigned int len
= min(pool
->buff_size
,
271 adapter
->netdev
->mtu
+
273 ibmveth_flush_buffer(skb
->data
, len
);
275 lpar_rc
= h_add_logical_lan_buffer(adapter
->vdev
->unit_address
,
278 if (lpar_rc
!= H_SUCCESS
) {
282 adapter
->replenish_add_buff_success
++;
287 atomic_add(buffers_added
, &(pool
->available
));
291 pool
->free_map
[free_index
] = index
;
292 pool
->skbuff
[index
] = NULL
;
293 if (pool
->consumer_index
== 0)
294 pool
->consumer_index
= pool
->size
- 1;
296 pool
->consumer_index
--;
297 if (!dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
298 dma_unmap_single(&adapter
->vdev
->dev
,
299 pool
->dma_addr
[index
], pool
->buff_size
,
301 dev_kfree_skb_any(skb
);
302 adapter
->replenish_add_buff_failure
++;
305 atomic_add(buffers_added
, &(pool
->available
));
309 * The final 8 bytes of the buffer list is a counter of frames dropped
310 * because there was not a buffer in the buffer list capable of holding
313 static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter
*adapter
)
315 __be64
*p
= adapter
->buffer_list_addr
+ 4096 - 8;
317 adapter
->rx_no_buffer
= be64_to_cpup(p
);
320 /* replenish routine */
321 static void ibmveth_replenish_task(struct ibmveth_adapter
*adapter
)
325 adapter
->replenish_task_cycles
++;
327 for (i
= (IBMVETH_NUM_BUFF_POOLS
- 1); i
>= 0; i
--) {
328 struct ibmveth_buff_pool
*pool
= &adapter
->rx_buff_pool
[i
];
331 (atomic_read(&pool
->available
) < pool
->threshold
))
332 ibmveth_replenish_buffer_pool(adapter
, pool
);
335 ibmveth_update_rx_no_buffer(adapter
);
338 /* empty and free ana buffer pool - also used to do cleanup in error paths */
339 static void ibmveth_free_buffer_pool(struct ibmveth_adapter
*adapter
,
340 struct ibmveth_buff_pool
*pool
)
344 kfree(pool
->free_map
);
345 pool
->free_map
= NULL
;
347 if (pool
->skbuff
&& pool
->dma_addr
) {
348 for (i
= 0; i
< pool
->size
; ++i
) {
349 struct sk_buff
*skb
= pool
->skbuff
[i
];
351 dma_unmap_single(&adapter
->vdev
->dev
,
355 dev_kfree_skb_any(skb
);
356 pool
->skbuff
[i
] = NULL
;
361 if (pool
->dma_addr
) {
362 kfree(pool
->dma_addr
);
363 pool
->dma_addr
= NULL
;
372 /* remove a buffer from a pool */
373 static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter
*adapter
,
376 unsigned int pool
= correlator
>> 32;
377 unsigned int index
= correlator
& 0xffffffffUL
;
378 unsigned int free_index
;
381 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
382 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
384 skb
= adapter
->rx_buff_pool
[pool
].skbuff
[index
];
388 adapter
->rx_buff_pool
[pool
].skbuff
[index
] = NULL
;
390 dma_unmap_single(&adapter
->vdev
->dev
,
391 adapter
->rx_buff_pool
[pool
].dma_addr
[index
],
392 adapter
->rx_buff_pool
[pool
].buff_size
,
395 free_index
= adapter
->rx_buff_pool
[pool
].producer_index
;
396 adapter
->rx_buff_pool
[pool
].producer_index
++;
397 if (adapter
->rx_buff_pool
[pool
].producer_index
>=
398 adapter
->rx_buff_pool
[pool
].size
)
399 adapter
->rx_buff_pool
[pool
].producer_index
= 0;
400 adapter
->rx_buff_pool
[pool
].free_map
[free_index
] = index
;
404 atomic_dec(&(adapter
->rx_buff_pool
[pool
].available
));
407 /* get the current buffer on the rx queue */
408 static inline struct sk_buff
*ibmveth_rxq_get_buffer(struct ibmveth_adapter
*adapter
)
410 u64 correlator
= adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
;
411 unsigned int pool
= correlator
>> 32;
412 unsigned int index
= correlator
& 0xffffffffUL
;
414 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
415 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
417 return adapter
->rx_buff_pool
[pool
].skbuff
[index
];
420 /* recycle the current buffer on the rx queue */
421 static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter
*adapter
)
423 u32 q_index
= adapter
->rx_queue
.index
;
424 u64 correlator
= adapter
->rx_queue
.queue_addr
[q_index
].correlator
;
425 unsigned int pool
= correlator
>> 32;
426 unsigned int index
= correlator
& 0xffffffffUL
;
427 union ibmveth_buf_desc desc
;
428 unsigned long lpar_rc
;
431 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
432 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
434 if (!adapter
->rx_buff_pool
[pool
].active
) {
435 ibmveth_rxq_harvest_buffer(adapter
);
436 ibmveth_free_buffer_pool(adapter
, &adapter
->rx_buff_pool
[pool
]);
440 desc
.fields
.flags_len
= IBMVETH_BUF_VALID
|
441 adapter
->rx_buff_pool
[pool
].buff_size
;
442 desc
.fields
.address
= adapter
->rx_buff_pool
[pool
].dma_addr
[index
];
444 lpar_rc
= h_add_logical_lan_buffer(adapter
->vdev
->unit_address
, desc
.desc
);
446 if (lpar_rc
!= H_SUCCESS
) {
447 netdev_dbg(adapter
->netdev
, "h_add_logical_lan_buffer failed "
448 "during recycle rc=%ld", lpar_rc
);
449 ibmveth_remove_buffer_from_pool(adapter
, adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
);
453 if (++adapter
->rx_queue
.index
== adapter
->rx_queue
.num_slots
) {
454 adapter
->rx_queue
.index
= 0;
455 adapter
->rx_queue
.toggle
= !adapter
->rx_queue
.toggle
;
462 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter
*adapter
)
464 ibmveth_remove_buffer_from_pool(adapter
, adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
);
466 if (++adapter
->rx_queue
.index
== adapter
->rx_queue
.num_slots
) {
467 adapter
->rx_queue
.index
= 0;
468 adapter
->rx_queue
.toggle
= !adapter
->rx_queue
.toggle
;
472 static int ibmveth_register_logical_lan(struct ibmveth_adapter
*adapter
,
473 union ibmveth_buf_desc rxq_desc
, u64 mac_address
)
475 int rc
, try_again
= 1;
478 * After a kexec the adapter will still be open, so our attempt to
479 * open it will fail. So if we get a failure we free the adapter and
480 * try again, but only once.
483 rc
= h_register_logical_lan(adapter
->vdev
->unit_address
,
484 adapter
->buffer_list_dma
, rxq_desc
.desc
,
485 adapter
->filter_list_dma
, mac_address
);
487 if (rc
!= H_SUCCESS
&& try_again
) {
489 rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
490 } while (H_IS_LONG_BUSY(rc
) || (rc
== H_BUSY
));
499 static u64
ibmveth_encode_mac_addr(u8
*mac
)
504 for (i
= 0; i
< ETH_ALEN
; i
++)
505 encoded
= (encoded
<< 8) | mac
[i
];
510 static int ibmveth_open(struct net_device
*netdev
)
512 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
515 unsigned long lpar_rc
;
517 union ibmveth_buf_desc rxq_desc
;
521 netdev_dbg(netdev
, "open starting\n");
523 napi_enable(&adapter
->napi
);
525 for(i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
526 rxq_entries
+= adapter
->rx_buff_pool
[i
].size
;
529 adapter
->buffer_list_addr
= (void*) get_zeroed_page(GFP_KERNEL
);
530 if (!adapter
->buffer_list_addr
) {
531 netdev_err(netdev
, "unable to allocate list pages\n");
535 adapter
->filter_list_addr
= (void*) get_zeroed_page(GFP_KERNEL
);
536 if (!adapter
->filter_list_addr
) {
537 netdev_err(netdev
, "unable to allocate filter pages\n");
538 goto out_free_buffer_list
;
541 dev
= &adapter
->vdev
->dev
;
543 adapter
->rx_queue
.queue_len
= sizeof(struct ibmveth_rx_q_entry
) *
545 adapter
->rx_queue
.queue_addr
=
546 dma_alloc_coherent(dev
, adapter
->rx_queue
.queue_len
,
547 &adapter
->rx_queue
.queue_dma
, GFP_KERNEL
);
548 if (!adapter
->rx_queue
.queue_addr
)
549 goto out_free_filter_list
;
551 adapter
->buffer_list_dma
= dma_map_single(dev
,
552 adapter
->buffer_list_addr
, 4096, DMA_BIDIRECTIONAL
);
553 if (dma_mapping_error(dev
, adapter
->buffer_list_dma
)) {
554 netdev_err(netdev
, "unable to map buffer list pages\n");
555 goto out_free_queue_mem
;
558 adapter
->filter_list_dma
= dma_map_single(dev
,
559 adapter
->filter_list_addr
, 4096, DMA_BIDIRECTIONAL
);
560 if (dma_mapping_error(dev
, adapter
->filter_list_dma
)) {
561 netdev_err(netdev
, "unable to map filter list pages\n");
562 goto out_unmap_buffer_list
;
565 adapter
->rx_queue
.index
= 0;
566 adapter
->rx_queue
.num_slots
= rxq_entries
;
567 adapter
->rx_queue
.toggle
= 1;
569 mac_address
= ibmveth_encode_mac_addr(netdev
->dev_addr
);
571 rxq_desc
.fields
.flags_len
= IBMVETH_BUF_VALID
|
572 adapter
->rx_queue
.queue_len
;
573 rxq_desc
.fields
.address
= adapter
->rx_queue
.queue_dma
;
575 netdev_dbg(netdev
, "buffer list @ 0x%p\n", adapter
->buffer_list_addr
);
576 netdev_dbg(netdev
, "filter list @ 0x%p\n", adapter
->filter_list_addr
);
577 netdev_dbg(netdev
, "receive q @ 0x%p\n", adapter
->rx_queue
.queue_addr
);
579 h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_DISABLE
);
581 lpar_rc
= ibmveth_register_logical_lan(adapter
, rxq_desc
, mac_address
);
583 if (lpar_rc
!= H_SUCCESS
) {
584 netdev_err(netdev
, "h_register_logical_lan failed with %ld\n",
586 netdev_err(netdev
, "buffer TCE:0x%llx filter TCE:0x%llx rxq "
587 "desc:0x%llx MAC:0x%llx\n",
588 adapter
->buffer_list_dma
,
589 adapter
->filter_list_dma
,
593 goto out_unmap_filter_list
;
596 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
597 if (!adapter
->rx_buff_pool
[i
].active
)
599 if (ibmveth_alloc_buffer_pool(&adapter
->rx_buff_pool
[i
])) {
600 netdev_err(netdev
, "unable to alloc pool\n");
601 adapter
->rx_buff_pool
[i
].active
= 0;
603 goto out_free_buffer_pools
;
607 netdev_dbg(netdev
, "registering irq 0x%x\n", netdev
->irq
);
608 rc
= request_irq(netdev
->irq
, ibmveth_interrupt
, 0, netdev
->name
,
611 netdev_err(netdev
, "unable to request irq 0x%x, rc %d\n",
614 lpar_rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
615 } while (H_IS_LONG_BUSY(lpar_rc
) || (lpar_rc
== H_BUSY
));
617 goto out_free_buffer_pools
;
621 adapter
->bounce_buffer
=
622 kmalloc(netdev
->mtu
+ IBMVETH_BUFF_OH
, GFP_KERNEL
);
623 if (!adapter
->bounce_buffer
)
626 adapter
->bounce_buffer_dma
=
627 dma_map_single(&adapter
->vdev
->dev
, adapter
->bounce_buffer
,
628 netdev
->mtu
+ IBMVETH_BUFF_OH
, DMA_BIDIRECTIONAL
);
629 if (dma_mapping_error(dev
, adapter
->bounce_buffer_dma
)) {
630 netdev_err(netdev
, "unable to map bounce buffer\n");
631 goto out_free_bounce_buffer
;
634 netdev_dbg(netdev
, "initial replenish cycle\n");
635 ibmveth_interrupt(netdev
->irq
, netdev
);
637 netif_start_queue(netdev
);
639 netdev_dbg(netdev
, "open complete\n");
643 out_free_bounce_buffer
:
644 kfree(adapter
->bounce_buffer
);
646 free_irq(netdev
->irq
, netdev
);
647 out_free_buffer_pools
:
649 if (adapter
->rx_buff_pool
[i
].active
)
650 ibmveth_free_buffer_pool(adapter
,
651 &adapter
->rx_buff_pool
[i
]);
653 out_unmap_filter_list
:
654 dma_unmap_single(dev
, adapter
->filter_list_dma
, 4096,
656 out_unmap_buffer_list
:
657 dma_unmap_single(dev
, adapter
->buffer_list_dma
, 4096,
660 dma_free_coherent(dev
, adapter
->rx_queue
.queue_len
,
661 adapter
->rx_queue
.queue_addr
,
662 adapter
->rx_queue
.queue_dma
);
663 out_free_filter_list
:
664 free_page((unsigned long)adapter
->filter_list_addr
);
665 out_free_buffer_list
:
666 free_page((unsigned long)adapter
->buffer_list_addr
);
668 napi_disable(&adapter
->napi
);
672 static int ibmveth_close(struct net_device
*netdev
)
674 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
675 struct device
*dev
= &adapter
->vdev
->dev
;
679 netdev_dbg(netdev
, "close starting\n");
681 napi_disable(&adapter
->napi
);
683 if (!adapter
->pool_config
)
684 netif_stop_queue(netdev
);
686 h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_DISABLE
);
689 lpar_rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
690 } while (H_IS_LONG_BUSY(lpar_rc
) || (lpar_rc
== H_BUSY
));
692 if (lpar_rc
!= H_SUCCESS
) {
693 netdev_err(netdev
, "h_free_logical_lan failed with %lx, "
694 "continuing with close\n", lpar_rc
);
697 free_irq(netdev
->irq
, netdev
);
699 ibmveth_update_rx_no_buffer(adapter
);
701 dma_unmap_single(dev
, adapter
->buffer_list_dma
, 4096,
703 free_page((unsigned long)adapter
->buffer_list_addr
);
705 dma_unmap_single(dev
, adapter
->filter_list_dma
, 4096,
707 free_page((unsigned long)adapter
->filter_list_addr
);
709 dma_free_coherent(dev
, adapter
->rx_queue
.queue_len
,
710 adapter
->rx_queue
.queue_addr
,
711 adapter
->rx_queue
.queue_dma
);
713 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
714 if (adapter
->rx_buff_pool
[i
].active
)
715 ibmveth_free_buffer_pool(adapter
,
716 &adapter
->rx_buff_pool
[i
]);
718 dma_unmap_single(&adapter
->vdev
->dev
, adapter
->bounce_buffer_dma
,
719 adapter
->netdev
->mtu
+ IBMVETH_BUFF_OH
,
721 kfree(adapter
->bounce_buffer
);
723 netdev_dbg(netdev
, "close complete\n");
728 static int netdev_get_link_ksettings(struct net_device
*dev
,
729 struct ethtool_link_ksettings
*cmd
)
731 u32 supported
, advertising
;
733 supported
= (SUPPORTED_1000baseT_Full
| SUPPORTED_Autoneg
|
735 advertising
= (ADVERTISED_1000baseT_Full
| ADVERTISED_Autoneg
|
737 cmd
->base
.speed
= SPEED_1000
;
738 cmd
->base
.duplex
= DUPLEX_FULL
;
739 cmd
->base
.port
= PORT_FIBRE
;
740 cmd
->base
.phy_address
= 0;
741 cmd
->base
.autoneg
= AUTONEG_ENABLE
;
743 ethtool_convert_legacy_u32_to_link_mode(cmd
->link_modes
.supported
,
745 ethtool_convert_legacy_u32_to_link_mode(cmd
->link_modes
.advertising
,
751 static void netdev_get_drvinfo(struct net_device
*dev
,
752 struct ethtool_drvinfo
*info
)
754 strlcpy(info
->driver
, ibmveth_driver_name
, sizeof(info
->driver
));
755 strlcpy(info
->version
, ibmveth_driver_version
, sizeof(info
->version
));
758 static netdev_features_t
ibmveth_fix_features(struct net_device
*dev
,
759 netdev_features_t features
)
762 * Since the ibmveth firmware interface does not have the
763 * concept of separate tx/rx checksum offload enable, if rx
764 * checksum is disabled we also have to disable tx checksum
765 * offload. Once we disable rx checksum offload, we are no
766 * longer allowed to send tx buffers that are not properly
770 if (!(features
& NETIF_F_RXCSUM
))
771 features
&= ~NETIF_F_CSUM_MASK
;
776 static int ibmveth_set_csum_offload(struct net_device
*dev
, u32 data
)
778 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
779 unsigned long set_attr
, clr_attr
, ret_attr
;
780 unsigned long set_attr6
, clr_attr6
;
781 long ret
, ret4
, ret6
;
782 int rc1
= 0, rc2
= 0;
785 if (netif_running(dev
)) {
787 adapter
->pool_config
= 1;
789 adapter
->pool_config
= 0;
798 set_attr
= IBMVETH_ILLAN_IPV4_TCP_CSUM
;
799 set_attr6
= IBMVETH_ILLAN_IPV6_TCP_CSUM
;
801 clr_attr
= IBMVETH_ILLAN_IPV4_TCP_CSUM
;
802 clr_attr6
= IBMVETH_ILLAN_IPV6_TCP_CSUM
;
805 ret
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
807 if (ret
== H_SUCCESS
&&
808 (ret_attr
& IBMVETH_ILLAN_PADDED_PKT_CSUM
)) {
809 ret4
= h_illan_attributes(adapter
->vdev
->unit_address
, clr_attr
,
810 set_attr
, &ret_attr
);
812 if (ret4
!= H_SUCCESS
) {
813 netdev_err(dev
, "unable to change IPv4 checksum "
814 "offload settings. %d rc=%ld\n",
817 h_illan_attributes(adapter
->vdev
->unit_address
,
818 set_attr
, clr_attr
, &ret_attr
);
821 dev
->features
&= ~NETIF_F_IP_CSUM
;
824 adapter
->fw_ipv4_csum_support
= data
;
827 ret6
= h_illan_attributes(adapter
->vdev
->unit_address
,
828 clr_attr6
, set_attr6
, &ret_attr
);
830 if (ret6
!= H_SUCCESS
) {
831 netdev_err(dev
, "unable to change IPv6 checksum "
832 "offload settings. %d rc=%ld\n",
835 h_illan_attributes(adapter
->vdev
->unit_address
,
836 set_attr6
, clr_attr6
, &ret_attr
);
839 dev
->features
&= ~NETIF_F_IPV6_CSUM
;
842 adapter
->fw_ipv6_csum_support
= data
;
844 if (ret4
== H_SUCCESS
|| ret6
== H_SUCCESS
)
845 adapter
->rx_csum
= data
;
850 netdev_err(dev
, "unable to change checksum offload settings."
851 " %d rc=%ld ret_attr=%lx\n", data
, ret
,
856 rc2
= ibmveth_open(dev
);
858 return rc1
? rc1
: rc2
;
861 static int ibmveth_set_tso(struct net_device
*dev
, u32 data
)
863 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
864 unsigned long set_attr
, clr_attr
, ret_attr
;
866 int rc1
= 0, rc2
= 0;
869 if (netif_running(dev
)) {
871 adapter
->pool_config
= 1;
873 adapter
->pool_config
= 0;
880 set_attr
= IBMVETH_ILLAN_LRG_SR_ENABLED
;
882 clr_attr
= IBMVETH_ILLAN_LRG_SR_ENABLED
;
884 ret1
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
886 if (ret1
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_LRG_SND_SUPPORT
) &&
888 ret2
= h_illan_attributes(adapter
->vdev
->unit_address
, clr_attr
,
889 set_attr
, &ret_attr
);
891 if (ret2
!= H_SUCCESS
) {
892 netdev_err(dev
, "unable to change tso settings. %d rc=%ld\n",
895 h_illan_attributes(adapter
->vdev
->unit_address
,
896 set_attr
, clr_attr
, &ret_attr
);
899 dev
->features
&= ~(NETIF_F_TSO
| NETIF_F_TSO6
);
903 adapter
->fw_large_send_support
= data
;
904 adapter
->large_send
= data
;
907 /* Older firmware version of large send offload does not
911 dev
->features
&= ~NETIF_F_TSO6
;
912 netdev_info(dev
, "TSO feature requires all partitions to have updated driver");
914 adapter
->large_send
= data
;
918 rc2
= ibmveth_open(dev
);
920 return rc1
? rc1
: rc2
;
923 static int ibmveth_set_features(struct net_device
*dev
,
924 netdev_features_t features
)
926 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
927 int rx_csum
= !!(features
& NETIF_F_RXCSUM
);
928 int large_send
= !!(features
& (NETIF_F_TSO
| NETIF_F_TSO6
));
929 int rc1
= 0, rc2
= 0;
931 if (rx_csum
!= adapter
->rx_csum
) {
932 rc1
= ibmveth_set_csum_offload(dev
, rx_csum
);
933 if (rc1
&& !adapter
->rx_csum
)
935 features
& ~(NETIF_F_CSUM_MASK
|
939 if (large_send
!= adapter
->large_send
) {
940 rc2
= ibmveth_set_tso(dev
, large_send
);
941 if (rc2
&& !adapter
->large_send
)
943 features
& ~(NETIF_F_TSO
| NETIF_F_TSO6
);
946 return rc1
? rc1
: rc2
;
949 static void ibmveth_get_strings(struct net_device
*dev
, u32 stringset
, u8
*data
)
953 if (stringset
!= ETH_SS_STATS
)
956 for (i
= 0; i
< ARRAY_SIZE(ibmveth_stats
); i
++, data
+= ETH_GSTRING_LEN
)
957 memcpy(data
, ibmveth_stats
[i
].name
, ETH_GSTRING_LEN
);
960 static int ibmveth_get_sset_count(struct net_device
*dev
, int sset
)
964 return ARRAY_SIZE(ibmveth_stats
);
970 static void ibmveth_get_ethtool_stats(struct net_device
*dev
,
971 struct ethtool_stats
*stats
, u64
*data
)
974 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
976 for (i
= 0; i
< ARRAY_SIZE(ibmveth_stats
); i
++)
977 data
[i
] = IBMVETH_GET_STAT(adapter
, ibmveth_stats
[i
].offset
);
980 static const struct ethtool_ops netdev_ethtool_ops
= {
981 .get_drvinfo
= netdev_get_drvinfo
,
982 .get_link
= ethtool_op_get_link
,
983 .get_strings
= ibmveth_get_strings
,
984 .get_sset_count
= ibmveth_get_sset_count
,
985 .get_ethtool_stats
= ibmveth_get_ethtool_stats
,
986 .get_link_ksettings
= netdev_get_link_ksettings
,
989 static int ibmveth_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
994 #define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1))
996 static int ibmveth_send(struct ibmveth_adapter
*adapter
,
997 union ibmveth_buf_desc
*descs
, unsigned long mss
)
999 unsigned long correlator
;
1000 unsigned int retry_count
;
1004 * The retry count sets a maximum for the number of broadcast and
1005 * multicast destinations within the system.
1010 ret
= h_send_logical_lan(adapter
->vdev
->unit_address
,
1011 descs
[0].desc
, descs
[1].desc
,
1012 descs
[2].desc
, descs
[3].desc
,
1013 descs
[4].desc
, descs
[5].desc
,
1014 correlator
, &correlator
, mss
,
1015 adapter
->fw_large_send_support
);
1016 } while ((ret
== H_BUSY
) && (retry_count
--));
1018 if (ret
!= H_SUCCESS
&& ret
!= H_DROPPED
) {
1019 netdev_err(adapter
->netdev
, "tx: h_send_logical_lan failed "
1020 "with rc=%ld\n", ret
);
1027 static netdev_tx_t
ibmveth_start_xmit(struct sk_buff
*skb
,
1028 struct net_device
*netdev
)
1030 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1031 unsigned int desc_flags
;
1032 union ibmveth_buf_desc descs
[6];
1034 int force_bounce
= 0;
1035 dma_addr_t dma_addr
;
1036 unsigned long mss
= 0;
1038 /* veth doesn't handle frag_list, so linearize the skb.
1039 * When GRO is enabled SKB's can have frag_list.
1041 if (adapter
->is_active_trunk
&&
1042 skb_has_frag_list(skb
) && __skb_linearize(skb
)) {
1043 netdev
->stats
.tx_dropped
++;
1048 * veth handles a maximum of 6 segments including the header, so
1049 * we have to linearize the skb if there are more than this.
1051 if (skb_shinfo(skb
)->nr_frags
> 5 && __skb_linearize(skb
)) {
1052 netdev
->stats
.tx_dropped
++;
1056 /* veth can't checksum offload UDP */
1057 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
1058 ((skb
->protocol
== htons(ETH_P_IP
) &&
1059 ip_hdr(skb
)->protocol
!= IPPROTO_TCP
) ||
1060 (skb
->protocol
== htons(ETH_P_IPV6
) &&
1061 ipv6_hdr(skb
)->nexthdr
!= IPPROTO_TCP
)) &&
1062 skb_checksum_help(skb
)) {
1064 netdev_err(netdev
, "tx: failed to checksum packet\n");
1065 netdev
->stats
.tx_dropped
++;
1069 desc_flags
= IBMVETH_BUF_VALID
;
1071 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1072 unsigned char *buf
= skb_transport_header(skb
) +
1075 desc_flags
|= (IBMVETH_BUF_NO_CSUM
| IBMVETH_BUF_CSUM_GOOD
);
1077 /* Need to zero out the checksum */
1081 if (skb_is_gso(skb
) && adapter
->fw_large_send_support
)
1082 desc_flags
|= IBMVETH_BUF_LRG_SND
;
1086 memset(descs
, 0, sizeof(descs
));
1089 * If a linear packet is below the rx threshold then
1090 * copy it into the static bounce buffer. This avoids the
1091 * cost of a TCE insert and remove.
1093 if (force_bounce
|| (!skb_is_nonlinear(skb
) &&
1094 (skb
->len
< tx_copybreak
))) {
1095 skb_copy_from_linear_data(skb
, adapter
->bounce_buffer
,
1098 descs
[0].fields
.flags_len
= desc_flags
| skb
->len
;
1099 descs
[0].fields
.address
= adapter
->bounce_buffer_dma
;
1101 if (ibmveth_send(adapter
, descs
, 0)) {
1102 adapter
->tx_send_failed
++;
1103 netdev
->stats
.tx_dropped
++;
1105 netdev
->stats
.tx_packets
++;
1106 netdev
->stats
.tx_bytes
+= skb
->len
;
1112 /* Map the header */
1113 dma_addr
= dma_map_single(&adapter
->vdev
->dev
, skb
->data
,
1114 skb_headlen(skb
), DMA_TO_DEVICE
);
1115 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
1118 descs
[0].fields
.flags_len
= desc_flags
| skb_headlen(skb
);
1119 descs
[0].fields
.address
= dma_addr
;
1122 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1123 const skb_frag_t
*frag
= &skb_shinfo(skb
)->frags
[i
];
1125 dma_addr
= skb_frag_dma_map(&adapter
->vdev
->dev
, frag
, 0,
1126 skb_frag_size(frag
), DMA_TO_DEVICE
);
1128 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
1129 goto map_failed_frags
;
1131 descs
[i
+1].fields
.flags_len
= desc_flags
| skb_frag_size(frag
);
1132 descs
[i
+1].fields
.address
= dma_addr
;
1135 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1136 if (adapter
->fw_large_send_support
) {
1137 mss
= (unsigned long)skb_shinfo(skb
)->gso_size
;
1138 adapter
->tx_large_packets
++;
1139 } else if (!skb_is_gso_v6(skb
)) {
1140 /* Put -1 in the IP checksum to tell phyp it
1141 * is a largesend packet. Put the mss in
1144 ip_hdr(skb
)->check
= 0xffff;
1145 tcp_hdr(skb
)->check
=
1146 cpu_to_be16(skb_shinfo(skb
)->gso_size
);
1147 adapter
->tx_large_packets
++;
1151 if (ibmveth_send(adapter
, descs
, mss
)) {
1152 adapter
->tx_send_failed
++;
1153 netdev
->stats
.tx_dropped
++;
1155 netdev
->stats
.tx_packets
++;
1156 netdev
->stats
.tx_bytes
+= skb
->len
;
1159 dma_unmap_single(&adapter
->vdev
->dev
,
1160 descs
[0].fields
.address
,
1161 descs
[0].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1164 for (i
= 1; i
< skb_shinfo(skb
)->nr_frags
+ 1; i
++)
1165 dma_unmap_page(&adapter
->vdev
->dev
, descs
[i
].fields
.address
,
1166 descs
[i
].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1170 dev_consume_skb_any(skb
);
1171 return NETDEV_TX_OK
;
1175 for (i
= 0; i
< last
; i
++)
1176 dma_unmap_page(&adapter
->vdev
->dev
, descs
[i
].fields
.address
,
1177 descs
[i
].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1181 if (!firmware_has_feature(FW_FEATURE_CMO
))
1182 netdev_err(netdev
, "tx: unable to map xmit buffer\n");
1183 adapter
->tx_map_failed
++;
1184 if (skb_linearize(skb
)) {
1185 netdev
->stats
.tx_dropped
++;
1192 static void ibmveth_rx_mss_helper(struct sk_buff
*skb
, u16 mss
, int lrg_pkt
)
1194 struct tcphdr
*tcph
;
1198 /* only TCP packets will be aggregated */
1199 if (skb
->protocol
== htons(ETH_P_IP
)) {
1200 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
1202 if (iph
->protocol
== IPPROTO_TCP
) {
1203 offset
= iph
->ihl
* 4;
1204 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1208 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
1209 struct ipv6hdr
*iph6
= (struct ipv6hdr
*)skb
->data
;
1211 if (iph6
->nexthdr
== IPPROTO_TCP
) {
1212 offset
= sizeof(struct ipv6hdr
);
1213 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1220 /* if mss is not set through Large Packet bit/mss in rx buffer,
1221 * expect that the mss will be written to the tcp header checksum.
1223 tcph
= (struct tcphdr
*)(skb
->data
+ offset
);
1225 skb_shinfo(skb
)->gso_size
= mss
;
1226 } else if (offset
) {
1227 skb_shinfo(skb
)->gso_size
= ntohs(tcph
->check
);
1231 if (skb_shinfo(skb
)->gso_size
) {
1232 hdr_len
= offset
+ tcph
->doff
* 4;
1233 skb_shinfo(skb
)->gso_segs
=
1234 DIV_ROUND_UP(skb
->len
- hdr_len
,
1235 skb_shinfo(skb
)->gso_size
);
1239 static void ibmveth_rx_csum_helper(struct sk_buff
*skb
,
1240 struct ibmveth_adapter
*adapter
)
1242 struct iphdr
*iph
= NULL
;
1243 struct ipv6hdr
*iph6
= NULL
;
1244 __be16 skb_proto
= 0;
1249 skb_proto
= be16_to_cpu(skb
->protocol
);
1251 if (skb_proto
== ETH_P_IP
) {
1252 iph
= (struct iphdr
*)skb
->data
;
1254 /* If the IP checksum is not offloaded and if the packet
1255 * is large send, the checksum must be rebuilt.
1257 if (iph
->check
== 0xffff) {
1259 iph
->check
= ip_fast_csum((unsigned char *)iph
,
1263 iphlen
= iph
->ihl
* 4;
1264 iph_proto
= iph
->protocol
;
1265 } else if (skb_proto
== ETH_P_IPV6
) {
1266 iph6
= (struct ipv6hdr
*)skb
->data
;
1267 iphlen
= sizeof(struct ipv6hdr
);
1268 iph_proto
= iph6
->nexthdr
;
1271 /* In OVS environment, when a flow is not cached, specifically for a
1272 * new TCP connection, the first packet information is passed up
1273 * the user space for finding a flow. During this process, OVS computes
1274 * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
1276 * Given that we zeroed out TCP checksum field in transmit path
1277 * (refer ibmveth_start_xmit routine) as we set "no checksum bit",
1278 * OVS computed checksum will be incorrect w/o TCP pseudo checksum
1279 * in the packet. This leads to OVS dropping the packet and hence
1280 * TCP retransmissions are seen.
1282 * So, re-compute TCP pseudo header checksum.
1284 if (iph_proto
== IPPROTO_TCP
&& adapter
->is_active_trunk
) {
1285 struct tcphdr
*tcph
= (struct tcphdr
*)(skb
->data
+ iphlen
);
1287 tcphdrlen
= skb
->len
- iphlen
;
1289 /* Recompute TCP pseudo header checksum */
1290 if (skb_proto
== ETH_P_IP
)
1291 tcph
->check
= ~csum_tcpudp_magic(iph
->saddr
,
1292 iph
->daddr
, tcphdrlen
, iph_proto
, 0);
1293 else if (skb_proto
== ETH_P_IPV6
)
1294 tcph
->check
= ~csum_ipv6_magic(&iph6
->saddr
,
1295 &iph6
->daddr
, tcphdrlen
, iph_proto
, 0);
1297 /* Setup SKB fields for checksum offload */
1298 skb_partial_csum_set(skb
, iphlen
,
1299 offsetof(struct tcphdr
, check
));
1300 skb_reset_network_header(skb
);
1304 static int ibmveth_poll(struct napi_struct
*napi
, int budget
)
1306 struct ibmveth_adapter
*adapter
=
1307 container_of(napi
, struct ibmveth_adapter
, napi
);
1308 struct net_device
*netdev
= adapter
->netdev
;
1309 int frames_processed
= 0;
1310 unsigned long lpar_rc
;
1314 while (frames_processed
< budget
) {
1315 if (!ibmveth_rxq_pending_buffer(adapter
))
1319 if (!ibmveth_rxq_buffer_valid(adapter
)) {
1320 wmb(); /* suggested by larson1 */
1321 adapter
->rx_invalid_buffer
++;
1322 netdev_dbg(netdev
, "recycling invalid buffer\n");
1323 ibmveth_rxq_recycle_buffer(adapter
);
1325 struct sk_buff
*skb
, *new_skb
;
1326 int length
= ibmveth_rxq_frame_length(adapter
);
1327 int offset
= ibmveth_rxq_frame_offset(adapter
);
1328 int csum_good
= ibmveth_rxq_csum_good(adapter
);
1329 int lrg_pkt
= ibmveth_rxq_large_packet(adapter
);
1331 skb
= ibmveth_rxq_get_buffer(adapter
);
1333 /* if the large packet bit is set in the rx queue
1334 * descriptor, the mss will be written by PHYP eight
1335 * bytes from the start of the rx buffer, which is
1336 * skb->data at this stage
1339 __be64
*rxmss
= (__be64
*)(skb
->data
+ 8);
1341 mss
= (u16
)be64_to_cpu(*rxmss
);
1345 if (length
< rx_copybreak
)
1346 new_skb
= netdev_alloc_skb(netdev
, length
);
1349 skb_copy_to_linear_data(new_skb
,
1353 ibmveth_flush_buffer(skb
->data
,
1355 if (!ibmveth_rxq_recycle_buffer(adapter
))
1359 ibmveth_rxq_harvest_buffer(adapter
);
1360 skb_reserve(skb
, offset
);
1363 skb_put(skb
, length
);
1364 skb
->protocol
= eth_type_trans(skb
, netdev
);
1367 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1368 ibmveth_rx_csum_helper(skb
, adapter
);
1371 if (length
> netdev
->mtu
+ ETH_HLEN
) {
1372 ibmveth_rx_mss_helper(skb
, mss
, lrg_pkt
);
1373 adapter
->rx_large_packets
++;
1376 napi_gro_receive(napi
, skb
); /* send it up */
1378 netdev
->stats
.rx_packets
++;
1379 netdev
->stats
.rx_bytes
+= length
;
1384 ibmveth_replenish_task(adapter
);
1386 if (frames_processed
< budget
) {
1387 napi_complete_done(napi
, frames_processed
);
1389 /* We think we are done - reenable interrupts,
1390 * then check once more to make sure we are done.
1392 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1395 BUG_ON(lpar_rc
!= H_SUCCESS
);
1397 if (ibmveth_rxq_pending_buffer(adapter
) &&
1398 napi_reschedule(napi
)) {
1399 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1405 return frames_processed
;
1408 static irqreturn_t
ibmveth_interrupt(int irq
, void *dev_instance
)
1410 struct net_device
*netdev
= dev_instance
;
1411 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1412 unsigned long lpar_rc
;
1414 if (napi_schedule_prep(&adapter
->napi
)) {
1415 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1417 BUG_ON(lpar_rc
!= H_SUCCESS
);
1418 __napi_schedule(&adapter
->napi
);
1423 static void ibmveth_set_multicast_list(struct net_device
*netdev
)
1425 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1426 unsigned long lpar_rc
;
1428 if ((netdev
->flags
& IFF_PROMISC
) ||
1429 (netdev_mc_count(netdev
) > adapter
->mcastFilterSize
)) {
1430 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1431 IbmVethMcastEnableRecv
|
1432 IbmVethMcastDisableFiltering
,
1434 if (lpar_rc
!= H_SUCCESS
) {
1435 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1436 "entering promisc mode\n", lpar_rc
);
1439 struct netdev_hw_addr
*ha
;
1440 /* clear the filter table & disable filtering */
1441 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1442 IbmVethMcastEnableRecv
|
1443 IbmVethMcastDisableFiltering
|
1444 IbmVethMcastClearFilterTable
,
1446 if (lpar_rc
!= H_SUCCESS
) {
1447 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1448 "attempting to clear filter table\n",
1451 /* add the addresses to the filter table */
1452 netdev_for_each_mc_addr(ha
, netdev
) {
1453 /* add the multicast address to the filter table */
1455 mcast_addr
= ibmveth_encode_mac_addr(ha
->addr
);
1456 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1457 IbmVethMcastAddFilter
,
1459 if (lpar_rc
!= H_SUCCESS
) {
1460 netdev_err(netdev
, "h_multicast_ctrl rc=%ld "
1461 "when adding an entry to the filter "
1462 "table\n", lpar_rc
);
1466 /* re-enable filtering */
1467 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1468 IbmVethMcastEnableFiltering
,
1470 if (lpar_rc
!= H_SUCCESS
) {
1471 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1472 "enabling filtering\n", lpar_rc
);
1477 static int ibmveth_change_mtu(struct net_device
*dev
, int new_mtu
)
1479 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
1480 struct vio_dev
*viodev
= adapter
->vdev
;
1481 int new_mtu_oh
= new_mtu
+ IBMVETH_BUFF_OH
;
1483 int need_restart
= 0;
1485 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
1486 if (new_mtu_oh
<= adapter
->rx_buff_pool
[i
].buff_size
)
1489 if (i
== IBMVETH_NUM_BUFF_POOLS
)
1492 /* Deactivate all the buffer pools so that the next loop can activate
1493 only the buffer pools necessary to hold the new MTU */
1494 if (netif_running(adapter
->netdev
)) {
1496 adapter
->pool_config
= 1;
1497 ibmveth_close(adapter
->netdev
);
1498 adapter
->pool_config
= 0;
1501 /* Look for an active buffer pool that can hold the new MTU */
1502 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1503 adapter
->rx_buff_pool
[i
].active
= 1;
1505 if (new_mtu_oh
<= adapter
->rx_buff_pool
[i
].buff_size
) {
1507 vio_cmo_set_dev_desired(viodev
,
1508 ibmveth_get_desired_dma
1511 return ibmveth_open(adapter
->netdev
);
1517 if (need_restart
&& (rc
= ibmveth_open(adapter
->netdev
)))
1523 #ifdef CONFIG_NET_POLL_CONTROLLER
1524 static void ibmveth_poll_controller(struct net_device
*dev
)
1526 ibmveth_replenish_task(netdev_priv(dev
));
1527 ibmveth_interrupt(dev
->irq
, dev
);
1532 * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1534 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1537 * Number of bytes of IO data the driver will need to perform well.
1539 static unsigned long ibmveth_get_desired_dma(struct vio_dev
*vdev
)
1541 struct net_device
*netdev
= dev_get_drvdata(&vdev
->dev
);
1542 struct ibmveth_adapter
*adapter
;
1543 struct iommu_table
*tbl
;
1548 tbl
= get_iommu_table_base(&vdev
->dev
);
1550 /* netdev inits at probe time along with the structures we need below*/
1552 return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT
, tbl
);
1554 adapter
= netdev_priv(netdev
);
1556 ret
= IBMVETH_BUFF_LIST_SIZE
+ IBMVETH_FILT_LIST_SIZE
;
1557 ret
+= IOMMU_PAGE_ALIGN(netdev
->mtu
, tbl
);
1559 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1560 /* add the size of the active receive buffers */
1561 if (adapter
->rx_buff_pool
[i
].active
)
1563 adapter
->rx_buff_pool
[i
].size
*
1564 IOMMU_PAGE_ALIGN(adapter
->rx_buff_pool
[i
].
1566 rxqentries
+= adapter
->rx_buff_pool
[i
].size
;
1568 /* add the size of the receive queue entries */
1569 ret
+= IOMMU_PAGE_ALIGN(
1570 rxqentries
* sizeof(struct ibmveth_rx_q_entry
), tbl
);
1575 static int ibmveth_set_mac_addr(struct net_device
*dev
, void *p
)
1577 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
1578 struct sockaddr
*addr
= p
;
1582 if (!is_valid_ether_addr(addr
->sa_data
))
1583 return -EADDRNOTAVAIL
;
1585 mac_address
= ibmveth_encode_mac_addr(addr
->sa_data
);
1586 rc
= h_change_logical_lan_mac(adapter
->vdev
->unit_address
, mac_address
);
1588 netdev_err(adapter
->netdev
, "h_change_logical_lan_mac failed with rc=%d\n", rc
);
1592 ether_addr_copy(dev
->dev_addr
, addr
->sa_data
);
1597 static const struct net_device_ops ibmveth_netdev_ops
= {
1598 .ndo_open
= ibmveth_open
,
1599 .ndo_stop
= ibmveth_close
,
1600 .ndo_start_xmit
= ibmveth_start_xmit
,
1601 .ndo_set_rx_mode
= ibmveth_set_multicast_list
,
1602 .ndo_do_ioctl
= ibmveth_ioctl
,
1603 .ndo_change_mtu
= ibmveth_change_mtu
,
1604 .ndo_fix_features
= ibmveth_fix_features
,
1605 .ndo_set_features
= ibmveth_set_features
,
1606 .ndo_validate_addr
= eth_validate_addr
,
1607 .ndo_set_mac_address
= ibmveth_set_mac_addr
,
1608 #ifdef CONFIG_NET_POLL_CONTROLLER
1609 .ndo_poll_controller
= ibmveth_poll_controller
,
1613 static int ibmveth_probe(struct vio_dev
*dev
, const struct vio_device_id
*id
)
1616 struct net_device
*netdev
;
1617 struct ibmveth_adapter
*adapter
;
1618 unsigned char *mac_addr_p
;
1619 unsigned int *mcastFilterSize_p
;
1621 unsigned long ret_attr
;
1623 dev_dbg(&dev
->dev
, "entering ibmveth_probe for UA 0x%x\n",
1626 mac_addr_p
= (unsigned char *)vio_get_attribute(dev
, VETH_MAC_ADDR
,
1629 dev_err(&dev
->dev
, "Can't find VETH_MAC_ADDR attribute\n");
1632 /* Workaround for old/broken pHyp */
1635 else if (mac_len
!= 6) {
1636 dev_err(&dev
->dev
, "VETH_MAC_ADDR attribute wrong len %d\n",
1641 mcastFilterSize_p
= (unsigned int *)vio_get_attribute(dev
,
1642 VETH_MCAST_FILTER_SIZE
, NULL
);
1643 if (!mcastFilterSize_p
) {
1644 dev_err(&dev
->dev
, "Can't find VETH_MCAST_FILTER_SIZE "
1649 netdev
= alloc_etherdev(sizeof(struct ibmveth_adapter
));
1654 adapter
= netdev_priv(netdev
);
1655 dev_set_drvdata(&dev
->dev
, netdev
);
1657 adapter
->vdev
= dev
;
1658 adapter
->netdev
= netdev
;
1659 adapter
->mcastFilterSize
= *mcastFilterSize_p
;
1660 adapter
->pool_config
= 0;
1662 netif_napi_add(netdev
, &adapter
->napi
, ibmveth_poll
, 16);
1664 netdev
->irq
= dev
->irq
;
1665 netdev
->netdev_ops
= &ibmveth_netdev_ops
;
1666 netdev
->ethtool_ops
= &netdev_ethtool_ops
;
1667 SET_NETDEV_DEV(netdev
, &dev
->dev
);
1668 netdev
->hw_features
= NETIF_F_SG
;
1669 if (vio_get_attribute(dev
, "ibm,illan-options", NULL
) != NULL
) {
1670 netdev
->hw_features
|= NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
|
1674 netdev
->features
|= netdev
->hw_features
;
1676 ret
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
1678 /* If running older firmware, TSO should not be enabled by default */
1679 if (ret
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_LRG_SND_SUPPORT
) &&
1681 netdev
->hw_features
|= NETIF_F_TSO
| NETIF_F_TSO6
;
1682 netdev
->features
|= netdev
->hw_features
;
1684 netdev
->hw_features
|= NETIF_F_TSO
;
1687 adapter
->is_active_trunk
= false;
1688 if (ret
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_ACTIVE_TRUNK
)) {
1689 adapter
->is_active_trunk
= true;
1690 netdev
->hw_features
|= NETIF_F_FRAGLIST
;
1691 netdev
->features
|= NETIF_F_FRAGLIST
;
1694 netdev
->min_mtu
= IBMVETH_MIN_MTU
;
1695 netdev
->max_mtu
= ETH_MAX_MTU
;
1697 memcpy(netdev
->dev_addr
, mac_addr_p
, ETH_ALEN
);
1699 if (firmware_has_feature(FW_FEATURE_CMO
))
1700 memcpy(pool_count
, pool_count_cmo
, sizeof(pool_count
));
1702 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1703 struct kobject
*kobj
= &adapter
->rx_buff_pool
[i
].kobj
;
1706 ibmveth_init_buffer_pool(&adapter
->rx_buff_pool
[i
], i
,
1707 pool_count
[i
], pool_size
[i
],
1709 error
= kobject_init_and_add(kobj
, &ktype_veth_pool
,
1710 &dev
->dev
.kobj
, "pool%d", i
);
1712 kobject_uevent(kobj
, KOBJ_ADD
);
1715 netdev_dbg(netdev
, "adapter @ 0x%p\n", adapter
);
1716 netdev_dbg(netdev
, "registering netdev...\n");
1718 ibmveth_set_features(netdev
, netdev
->features
);
1720 rc
= register_netdev(netdev
);
1723 netdev_dbg(netdev
, "failed to register netdev rc=%d\n", rc
);
1724 free_netdev(netdev
);
1728 netdev_dbg(netdev
, "registered\n");
1733 static int ibmveth_remove(struct vio_dev
*dev
)
1735 struct net_device
*netdev
= dev_get_drvdata(&dev
->dev
);
1736 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1739 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
1740 kobject_put(&adapter
->rx_buff_pool
[i
].kobj
);
1742 unregister_netdev(netdev
);
1744 free_netdev(netdev
);
1745 dev_set_drvdata(&dev
->dev
, NULL
);
1750 static struct attribute veth_active_attr
;
1751 static struct attribute veth_num_attr
;
1752 static struct attribute veth_size_attr
;
1754 static ssize_t
veth_pool_show(struct kobject
*kobj
,
1755 struct attribute
*attr
, char *buf
)
1757 struct ibmveth_buff_pool
*pool
= container_of(kobj
,
1758 struct ibmveth_buff_pool
,
1761 if (attr
== &veth_active_attr
)
1762 return sprintf(buf
, "%d\n", pool
->active
);
1763 else if (attr
== &veth_num_attr
)
1764 return sprintf(buf
, "%d\n", pool
->size
);
1765 else if (attr
== &veth_size_attr
)
1766 return sprintf(buf
, "%d\n", pool
->buff_size
);
1770 static ssize_t
veth_pool_store(struct kobject
*kobj
, struct attribute
*attr
,
1771 const char *buf
, size_t count
)
1773 struct ibmveth_buff_pool
*pool
= container_of(kobj
,
1774 struct ibmveth_buff_pool
,
1776 struct net_device
*netdev
= dev_get_drvdata(
1777 container_of(kobj
->parent
, struct device
, kobj
));
1778 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1779 long value
= simple_strtol(buf
, NULL
, 10);
1782 if (attr
== &veth_active_attr
) {
1783 if (value
&& !pool
->active
) {
1784 if (netif_running(netdev
)) {
1785 if (ibmveth_alloc_buffer_pool(pool
)) {
1787 "unable to alloc pool\n");
1791 adapter
->pool_config
= 1;
1792 ibmveth_close(netdev
);
1793 adapter
->pool_config
= 0;
1794 if ((rc
= ibmveth_open(netdev
)))
1799 } else if (!value
&& pool
->active
) {
1800 int mtu
= netdev
->mtu
+ IBMVETH_BUFF_OH
;
1802 /* Make sure there is a buffer pool with buffers that
1803 can hold a packet of the size of the MTU */
1804 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1805 if (pool
== &adapter
->rx_buff_pool
[i
])
1807 if (!adapter
->rx_buff_pool
[i
].active
)
1809 if (mtu
<= adapter
->rx_buff_pool
[i
].buff_size
)
1813 if (i
== IBMVETH_NUM_BUFF_POOLS
) {
1814 netdev_err(netdev
, "no active pool >= MTU\n");
1818 if (netif_running(netdev
)) {
1819 adapter
->pool_config
= 1;
1820 ibmveth_close(netdev
);
1822 adapter
->pool_config
= 0;
1823 if ((rc
= ibmveth_open(netdev
)))
1828 } else if (attr
== &veth_num_attr
) {
1829 if (value
<= 0 || value
> IBMVETH_MAX_POOL_COUNT
) {
1832 if (netif_running(netdev
)) {
1833 adapter
->pool_config
= 1;
1834 ibmveth_close(netdev
);
1835 adapter
->pool_config
= 0;
1837 if ((rc
= ibmveth_open(netdev
)))
1843 } else if (attr
== &veth_size_attr
) {
1844 if (value
<= IBMVETH_BUFF_OH
|| value
> IBMVETH_MAX_BUF_SIZE
) {
1847 if (netif_running(netdev
)) {
1848 adapter
->pool_config
= 1;
1849 ibmveth_close(netdev
);
1850 adapter
->pool_config
= 0;
1851 pool
->buff_size
= value
;
1852 if ((rc
= ibmveth_open(netdev
)))
1855 pool
->buff_size
= value
;
1860 /* kick the interrupt handler to allocate/deallocate pools */
1861 ibmveth_interrupt(netdev
->irq
, netdev
);
1866 #define ATTR(_name, _mode) \
1867 struct attribute veth_##_name##_attr = { \
1868 .name = __stringify(_name), .mode = _mode, \
1871 static ATTR(active
, 0644);
1872 static ATTR(num
, 0644);
1873 static ATTR(size
, 0644);
1875 static struct attribute
*veth_pool_attrs
[] = {
1882 static const struct sysfs_ops veth_pool_ops
= {
1883 .show
= veth_pool_show
,
1884 .store
= veth_pool_store
,
1887 static struct kobj_type ktype_veth_pool
= {
1889 .sysfs_ops
= &veth_pool_ops
,
1890 .default_attrs
= veth_pool_attrs
,
1893 static int ibmveth_resume(struct device
*dev
)
1895 struct net_device
*netdev
= dev_get_drvdata(dev
);
1896 ibmveth_interrupt(netdev
->irq
, netdev
);
1900 static const struct vio_device_id ibmveth_device_table
[] = {
1901 { "network", "IBM,l-lan"},
1904 MODULE_DEVICE_TABLE(vio
, ibmveth_device_table
);
1906 static const struct dev_pm_ops ibmveth_pm_ops
= {
1907 .resume
= ibmveth_resume
1910 static struct vio_driver ibmveth_driver
= {
1911 .id_table
= ibmveth_device_table
,
1912 .probe
= ibmveth_probe
,
1913 .remove
= ibmveth_remove
,
1914 .get_desired_dma
= ibmveth_get_desired_dma
,
1915 .name
= ibmveth_driver_name
,
1916 .pm
= &ibmveth_pm_ops
,
1919 static int __init
ibmveth_module_init(void)
1921 printk(KERN_DEBUG
"%s: %s %s\n", ibmveth_driver_name
,
1922 ibmveth_driver_string
, ibmveth_driver_version
);
1924 return vio_register_driver(&ibmveth_driver
);
1927 static void __exit
ibmveth_module_exit(void)
1929 vio_unregister_driver(&ibmveth_driver
);
1932 module_init(ibmveth_module_init
);
1933 module_exit(ibmveth_module_exit
);