1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * IBM Power Virtual Ethernet Device Driver
5 * Copyright (C) IBM Corporation, 2003, 2010
7 * Authors: Dave Larson <larson1@us.ibm.com>
8 * Santiago Leon <santil@linux.vnet.ibm.com>
9 * Brian King <brking@linux.vnet.ibm.com>
10 * Robert Jennings <rcj@linux.vnet.ibm.com>
11 * Anton Blanchard <anton@au.ibm.com>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/kernel.h>
19 #include <linux/netdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/skbuff.h>
22 #include <linux/init.h>
23 #include <linux/interrupt.h>
26 #include <linux/ethtool.h>
29 #include <linux/ipv6.h>
30 #include <linux/slab.h>
31 #include <asm/hvcall.h>
32 #include <linux/atomic.h>
34 #include <asm/iommu.h>
35 #include <asm/firmware.h>
37 #include <net/ip6_checksum.h>
41 static irqreturn_t
ibmveth_interrupt(int irq
, void *dev_instance
);
42 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter
*adapter
);
43 static unsigned long ibmveth_get_desired_dma(struct vio_dev
*vdev
);
45 static struct kobj_type ktype_veth_pool
;
48 static const char ibmveth_driver_name
[] = "ibmveth";
49 static const char ibmveth_driver_string
[] = "IBM Power Virtual Ethernet Driver";
50 #define ibmveth_driver_version "1.06"
52 MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
53 MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
54 MODULE_LICENSE("GPL");
55 MODULE_VERSION(ibmveth_driver_version
);
57 static unsigned int tx_copybreak __read_mostly
= 128;
58 module_param(tx_copybreak
, uint
, 0644);
59 MODULE_PARM_DESC(tx_copybreak
,
60 "Maximum size of packet that is copied to a new buffer on transmit");
62 static unsigned int rx_copybreak __read_mostly
= 128;
63 module_param(rx_copybreak
, uint
, 0644);
64 MODULE_PARM_DESC(rx_copybreak
,
65 "Maximum size of packet that is copied to a new buffer on receive");
67 static unsigned int rx_flush __read_mostly
= 0;
68 module_param(rx_flush
, uint
, 0644);
69 MODULE_PARM_DESC(rx_flush
, "Flush receive buffers before use");
71 static bool old_large_send __read_mostly
;
72 module_param(old_large_send
, bool, 0444);
73 MODULE_PARM_DESC(old_large_send
,
74 "Use old large send method on firmware that supports the new method");
77 char name
[ETH_GSTRING_LEN
];
81 #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
82 #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
84 static struct ibmveth_stat ibmveth_stats
[] = {
85 { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles
) },
86 { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem
) },
87 { "replenish_add_buff_failure",
88 IBMVETH_STAT_OFF(replenish_add_buff_failure
) },
89 { "replenish_add_buff_success",
90 IBMVETH_STAT_OFF(replenish_add_buff_success
) },
91 { "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer
) },
92 { "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer
) },
93 { "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed
) },
94 { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed
) },
95 { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support
) },
96 { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support
) },
97 { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets
) },
98 { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets
) },
99 { "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support
) }
102 /* simple methods of getting data from the current rxq entry */
103 static inline u32
ibmveth_rxq_flags(struct ibmveth_adapter
*adapter
)
105 return be32_to_cpu(adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].flags_off
);
108 static inline int ibmveth_rxq_toggle(struct ibmveth_adapter
*adapter
)
110 return (ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_TOGGLE
) >>
111 IBMVETH_RXQ_TOGGLE_SHIFT
;
114 static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter
*adapter
)
116 return ibmveth_rxq_toggle(adapter
) == adapter
->rx_queue
.toggle
;
119 static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter
*adapter
)
121 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_VALID
;
124 static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter
*adapter
)
126 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_OFF_MASK
;
129 static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter
*adapter
)
131 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_LRG_PKT
;
134 static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter
*adapter
)
136 return be32_to_cpu(adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].length
);
139 static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter
*adapter
)
141 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_CSUM_GOOD
;
144 /* setup the initial settings for a buffer pool */
145 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool
*pool
,
146 u32 pool_index
, u32 pool_size
,
147 u32 buff_size
, u32 pool_active
)
149 pool
->size
= pool_size
;
150 pool
->index
= pool_index
;
151 pool
->buff_size
= buff_size
;
152 pool
->threshold
= pool_size
* 7 / 8;
153 pool
->active
= pool_active
;
156 /* allocate and setup an buffer pool - called during open */
157 static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool
*pool
)
161 pool
->free_map
= kmalloc_array(pool
->size
, sizeof(u16
), GFP_KERNEL
);
166 pool
->dma_addr
= kcalloc(pool
->size
, sizeof(dma_addr_t
), GFP_KERNEL
);
167 if (!pool
->dma_addr
) {
168 kfree(pool
->free_map
);
169 pool
->free_map
= NULL
;
173 pool
->skbuff
= kcalloc(pool
->size
, sizeof(void *), GFP_KERNEL
);
176 kfree(pool
->dma_addr
);
177 pool
->dma_addr
= NULL
;
179 kfree(pool
->free_map
);
180 pool
->free_map
= NULL
;
184 for (i
= 0; i
< pool
->size
; ++i
)
185 pool
->free_map
[i
] = i
;
187 atomic_set(&pool
->available
, 0);
188 pool
->producer_index
= 0;
189 pool
->consumer_index
= 0;
194 static inline void ibmveth_flush_buffer(void *addr
, unsigned long length
)
196 unsigned long offset
;
198 for (offset
= 0; offset
< length
; offset
+= SMP_CACHE_BYTES
)
199 asm("dcbfl %0,%1" :: "b" (addr
), "r" (offset
));
202 /* replenish the buffers for a pool. note that we don't need to
203 * skb_reserve these since they are used for incoming...
205 static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter
*adapter
,
206 struct ibmveth_buff_pool
*pool
)
209 u32 count
= pool
->size
- atomic_read(&pool
->available
);
210 u32 buffers_added
= 0;
212 unsigned int free_index
, index
;
214 unsigned long lpar_rc
;
219 for (i
= 0; i
< count
; ++i
) {
220 union ibmveth_buf_desc desc
;
222 skb
= netdev_alloc_skb(adapter
->netdev
, pool
->buff_size
);
225 netdev_dbg(adapter
->netdev
,
226 "replenish: unable to allocate skb\n");
227 adapter
->replenish_no_mem
++;
231 free_index
= pool
->consumer_index
;
232 pool
->consumer_index
++;
233 if (pool
->consumer_index
>= pool
->size
)
234 pool
->consumer_index
= 0;
235 index
= pool
->free_map
[free_index
];
237 BUG_ON(index
== IBM_VETH_INVALID_MAP
);
238 BUG_ON(pool
->skbuff
[index
] != NULL
);
240 dma_addr
= dma_map_single(&adapter
->vdev
->dev
, skb
->data
,
241 pool
->buff_size
, DMA_FROM_DEVICE
);
243 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
246 pool
->free_map
[free_index
] = IBM_VETH_INVALID_MAP
;
247 pool
->dma_addr
[index
] = dma_addr
;
248 pool
->skbuff
[index
] = skb
;
250 correlator
= ((u64
)pool
->index
<< 32) | index
;
251 *(u64
*)skb
->data
= correlator
;
253 desc
.fields
.flags_len
= IBMVETH_BUF_VALID
| pool
->buff_size
;
254 desc
.fields
.address
= dma_addr
;
257 unsigned int len
= min(pool
->buff_size
,
258 adapter
->netdev
->mtu
+
260 ibmveth_flush_buffer(skb
->data
, len
);
262 lpar_rc
= h_add_logical_lan_buffer(adapter
->vdev
->unit_address
,
265 if (lpar_rc
!= H_SUCCESS
) {
269 adapter
->replenish_add_buff_success
++;
274 atomic_add(buffers_added
, &(pool
->available
));
278 pool
->free_map
[free_index
] = index
;
279 pool
->skbuff
[index
] = NULL
;
280 if (pool
->consumer_index
== 0)
281 pool
->consumer_index
= pool
->size
- 1;
283 pool
->consumer_index
--;
284 if (!dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
285 dma_unmap_single(&adapter
->vdev
->dev
,
286 pool
->dma_addr
[index
], pool
->buff_size
,
288 dev_kfree_skb_any(skb
);
289 adapter
->replenish_add_buff_failure
++;
292 atomic_add(buffers_added
, &(pool
->available
));
296 * The final 8 bytes of the buffer list is a counter of frames dropped
297 * because there was not a buffer in the buffer list capable of holding
300 static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter
*adapter
)
302 __be64
*p
= adapter
->buffer_list_addr
+ 4096 - 8;
304 adapter
->rx_no_buffer
= be64_to_cpup(p
);
307 /* replenish routine */
308 static void ibmveth_replenish_task(struct ibmveth_adapter
*adapter
)
312 adapter
->replenish_task_cycles
++;
314 for (i
= (IBMVETH_NUM_BUFF_POOLS
- 1); i
>= 0; i
--) {
315 struct ibmveth_buff_pool
*pool
= &adapter
->rx_buff_pool
[i
];
318 (atomic_read(&pool
->available
) < pool
->threshold
))
319 ibmveth_replenish_buffer_pool(adapter
, pool
);
322 ibmveth_update_rx_no_buffer(adapter
);
325 /* empty and free ana buffer pool - also used to do cleanup in error paths */
326 static void ibmveth_free_buffer_pool(struct ibmveth_adapter
*adapter
,
327 struct ibmveth_buff_pool
*pool
)
331 kfree(pool
->free_map
);
332 pool
->free_map
= NULL
;
334 if (pool
->skbuff
&& pool
->dma_addr
) {
335 for (i
= 0; i
< pool
->size
; ++i
) {
336 struct sk_buff
*skb
= pool
->skbuff
[i
];
338 dma_unmap_single(&adapter
->vdev
->dev
,
342 dev_kfree_skb_any(skb
);
343 pool
->skbuff
[i
] = NULL
;
348 if (pool
->dma_addr
) {
349 kfree(pool
->dma_addr
);
350 pool
->dma_addr
= NULL
;
359 /* remove a buffer from a pool */
360 static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter
*adapter
,
363 unsigned int pool
= correlator
>> 32;
364 unsigned int index
= correlator
& 0xffffffffUL
;
365 unsigned int free_index
;
368 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
369 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
371 skb
= adapter
->rx_buff_pool
[pool
].skbuff
[index
];
375 adapter
->rx_buff_pool
[pool
].skbuff
[index
] = NULL
;
377 dma_unmap_single(&adapter
->vdev
->dev
,
378 adapter
->rx_buff_pool
[pool
].dma_addr
[index
],
379 adapter
->rx_buff_pool
[pool
].buff_size
,
382 free_index
= adapter
->rx_buff_pool
[pool
].producer_index
;
383 adapter
->rx_buff_pool
[pool
].producer_index
++;
384 if (adapter
->rx_buff_pool
[pool
].producer_index
>=
385 adapter
->rx_buff_pool
[pool
].size
)
386 adapter
->rx_buff_pool
[pool
].producer_index
= 0;
387 adapter
->rx_buff_pool
[pool
].free_map
[free_index
] = index
;
391 atomic_dec(&(adapter
->rx_buff_pool
[pool
].available
));
394 /* get the current buffer on the rx queue */
395 static inline struct sk_buff
*ibmveth_rxq_get_buffer(struct ibmveth_adapter
*adapter
)
397 u64 correlator
= adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
;
398 unsigned int pool
= correlator
>> 32;
399 unsigned int index
= correlator
& 0xffffffffUL
;
401 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
402 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
404 return adapter
->rx_buff_pool
[pool
].skbuff
[index
];
407 /* recycle the current buffer on the rx queue */
408 static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter
*adapter
)
410 u32 q_index
= adapter
->rx_queue
.index
;
411 u64 correlator
= adapter
->rx_queue
.queue_addr
[q_index
].correlator
;
412 unsigned int pool
= correlator
>> 32;
413 unsigned int index
= correlator
& 0xffffffffUL
;
414 union ibmveth_buf_desc desc
;
415 unsigned long lpar_rc
;
418 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
419 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
421 if (!adapter
->rx_buff_pool
[pool
].active
) {
422 ibmveth_rxq_harvest_buffer(adapter
);
423 ibmveth_free_buffer_pool(adapter
, &adapter
->rx_buff_pool
[pool
]);
427 desc
.fields
.flags_len
= IBMVETH_BUF_VALID
|
428 adapter
->rx_buff_pool
[pool
].buff_size
;
429 desc
.fields
.address
= adapter
->rx_buff_pool
[pool
].dma_addr
[index
];
431 lpar_rc
= h_add_logical_lan_buffer(adapter
->vdev
->unit_address
, desc
.desc
);
433 if (lpar_rc
!= H_SUCCESS
) {
434 netdev_dbg(adapter
->netdev
, "h_add_logical_lan_buffer failed "
435 "during recycle rc=%ld", lpar_rc
);
436 ibmveth_remove_buffer_from_pool(adapter
, adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
);
440 if (++adapter
->rx_queue
.index
== adapter
->rx_queue
.num_slots
) {
441 adapter
->rx_queue
.index
= 0;
442 adapter
->rx_queue
.toggle
= !adapter
->rx_queue
.toggle
;
449 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter
*adapter
)
451 ibmveth_remove_buffer_from_pool(adapter
, adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
);
453 if (++adapter
->rx_queue
.index
== adapter
->rx_queue
.num_slots
) {
454 adapter
->rx_queue
.index
= 0;
455 adapter
->rx_queue
.toggle
= !adapter
->rx_queue
.toggle
;
459 static int ibmveth_register_logical_lan(struct ibmveth_adapter
*adapter
,
460 union ibmveth_buf_desc rxq_desc
, u64 mac_address
)
462 int rc
, try_again
= 1;
465 * After a kexec the adapter will still be open, so our attempt to
466 * open it will fail. So if we get a failure we free the adapter and
467 * try again, but only once.
470 rc
= h_register_logical_lan(adapter
->vdev
->unit_address
,
471 adapter
->buffer_list_dma
, rxq_desc
.desc
,
472 adapter
->filter_list_dma
, mac_address
);
474 if (rc
!= H_SUCCESS
&& try_again
) {
476 rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
477 } while (H_IS_LONG_BUSY(rc
) || (rc
== H_BUSY
));
486 static u64
ibmveth_encode_mac_addr(u8
*mac
)
491 for (i
= 0; i
< ETH_ALEN
; i
++)
492 encoded
= (encoded
<< 8) | mac
[i
];
497 static int ibmveth_open(struct net_device
*netdev
)
499 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
502 unsigned long lpar_rc
;
504 union ibmveth_buf_desc rxq_desc
;
508 netdev_dbg(netdev
, "open starting\n");
510 napi_enable(&adapter
->napi
);
512 for(i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
513 rxq_entries
+= adapter
->rx_buff_pool
[i
].size
;
516 adapter
->buffer_list_addr
= (void*) get_zeroed_page(GFP_KERNEL
);
517 if (!adapter
->buffer_list_addr
) {
518 netdev_err(netdev
, "unable to allocate list pages\n");
522 adapter
->filter_list_addr
= (void*) get_zeroed_page(GFP_KERNEL
);
523 if (!adapter
->filter_list_addr
) {
524 netdev_err(netdev
, "unable to allocate filter pages\n");
525 goto out_free_buffer_list
;
528 dev
= &adapter
->vdev
->dev
;
530 adapter
->rx_queue
.queue_len
= sizeof(struct ibmveth_rx_q_entry
) *
532 adapter
->rx_queue
.queue_addr
=
533 dma_alloc_coherent(dev
, adapter
->rx_queue
.queue_len
,
534 &adapter
->rx_queue
.queue_dma
, GFP_KERNEL
);
535 if (!adapter
->rx_queue
.queue_addr
)
536 goto out_free_filter_list
;
538 adapter
->buffer_list_dma
= dma_map_single(dev
,
539 adapter
->buffer_list_addr
, 4096, DMA_BIDIRECTIONAL
);
540 if (dma_mapping_error(dev
, adapter
->buffer_list_dma
)) {
541 netdev_err(netdev
, "unable to map buffer list pages\n");
542 goto out_free_queue_mem
;
545 adapter
->filter_list_dma
= dma_map_single(dev
,
546 adapter
->filter_list_addr
, 4096, DMA_BIDIRECTIONAL
);
547 if (dma_mapping_error(dev
, adapter
->filter_list_dma
)) {
548 netdev_err(netdev
, "unable to map filter list pages\n");
549 goto out_unmap_buffer_list
;
552 adapter
->rx_queue
.index
= 0;
553 adapter
->rx_queue
.num_slots
= rxq_entries
;
554 adapter
->rx_queue
.toggle
= 1;
556 mac_address
= ibmveth_encode_mac_addr(netdev
->dev_addr
);
558 rxq_desc
.fields
.flags_len
= IBMVETH_BUF_VALID
|
559 adapter
->rx_queue
.queue_len
;
560 rxq_desc
.fields
.address
= adapter
->rx_queue
.queue_dma
;
562 netdev_dbg(netdev
, "buffer list @ 0x%p\n", adapter
->buffer_list_addr
);
563 netdev_dbg(netdev
, "filter list @ 0x%p\n", adapter
->filter_list_addr
);
564 netdev_dbg(netdev
, "receive q @ 0x%p\n", adapter
->rx_queue
.queue_addr
);
566 h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_DISABLE
);
568 lpar_rc
= ibmveth_register_logical_lan(adapter
, rxq_desc
, mac_address
);
570 if (lpar_rc
!= H_SUCCESS
) {
571 netdev_err(netdev
, "h_register_logical_lan failed with %ld\n",
573 netdev_err(netdev
, "buffer TCE:0x%llx filter TCE:0x%llx rxq "
574 "desc:0x%llx MAC:0x%llx\n",
575 adapter
->buffer_list_dma
,
576 adapter
->filter_list_dma
,
580 goto out_unmap_filter_list
;
583 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
584 if (!adapter
->rx_buff_pool
[i
].active
)
586 if (ibmveth_alloc_buffer_pool(&adapter
->rx_buff_pool
[i
])) {
587 netdev_err(netdev
, "unable to alloc pool\n");
588 adapter
->rx_buff_pool
[i
].active
= 0;
590 goto out_free_buffer_pools
;
594 netdev_dbg(netdev
, "registering irq 0x%x\n", netdev
->irq
);
595 rc
= request_irq(netdev
->irq
, ibmveth_interrupt
, 0, netdev
->name
,
598 netdev_err(netdev
, "unable to request irq 0x%x, rc %d\n",
601 lpar_rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
602 } while (H_IS_LONG_BUSY(lpar_rc
) || (lpar_rc
== H_BUSY
));
604 goto out_free_buffer_pools
;
608 adapter
->bounce_buffer
=
609 kmalloc(netdev
->mtu
+ IBMVETH_BUFF_OH
, GFP_KERNEL
);
610 if (!adapter
->bounce_buffer
)
613 adapter
->bounce_buffer_dma
=
614 dma_map_single(&adapter
->vdev
->dev
, adapter
->bounce_buffer
,
615 netdev
->mtu
+ IBMVETH_BUFF_OH
, DMA_BIDIRECTIONAL
);
616 if (dma_mapping_error(dev
, adapter
->bounce_buffer_dma
)) {
617 netdev_err(netdev
, "unable to map bounce buffer\n");
618 goto out_free_bounce_buffer
;
621 netdev_dbg(netdev
, "initial replenish cycle\n");
622 ibmveth_interrupt(netdev
->irq
, netdev
);
624 netif_start_queue(netdev
);
626 netdev_dbg(netdev
, "open complete\n");
630 out_free_bounce_buffer
:
631 kfree(adapter
->bounce_buffer
);
633 free_irq(netdev
->irq
, netdev
);
634 out_free_buffer_pools
:
636 if (adapter
->rx_buff_pool
[i
].active
)
637 ibmveth_free_buffer_pool(adapter
,
638 &adapter
->rx_buff_pool
[i
]);
640 out_unmap_filter_list
:
641 dma_unmap_single(dev
, adapter
->filter_list_dma
, 4096,
643 out_unmap_buffer_list
:
644 dma_unmap_single(dev
, adapter
->buffer_list_dma
, 4096,
647 dma_free_coherent(dev
, adapter
->rx_queue
.queue_len
,
648 adapter
->rx_queue
.queue_addr
,
649 adapter
->rx_queue
.queue_dma
);
650 out_free_filter_list
:
651 free_page((unsigned long)adapter
->filter_list_addr
);
652 out_free_buffer_list
:
653 free_page((unsigned long)adapter
->buffer_list_addr
);
655 napi_disable(&adapter
->napi
);
659 static int ibmveth_close(struct net_device
*netdev
)
661 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
662 struct device
*dev
= &adapter
->vdev
->dev
;
666 netdev_dbg(netdev
, "close starting\n");
668 napi_disable(&adapter
->napi
);
670 if (!adapter
->pool_config
)
671 netif_stop_queue(netdev
);
673 h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_DISABLE
);
676 lpar_rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
677 } while (H_IS_LONG_BUSY(lpar_rc
) || (lpar_rc
== H_BUSY
));
679 if (lpar_rc
!= H_SUCCESS
) {
680 netdev_err(netdev
, "h_free_logical_lan failed with %lx, "
681 "continuing with close\n", lpar_rc
);
684 free_irq(netdev
->irq
, netdev
);
686 ibmveth_update_rx_no_buffer(adapter
);
688 dma_unmap_single(dev
, adapter
->buffer_list_dma
, 4096,
690 free_page((unsigned long)adapter
->buffer_list_addr
);
692 dma_unmap_single(dev
, adapter
->filter_list_dma
, 4096,
694 free_page((unsigned long)adapter
->filter_list_addr
);
696 dma_free_coherent(dev
, adapter
->rx_queue
.queue_len
,
697 adapter
->rx_queue
.queue_addr
,
698 adapter
->rx_queue
.queue_dma
);
700 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
701 if (adapter
->rx_buff_pool
[i
].active
)
702 ibmveth_free_buffer_pool(adapter
,
703 &adapter
->rx_buff_pool
[i
]);
705 dma_unmap_single(&adapter
->vdev
->dev
, adapter
->bounce_buffer_dma
,
706 adapter
->netdev
->mtu
+ IBMVETH_BUFF_OH
,
708 kfree(adapter
->bounce_buffer
);
710 netdev_dbg(netdev
, "close complete\n");
715 static int netdev_get_link_ksettings(struct net_device
*dev
,
716 struct ethtool_link_ksettings
*cmd
)
718 u32 supported
, advertising
;
720 supported
= (SUPPORTED_1000baseT_Full
| SUPPORTED_Autoneg
|
722 advertising
= (ADVERTISED_1000baseT_Full
| ADVERTISED_Autoneg
|
724 cmd
->base
.speed
= SPEED_1000
;
725 cmd
->base
.duplex
= DUPLEX_FULL
;
726 cmd
->base
.port
= PORT_FIBRE
;
727 cmd
->base
.phy_address
= 0;
728 cmd
->base
.autoneg
= AUTONEG_ENABLE
;
730 ethtool_convert_legacy_u32_to_link_mode(cmd
->link_modes
.supported
,
732 ethtool_convert_legacy_u32_to_link_mode(cmd
->link_modes
.advertising
,
738 static void netdev_get_drvinfo(struct net_device
*dev
,
739 struct ethtool_drvinfo
*info
)
741 strlcpy(info
->driver
, ibmveth_driver_name
, sizeof(info
->driver
));
742 strlcpy(info
->version
, ibmveth_driver_version
, sizeof(info
->version
));
745 static netdev_features_t
ibmveth_fix_features(struct net_device
*dev
,
746 netdev_features_t features
)
749 * Since the ibmveth firmware interface does not have the
750 * concept of separate tx/rx checksum offload enable, if rx
751 * checksum is disabled we also have to disable tx checksum
752 * offload. Once we disable rx checksum offload, we are no
753 * longer allowed to send tx buffers that are not properly
757 if (!(features
& NETIF_F_RXCSUM
))
758 features
&= ~NETIF_F_CSUM_MASK
;
763 static int ibmveth_set_csum_offload(struct net_device
*dev
, u32 data
)
765 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
766 unsigned long set_attr
, clr_attr
, ret_attr
;
767 unsigned long set_attr6
, clr_attr6
;
768 long ret
, ret4
, ret6
;
769 int rc1
= 0, rc2
= 0;
772 if (netif_running(dev
)) {
774 adapter
->pool_config
= 1;
776 adapter
->pool_config
= 0;
785 set_attr
= IBMVETH_ILLAN_IPV4_TCP_CSUM
;
786 set_attr6
= IBMVETH_ILLAN_IPV6_TCP_CSUM
;
788 clr_attr
= IBMVETH_ILLAN_IPV4_TCP_CSUM
;
789 clr_attr6
= IBMVETH_ILLAN_IPV6_TCP_CSUM
;
792 ret
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
794 if (ret
== H_SUCCESS
&&
795 (ret_attr
& IBMVETH_ILLAN_PADDED_PKT_CSUM
)) {
796 ret4
= h_illan_attributes(adapter
->vdev
->unit_address
, clr_attr
,
797 set_attr
, &ret_attr
);
799 if (ret4
!= H_SUCCESS
) {
800 netdev_err(dev
, "unable to change IPv4 checksum "
801 "offload settings. %d rc=%ld\n",
804 h_illan_attributes(adapter
->vdev
->unit_address
,
805 set_attr
, clr_attr
, &ret_attr
);
808 dev
->features
&= ~NETIF_F_IP_CSUM
;
811 adapter
->fw_ipv4_csum_support
= data
;
814 ret6
= h_illan_attributes(adapter
->vdev
->unit_address
,
815 clr_attr6
, set_attr6
, &ret_attr
);
817 if (ret6
!= H_SUCCESS
) {
818 netdev_err(dev
, "unable to change IPv6 checksum "
819 "offload settings. %d rc=%ld\n",
822 h_illan_attributes(adapter
->vdev
->unit_address
,
823 set_attr6
, clr_attr6
, &ret_attr
);
826 dev
->features
&= ~NETIF_F_IPV6_CSUM
;
829 adapter
->fw_ipv6_csum_support
= data
;
831 if (ret4
== H_SUCCESS
|| ret6
== H_SUCCESS
)
832 adapter
->rx_csum
= data
;
837 netdev_err(dev
, "unable to change checksum offload settings."
838 " %d rc=%ld ret_attr=%lx\n", data
, ret
,
843 rc2
= ibmveth_open(dev
);
845 return rc1
? rc1
: rc2
;
848 static int ibmveth_set_tso(struct net_device
*dev
, u32 data
)
850 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
851 unsigned long set_attr
, clr_attr
, ret_attr
;
853 int rc1
= 0, rc2
= 0;
856 if (netif_running(dev
)) {
858 adapter
->pool_config
= 1;
860 adapter
->pool_config
= 0;
867 set_attr
= IBMVETH_ILLAN_LRG_SR_ENABLED
;
869 clr_attr
= IBMVETH_ILLAN_LRG_SR_ENABLED
;
871 ret1
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
873 if (ret1
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_LRG_SND_SUPPORT
) &&
875 ret2
= h_illan_attributes(adapter
->vdev
->unit_address
, clr_attr
,
876 set_attr
, &ret_attr
);
878 if (ret2
!= H_SUCCESS
) {
879 netdev_err(dev
, "unable to change tso settings. %d rc=%ld\n",
882 h_illan_attributes(adapter
->vdev
->unit_address
,
883 set_attr
, clr_attr
, &ret_attr
);
886 dev
->features
&= ~(NETIF_F_TSO
| NETIF_F_TSO6
);
890 adapter
->fw_large_send_support
= data
;
891 adapter
->large_send
= data
;
894 /* Older firmware version of large send offload does not
898 dev
->features
&= ~NETIF_F_TSO6
;
899 netdev_info(dev
, "TSO feature requires all partitions to have updated driver");
901 adapter
->large_send
= data
;
905 rc2
= ibmveth_open(dev
);
907 return rc1
? rc1
: rc2
;
910 static int ibmveth_set_features(struct net_device
*dev
,
911 netdev_features_t features
)
913 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
914 int rx_csum
= !!(features
& NETIF_F_RXCSUM
);
915 int large_send
= !!(features
& (NETIF_F_TSO
| NETIF_F_TSO6
));
916 int rc1
= 0, rc2
= 0;
918 if (rx_csum
!= adapter
->rx_csum
) {
919 rc1
= ibmveth_set_csum_offload(dev
, rx_csum
);
920 if (rc1
&& !adapter
->rx_csum
)
922 features
& ~(NETIF_F_CSUM_MASK
|
926 if (large_send
!= adapter
->large_send
) {
927 rc2
= ibmveth_set_tso(dev
, large_send
);
928 if (rc2
&& !adapter
->large_send
)
930 features
& ~(NETIF_F_TSO
| NETIF_F_TSO6
);
933 return rc1
? rc1
: rc2
;
936 static void ibmveth_get_strings(struct net_device
*dev
, u32 stringset
, u8
*data
)
940 if (stringset
!= ETH_SS_STATS
)
943 for (i
= 0; i
< ARRAY_SIZE(ibmveth_stats
); i
++, data
+= ETH_GSTRING_LEN
)
944 memcpy(data
, ibmveth_stats
[i
].name
, ETH_GSTRING_LEN
);
947 static int ibmveth_get_sset_count(struct net_device
*dev
, int sset
)
951 return ARRAY_SIZE(ibmveth_stats
);
957 static void ibmveth_get_ethtool_stats(struct net_device
*dev
,
958 struct ethtool_stats
*stats
, u64
*data
)
961 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
963 for (i
= 0; i
< ARRAY_SIZE(ibmveth_stats
); i
++)
964 data
[i
] = IBMVETH_GET_STAT(adapter
, ibmveth_stats
[i
].offset
);
967 static const struct ethtool_ops netdev_ethtool_ops
= {
968 .get_drvinfo
= netdev_get_drvinfo
,
969 .get_link
= ethtool_op_get_link
,
970 .get_strings
= ibmveth_get_strings
,
971 .get_sset_count
= ibmveth_get_sset_count
,
972 .get_ethtool_stats
= ibmveth_get_ethtool_stats
,
973 .get_link_ksettings
= netdev_get_link_ksettings
,
976 static int ibmveth_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
981 #define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1))
983 static int ibmveth_send(struct ibmveth_adapter
*adapter
,
984 union ibmveth_buf_desc
*descs
, unsigned long mss
)
986 unsigned long correlator
;
987 unsigned int retry_count
;
991 * The retry count sets a maximum for the number of broadcast and
992 * multicast destinations within the system.
997 ret
= h_send_logical_lan(adapter
->vdev
->unit_address
,
998 descs
[0].desc
, descs
[1].desc
,
999 descs
[2].desc
, descs
[3].desc
,
1000 descs
[4].desc
, descs
[5].desc
,
1001 correlator
, &correlator
, mss
,
1002 adapter
->fw_large_send_support
);
1003 } while ((ret
== H_BUSY
) && (retry_count
--));
1005 if (ret
!= H_SUCCESS
&& ret
!= H_DROPPED
) {
1006 netdev_err(adapter
->netdev
, "tx: h_send_logical_lan failed "
1007 "with rc=%ld\n", ret
);
1014 static int ibmveth_is_packet_unsupported(struct sk_buff
*skb
,
1015 struct net_device
*netdev
)
1017 struct ethhdr
*ether_header
;
1020 ether_header
= eth_hdr(skb
);
1022 if (ether_addr_equal(ether_header
->h_dest
, netdev
->dev_addr
)) {
1023 netdev_dbg(netdev
, "veth doesn't support loopback packets, dropping packet.\n");
1024 netdev
->stats
.tx_dropped
++;
1028 if (!ether_addr_equal(ether_header
->h_source
, netdev
->dev_addr
)) {
1029 netdev_dbg(netdev
, "source packet MAC address does not match veth device's, dropping packet.\n");
1030 netdev
->stats
.tx_dropped
++;
1037 static netdev_tx_t
ibmveth_start_xmit(struct sk_buff
*skb
,
1038 struct net_device
*netdev
)
1040 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1041 unsigned int desc_flags
;
1042 union ibmveth_buf_desc descs
[6];
1044 int force_bounce
= 0;
1045 dma_addr_t dma_addr
;
1046 unsigned long mss
= 0;
1048 if (ibmveth_is_packet_unsupported(skb
, netdev
))
1051 /* veth doesn't handle frag_list, so linearize the skb.
1052 * When GRO is enabled SKB's can have frag_list.
1054 if (adapter
->is_active_trunk
&&
1055 skb_has_frag_list(skb
) && __skb_linearize(skb
)) {
1056 netdev
->stats
.tx_dropped
++;
1061 * veth handles a maximum of 6 segments including the header, so
1062 * we have to linearize the skb if there are more than this.
1064 if (skb_shinfo(skb
)->nr_frags
> 5 && __skb_linearize(skb
)) {
1065 netdev
->stats
.tx_dropped
++;
1069 /* veth can't checksum offload UDP */
1070 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
1071 ((skb
->protocol
== htons(ETH_P_IP
) &&
1072 ip_hdr(skb
)->protocol
!= IPPROTO_TCP
) ||
1073 (skb
->protocol
== htons(ETH_P_IPV6
) &&
1074 ipv6_hdr(skb
)->nexthdr
!= IPPROTO_TCP
)) &&
1075 skb_checksum_help(skb
)) {
1077 netdev_err(netdev
, "tx: failed to checksum packet\n");
1078 netdev
->stats
.tx_dropped
++;
1082 desc_flags
= IBMVETH_BUF_VALID
;
1084 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1085 unsigned char *buf
= skb_transport_header(skb
) +
1088 desc_flags
|= (IBMVETH_BUF_NO_CSUM
| IBMVETH_BUF_CSUM_GOOD
);
1090 /* Need to zero out the checksum */
1094 if (skb_is_gso(skb
) && adapter
->fw_large_send_support
)
1095 desc_flags
|= IBMVETH_BUF_LRG_SND
;
1099 memset(descs
, 0, sizeof(descs
));
1102 * If a linear packet is below the rx threshold then
1103 * copy it into the static bounce buffer. This avoids the
1104 * cost of a TCE insert and remove.
1106 if (force_bounce
|| (!skb_is_nonlinear(skb
) &&
1107 (skb
->len
< tx_copybreak
))) {
1108 skb_copy_from_linear_data(skb
, adapter
->bounce_buffer
,
1111 descs
[0].fields
.flags_len
= desc_flags
| skb
->len
;
1112 descs
[0].fields
.address
= adapter
->bounce_buffer_dma
;
1114 if (ibmveth_send(adapter
, descs
, 0)) {
1115 adapter
->tx_send_failed
++;
1116 netdev
->stats
.tx_dropped
++;
1118 netdev
->stats
.tx_packets
++;
1119 netdev
->stats
.tx_bytes
+= skb
->len
;
1125 /* Map the header */
1126 dma_addr
= dma_map_single(&adapter
->vdev
->dev
, skb
->data
,
1127 skb_headlen(skb
), DMA_TO_DEVICE
);
1128 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
1131 descs
[0].fields
.flags_len
= desc_flags
| skb_headlen(skb
);
1132 descs
[0].fields
.address
= dma_addr
;
1135 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1136 const skb_frag_t
*frag
= &skb_shinfo(skb
)->frags
[i
];
1138 dma_addr
= skb_frag_dma_map(&adapter
->vdev
->dev
, frag
, 0,
1139 skb_frag_size(frag
), DMA_TO_DEVICE
);
1141 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
1142 goto map_failed_frags
;
1144 descs
[i
+1].fields
.flags_len
= desc_flags
| skb_frag_size(frag
);
1145 descs
[i
+1].fields
.address
= dma_addr
;
1148 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1149 if (adapter
->fw_large_send_support
) {
1150 mss
= (unsigned long)skb_shinfo(skb
)->gso_size
;
1151 adapter
->tx_large_packets
++;
1152 } else if (!skb_is_gso_v6(skb
)) {
1153 /* Put -1 in the IP checksum to tell phyp it
1154 * is a largesend packet. Put the mss in
1157 ip_hdr(skb
)->check
= 0xffff;
1158 tcp_hdr(skb
)->check
=
1159 cpu_to_be16(skb_shinfo(skb
)->gso_size
);
1160 adapter
->tx_large_packets
++;
1164 if (ibmveth_send(adapter
, descs
, mss
)) {
1165 adapter
->tx_send_failed
++;
1166 netdev
->stats
.tx_dropped
++;
1168 netdev
->stats
.tx_packets
++;
1169 netdev
->stats
.tx_bytes
+= skb
->len
;
1172 dma_unmap_single(&adapter
->vdev
->dev
,
1173 descs
[0].fields
.address
,
1174 descs
[0].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1177 for (i
= 1; i
< skb_shinfo(skb
)->nr_frags
+ 1; i
++)
1178 dma_unmap_page(&adapter
->vdev
->dev
, descs
[i
].fields
.address
,
1179 descs
[i
].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1183 dev_consume_skb_any(skb
);
1184 return NETDEV_TX_OK
;
1188 for (i
= 1; i
< last
; i
++)
1189 dma_unmap_page(&adapter
->vdev
->dev
, descs
[i
].fields
.address
,
1190 descs
[i
].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1193 dma_unmap_single(&adapter
->vdev
->dev
,
1194 descs
[0].fields
.address
,
1195 descs
[0].fields
.flags_len
& IBMVETH_BUF_LEN_MASK
,
1198 if (!firmware_has_feature(FW_FEATURE_CMO
))
1199 netdev_err(netdev
, "tx: unable to map xmit buffer\n");
1200 adapter
->tx_map_failed
++;
1201 if (skb_linearize(skb
)) {
1202 netdev
->stats
.tx_dropped
++;
1209 static void ibmveth_rx_mss_helper(struct sk_buff
*skb
, u16 mss
, int lrg_pkt
)
1211 struct tcphdr
*tcph
;
1215 /* only TCP packets will be aggregated */
1216 if (skb
->protocol
== htons(ETH_P_IP
)) {
1217 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
1219 if (iph
->protocol
== IPPROTO_TCP
) {
1220 offset
= iph
->ihl
* 4;
1221 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1225 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
1226 struct ipv6hdr
*iph6
= (struct ipv6hdr
*)skb
->data
;
1228 if (iph6
->nexthdr
== IPPROTO_TCP
) {
1229 offset
= sizeof(struct ipv6hdr
);
1230 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1237 /* if mss is not set through Large Packet bit/mss in rx buffer,
1238 * expect that the mss will be written to the tcp header checksum.
1240 tcph
= (struct tcphdr
*)(skb
->data
+ offset
);
1242 skb_shinfo(skb
)->gso_size
= mss
;
1243 } else if (offset
) {
1244 skb_shinfo(skb
)->gso_size
= ntohs(tcph
->check
);
1248 if (skb_shinfo(skb
)->gso_size
) {
1249 hdr_len
= offset
+ tcph
->doff
* 4;
1250 skb_shinfo(skb
)->gso_segs
=
1251 DIV_ROUND_UP(skb
->len
- hdr_len
,
1252 skb_shinfo(skb
)->gso_size
);
1256 static void ibmveth_rx_csum_helper(struct sk_buff
*skb
,
1257 struct ibmveth_adapter
*adapter
)
1259 struct iphdr
*iph
= NULL
;
1260 struct ipv6hdr
*iph6
= NULL
;
1261 __be16 skb_proto
= 0;
1266 skb_proto
= be16_to_cpu(skb
->protocol
);
1268 if (skb_proto
== ETH_P_IP
) {
1269 iph
= (struct iphdr
*)skb
->data
;
1271 /* If the IP checksum is not offloaded and if the packet
1272 * is large send, the checksum must be rebuilt.
1274 if (iph
->check
== 0xffff) {
1276 iph
->check
= ip_fast_csum((unsigned char *)iph
,
1280 iphlen
= iph
->ihl
* 4;
1281 iph_proto
= iph
->protocol
;
1282 } else if (skb_proto
== ETH_P_IPV6
) {
1283 iph6
= (struct ipv6hdr
*)skb
->data
;
1284 iphlen
= sizeof(struct ipv6hdr
);
1285 iph_proto
= iph6
->nexthdr
;
1288 /* In OVS environment, when a flow is not cached, specifically for a
1289 * new TCP connection, the first packet information is passed up
1290 * the user space for finding a flow. During this process, OVS computes
1291 * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
1293 * Given that we zeroed out TCP checksum field in transmit path
1294 * (refer ibmveth_start_xmit routine) as we set "no checksum bit",
1295 * OVS computed checksum will be incorrect w/o TCP pseudo checksum
1296 * in the packet. This leads to OVS dropping the packet and hence
1297 * TCP retransmissions are seen.
1299 * So, re-compute TCP pseudo header checksum.
1301 if (iph_proto
== IPPROTO_TCP
&& adapter
->is_active_trunk
) {
1302 struct tcphdr
*tcph
= (struct tcphdr
*)(skb
->data
+ iphlen
);
1304 tcphdrlen
= skb
->len
- iphlen
;
1306 /* Recompute TCP pseudo header checksum */
1307 if (skb_proto
== ETH_P_IP
)
1308 tcph
->check
= ~csum_tcpudp_magic(iph
->saddr
,
1309 iph
->daddr
, tcphdrlen
, iph_proto
, 0);
1310 else if (skb_proto
== ETH_P_IPV6
)
1311 tcph
->check
= ~csum_ipv6_magic(&iph6
->saddr
,
1312 &iph6
->daddr
, tcphdrlen
, iph_proto
, 0);
1314 /* Setup SKB fields for checksum offload */
1315 skb_partial_csum_set(skb
, iphlen
,
1316 offsetof(struct tcphdr
, check
));
1317 skb_reset_network_header(skb
);
1321 static int ibmveth_poll(struct napi_struct
*napi
, int budget
)
1323 struct ibmveth_adapter
*adapter
=
1324 container_of(napi
, struct ibmveth_adapter
, napi
);
1325 struct net_device
*netdev
= adapter
->netdev
;
1326 int frames_processed
= 0;
1327 unsigned long lpar_rc
;
1330 while (frames_processed
< budget
) {
1331 if (!ibmveth_rxq_pending_buffer(adapter
))
1335 if (!ibmveth_rxq_buffer_valid(adapter
)) {
1336 wmb(); /* suggested by larson1 */
1337 adapter
->rx_invalid_buffer
++;
1338 netdev_dbg(netdev
, "recycling invalid buffer\n");
1339 ibmveth_rxq_recycle_buffer(adapter
);
1341 struct sk_buff
*skb
, *new_skb
;
1342 int length
= ibmveth_rxq_frame_length(adapter
);
1343 int offset
= ibmveth_rxq_frame_offset(adapter
);
1344 int csum_good
= ibmveth_rxq_csum_good(adapter
);
1345 int lrg_pkt
= ibmveth_rxq_large_packet(adapter
);
1347 skb
= ibmveth_rxq_get_buffer(adapter
);
1349 /* if the large packet bit is set in the rx queue
1350 * descriptor, the mss will be written by PHYP eight
1351 * bytes from the start of the rx buffer, which is
1352 * skb->data at this stage
1355 __be64
*rxmss
= (__be64
*)(skb
->data
+ 8);
1357 mss
= (u16
)be64_to_cpu(*rxmss
);
1361 if (length
< rx_copybreak
)
1362 new_skb
= netdev_alloc_skb(netdev
, length
);
1365 skb_copy_to_linear_data(new_skb
,
1369 ibmveth_flush_buffer(skb
->data
,
1371 if (!ibmveth_rxq_recycle_buffer(adapter
))
1375 ibmveth_rxq_harvest_buffer(adapter
);
1376 skb_reserve(skb
, offset
);
1379 skb_put(skb
, length
);
1380 skb
->protocol
= eth_type_trans(skb
, netdev
);
1383 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1384 ibmveth_rx_csum_helper(skb
, adapter
);
1387 if (length
> netdev
->mtu
+ ETH_HLEN
) {
1388 ibmveth_rx_mss_helper(skb
, mss
, lrg_pkt
);
1389 adapter
->rx_large_packets
++;
1392 napi_gro_receive(napi
, skb
); /* send it up */
1394 netdev
->stats
.rx_packets
++;
1395 netdev
->stats
.rx_bytes
+= length
;
1400 ibmveth_replenish_task(adapter
);
1402 if (frames_processed
< budget
) {
1403 napi_complete_done(napi
, frames_processed
);
1405 /* We think we are done - reenable interrupts,
1406 * then check once more to make sure we are done.
1408 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1411 BUG_ON(lpar_rc
!= H_SUCCESS
);
1413 if (ibmveth_rxq_pending_buffer(adapter
) &&
1414 napi_reschedule(napi
)) {
1415 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1420 return frames_processed
;
1423 static irqreturn_t
ibmveth_interrupt(int irq
, void *dev_instance
)
1425 struct net_device
*netdev
= dev_instance
;
1426 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1427 unsigned long lpar_rc
;
1429 if (napi_schedule_prep(&adapter
->napi
)) {
1430 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1432 BUG_ON(lpar_rc
!= H_SUCCESS
);
1433 __napi_schedule(&adapter
->napi
);
1438 static void ibmveth_set_multicast_list(struct net_device
*netdev
)
1440 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1441 unsigned long lpar_rc
;
1443 if ((netdev
->flags
& IFF_PROMISC
) ||
1444 (netdev_mc_count(netdev
) > adapter
->mcastFilterSize
)) {
1445 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1446 IbmVethMcastEnableRecv
|
1447 IbmVethMcastDisableFiltering
,
1449 if (lpar_rc
!= H_SUCCESS
) {
1450 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1451 "entering promisc mode\n", lpar_rc
);
1454 struct netdev_hw_addr
*ha
;
1455 /* clear the filter table & disable filtering */
1456 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1457 IbmVethMcastEnableRecv
|
1458 IbmVethMcastDisableFiltering
|
1459 IbmVethMcastClearFilterTable
,
1461 if (lpar_rc
!= H_SUCCESS
) {
1462 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1463 "attempting to clear filter table\n",
1466 /* add the addresses to the filter table */
1467 netdev_for_each_mc_addr(ha
, netdev
) {
1468 /* add the multicast address to the filter table */
1470 mcast_addr
= ibmveth_encode_mac_addr(ha
->addr
);
1471 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1472 IbmVethMcastAddFilter
,
1474 if (lpar_rc
!= H_SUCCESS
) {
1475 netdev_err(netdev
, "h_multicast_ctrl rc=%ld "
1476 "when adding an entry to the filter "
1477 "table\n", lpar_rc
);
1481 /* re-enable filtering */
1482 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1483 IbmVethMcastEnableFiltering
,
1485 if (lpar_rc
!= H_SUCCESS
) {
1486 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1487 "enabling filtering\n", lpar_rc
);
1492 static int ibmveth_change_mtu(struct net_device
*dev
, int new_mtu
)
1494 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
1495 struct vio_dev
*viodev
= adapter
->vdev
;
1496 int new_mtu_oh
= new_mtu
+ IBMVETH_BUFF_OH
;
1498 int need_restart
= 0;
1500 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
1501 if (new_mtu_oh
<= adapter
->rx_buff_pool
[i
].buff_size
)
1504 if (i
== IBMVETH_NUM_BUFF_POOLS
)
1507 /* Deactivate all the buffer pools so that the next loop can activate
1508 only the buffer pools necessary to hold the new MTU */
1509 if (netif_running(adapter
->netdev
)) {
1511 adapter
->pool_config
= 1;
1512 ibmveth_close(adapter
->netdev
);
1513 adapter
->pool_config
= 0;
1516 /* Look for an active buffer pool that can hold the new MTU */
1517 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1518 adapter
->rx_buff_pool
[i
].active
= 1;
1520 if (new_mtu_oh
<= adapter
->rx_buff_pool
[i
].buff_size
) {
1522 vio_cmo_set_dev_desired(viodev
,
1523 ibmveth_get_desired_dma
1526 return ibmveth_open(adapter
->netdev
);
1532 if (need_restart
&& (rc
= ibmveth_open(adapter
->netdev
)))
1538 #ifdef CONFIG_NET_POLL_CONTROLLER
1539 static void ibmveth_poll_controller(struct net_device
*dev
)
1541 ibmveth_replenish_task(netdev_priv(dev
));
1542 ibmveth_interrupt(dev
->irq
, dev
);
1547 * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1549 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1552 * Number of bytes of IO data the driver will need to perform well.
1554 static unsigned long ibmveth_get_desired_dma(struct vio_dev
*vdev
)
1556 struct net_device
*netdev
= dev_get_drvdata(&vdev
->dev
);
1557 struct ibmveth_adapter
*adapter
;
1558 struct iommu_table
*tbl
;
1563 tbl
= get_iommu_table_base(&vdev
->dev
);
1565 /* netdev inits at probe time along with the structures we need below*/
1567 return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT
, tbl
);
1569 adapter
= netdev_priv(netdev
);
1571 ret
= IBMVETH_BUFF_LIST_SIZE
+ IBMVETH_FILT_LIST_SIZE
;
1572 ret
+= IOMMU_PAGE_ALIGN(netdev
->mtu
, tbl
);
1574 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1575 /* add the size of the active receive buffers */
1576 if (adapter
->rx_buff_pool
[i
].active
)
1578 adapter
->rx_buff_pool
[i
].size
*
1579 IOMMU_PAGE_ALIGN(adapter
->rx_buff_pool
[i
].
1581 rxqentries
+= adapter
->rx_buff_pool
[i
].size
;
1583 /* add the size of the receive queue entries */
1584 ret
+= IOMMU_PAGE_ALIGN(
1585 rxqentries
* sizeof(struct ibmveth_rx_q_entry
), tbl
);
1590 static int ibmveth_set_mac_addr(struct net_device
*dev
, void *p
)
1592 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
1593 struct sockaddr
*addr
= p
;
1597 if (!is_valid_ether_addr(addr
->sa_data
))
1598 return -EADDRNOTAVAIL
;
1600 mac_address
= ibmveth_encode_mac_addr(addr
->sa_data
);
1601 rc
= h_change_logical_lan_mac(adapter
->vdev
->unit_address
, mac_address
);
1603 netdev_err(adapter
->netdev
, "h_change_logical_lan_mac failed with rc=%d\n", rc
);
1607 ether_addr_copy(dev
->dev_addr
, addr
->sa_data
);
1612 static const struct net_device_ops ibmveth_netdev_ops
= {
1613 .ndo_open
= ibmveth_open
,
1614 .ndo_stop
= ibmveth_close
,
1615 .ndo_start_xmit
= ibmveth_start_xmit
,
1616 .ndo_set_rx_mode
= ibmveth_set_multicast_list
,
1617 .ndo_do_ioctl
= ibmveth_ioctl
,
1618 .ndo_change_mtu
= ibmveth_change_mtu
,
1619 .ndo_fix_features
= ibmveth_fix_features
,
1620 .ndo_set_features
= ibmveth_set_features
,
1621 .ndo_validate_addr
= eth_validate_addr
,
1622 .ndo_set_mac_address
= ibmveth_set_mac_addr
,
1623 #ifdef CONFIG_NET_POLL_CONTROLLER
1624 .ndo_poll_controller
= ibmveth_poll_controller
,
1628 static int ibmveth_probe(struct vio_dev
*dev
, const struct vio_device_id
*id
)
1631 struct net_device
*netdev
;
1632 struct ibmveth_adapter
*adapter
;
1633 unsigned char *mac_addr_p
;
1634 __be32
*mcastFilterSize_p
;
1636 unsigned long ret_attr
;
1638 dev_dbg(&dev
->dev
, "entering ibmveth_probe for UA 0x%x\n",
1641 mac_addr_p
= (unsigned char *)vio_get_attribute(dev
, VETH_MAC_ADDR
,
1644 dev_err(&dev
->dev
, "Can't find VETH_MAC_ADDR attribute\n");
1647 /* Workaround for old/broken pHyp */
1650 else if (mac_len
!= 6) {
1651 dev_err(&dev
->dev
, "VETH_MAC_ADDR attribute wrong len %d\n",
1656 mcastFilterSize_p
= (__be32
*)vio_get_attribute(dev
,
1657 VETH_MCAST_FILTER_SIZE
,
1659 if (!mcastFilterSize_p
) {
1660 dev_err(&dev
->dev
, "Can't find VETH_MCAST_FILTER_SIZE "
1665 netdev
= alloc_etherdev(sizeof(struct ibmveth_adapter
));
1670 adapter
= netdev_priv(netdev
);
1671 dev_set_drvdata(&dev
->dev
, netdev
);
1673 adapter
->vdev
= dev
;
1674 adapter
->netdev
= netdev
;
1675 adapter
->mcastFilterSize
= be32_to_cpu(*mcastFilterSize_p
);
1676 adapter
->pool_config
= 0;
1678 netif_napi_add(netdev
, &adapter
->napi
, ibmveth_poll
, 16);
1680 netdev
->irq
= dev
->irq
;
1681 netdev
->netdev_ops
= &ibmveth_netdev_ops
;
1682 netdev
->ethtool_ops
= &netdev_ethtool_ops
;
1683 SET_NETDEV_DEV(netdev
, &dev
->dev
);
1684 netdev
->hw_features
= NETIF_F_SG
;
1685 if (vio_get_attribute(dev
, "ibm,illan-options", NULL
) != NULL
) {
1686 netdev
->hw_features
|= NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
|
1690 netdev
->features
|= netdev
->hw_features
;
1692 ret
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
1694 /* If running older firmware, TSO should not be enabled by default */
1695 if (ret
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_LRG_SND_SUPPORT
) &&
1697 netdev
->hw_features
|= NETIF_F_TSO
| NETIF_F_TSO6
;
1698 netdev
->features
|= netdev
->hw_features
;
1700 netdev
->hw_features
|= NETIF_F_TSO
;
1703 adapter
->is_active_trunk
= false;
1704 if (ret
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_ACTIVE_TRUNK
)) {
1705 adapter
->is_active_trunk
= true;
1706 netdev
->hw_features
|= NETIF_F_FRAGLIST
;
1707 netdev
->features
|= NETIF_F_FRAGLIST
;
1710 netdev
->min_mtu
= IBMVETH_MIN_MTU
;
1711 netdev
->max_mtu
= ETH_MAX_MTU
;
1713 memcpy(netdev
->dev_addr
, mac_addr_p
, ETH_ALEN
);
1715 if (firmware_has_feature(FW_FEATURE_CMO
))
1716 memcpy(pool_count
, pool_count_cmo
, sizeof(pool_count
));
1718 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1719 struct kobject
*kobj
= &adapter
->rx_buff_pool
[i
].kobj
;
1722 ibmveth_init_buffer_pool(&adapter
->rx_buff_pool
[i
], i
,
1723 pool_count
[i
], pool_size
[i
],
1725 error
= kobject_init_and_add(kobj
, &ktype_veth_pool
,
1726 &dev
->dev
.kobj
, "pool%d", i
);
1728 kobject_uevent(kobj
, KOBJ_ADD
);
1731 netdev_dbg(netdev
, "adapter @ 0x%p\n", adapter
);
1732 netdev_dbg(netdev
, "registering netdev...\n");
1734 ibmveth_set_features(netdev
, netdev
->features
);
1736 rc
= register_netdev(netdev
);
1739 netdev_dbg(netdev
, "failed to register netdev rc=%d\n", rc
);
1740 free_netdev(netdev
);
1744 netdev_dbg(netdev
, "registered\n");
1749 static int ibmveth_remove(struct vio_dev
*dev
)
1751 struct net_device
*netdev
= dev_get_drvdata(&dev
->dev
);
1752 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1755 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
1756 kobject_put(&adapter
->rx_buff_pool
[i
].kobj
);
1758 unregister_netdev(netdev
);
1760 free_netdev(netdev
);
1761 dev_set_drvdata(&dev
->dev
, NULL
);
1766 static struct attribute veth_active_attr
;
1767 static struct attribute veth_num_attr
;
1768 static struct attribute veth_size_attr
;
1770 static ssize_t
veth_pool_show(struct kobject
*kobj
,
1771 struct attribute
*attr
, char *buf
)
1773 struct ibmveth_buff_pool
*pool
= container_of(kobj
,
1774 struct ibmveth_buff_pool
,
1777 if (attr
== &veth_active_attr
)
1778 return sprintf(buf
, "%d\n", pool
->active
);
1779 else if (attr
== &veth_num_attr
)
1780 return sprintf(buf
, "%d\n", pool
->size
);
1781 else if (attr
== &veth_size_attr
)
1782 return sprintf(buf
, "%d\n", pool
->buff_size
);
1786 static ssize_t
veth_pool_store(struct kobject
*kobj
, struct attribute
*attr
,
1787 const char *buf
, size_t count
)
1789 struct ibmveth_buff_pool
*pool
= container_of(kobj
,
1790 struct ibmveth_buff_pool
,
1792 struct net_device
*netdev
= dev_get_drvdata(
1793 container_of(kobj
->parent
, struct device
, kobj
));
1794 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1795 long value
= simple_strtol(buf
, NULL
, 10);
1798 if (attr
== &veth_active_attr
) {
1799 if (value
&& !pool
->active
) {
1800 if (netif_running(netdev
)) {
1801 if (ibmveth_alloc_buffer_pool(pool
)) {
1803 "unable to alloc pool\n");
1807 adapter
->pool_config
= 1;
1808 ibmveth_close(netdev
);
1809 adapter
->pool_config
= 0;
1810 if ((rc
= ibmveth_open(netdev
)))
1815 } else if (!value
&& pool
->active
) {
1816 int mtu
= netdev
->mtu
+ IBMVETH_BUFF_OH
;
1818 /* Make sure there is a buffer pool with buffers that
1819 can hold a packet of the size of the MTU */
1820 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1821 if (pool
== &adapter
->rx_buff_pool
[i
])
1823 if (!adapter
->rx_buff_pool
[i
].active
)
1825 if (mtu
<= adapter
->rx_buff_pool
[i
].buff_size
)
1829 if (i
== IBMVETH_NUM_BUFF_POOLS
) {
1830 netdev_err(netdev
, "no active pool >= MTU\n");
1834 if (netif_running(netdev
)) {
1835 adapter
->pool_config
= 1;
1836 ibmveth_close(netdev
);
1838 adapter
->pool_config
= 0;
1839 if ((rc
= ibmveth_open(netdev
)))
1844 } else if (attr
== &veth_num_attr
) {
1845 if (value
<= 0 || value
> IBMVETH_MAX_POOL_COUNT
) {
1848 if (netif_running(netdev
)) {
1849 adapter
->pool_config
= 1;
1850 ibmveth_close(netdev
);
1851 adapter
->pool_config
= 0;
1853 if ((rc
= ibmveth_open(netdev
)))
1859 } else if (attr
== &veth_size_attr
) {
1860 if (value
<= IBMVETH_BUFF_OH
|| value
> IBMVETH_MAX_BUF_SIZE
) {
1863 if (netif_running(netdev
)) {
1864 adapter
->pool_config
= 1;
1865 ibmveth_close(netdev
);
1866 adapter
->pool_config
= 0;
1867 pool
->buff_size
= value
;
1868 if ((rc
= ibmveth_open(netdev
)))
1871 pool
->buff_size
= value
;
1876 /* kick the interrupt handler to allocate/deallocate pools */
1877 ibmveth_interrupt(netdev
->irq
, netdev
);
1882 #define ATTR(_name, _mode) \
1883 struct attribute veth_##_name##_attr = { \
1884 .name = __stringify(_name), .mode = _mode, \
1887 static ATTR(active
, 0644);
1888 static ATTR(num
, 0644);
1889 static ATTR(size
, 0644);
1891 static struct attribute
*veth_pool_attrs
[] = {
1898 static const struct sysfs_ops veth_pool_ops
= {
1899 .show
= veth_pool_show
,
1900 .store
= veth_pool_store
,
1903 static struct kobj_type ktype_veth_pool
= {
1905 .sysfs_ops
= &veth_pool_ops
,
1906 .default_attrs
= veth_pool_attrs
,
1909 static int ibmveth_resume(struct device
*dev
)
1911 struct net_device
*netdev
= dev_get_drvdata(dev
);
1912 ibmveth_interrupt(netdev
->irq
, netdev
);
1916 static const struct vio_device_id ibmveth_device_table
[] = {
1917 { "network", "IBM,l-lan"},
1920 MODULE_DEVICE_TABLE(vio
, ibmveth_device_table
);
1922 static const struct dev_pm_ops ibmveth_pm_ops
= {
1923 .resume
= ibmveth_resume
1926 static struct vio_driver ibmveth_driver
= {
1927 .id_table
= ibmveth_device_table
,
1928 .probe
= ibmveth_probe
,
1929 .remove
= ibmveth_remove
,
1930 .get_desired_dma
= ibmveth_get_desired_dma
,
1931 .name
= ibmveth_driver_name
,
1932 .pm
= &ibmveth_pm_ops
,
1935 static int __init
ibmveth_module_init(void)
1937 printk(KERN_DEBUG
"%s: %s %s\n", ibmveth_driver_name
,
1938 ibmveth_driver_string
, ibmveth_driver_version
);
1940 return vio_register_driver(&ibmveth_driver
);
1943 static void __exit
ibmveth_module_exit(void)
1945 vio_unregister_driver(&ibmveth_driver
);
1948 module_init(ibmveth_module_init
);
1949 module_exit(ibmveth_module_exit
);