1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * IBM Power Virtual Ethernet Device Driver
5 * Copyright (C) IBM Corporation, 2003, 2010
7 * Authors: Dave Larson <larson1@us.ibm.com>
8 * Santiago Leon <santil@linux.vnet.ibm.com>
9 * Brian King <brking@linux.vnet.ibm.com>
10 * Robert Jennings <rcj@linux.vnet.ibm.com>
11 * Anton Blanchard <anton@au.ibm.com>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/kernel.h>
19 #include <linux/netdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/skbuff.h>
22 #include <linux/init.h>
23 #include <linux/interrupt.h>
26 #include <linux/ethtool.h>
29 #include <linux/ipv6.h>
30 #include <linux/slab.h>
31 #include <asm/hvcall.h>
32 #include <linux/atomic.h>
34 #include <asm/iommu.h>
35 #include <asm/firmware.h>
37 #include <net/ip6_checksum.h>
41 static irqreturn_t
ibmveth_interrupt(int irq
, void *dev_instance
);
42 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter
*adapter
,
44 static unsigned long ibmveth_get_desired_dma(struct vio_dev
*vdev
);
46 static struct kobj_type ktype_veth_pool
;
49 static const char ibmveth_driver_name
[] = "ibmveth";
50 static const char ibmveth_driver_string
[] = "IBM Power Virtual Ethernet Driver";
51 #define ibmveth_driver_version "1.06"
53 MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
54 MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
55 MODULE_LICENSE("GPL");
56 MODULE_VERSION(ibmveth_driver_version
);
58 static unsigned int tx_copybreak __read_mostly
= 128;
59 module_param(tx_copybreak
, uint
, 0644);
60 MODULE_PARM_DESC(tx_copybreak
,
61 "Maximum size of packet that is copied to a new buffer on transmit");
63 static unsigned int rx_copybreak __read_mostly
= 128;
64 module_param(rx_copybreak
, uint
, 0644);
65 MODULE_PARM_DESC(rx_copybreak
,
66 "Maximum size of packet that is copied to a new buffer on receive");
68 static unsigned int rx_flush __read_mostly
= 0;
69 module_param(rx_flush
, uint
, 0644);
70 MODULE_PARM_DESC(rx_flush
, "Flush receive buffers before use");
72 static bool old_large_send __read_mostly
;
73 module_param(old_large_send
, bool, 0444);
74 MODULE_PARM_DESC(old_large_send
,
75 "Use old large send method on firmware that supports the new method");
78 char name
[ETH_GSTRING_LEN
];
82 #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
83 #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
85 static struct ibmveth_stat ibmveth_stats
[] = {
86 { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles
) },
87 { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem
) },
88 { "replenish_add_buff_failure",
89 IBMVETH_STAT_OFF(replenish_add_buff_failure
) },
90 { "replenish_add_buff_success",
91 IBMVETH_STAT_OFF(replenish_add_buff_success
) },
92 { "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer
) },
93 { "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer
) },
94 { "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed
) },
95 { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed
) },
96 { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support
) },
97 { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support
) },
98 { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets
) },
99 { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets
) },
100 { "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support
) }
103 /* simple methods of getting data from the current rxq entry */
104 static inline u32
ibmveth_rxq_flags(struct ibmveth_adapter
*adapter
)
106 return be32_to_cpu(adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].flags_off
);
109 static inline int ibmveth_rxq_toggle(struct ibmveth_adapter
*adapter
)
111 return (ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_TOGGLE
) >>
112 IBMVETH_RXQ_TOGGLE_SHIFT
;
115 static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter
*adapter
)
117 return ibmveth_rxq_toggle(adapter
) == adapter
->rx_queue
.toggle
;
120 static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter
*adapter
)
122 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_VALID
;
125 static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter
*adapter
)
127 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_OFF_MASK
;
130 static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter
*adapter
)
132 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_LRG_PKT
;
135 static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter
*adapter
)
137 return be32_to_cpu(adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].length
);
140 static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter
*adapter
)
142 return ibmveth_rxq_flags(adapter
) & IBMVETH_RXQ_CSUM_GOOD
;
145 static unsigned int ibmveth_real_max_tx_queues(void)
147 unsigned int n_cpu
= num_online_cpus();
149 return min(n_cpu
, IBMVETH_MAX_QUEUES
);
152 /* setup the initial settings for a buffer pool */
153 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool
*pool
,
154 u32 pool_index
, u32 pool_size
,
155 u32 buff_size
, u32 pool_active
)
157 pool
->size
= pool_size
;
158 pool
->index
= pool_index
;
159 pool
->buff_size
= buff_size
;
160 pool
->threshold
= pool_size
* 7 / 8;
161 pool
->active
= pool_active
;
164 /* allocate and setup an buffer pool - called during open */
165 static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool
*pool
)
169 pool
->free_map
= kmalloc_array(pool
->size
, sizeof(u16
), GFP_KERNEL
);
174 pool
->dma_addr
= kcalloc(pool
->size
, sizeof(dma_addr_t
), GFP_KERNEL
);
175 if (!pool
->dma_addr
) {
176 kfree(pool
->free_map
);
177 pool
->free_map
= NULL
;
181 pool
->skbuff
= kcalloc(pool
->size
, sizeof(void *), GFP_KERNEL
);
184 kfree(pool
->dma_addr
);
185 pool
->dma_addr
= NULL
;
187 kfree(pool
->free_map
);
188 pool
->free_map
= NULL
;
192 for (i
= 0; i
< pool
->size
; ++i
)
193 pool
->free_map
[i
] = i
;
195 atomic_set(&pool
->available
, 0);
196 pool
->producer_index
= 0;
197 pool
->consumer_index
= 0;
202 static inline void ibmveth_flush_buffer(void *addr
, unsigned long length
)
204 unsigned long offset
;
206 for (offset
= 0; offset
< length
; offset
+= SMP_CACHE_BYTES
)
207 asm("dcbf %0,%1,1" :: "b" (addr
), "r" (offset
));
210 /* replenish the buffers for a pool. note that we don't need to
211 * skb_reserve these since they are used for incoming...
213 static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter
*adapter
,
214 struct ibmveth_buff_pool
*pool
)
217 u32 count
= pool
->size
- atomic_read(&pool
->available
);
218 u32 buffers_added
= 0;
220 unsigned int free_index
, index
;
222 unsigned long lpar_rc
;
227 for (i
= 0; i
< count
; ++i
) {
228 union ibmveth_buf_desc desc
;
230 free_index
= pool
->consumer_index
;
231 index
= pool
->free_map
[free_index
];
234 BUG_ON(index
== IBM_VETH_INVALID_MAP
);
236 /* are we allocating a new buffer or recycling an old one */
237 if (pool
->skbuff
[index
])
240 skb
= netdev_alloc_skb(adapter
->netdev
, pool
->buff_size
);
243 netdev_dbg(adapter
->netdev
,
244 "replenish: unable to allocate skb\n");
245 adapter
->replenish_no_mem
++;
249 dma_addr
= dma_map_single(&adapter
->vdev
->dev
, skb
->data
,
250 pool
->buff_size
, DMA_FROM_DEVICE
);
252 if (dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
255 pool
->dma_addr
[index
] = dma_addr
;
256 pool
->skbuff
[index
] = skb
;
259 unsigned int len
= min(pool
->buff_size
,
260 adapter
->netdev
->mtu
+
262 ibmveth_flush_buffer(skb
->data
, len
);
265 dma_addr
= pool
->dma_addr
[index
];
266 desc
.fields
.flags_len
= IBMVETH_BUF_VALID
| pool
->buff_size
;
267 desc
.fields
.address
= dma_addr
;
269 correlator
= ((u64
)pool
->index
<< 32) | index
;
270 *(u64
*)pool
->skbuff
[index
]->data
= correlator
;
272 lpar_rc
= h_add_logical_lan_buffer(adapter
->vdev
->unit_address
,
275 if (lpar_rc
!= H_SUCCESS
) {
276 netdev_warn(adapter
->netdev
,
277 "%sadd_logical_lan failed %lu\n",
278 skb
? "" : "When recycling: ", lpar_rc
);
282 pool
->free_map
[free_index
] = IBM_VETH_INVALID_MAP
;
283 pool
->consumer_index
++;
284 if (pool
->consumer_index
>= pool
->size
)
285 pool
->consumer_index
= 0;
288 adapter
->replenish_add_buff_success
++;
292 atomic_add(buffers_added
, &(pool
->available
));
297 if (dma_addr
&& !dma_mapping_error(&adapter
->vdev
->dev
, dma_addr
))
298 dma_unmap_single(&adapter
->vdev
->dev
,
299 pool
->dma_addr
[index
], pool
->buff_size
,
301 dev_kfree_skb_any(pool
->skbuff
[index
]);
302 pool
->skbuff
[index
] = NULL
;
303 adapter
->replenish_add_buff_failure
++;
306 atomic_add(buffers_added
, &(pool
->available
));
310 * The final 8 bytes of the buffer list is a counter of frames dropped
311 * because there was not a buffer in the buffer list capable of holding
314 static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter
*adapter
)
316 __be64
*p
= adapter
->buffer_list_addr
+ 4096 - 8;
318 adapter
->rx_no_buffer
= be64_to_cpup(p
);
321 /* replenish routine */
322 static void ibmveth_replenish_task(struct ibmveth_adapter
*adapter
)
326 adapter
->replenish_task_cycles
++;
328 for (i
= (IBMVETH_NUM_BUFF_POOLS
- 1); i
>= 0; i
--) {
329 struct ibmveth_buff_pool
*pool
= &adapter
->rx_buff_pool
[i
];
332 (atomic_read(&pool
->available
) < pool
->threshold
))
333 ibmveth_replenish_buffer_pool(adapter
, pool
);
336 ibmveth_update_rx_no_buffer(adapter
);
339 /* empty and free ana buffer pool - also used to do cleanup in error paths */
340 static void ibmveth_free_buffer_pool(struct ibmveth_adapter
*adapter
,
341 struct ibmveth_buff_pool
*pool
)
345 kfree(pool
->free_map
);
346 pool
->free_map
= NULL
;
348 if (pool
->skbuff
&& pool
->dma_addr
) {
349 for (i
= 0; i
< pool
->size
; ++i
) {
350 struct sk_buff
*skb
= pool
->skbuff
[i
];
352 dma_unmap_single(&adapter
->vdev
->dev
,
356 dev_kfree_skb_any(skb
);
357 pool
->skbuff
[i
] = NULL
;
362 if (pool
->dma_addr
) {
363 kfree(pool
->dma_addr
);
364 pool
->dma_addr
= NULL
;
373 /* remove a buffer from a pool */
374 static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter
*adapter
,
375 u64 correlator
, bool reuse
)
377 unsigned int pool
= correlator
>> 32;
378 unsigned int index
= correlator
& 0xffffffffUL
;
379 unsigned int free_index
;
382 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
383 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
385 skb
= adapter
->rx_buff_pool
[pool
].skbuff
[index
];
388 /* if we are going to reuse the buffer then keep the pointers around
389 * but mark index as available. replenish will see the skb pointer and
390 * assume it is to be recycled.
393 /* remove the skb pointer to mark free. actual freeing is done
394 * by upper level networking after gro_recieve
396 adapter
->rx_buff_pool
[pool
].skbuff
[index
] = NULL
;
398 dma_unmap_single(&adapter
->vdev
->dev
,
399 adapter
->rx_buff_pool
[pool
].dma_addr
[index
],
400 adapter
->rx_buff_pool
[pool
].buff_size
,
404 free_index
= adapter
->rx_buff_pool
[pool
].producer_index
;
405 adapter
->rx_buff_pool
[pool
].producer_index
++;
406 if (adapter
->rx_buff_pool
[pool
].producer_index
>=
407 adapter
->rx_buff_pool
[pool
].size
)
408 adapter
->rx_buff_pool
[pool
].producer_index
= 0;
409 adapter
->rx_buff_pool
[pool
].free_map
[free_index
] = index
;
413 atomic_dec(&(adapter
->rx_buff_pool
[pool
].available
));
416 /* get the current buffer on the rx queue */
417 static inline struct sk_buff
*ibmveth_rxq_get_buffer(struct ibmveth_adapter
*adapter
)
419 u64 correlator
= adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
;
420 unsigned int pool
= correlator
>> 32;
421 unsigned int index
= correlator
& 0xffffffffUL
;
423 BUG_ON(pool
>= IBMVETH_NUM_BUFF_POOLS
);
424 BUG_ON(index
>= adapter
->rx_buff_pool
[pool
].size
);
426 return adapter
->rx_buff_pool
[pool
].skbuff
[index
];
429 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter
*adapter
,
434 cor
= adapter
->rx_queue
.queue_addr
[adapter
->rx_queue
.index
].correlator
;
435 ibmveth_remove_buffer_from_pool(adapter
, cor
, reuse
);
437 if (++adapter
->rx_queue
.index
== adapter
->rx_queue
.num_slots
) {
438 adapter
->rx_queue
.index
= 0;
439 adapter
->rx_queue
.toggle
= !adapter
->rx_queue
.toggle
;
443 static void ibmveth_free_tx_ltb(struct ibmveth_adapter
*adapter
, int idx
)
445 dma_unmap_single(&adapter
->vdev
->dev
, adapter
->tx_ltb_dma
[idx
],
446 adapter
->tx_ltb_size
, DMA_TO_DEVICE
);
447 kfree(adapter
->tx_ltb_ptr
[idx
]);
448 adapter
->tx_ltb_ptr
[idx
] = NULL
;
451 static int ibmveth_allocate_tx_ltb(struct ibmveth_adapter
*adapter
, int idx
)
453 adapter
->tx_ltb_ptr
[idx
] = kzalloc(adapter
->tx_ltb_size
,
455 if (!adapter
->tx_ltb_ptr
[idx
]) {
456 netdev_err(adapter
->netdev
,
457 "unable to allocate tx long term buffer\n");
460 adapter
->tx_ltb_dma
[idx
] = dma_map_single(&adapter
->vdev
->dev
,
461 adapter
->tx_ltb_ptr
[idx
],
462 adapter
->tx_ltb_size
,
464 if (dma_mapping_error(&adapter
->vdev
->dev
, adapter
->tx_ltb_dma
[idx
])) {
465 netdev_err(adapter
->netdev
,
466 "unable to DMA map tx long term buffer\n");
467 kfree(adapter
->tx_ltb_ptr
[idx
]);
468 adapter
->tx_ltb_ptr
[idx
] = NULL
;
475 static int ibmveth_register_logical_lan(struct ibmveth_adapter
*adapter
,
476 union ibmveth_buf_desc rxq_desc
, u64 mac_address
)
478 int rc
, try_again
= 1;
481 * After a kexec the adapter will still be open, so our attempt to
482 * open it will fail. So if we get a failure we free the adapter and
483 * try again, but only once.
486 rc
= h_register_logical_lan(adapter
->vdev
->unit_address
,
487 adapter
->buffer_list_dma
, rxq_desc
.desc
,
488 adapter
->filter_list_dma
, mac_address
);
490 if (rc
!= H_SUCCESS
&& try_again
) {
492 rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
493 } while (H_IS_LONG_BUSY(rc
) || (rc
== H_BUSY
));
502 static int ibmveth_open(struct net_device
*netdev
)
504 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
507 unsigned long lpar_rc
;
509 union ibmveth_buf_desc rxq_desc
;
513 netdev_dbg(netdev
, "open starting\n");
515 napi_enable(&adapter
->napi
);
517 for(i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
518 rxq_entries
+= adapter
->rx_buff_pool
[i
].size
;
521 adapter
->buffer_list_addr
= (void*) get_zeroed_page(GFP_KERNEL
);
522 if (!adapter
->buffer_list_addr
) {
523 netdev_err(netdev
, "unable to allocate list pages\n");
527 adapter
->filter_list_addr
= (void*) get_zeroed_page(GFP_KERNEL
);
528 if (!adapter
->filter_list_addr
) {
529 netdev_err(netdev
, "unable to allocate filter pages\n");
530 goto out_free_buffer_list
;
533 dev
= &adapter
->vdev
->dev
;
535 adapter
->rx_queue
.queue_len
= sizeof(struct ibmveth_rx_q_entry
) *
537 adapter
->rx_queue
.queue_addr
=
538 dma_alloc_coherent(dev
, adapter
->rx_queue
.queue_len
,
539 &adapter
->rx_queue
.queue_dma
, GFP_KERNEL
);
540 if (!adapter
->rx_queue
.queue_addr
)
541 goto out_free_filter_list
;
543 adapter
->buffer_list_dma
= dma_map_single(dev
,
544 adapter
->buffer_list_addr
, 4096, DMA_BIDIRECTIONAL
);
545 if (dma_mapping_error(dev
, adapter
->buffer_list_dma
)) {
546 netdev_err(netdev
, "unable to map buffer list pages\n");
547 goto out_free_queue_mem
;
550 adapter
->filter_list_dma
= dma_map_single(dev
,
551 adapter
->filter_list_addr
, 4096, DMA_BIDIRECTIONAL
);
552 if (dma_mapping_error(dev
, adapter
->filter_list_dma
)) {
553 netdev_err(netdev
, "unable to map filter list pages\n");
554 goto out_unmap_buffer_list
;
557 for (i
= 0; i
< netdev
->real_num_tx_queues
; i
++) {
558 if (ibmveth_allocate_tx_ltb(adapter
, i
))
559 goto out_free_tx_ltb
;
562 adapter
->rx_queue
.index
= 0;
563 adapter
->rx_queue
.num_slots
= rxq_entries
;
564 adapter
->rx_queue
.toggle
= 1;
566 mac_address
= ether_addr_to_u64(netdev
->dev_addr
);
568 rxq_desc
.fields
.flags_len
= IBMVETH_BUF_VALID
|
569 adapter
->rx_queue
.queue_len
;
570 rxq_desc
.fields
.address
= adapter
->rx_queue
.queue_dma
;
572 netdev_dbg(netdev
, "buffer list @ 0x%p\n", adapter
->buffer_list_addr
);
573 netdev_dbg(netdev
, "filter list @ 0x%p\n", adapter
->filter_list_addr
);
574 netdev_dbg(netdev
, "receive q @ 0x%p\n", adapter
->rx_queue
.queue_addr
);
576 h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_DISABLE
);
578 lpar_rc
= ibmveth_register_logical_lan(adapter
, rxq_desc
, mac_address
);
580 if (lpar_rc
!= H_SUCCESS
) {
581 netdev_err(netdev
, "h_register_logical_lan failed with %ld\n",
583 netdev_err(netdev
, "buffer TCE:0x%llx filter TCE:0x%llx rxq "
584 "desc:0x%llx MAC:0x%llx\n",
585 adapter
->buffer_list_dma
,
586 adapter
->filter_list_dma
,
590 goto out_unmap_filter_list
;
593 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
594 if (!adapter
->rx_buff_pool
[i
].active
)
596 if (ibmveth_alloc_buffer_pool(&adapter
->rx_buff_pool
[i
])) {
597 netdev_err(netdev
, "unable to alloc pool\n");
598 adapter
->rx_buff_pool
[i
].active
= 0;
600 goto out_free_buffer_pools
;
604 netdev_dbg(netdev
, "registering irq 0x%x\n", netdev
->irq
);
605 rc
= request_irq(netdev
->irq
, ibmveth_interrupt
, 0, netdev
->name
,
608 netdev_err(netdev
, "unable to request irq 0x%x, rc %d\n",
611 lpar_rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
612 } while (H_IS_LONG_BUSY(lpar_rc
) || (lpar_rc
== H_BUSY
));
614 goto out_free_buffer_pools
;
619 netdev_dbg(netdev
, "initial replenish cycle\n");
620 ibmveth_interrupt(netdev
->irq
, netdev
);
622 netif_tx_start_all_queues(netdev
);
624 netdev_dbg(netdev
, "open complete\n");
628 out_free_buffer_pools
:
630 if (adapter
->rx_buff_pool
[i
].active
)
631 ibmveth_free_buffer_pool(adapter
,
632 &adapter
->rx_buff_pool
[i
]);
634 out_unmap_filter_list
:
635 dma_unmap_single(dev
, adapter
->filter_list_dma
, 4096,
640 ibmveth_free_tx_ltb(adapter
, i
);
643 out_unmap_buffer_list
:
644 dma_unmap_single(dev
, adapter
->buffer_list_dma
, 4096,
647 dma_free_coherent(dev
, adapter
->rx_queue
.queue_len
,
648 adapter
->rx_queue
.queue_addr
,
649 adapter
->rx_queue
.queue_dma
);
650 out_free_filter_list
:
651 free_page((unsigned long)adapter
->filter_list_addr
);
652 out_free_buffer_list
:
653 free_page((unsigned long)adapter
->buffer_list_addr
);
655 napi_disable(&adapter
->napi
);
659 static int ibmveth_close(struct net_device
*netdev
)
661 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
662 struct device
*dev
= &adapter
->vdev
->dev
;
666 netdev_dbg(netdev
, "close starting\n");
668 napi_disable(&adapter
->napi
);
670 netif_tx_stop_all_queues(netdev
);
672 h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_DISABLE
);
675 lpar_rc
= h_free_logical_lan(adapter
->vdev
->unit_address
);
676 } while (H_IS_LONG_BUSY(lpar_rc
) || (lpar_rc
== H_BUSY
));
678 if (lpar_rc
!= H_SUCCESS
) {
679 netdev_err(netdev
, "h_free_logical_lan failed with %lx, "
680 "continuing with close\n", lpar_rc
);
683 free_irq(netdev
->irq
, netdev
);
685 ibmveth_update_rx_no_buffer(adapter
);
687 dma_unmap_single(dev
, adapter
->buffer_list_dma
, 4096,
689 free_page((unsigned long)adapter
->buffer_list_addr
);
691 dma_unmap_single(dev
, adapter
->filter_list_dma
, 4096,
693 free_page((unsigned long)adapter
->filter_list_addr
);
695 dma_free_coherent(dev
, adapter
->rx_queue
.queue_len
,
696 adapter
->rx_queue
.queue_addr
,
697 adapter
->rx_queue
.queue_dma
);
699 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
700 if (adapter
->rx_buff_pool
[i
].active
)
701 ibmveth_free_buffer_pool(adapter
,
702 &adapter
->rx_buff_pool
[i
]);
704 for (i
= 0; i
< netdev
->real_num_tx_queues
; i
++)
705 ibmveth_free_tx_ltb(adapter
, i
);
707 netdev_dbg(netdev
, "close complete\n");
712 static int ibmveth_set_link_ksettings(struct net_device
*dev
,
713 const struct ethtool_link_ksettings
*cmd
)
715 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
717 return ethtool_virtdev_set_link_ksettings(dev
, cmd
,
722 static int ibmveth_get_link_ksettings(struct net_device
*dev
,
723 struct ethtool_link_ksettings
*cmd
)
725 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
727 cmd
->base
.speed
= adapter
->speed
;
728 cmd
->base
.duplex
= adapter
->duplex
;
729 cmd
->base
.port
= PORT_OTHER
;
734 static void ibmveth_init_link_settings(struct net_device
*dev
)
736 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
738 adapter
->speed
= SPEED_1000
;
739 adapter
->duplex
= DUPLEX_FULL
;
742 static void netdev_get_drvinfo(struct net_device
*dev
,
743 struct ethtool_drvinfo
*info
)
745 strscpy(info
->driver
, ibmveth_driver_name
, sizeof(info
->driver
));
746 strscpy(info
->version
, ibmveth_driver_version
, sizeof(info
->version
));
749 static netdev_features_t
ibmveth_fix_features(struct net_device
*dev
,
750 netdev_features_t features
)
753 * Since the ibmveth firmware interface does not have the
754 * concept of separate tx/rx checksum offload enable, if rx
755 * checksum is disabled we also have to disable tx checksum
756 * offload. Once we disable rx checksum offload, we are no
757 * longer allowed to send tx buffers that are not properly
761 if (!(features
& NETIF_F_RXCSUM
))
762 features
&= ~NETIF_F_CSUM_MASK
;
767 static int ibmveth_set_csum_offload(struct net_device
*dev
, u32 data
)
769 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
770 unsigned long set_attr
, clr_attr
, ret_attr
;
771 unsigned long set_attr6
, clr_attr6
;
772 long ret
, ret4
, ret6
;
773 int rc1
= 0, rc2
= 0;
776 if (netif_running(dev
)) {
787 set_attr
= IBMVETH_ILLAN_IPV4_TCP_CSUM
;
788 set_attr6
= IBMVETH_ILLAN_IPV6_TCP_CSUM
;
790 clr_attr
= IBMVETH_ILLAN_IPV4_TCP_CSUM
;
791 clr_attr6
= IBMVETH_ILLAN_IPV6_TCP_CSUM
;
794 ret
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
796 if (ret
== H_SUCCESS
&&
797 (ret_attr
& IBMVETH_ILLAN_PADDED_PKT_CSUM
)) {
798 ret4
= h_illan_attributes(adapter
->vdev
->unit_address
, clr_attr
,
799 set_attr
, &ret_attr
);
801 if (ret4
!= H_SUCCESS
) {
802 netdev_err(dev
, "unable to change IPv4 checksum "
803 "offload settings. %d rc=%ld\n",
806 h_illan_attributes(adapter
->vdev
->unit_address
,
807 set_attr
, clr_attr
, &ret_attr
);
810 dev
->features
&= ~NETIF_F_IP_CSUM
;
813 adapter
->fw_ipv4_csum_support
= data
;
816 ret6
= h_illan_attributes(adapter
->vdev
->unit_address
,
817 clr_attr6
, set_attr6
, &ret_attr
);
819 if (ret6
!= H_SUCCESS
) {
820 netdev_err(dev
, "unable to change IPv6 checksum "
821 "offload settings. %d rc=%ld\n",
824 h_illan_attributes(adapter
->vdev
->unit_address
,
825 set_attr6
, clr_attr6
, &ret_attr
);
828 dev
->features
&= ~NETIF_F_IPV6_CSUM
;
831 adapter
->fw_ipv6_csum_support
= data
;
833 if (ret4
== H_SUCCESS
|| ret6
== H_SUCCESS
)
834 adapter
->rx_csum
= data
;
839 netdev_err(dev
, "unable to change checksum offload settings."
840 " %d rc=%ld ret_attr=%lx\n", data
, ret
,
845 rc2
= ibmveth_open(dev
);
847 return rc1
? rc1
: rc2
;
850 static int ibmveth_set_tso(struct net_device
*dev
, u32 data
)
852 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
853 unsigned long set_attr
, clr_attr
, ret_attr
;
855 int rc1
= 0, rc2
= 0;
858 if (netif_running(dev
)) {
867 set_attr
= IBMVETH_ILLAN_LRG_SR_ENABLED
;
869 clr_attr
= IBMVETH_ILLAN_LRG_SR_ENABLED
;
871 ret1
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
873 if (ret1
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_LRG_SND_SUPPORT
) &&
875 ret2
= h_illan_attributes(adapter
->vdev
->unit_address
, clr_attr
,
876 set_attr
, &ret_attr
);
878 if (ret2
!= H_SUCCESS
) {
879 netdev_err(dev
, "unable to change tso settings. %d rc=%ld\n",
882 h_illan_attributes(adapter
->vdev
->unit_address
,
883 set_attr
, clr_attr
, &ret_attr
);
886 dev
->features
&= ~(NETIF_F_TSO
| NETIF_F_TSO6
);
890 adapter
->fw_large_send_support
= data
;
891 adapter
->large_send
= data
;
894 /* Older firmware version of large send offload does not
898 dev
->features
&= ~NETIF_F_TSO6
;
899 netdev_info(dev
, "TSO feature requires all partitions to have updated driver");
901 adapter
->large_send
= data
;
905 rc2
= ibmveth_open(dev
);
907 return rc1
? rc1
: rc2
;
910 static int ibmveth_set_features(struct net_device
*dev
,
911 netdev_features_t features
)
913 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
914 int rx_csum
= !!(features
& NETIF_F_RXCSUM
);
915 int large_send
= !!(features
& (NETIF_F_TSO
| NETIF_F_TSO6
));
916 int rc1
= 0, rc2
= 0;
918 if (rx_csum
!= adapter
->rx_csum
) {
919 rc1
= ibmveth_set_csum_offload(dev
, rx_csum
);
920 if (rc1
&& !adapter
->rx_csum
)
922 features
& ~(NETIF_F_CSUM_MASK
|
926 if (large_send
!= adapter
->large_send
) {
927 rc2
= ibmveth_set_tso(dev
, large_send
);
928 if (rc2
&& !adapter
->large_send
)
930 features
& ~(NETIF_F_TSO
| NETIF_F_TSO6
);
933 return rc1
? rc1
: rc2
;
936 static void ibmveth_get_strings(struct net_device
*dev
, u32 stringset
, u8
*data
)
940 if (stringset
!= ETH_SS_STATS
)
943 for (i
= 0; i
< ARRAY_SIZE(ibmveth_stats
); i
++, data
+= ETH_GSTRING_LEN
)
944 memcpy(data
, ibmveth_stats
[i
].name
, ETH_GSTRING_LEN
);
947 static int ibmveth_get_sset_count(struct net_device
*dev
, int sset
)
951 return ARRAY_SIZE(ibmveth_stats
);
957 static void ibmveth_get_ethtool_stats(struct net_device
*dev
,
958 struct ethtool_stats
*stats
, u64
*data
)
961 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
963 for (i
= 0; i
< ARRAY_SIZE(ibmveth_stats
); i
++)
964 data
[i
] = IBMVETH_GET_STAT(adapter
, ibmveth_stats
[i
].offset
);
967 static void ibmveth_get_channels(struct net_device
*netdev
,
968 struct ethtool_channels
*channels
)
970 channels
->max_tx
= ibmveth_real_max_tx_queues();
971 channels
->tx_count
= netdev
->real_num_tx_queues
;
973 channels
->max_rx
= netdev
->real_num_rx_queues
;
974 channels
->rx_count
= netdev
->real_num_rx_queues
;
977 static int ibmveth_set_channels(struct net_device
*netdev
,
978 struct ethtool_channels
*channels
)
980 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
981 unsigned int old
= netdev
->real_num_tx_queues
,
982 goal
= channels
->tx_count
;
985 /* If ndo_open has not been called yet then don't allocate, just set
986 * desired netdev_queue's and return
988 if (!(netdev
->flags
& IFF_UP
))
989 return netif_set_real_num_tx_queues(netdev
, goal
);
991 /* We have IBMVETH_MAX_QUEUES netdev_queue's allocated
992 * but we may need to alloc/free the ltb's.
994 netif_tx_stop_all_queues(netdev
);
996 /* Allocate any queue that we need */
997 for (i
= old
; i
< goal
; i
++) {
998 if (adapter
->tx_ltb_ptr
[i
])
1001 rc
= ibmveth_allocate_tx_ltb(adapter
, i
);
1005 /* if something goes wrong, free everything we just allocated */
1006 netdev_err(netdev
, "Failed to allocate more tx queues, returning to %d queues\n",
1012 rc
= netif_set_real_num_tx_queues(netdev
, goal
);
1014 netdev_err(netdev
, "Failed to set real tx queues, returning to %d queues\n",
1019 /* Free any that are no longer needed */
1020 for (i
= old
; i
> goal
; i
--) {
1021 if (adapter
->tx_ltb_ptr
[i
- 1])
1022 ibmveth_free_tx_ltb(adapter
, i
- 1);
1025 netif_tx_wake_all_queues(netdev
);
1030 static const struct ethtool_ops netdev_ethtool_ops
= {
1031 .get_drvinfo
= netdev_get_drvinfo
,
1032 .get_link
= ethtool_op_get_link
,
1033 .get_strings
= ibmveth_get_strings
,
1034 .get_sset_count
= ibmveth_get_sset_count
,
1035 .get_ethtool_stats
= ibmveth_get_ethtool_stats
,
1036 .get_link_ksettings
= ibmveth_get_link_ksettings
,
1037 .set_link_ksettings
= ibmveth_set_link_ksettings
,
1038 .get_channels
= ibmveth_get_channels
,
1039 .set_channels
= ibmveth_set_channels
1042 static int ibmveth_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
1047 static int ibmveth_send(struct ibmveth_adapter
*adapter
,
1048 unsigned long desc
, unsigned long mss
)
1050 unsigned long correlator
;
1051 unsigned int retry_count
;
1055 * The retry count sets a maximum for the number of broadcast and
1056 * multicast destinations within the system.
1061 ret
= h_send_logical_lan(adapter
->vdev
->unit_address
, desc
,
1062 correlator
, &correlator
, mss
,
1063 adapter
->fw_large_send_support
);
1064 } while ((ret
== H_BUSY
) && (retry_count
--));
1066 if (ret
!= H_SUCCESS
&& ret
!= H_DROPPED
) {
1067 netdev_err(adapter
->netdev
, "tx: h_send_logical_lan failed "
1068 "with rc=%ld\n", ret
);
1075 static int ibmveth_is_packet_unsupported(struct sk_buff
*skb
,
1076 struct net_device
*netdev
)
1078 struct ethhdr
*ether_header
;
1081 ether_header
= eth_hdr(skb
);
1083 if (ether_addr_equal(ether_header
->h_dest
, netdev
->dev_addr
)) {
1084 netdev_dbg(netdev
, "veth doesn't support loopback packets, dropping packet.\n");
1085 netdev
->stats
.tx_dropped
++;
1092 static netdev_tx_t
ibmveth_start_xmit(struct sk_buff
*skb
,
1093 struct net_device
*netdev
)
1095 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1096 unsigned int desc_flags
, total_bytes
;
1097 union ibmveth_buf_desc desc
;
1098 int i
, queue_num
= skb_get_queue_mapping(skb
);
1099 unsigned long mss
= 0;
1101 if (ibmveth_is_packet_unsupported(skb
, netdev
))
1103 /* veth can't checksum offload UDP */
1104 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
1105 ((skb
->protocol
== htons(ETH_P_IP
) &&
1106 ip_hdr(skb
)->protocol
!= IPPROTO_TCP
) ||
1107 (skb
->protocol
== htons(ETH_P_IPV6
) &&
1108 ipv6_hdr(skb
)->nexthdr
!= IPPROTO_TCP
)) &&
1109 skb_checksum_help(skb
)) {
1111 netdev_err(netdev
, "tx: failed to checksum packet\n");
1112 netdev
->stats
.tx_dropped
++;
1116 desc_flags
= IBMVETH_BUF_VALID
;
1118 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1119 unsigned char *buf
= skb_transport_header(skb
) +
1122 desc_flags
|= (IBMVETH_BUF_NO_CSUM
| IBMVETH_BUF_CSUM_GOOD
);
1124 /* Need to zero out the checksum */
1128 if (skb_is_gso(skb
) && adapter
->fw_large_send_support
)
1129 desc_flags
|= IBMVETH_BUF_LRG_SND
;
1132 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1133 if (adapter
->fw_large_send_support
) {
1134 mss
= (unsigned long)skb_shinfo(skb
)->gso_size
;
1135 adapter
->tx_large_packets
++;
1136 } else if (!skb_is_gso_v6(skb
)) {
1137 /* Put -1 in the IP checksum to tell phyp it
1138 * is a largesend packet. Put the mss in
1141 ip_hdr(skb
)->check
= 0xffff;
1142 tcp_hdr(skb
)->check
=
1143 cpu_to_be16(skb_shinfo(skb
)->gso_size
);
1144 adapter
->tx_large_packets
++;
1148 /* Copy header into mapped buffer */
1149 if (unlikely(skb
->len
> adapter
->tx_ltb_size
)) {
1150 netdev_err(adapter
->netdev
, "tx: packet size (%u) exceeds ltb (%u)\n",
1151 skb
->len
, adapter
->tx_ltb_size
);
1152 netdev
->stats
.tx_dropped
++;
1155 memcpy(adapter
->tx_ltb_ptr
[queue_num
], skb
->data
, skb_headlen(skb
));
1156 total_bytes
= skb_headlen(skb
);
1157 /* Copy frags into mapped buffers */
1158 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1159 const skb_frag_t
*frag
= &skb_shinfo(skb
)->frags
[i
];
1161 memcpy(adapter
->tx_ltb_ptr
[queue_num
] + total_bytes
,
1162 skb_frag_address_safe(frag
), skb_frag_size(frag
));
1163 total_bytes
+= skb_frag_size(frag
);
1166 if (unlikely(total_bytes
!= skb
->len
)) {
1167 netdev_err(adapter
->netdev
, "tx: incorrect packet len copied into ltb (%u != %u)\n",
1168 skb
->len
, total_bytes
);
1169 netdev
->stats
.tx_dropped
++;
1172 desc
.fields
.flags_len
= desc_flags
| skb
->len
;
1173 desc
.fields
.address
= adapter
->tx_ltb_dma
[queue_num
];
1174 /* finish writing to long_term_buff before VIOS accessing it */
1177 if (ibmveth_send(adapter
, desc
.desc
, mss
)) {
1178 adapter
->tx_send_failed
++;
1179 netdev
->stats
.tx_dropped
++;
1181 netdev
->stats
.tx_packets
++;
1182 netdev
->stats
.tx_bytes
+= skb
->len
;
1186 dev_consume_skb_any(skb
);
1187 return NETDEV_TX_OK
;
1192 static void ibmveth_rx_mss_helper(struct sk_buff
*skb
, u16 mss
, int lrg_pkt
)
1194 struct tcphdr
*tcph
;
1198 /* only TCP packets will be aggregated */
1199 if (skb
->protocol
== htons(ETH_P_IP
)) {
1200 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
1202 if (iph
->protocol
== IPPROTO_TCP
) {
1203 offset
= iph
->ihl
* 4;
1204 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1208 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
1209 struct ipv6hdr
*iph6
= (struct ipv6hdr
*)skb
->data
;
1211 if (iph6
->nexthdr
== IPPROTO_TCP
) {
1212 offset
= sizeof(struct ipv6hdr
);
1213 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1220 /* if mss is not set through Large Packet bit/mss in rx buffer,
1221 * expect that the mss will be written to the tcp header checksum.
1223 tcph
= (struct tcphdr
*)(skb
->data
+ offset
);
1225 skb_shinfo(skb
)->gso_size
= mss
;
1226 } else if (offset
) {
1227 skb_shinfo(skb
)->gso_size
= ntohs(tcph
->check
);
1231 if (skb_shinfo(skb
)->gso_size
) {
1232 hdr_len
= offset
+ tcph
->doff
* 4;
1233 skb_shinfo(skb
)->gso_segs
=
1234 DIV_ROUND_UP(skb
->len
- hdr_len
,
1235 skb_shinfo(skb
)->gso_size
);
1239 static void ibmveth_rx_csum_helper(struct sk_buff
*skb
,
1240 struct ibmveth_adapter
*adapter
)
1242 struct iphdr
*iph
= NULL
;
1243 struct ipv6hdr
*iph6
= NULL
;
1244 __be16 skb_proto
= 0;
1249 skb_proto
= be16_to_cpu(skb
->protocol
);
1251 if (skb_proto
== ETH_P_IP
) {
1252 iph
= (struct iphdr
*)skb
->data
;
1254 /* If the IP checksum is not offloaded and if the packet
1255 * is large send, the checksum must be rebuilt.
1257 if (iph
->check
== 0xffff) {
1259 iph
->check
= ip_fast_csum((unsigned char *)iph
,
1263 iphlen
= iph
->ihl
* 4;
1264 iph_proto
= iph
->protocol
;
1265 } else if (skb_proto
== ETH_P_IPV6
) {
1266 iph6
= (struct ipv6hdr
*)skb
->data
;
1267 iphlen
= sizeof(struct ipv6hdr
);
1268 iph_proto
= iph6
->nexthdr
;
1271 /* When CSO is enabled the TCP checksum may have be set to NULL by
1272 * the sender given that we zeroed out TCP checksum field in
1273 * transmit path (refer ibmveth_start_xmit routine). In this case set
1274 * up CHECKSUM_PARTIAL. If the packet is forwarded, the checksum will
1275 * then be recalculated by the destination NIC (CSO must be enabled
1276 * on the destination NIC).
1278 * In an OVS environment, when a flow is not cached, specifically for a
1279 * new TCP connection, the first packet information is passed up to
1280 * the user space for finding a flow. During this process, OVS computes
1281 * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
1283 * So, re-compute TCP pseudo header checksum.
1286 if (iph_proto
== IPPROTO_TCP
) {
1287 struct tcphdr
*tcph
= (struct tcphdr
*)(skb
->data
+ iphlen
);
1289 if (tcph
->check
== 0x0000) {
1290 /* Recompute TCP pseudo header checksum */
1291 tcphdrlen
= skb
->len
- iphlen
;
1292 if (skb_proto
== ETH_P_IP
)
1294 ~csum_tcpudp_magic(iph
->saddr
,
1295 iph
->daddr
, tcphdrlen
, iph_proto
, 0);
1296 else if (skb_proto
== ETH_P_IPV6
)
1298 ~csum_ipv6_magic(&iph6
->saddr
,
1299 &iph6
->daddr
, tcphdrlen
, iph_proto
, 0);
1300 /* Setup SKB fields for checksum offload */
1301 skb_partial_csum_set(skb
, iphlen
,
1302 offsetof(struct tcphdr
, check
));
1303 skb_reset_network_header(skb
);
1308 static int ibmveth_poll(struct napi_struct
*napi
, int budget
)
1310 struct ibmveth_adapter
*adapter
=
1311 container_of(napi
, struct ibmveth_adapter
, napi
);
1312 struct net_device
*netdev
= adapter
->netdev
;
1313 int frames_processed
= 0;
1314 unsigned long lpar_rc
;
1318 while (frames_processed
< budget
) {
1319 if (!ibmveth_rxq_pending_buffer(adapter
))
1323 if (!ibmveth_rxq_buffer_valid(adapter
)) {
1324 wmb(); /* suggested by larson1 */
1325 adapter
->rx_invalid_buffer
++;
1326 netdev_dbg(netdev
, "recycling invalid buffer\n");
1327 ibmveth_rxq_harvest_buffer(adapter
, true);
1329 struct sk_buff
*skb
, *new_skb
;
1330 int length
= ibmveth_rxq_frame_length(adapter
);
1331 int offset
= ibmveth_rxq_frame_offset(adapter
);
1332 int csum_good
= ibmveth_rxq_csum_good(adapter
);
1333 int lrg_pkt
= ibmveth_rxq_large_packet(adapter
);
1334 __sum16 iph_check
= 0;
1336 skb
= ibmveth_rxq_get_buffer(adapter
);
1338 /* if the large packet bit is set in the rx queue
1339 * descriptor, the mss will be written by PHYP eight
1340 * bytes from the start of the rx buffer, which is
1341 * skb->data at this stage
1344 __be64
*rxmss
= (__be64
*)(skb
->data
+ 8);
1346 mss
= (u16
)be64_to_cpu(*rxmss
);
1350 if (length
< rx_copybreak
)
1351 new_skb
= netdev_alloc_skb(netdev
, length
);
1354 skb_copy_to_linear_data(new_skb
,
1358 ibmveth_flush_buffer(skb
->data
,
1360 ibmveth_rxq_harvest_buffer(adapter
, true);
1363 ibmveth_rxq_harvest_buffer(adapter
, false);
1364 skb_reserve(skb
, offset
);
1367 skb_put(skb
, length
);
1368 skb
->protocol
= eth_type_trans(skb
, netdev
);
1370 /* PHYP without PLSO support places a -1 in the ip
1371 * checksum for large send frames.
1373 if (skb
->protocol
== cpu_to_be16(ETH_P_IP
)) {
1374 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
1376 iph_check
= iph
->check
;
1379 if ((length
> netdev
->mtu
+ ETH_HLEN
) ||
1380 lrg_pkt
|| iph_check
== 0xffff) {
1381 ibmveth_rx_mss_helper(skb
, mss
, lrg_pkt
);
1382 adapter
->rx_large_packets
++;
1386 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1387 ibmveth_rx_csum_helper(skb
, adapter
);
1390 napi_gro_receive(napi
, skb
); /* send it up */
1392 netdev
->stats
.rx_packets
++;
1393 netdev
->stats
.rx_bytes
+= length
;
1398 ibmveth_replenish_task(adapter
);
1400 if (frames_processed
== budget
)
1403 if (!napi_complete_done(napi
, frames_processed
))
1406 /* We think we are done - reenable interrupts,
1407 * then check once more to make sure we are done.
1409 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
, VIO_IRQ_ENABLE
);
1410 BUG_ON(lpar_rc
!= H_SUCCESS
);
1412 if (ibmveth_rxq_pending_buffer(adapter
) && napi_schedule(napi
)) {
1413 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1419 return frames_processed
;
1422 static irqreturn_t
ibmveth_interrupt(int irq
, void *dev_instance
)
1424 struct net_device
*netdev
= dev_instance
;
1425 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1426 unsigned long lpar_rc
;
1428 if (napi_schedule_prep(&adapter
->napi
)) {
1429 lpar_rc
= h_vio_signal(adapter
->vdev
->unit_address
,
1431 BUG_ON(lpar_rc
!= H_SUCCESS
);
1432 __napi_schedule(&adapter
->napi
);
1437 static void ibmveth_set_multicast_list(struct net_device
*netdev
)
1439 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1440 unsigned long lpar_rc
;
1442 if ((netdev
->flags
& IFF_PROMISC
) ||
1443 (netdev_mc_count(netdev
) > adapter
->mcastFilterSize
)) {
1444 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1445 IbmVethMcastEnableRecv
|
1446 IbmVethMcastDisableFiltering
,
1448 if (lpar_rc
!= H_SUCCESS
) {
1449 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1450 "entering promisc mode\n", lpar_rc
);
1453 struct netdev_hw_addr
*ha
;
1454 /* clear the filter table & disable filtering */
1455 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1456 IbmVethMcastEnableRecv
|
1457 IbmVethMcastDisableFiltering
|
1458 IbmVethMcastClearFilterTable
,
1460 if (lpar_rc
!= H_SUCCESS
) {
1461 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1462 "attempting to clear filter table\n",
1465 /* add the addresses to the filter table */
1466 netdev_for_each_mc_addr(ha
, netdev
) {
1467 /* add the multicast address to the filter table */
1469 mcast_addr
= ether_addr_to_u64(ha
->addr
);
1470 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1471 IbmVethMcastAddFilter
,
1473 if (lpar_rc
!= H_SUCCESS
) {
1474 netdev_err(netdev
, "h_multicast_ctrl rc=%ld "
1475 "when adding an entry to the filter "
1476 "table\n", lpar_rc
);
1480 /* re-enable filtering */
1481 lpar_rc
= h_multicast_ctrl(adapter
->vdev
->unit_address
,
1482 IbmVethMcastEnableFiltering
,
1484 if (lpar_rc
!= H_SUCCESS
) {
1485 netdev_err(netdev
, "h_multicast_ctrl rc=%ld when "
1486 "enabling filtering\n", lpar_rc
);
1491 static int ibmveth_change_mtu(struct net_device
*dev
, int new_mtu
)
1493 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
1494 struct vio_dev
*viodev
= adapter
->vdev
;
1495 int new_mtu_oh
= new_mtu
+ IBMVETH_BUFF_OH
;
1497 int need_restart
= 0;
1499 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
1500 if (new_mtu_oh
<= adapter
->rx_buff_pool
[i
].buff_size
)
1503 if (i
== IBMVETH_NUM_BUFF_POOLS
)
1506 /* Deactivate all the buffer pools so that the next loop can activate
1507 only the buffer pools necessary to hold the new MTU */
1508 if (netif_running(adapter
->netdev
)) {
1510 ibmveth_close(adapter
->netdev
);
1513 /* Look for an active buffer pool that can hold the new MTU */
1514 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1515 adapter
->rx_buff_pool
[i
].active
= 1;
1517 if (new_mtu_oh
<= adapter
->rx_buff_pool
[i
].buff_size
) {
1518 WRITE_ONCE(dev
->mtu
, new_mtu
);
1519 vio_cmo_set_dev_desired(viodev
,
1520 ibmveth_get_desired_dma
1523 return ibmveth_open(adapter
->netdev
);
1529 if (need_restart
&& (rc
= ibmveth_open(adapter
->netdev
)))
1535 #ifdef CONFIG_NET_POLL_CONTROLLER
1536 static void ibmveth_poll_controller(struct net_device
*dev
)
1538 ibmveth_replenish_task(netdev_priv(dev
));
1539 ibmveth_interrupt(dev
->irq
, dev
);
1544 * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1546 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1549 * Number of bytes of IO data the driver will need to perform well.
1551 static unsigned long ibmveth_get_desired_dma(struct vio_dev
*vdev
)
1553 struct net_device
*netdev
= dev_get_drvdata(&vdev
->dev
);
1554 struct ibmveth_adapter
*adapter
;
1555 struct iommu_table
*tbl
;
1560 tbl
= get_iommu_table_base(&vdev
->dev
);
1562 /* netdev inits at probe time along with the structures we need below*/
1564 return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT
, tbl
);
1566 adapter
= netdev_priv(netdev
);
1568 ret
= IBMVETH_BUFF_LIST_SIZE
+ IBMVETH_FILT_LIST_SIZE
;
1569 ret
+= IOMMU_PAGE_ALIGN(netdev
->mtu
, tbl
);
1570 /* add size of mapped tx buffers */
1571 ret
+= IOMMU_PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE
, tbl
);
1573 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1574 /* add the size of the active receive buffers */
1575 if (adapter
->rx_buff_pool
[i
].active
)
1577 adapter
->rx_buff_pool
[i
].size
*
1578 IOMMU_PAGE_ALIGN(adapter
->rx_buff_pool
[i
].
1580 rxqentries
+= adapter
->rx_buff_pool
[i
].size
;
1582 /* add the size of the receive queue entries */
1583 ret
+= IOMMU_PAGE_ALIGN(
1584 rxqentries
* sizeof(struct ibmveth_rx_q_entry
), tbl
);
1589 static int ibmveth_set_mac_addr(struct net_device
*dev
, void *p
)
1591 struct ibmveth_adapter
*adapter
= netdev_priv(dev
);
1592 struct sockaddr
*addr
= p
;
1596 if (!is_valid_ether_addr(addr
->sa_data
))
1597 return -EADDRNOTAVAIL
;
1599 mac_address
= ether_addr_to_u64(addr
->sa_data
);
1600 rc
= h_change_logical_lan_mac(adapter
->vdev
->unit_address
, mac_address
);
1602 netdev_err(adapter
->netdev
, "h_change_logical_lan_mac failed with rc=%d\n", rc
);
1606 eth_hw_addr_set(dev
, addr
->sa_data
);
1611 static const struct net_device_ops ibmveth_netdev_ops
= {
1612 .ndo_open
= ibmveth_open
,
1613 .ndo_stop
= ibmveth_close
,
1614 .ndo_start_xmit
= ibmveth_start_xmit
,
1615 .ndo_set_rx_mode
= ibmveth_set_multicast_list
,
1616 .ndo_eth_ioctl
= ibmveth_ioctl
,
1617 .ndo_change_mtu
= ibmveth_change_mtu
,
1618 .ndo_fix_features
= ibmveth_fix_features
,
1619 .ndo_set_features
= ibmveth_set_features
,
1620 .ndo_validate_addr
= eth_validate_addr
,
1621 .ndo_set_mac_address
= ibmveth_set_mac_addr
,
1622 #ifdef CONFIG_NET_POLL_CONTROLLER
1623 .ndo_poll_controller
= ibmveth_poll_controller
,
1627 static int ibmveth_probe(struct vio_dev
*dev
, const struct vio_device_id
*id
)
1630 struct net_device
*netdev
;
1631 struct ibmveth_adapter
*adapter
;
1632 unsigned char *mac_addr_p
;
1633 __be32
*mcastFilterSize_p
;
1635 unsigned long ret_attr
;
1637 dev_dbg(&dev
->dev
, "entering ibmveth_probe for UA 0x%x\n",
1640 mac_addr_p
= (unsigned char *)vio_get_attribute(dev
, VETH_MAC_ADDR
,
1643 dev_err(&dev
->dev
, "Can't find VETH_MAC_ADDR attribute\n");
1646 /* Workaround for old/broken pHyp */
1649 else if (mac_len
!= 6) {
1650 dev_err(&dev
->dev
, "VETH_MAC_ADDR attribute wrong len %d\n",
1655 mcastFilterSize_p
= (__be32
*)vio_get_attribute(dev
,
1656 VETH_MCAST_FILTER_SIZE
,
1658 if (!mcastFilterSize_p
) {
1659 dev_err(&dev
->dev
, "Can't find VETH_MCAST_FILTER_SIZE "
1664 netdev
= alloc_etherdev_mqs(sizeof(struct ibmveth_adapter
), IBMVETH_MAX_QUEUES
, 1);
1668 adapter
= netdev_priv(netdev
);
1669 dev_set_drvdata(&dev
->dev
, netdev
);
1671 adapter
->vdev
= dev
;
1672 adapter
->netdev
= netdev
;
1673 adapter
->mcastFilterSize
= be32_to_cpu(*mcastFilterSize_p
);
1674 ibmveth_init_link_settings(netdev
);
1676 netif_napi_add_weight(netdev
, &adapter
->napi
, ibmveth_poll
, 16);
1678 netdev
->irq
= dev
->irq
;
1679 netdev
->netdev_ops
= &ibmveth_netdev_ops
;
1680 netdev
->ethtool_ops
= &netdev_ethtool_ops
;
1681 SET_NETDEV_DEV(netdev
, &dev
->dev
);
1682 netdev
->hw_features
= NETIF_F_SG
;
1683 if (vio_get_attribute(dev
, "ibm,illan-options", NULL
) != NULL
) {
1684 netdev
->hw_features
|= NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
|
1688 netdev
->features
|= netdev
->hw_features
;
1690 ret
= h_illan_attributes(adapter
->vdev
->unit_address
, 0, 0, &ret_attr
);
1692 /* If running older firmware, TSO should not be enabled by default */
1693 if (ret
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_LRG_SND_SUPPORT
) &&
1695 netdev
->hw_features
|= NETIF_F_TSO
| NETIF_F_TSO6
;
1696 netdev
->features
|= netdev
->hw_features
;
1698 netdev
->hw_features
|= NETIF_F_TSO
;
1701 adapter
->is_active_trunk
= false;
1702 if (ret
== H_SUCCESS
&& (ret_attr
& IBMVETH_ILLAN_ACTIVE_TRUNK
)) {
1703 adapter
->is_active_trunk
= true;
1704 netdev
->hw_features
|= NETIF_F_FRAGLIST
;
1705 netdev
->features
|= NETIF_F_FRAGLIST
;
1708 netdev
->min_mtu
= IBMVETH_MIN_MTU
;
1709 netdev
->max_mtu
= ETH_MAX_MTU
- IBMVETH_BUFF_OH
;
1711 eth_hw_addr_set(netdev
, mac_addr_p
);
1713 if (firmware_has_feature(FW_FEATURE_CMO
))
1714 memcpy(pool_count
, pool_count_cmo
, sizeof(pool_count
));
1716 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1717 struct kobject
*kobj
= &adapter
->rx_buff_pool
[i
].kobj
;
1720 ibmveth_init_buffer_pool(&adapter
->rx_buff_pool
[i
], i
,
1721 pool_count
[i
], pool_size
[i
],
1723 error
= kobject_init_and_add(kobj
, &ktype_veth_pool
,
1724 &dev
->dev
.kobj
, "pool%d", i
);
1726 kobject_uevent(kobj
, KOBJ_ADD
);
1729 rc
= netif_set_real_num_tx_queues(netdev
, min(num_online_cpus(),
1730 IBMVETH_DEFAULT_QUEUES
));
1732 netdev_dbg(netdev
, "failed to set number of tx queues rc=%d\n",
1734 free_netdev(netdev
);
1737 adapter
->tx_ltb_size
= PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE
);
1738 for (i
= 0; i
< IBMVETH_MAX_QUEUES
; i
++)
1739 adapter
->tx_ltb_ptr
[i
] = NULL
;
1741 netdev_dbg(netdev
, "adapter @ 0x%p\n", adapter
);
1742 netdev_dbg(netdev
, "registering netdev...\n");
1744 ibmveth_set_features(netdev
, netdev
->features
);
1746 rc
= register_netdev(netdev
);
1749 netdev_dbg(netdev
, "failed to register netdev rc=%d\n", rc
);
1750 free_netdev(netdev
);
1754 netdev_dbg(netdev
, "registered\n");
1759 static void ibmveth_remove(struct vio_dev
*dev
)
1761 struct net_device
*netdev
= dev_get_drvdata(&dev
->dev
);
1762 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1765 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++)
1766 kobject_put(&adapter
->rx_buff_pool
[i
].kobj
);
1768 unregister_netdev(netdev
);
1770 free_netdev(netdev
);
1771 dev_set_drvdata(&dev
->dev
, NULL
);
1774 static struct attribute veth_active_attr
;
1775 static struct attribute veth_num_attr
;
1776 static struct attribute veth_size_attr
;
1778 static ssize_t
veth_pool_show(struct kobject
*kobj
,
1779 struct attribute
*attr
, char *buf
)
1781 struct ibmveth_buff_pool
*pool
= container_of(kobj
,
1782 struct ibmveth_buff_pool
,
1785 if (attr
== &veth_active_attr
)
1786 return sprintf(buf
, "%d\n", pool
->active
);
1787 else if (attr
== &veth_num_attr
)
1788 return sprintf(buf
, "%d\n", pool
->size
);
1789 else if (attr
== &veth_size_attr
)
1790 return sprintf(buf
, "%d\n", pool
->buff_size
);
1794 static ssize_t
veth_pool_store(struct kobject
*kobj
, struct attribute
*attr
,
1795 const char *buf
, size_t count
)
1797 struct ibmveth_buff_pool
*pool
= container_of(kobj
,
1798 struct ibmveth_buff_pool
,
1800 struct net_device
*netdev
= dev_get_drvdata(kobj_to_dev(kobj
->parent
));
1801 struct ibmveth_adapter
*adapter
= netdev_priv(netdev
);
1802 long value
= simple_strtol(buf
, NULL
, 10);
1805 if (attr
== &veth_active_attr
) {
1806 if (value
&& !pool
->active
) {
1807 if (netif_running(netdev
)) {
1808 if (ibmveth_alloc_buffer_pool(pool
)) {
1810 "unable to alloc pool\n");
1814 ibmveth_close(netdev
);
1815 if ((rc
= ibmveth_open(netdev
)))
1820 } else if (!value
&& pool
->active
) {
1821 int mtu
= netdev
->mtu
+ IBMVETH_BUFF_OH
;
1823 /* Make sure there is a buffer pool with buffers that
1824 can hold a packet of the size of the MTU */
1825 for (i
= 0; i
< IBMVETH_NUM_BUFF_POOLS
; i
++) {
1826 if (pool
== &adapter
->rx_buff_pool
[i
])
1828 if (!adapter
->rx_buff_pool
[i
].active
)
1830 if (mtu
<= adapter
->rx_buff_pool
[i
].buff_size
)
1834 if (i
== IBMVETH_NUM_BUFF_POOLS
) {
1835 netdev_err(netdev
, "no active pool >= MTU\n");
1839 if (netif_running(netdev
)) {
1840 ibmveth_close(netdev
);
1842 if ((rc
= ibmveth_open(netdev
)))
1847 } else if (attr
== &veth_num_attr
) {
1848 if (value
<= 0 || value
> IBMVETH_MAX_POOL_COUNT
) {
1851 if (netif_running(netdev
)) {
1852 ibmveth_close(netdev
);
1854 if ((rc
= ibmveth_open(netdev
)))
1860 } else if (attr
== &veth_size_attr
) {
1861 if (value
<= IBMVETH_BUFF_OH
|| value
> IBMVETH_MAX_BUF_SIZE
) {
1864 if (netif_running(netdev
)) {
1865 ibmveth_close(netdev
);
1866 pool
->buff_size
= value
;
1867 if ((rc
= ibmveth_open(netdev
)))
1870 pool
->buff_size
= value
;
1875 /* kick the interrupt handler to allocate/deallocate pools */
1876 ibmveth_interrupt(netdev
->irq
, netdev
);
1881 #define ATTR(_name, _mode) \
1882 struct attribute veth_##_name##_attr = { \
1883 .name = __stringify(_name), .mode = _mode, \
1886 static ATTR(active
, 0644);
1887 static ATTR(num
, 0644);
1888 static ATTR(size
, 0644);
1890 static struct attribute
*veth_pool_attrs
[] = {
1896 ATTRIBUTE_GROUPS(veth_pool
);
1898 static const struct sysfs_ops veth_pool_ops
= {
1899 .show
= veth_pool_show
,
1900 .store
= veth_pool_store
,
1903 static struct kobj_type ktype_veth_pool
= {
1905 .sysfs_ops
= &veth_pool_ops
,
1906 .default_groups
= veth_pool_groups
,
1909 static int ibmveth_resume(struct device
*dev
)
1911 struct net_device
*netdev
= dev_get_drvdata(dev
);
1912 ibmveth_interrupt(netdev
->irq
, netdev
);
1916 static const struct vio_device_id ibmveth_device_table
[] = {
1917 { "network", "IBM,l-lan"},
1920 MODULE_DEVICE_TABLE(vio
, ibmveth_device_table
);
1922 static const struct dev_pm_ops ibmveth_pm_ops
= {
1923 .resume
= ibmveth_resume
1926 static struct vio_driver ibmveth_driver
= {
1927 .id_table
= ibmveth_device_table
,
1928 .probe
= ibmveth_probe
,
1929 .remove
= ibmveth_remove
,
1930 .get_desired_dma
= ibmveth_get_desired_dma
,
1931 .name
= ibmveth_driver_name
,
1932 .pm
= &ibmveth_pm_ops
,
1935 static int __init
ibmveth_module_init(void)
1937 printk(KERN_DEBUG
"%s: %s %s\n", ibmveth_driver_name
,
1938 ibmveth_driver_string
, ibmveth_driver_version
);
1940 return vio_register_driver(&ibmveth_driver
);
1943 static void __exit
ibmveth_module_exit(void)
1945 vio_unregister_driver(&ibmveth_driver
);
1948 module_init(ibmveth_module_init
);
1949 module_exit(ibmveth_module_exit
);