2 * Network-device interface management.
4 * Copyright (c) 2004-2005, Keir Fraser
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version 2
8 * as published by the Free Software Foundation; or, when distributed
9 * separately from the Linux kernel or incorporated into other
10 * software packages, subject to the following license:
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33 #include <linux/kthread.h>
34 #include <linux/ethtool.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/if_vlan.h>
37 #include <linux/vmalloc.h>
39 #include <xen/events.h>
40 #include <asm/xen/hypercall.h>
41 #include <xen/balloon.h>
43 #define XENVIF_QUEUE_LENGTH 32
44 #define XENVIF_NAPI_WEIGHT 64
46 /* Number of bytes allowed on the internal guest Rx queue. */
47 #define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE)
49 /* This function is used to set SKBTX_DEV_ZEROCOPY as well as
50 * increasing the inflight counter. We need to increase the inflight
51 * counter because core driver calls into xenvif_zerocopy_callback
52 * which calls xenvif_skb_zerocopy_complete.
54 void xenvif_skb_zerocopy_prepare(struct xenvif_queue
*queue
,
57 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
58 atomic_inc(&queue
->inflight_packets
);
61 void xenvif_skb_zerocopy_complete(struct xenvif_queue
*queue
)
63 atomic_dec(&queue
->inflight_packets
);
66 int xenvif_schedulable(struct xenvif
*vif
)
68 return netif_running(vif
->dev
) &&
69 test_bit(VIF_STATUS_CONNECTED
, &vif
->status
) &&
73 static irqreturn_t
xenvif_tx_interrupt(int irq
, void *dev_id
)
75 struct xenvif_queue
*queue
= dev_id
;
77 if (RING_HAS_UNCONSUMED_REQUESTS(&queue
->tx
))
78 napi_schedule(&queue
->napi
);
83 static int xenvif_poll(struct napi_struct
*napi
, int budget
)
85 struct xenvif_queue
*queue
=
86 container_of(napi
, struct xenvif_queue
, napi
);
89 /* This vif is rogue, we pretend we've there is nothing to do
90 * for this vif to deschedule it from NAPI. But this interface
91 * will be turned off in thread context later.
93 if (unlikely(queue
->vif
->disabled
)) {
98 work_done
= xenvif_tx_action(queue
, budget
);
100 if (work_done
< budget
) {
102 xenvif_napi_schedule_or_enable_events(queue
);
108 static irqreturn_t
xenvif_rx_interrupt(int irq
, void *dev_id
)
110 struct xenvif_queue
*queue
= dev_id
;
112 xenvif_kick_thread(queue
);
117 irqreturn_t
xenvif_interrupt(int irq
, void *dev_id
)
119 xenvif_tx_interrupt(irq
, dev_id
);
120 xenvif_rx_interrupt(irq
, dev_id
);
125 int xenvif_queue_stopped(struct xenvif_queue
*queue
)
127 struct net_device
*dev
= queue
->vif
->dev
;
128 unsigned int id
= queue
->id
;
129 return netif_tx_queue_stopped(netdev_get_tx_queue(dev
, id
));
132 void xenvif_wake_queue(struct xenvif_queue
*queue
)
134 struct net_device
*dev
= queue
->vif
->dev
;
135 unsigned int id
= queue
->id
;
136 netif_tx_wake_queue(netdev_get_tx_queue(dev
, id
));
139 static int xenvif_start_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
141 struct xenvif
*vif
= netdev_priv(dev
);
142 struct xenvif_queue
*queue
= NULL
;
143 unsigned int num_queues
= vif
->num_queues
;
145 struct xenvif_rx_cb
*cb
;
147 BUG_ON(skb
->dev
!= dev
);
149 /* Drop the packet if queues are not set up */
153 /* Obtain the queue to be used to transmit this packet */
154 index
= skb_get_queue_mapping(skb
);
155 if (index
>= num_queues
) {
156 pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n.",
157 index
, vif
->dev
->name
);
160 queue
= &vif
->queues
[index
];
162 /* Drop the packet if queue is not ready */
163 if (queue
->task
== NULL
||
164 queue
->dealloc_task
== NULL
||
165 !xenvif_schedulable(vif
))
168 cb
= XENVIF_RX_CB(skb
);
169 cb
->expires
= jiffies
+ vif
->drain_timeout
;
171 xenvif_rx_queue_tail(queue
, skb
);
172 xenvif_kick_thread(queue
);
177 vif
->dev
->stats
.tx_dropped
++;
182 static struct net_device_stats
*xenvif_get_stats(struct net_device
*dev
)
184 struct xenvif
*vif
= netdev_priv(dev
);
185 struct xenvif_queue
*queue
= NULL
;
186 unsigned int num_queues
= vif
->num_queues
;
187 unsigned long rx_bytes
= 0;
188 unsigned long rx_packets
= 0;
189 unsigned long tx_bytes
= 0;
190 unsigned long tx_packets
= 0;
193 if (vif
->queues
== NULL
)
196 /* Aggregate tx and rx stats from each queue */
197 for (index
= 0; index
< num_queues
; ++index
) {
198 queue
= &vif
->queues
[index
];
199 rx_bytes
+= queue
->stats
.rx_bytes
;
200 rx_packets
+= queue
->stats
.rx_packets
;
201 tx_bytes
+= queue
->stats
.tx_bytes
;
202 tx_packets
+= queue
->stats
.tx_packets
;
206 vif
->dev
->stats
.rx_bytes
= rx_bytes
;
207 vif
->dev
->stats
.rx_packets
= rx_packets
;
208 vif
->dev
->stats
.tx_bytes
= tx_bytes
;
209 vif
->dev
->stats
.tx_packets
= tx_packets
;
211 return &vif
->dev
->stats
;
214 static void xenvif_up(struct xenvif
*vif
)
216 struct xenvif_queue
*queue
= NULL
;
217 unsigned int num_queues
= vif
->num_queues
;
218 unsigned int queue_index
;
220 for (queue_index
= 0; queue_index
< num_queues
; ++queue_index
) {
221 queue
= &vif
->queues
[queue_index
];
222 napi_enable(&queue
->napi
);
223 enable_irq(queue
->tx_irq
);
224 if (queue
->tx_irq
!= queue
->rx_irq
)
225 enable_irq(queue
->rx_irq
);
226 xenvif_napi_schedule_or_enable_events(queue
);
230 static void xenvif_down(struct xenvif
*vif
)
232 struct xenvif_queue
*queue
= NULL
;
233 unsigned int num_queues
= vif
->num_queues
;
234 unsigned int queue_index
;
236 for (queue_index
= 0; queue_index
< num_queues
; ++queue_index
) {
237 queue
= &vif
->queues
[queue_index
];
238 disable_irq(queue
->tx_irq
);
239 if (queue
->tx_irq
!= queue
->rx_irq
)
240 disable_irq(queue
->rx_irq
);
241 napi_disable(&queue
->napi
);
242 del_timer_sync(&queue
->credit_timeout
);
246 static int xenvif_open(struct net_device
*dev
)
248 struct xenvif
*vif
= netdev_priv(dev
);
249 if (test_bit(VIF_STATUS_CONNECTED
, &vif
->status
))
251 netif_tx_start_all_queues(dev
);
255 static int xenvif_close(struct net_device
*dev
)
257 struct xenvif
*vif
= netdev_priv(dev
);
258 if (test_bit(VIF_STATUS_CONNECTED
, &vif
->status
))
260 netif_tx_stop_all_queues(dev
);
264 static int xenvif_change_mtu(struct net_device
*dev
, int mtu
)
266 struct xenvif
*vif
= netdev_priv(dev
);
267 int max
= vif
->can_sg
? 65535 - VLAN_ETH_HLEN
: ETH_DATA_LEN
;
275 static netdev_features_t
xenvif_fix_features(struct net_device
*dev
,
276 netdev_features_t features
)
278 struct xenvif
*vif
= netdev_priv(dev
);
281 features
&= ~NETIF_F_SG
;
282 if (~(vif
->gso_mask
| vif
->gso_prefix_mask
) & GSO_BIT(TCPV4
))
283 features
&= ~NETIF_F_TSO
;
284 if (~(vif
->gso_mask
| vif
->gso_prefix_mask
) & GSO_BIT(TCPV6
))
285 features
&= ~NETIF_F_TSO6
;
287 features
&= ~NETIF_F_IP_CSUM
;
289 features
&= ~NETIF_F_IPV6_CSUM
;
294 static const struct xenvif_stat
{
295 char name
[ETH_GSTRING_LEN
];
299 "rx_gso_checksum_fixup",
300 offsetof(struct xenvif_stats
, rx_gso_checksum_fixup
)
302 /* If (sent != success + fail), there are probably packets never
307 offsetof(struct xenvif_stats
, tx_zerocopy_sent
),
310 "tx_zerocopy_success",
311 offsetof(struct xenvif_stats
, tx_zerocopy_success
),
315 offsetof(struct xenvif_stats
, tx_zerocopy_fail
)
317 /* Number of packets exceeding MAX_SKB_FRAG slots. You should use
318 * a guest with the same MAX_SKB_FRAG
322 offsetof(struct xenvif_stats
, tx_frag_overflow
)
326 static int xenvif_get_sset_count(struct net_device
*dev
, int string_set
)
328 switch (string_set
) {
330 return ARRAY_SIZE(xenvif_stats
);
336 static void xenvif_get_ethtool_stats(struct net_device
*dev
,
337 struct ethtool_stats
*stats
, u64
* data
)
339 struct xenvif
*vif
= netdev_priv(dev
);
340 unsigned int num_queues
= vif
->num_queues
;
342 unsigned int queue_index
;
344 for (i
= 0; i
< ARRAY_SIZE(xenvif_stats
); i
++) {
345 unsigned long accum
= 0;
346 for (queue_index
= 0; queue_index
< num_queues
; ++queue_index
) {
347 void *vif_stats
= &vif
->queues
[queue_index
].stats
;
348 accum
+= *(unsigned long *)(vif_stats
+ xenvif_stats
[i
].offset
);
354 static void xenvif_get_strings(struct net_device
*dev
, u32 stringset
, u8
* data
)
360 for (i
= 0; i
< ARRAY_SIZE(xenvif_stats
); i
++)
361 memcpy(data
+ i
* ETH_GSTRING_LEN
,
362 xenvif_stats
[i
].name
, ETH_GSTRING_LEN
);
367 static const struct ethtool_ops xenvif_ethtool_ops
= {
368 .get_link
= ethtool_op_get_link
,
370 .get_sset_count
= xenvif_get_sset_count
,
371 .get_ethtool_stats
= xenvif_get_ethtool_stats
,
372 .get_strings
= xenvif_get_strings
,
375 static const struct net_device_ops xenvif_netdev_ops
= {
376 .ndo_start_xmit
= xenvif_start_xmit
,
377 .ndo_get_stats
= xenvif_get_stats
,
378 .ndo_open
= xenvif_open
,
379 .ndo_stop
= xenvif_close
,
380 .ndo_change_mtu
= xenvif_change_mtu
,
381 .ndo_fix_features
= xenvif_fix_features
,
382 .ndo_set_mac_address
= eth_mac_addr
,
383 .ndo_validate_addr
= eth_validate_addr
,
386 struct xenvif
*xenvif_alloc(struct device
*parent
, domid_t domid
,
390 struct net_device
*dev
;
392 char name
[IFNAMSIZ
] = {};
394 snprintf(name
, IFNAMSIZ
- 1, "vif%u.%u", domid
, handle
);
395 /* Allocate a netdev with the max. supported number of queues.
396 * When the guest selects the desired number, it will be updated
397 * via netif_set_real_num_*_queues().
399 dev
= alloc_netdev_mq(sizeof(struct xenvif
), name
, NET_NAME_UNKNOWN
,
400 ether_setup
, xenvif_max_queues
);
402 pr_warn("Could not allocate netdev for %s\n", name
);
403 return ERR_PTR(-ENOMEM
);
406 SET_NETDEV_DEV(dev
, parent
);
408 vif
= netdev_priv(dev
);
411 vif
->handle
= handle
;
415 vif
->disabled
= false;
416 vif
->drain_timeout
= msecs_to_jiffies(rx_drain_timeout_msecs
);
417 vif
->stall_timeout
= msecs_to_jiffies(rx_stall_timeout_msecs
);
419 /* Start out with no queues. */
423 spin_lock_init(&vif
->lock
);
425 dev
->netdev_ops
= &xenvif_netdev_ops
;
426 dev
->hw_features
= NETIF_F_SG
|
427 NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
|
428 NETIF_F_TSO
| NETIF_F_TSO6
;
429 dev
->features
= dev
->hw_features
| NETIF_F_RXCSUM
;
430 dev
->ethtool_ops
= &xenvif_ethtool_ops
;
432 dev
->tx_queue_len
= XENVIF_QUEUE_LENGTH
;
435 * Initialise a dummy MAC address. We choose the numerically
436 * largest non-broadcast address to prevent the address getting
437 * stolen by an Ethernet bridge for STP purposes.
438 * (FE:FF:FF:FF:FF:FF)
440 eth_broadcast_addr(dev
->dev_addr
);
441 dev
->dev_addr
[0] &= ~0x01;
443 netif_carrier_off(dev
);
445 err
= register_netdev(dev
);
447 netdev_warn(dev
, "Could not register device: err=%d\n", err
);
452 netdev_dbg(dev
, "Successfully created xenvif\n");
454 __module_get(THIS_MODULE
);
459 int xenvif_init_queue(struct xenvif_queue
*queue
)
463 queue
->credit_bytes
= queue
->remaining_credit
= ~0UL;
464 queue
->credit_usec
= 0UL;
465 init_timer(&queue
->credit_timeout
);
466 queue
->credit_timeout
.function
= xenvif_tx_credit_callback
;
467 queue
->credit_window_start
= get_jiffies_64();
469 queue
->rx_queue_max
= XENVIF_RX_QUEUE_BYTES
;
471 skb_queue_head_init(&queue
->rx_queue
);
472 skb_queue_head_init(&queue
->tx_queue
);
474 queue
->pending_cons
= 0;
475 queue
->pending_prod
= MAX_PENDING_REQS
;
476 for (i
= 0; i
< MAX_PENDING_REQS
; ++i
)
477 queue
->pending_ring
[i
] = i
;
479 spin_lock_init(&queue
->callback_lock
);
480 spin_lock_init(&queue
->response_lock
);
482 /* If ballooning is disabled, this will consume real memory, so you
483 * better enable it. The long term solution would be to use just a
484 * bunch of valid page descriptors, without dependency on ballooning
486 err
= gnttab_alloc_pages(MAX_PENDING_REQS
,
489 netdev_err(queue
->vif
->dev
, "Could not reserve mmap_pages\n");
493 for (i
= 0; i
< MAX_PENDING_REQS
; i
++) {
494 queue
->pending_tx_info
[i
].callback_struct
= (struct ubuf_info
)
495 { .callback
= xenvif_zerocopy_callback
,
498 queue
->grant_tx_handle
[i
] = NETBACK_INVALID_HANDLE
;
504 void xenvif_carrier_on(struct xenvif
*vif
)
507 if (!vif
->can_sg
&& vif
->dev
->mtu
> ETH_DATA_LEN
)
508 dev_set_mtu(vif
->dev
, ETH_DATA_LEN
);
509 netdev_update_features(vif
->dev
);
510 set_bit(VIF_STATUS_CONNECTED
, &vif
->status
);
511 if (netif_running(vif
->dev
))
516 int xenvif_connect(struct xenvif_queue
*queue
, unsigned long tx_ring_ref
,
517 unsigned long rx_ring_ref
, unsigned int tx_evtchn
,
518 unsigned int rx_evtchn
)
520 struct task_struct
*task
;
523 BUG_ON(queue
->tx_irq
);
525 BUG_ON(queue
->dealloc_task
);
527 err
= xenvif_map_frontend_rings(queue
, tx_ring_ref
, rx_ring_ref
);
531 init_waitqueue_head(&queue
->wq
);
532 init_waitqueue_head(&queue
->dealloc_wq
);
533 atomic_set(&queue
->inflight_packets
, 0);
535 netif_napi_add(queue
->vif
->dev
, &queue
->napi
, xenvif_poll
,
538 if (tx_evtchn
== rx_evtchn
) {
539 /* feature-split-event-channels == 0 */
540 err
= bind_interdomain_evtchn_to_irqhandler(
541 queue
->vif
->domid
, tx_evtchn
, xenvif_interrupt
, 0,
545 queue
->tx_irq
= queue
->rx_irq
= err
;
546 disable_irq(queue
->tx_irq
);
548 /* feature-split-event-channels == 1 */
549 snprintf(queue
->tx_irq_name
, sizeof(queue
->tx_irq_name
),
550 "%s-tx", queue
->name
);
551 err
= bind_interdomain_evtchn_to_irqhandler(
552 queue
->vif
->domid
, tx_evtchn
, xenvif_tx_interrupt
, 0,
553 queue
->tx_irq_name
, queue
);
557 disable_irq(queue
->tx_irq
);
559 snprintf(queue
->rx_irq_name
, sizeof(queue
->rx_irq_name
),
560 "%s-rx", queue
->name
);
561 err
= bind_interdomain_evtchn_to_irqhandler(
562 queue
->vif
->domid
, rx_evtchn
, xenvif_rx_interrupt
, 0,
563 queue
->rx_irq_name
, queue
);
567 disable_irq(queue
->rx_irq
);
570 queue
->stalled
= true;
572 task
= kthread_create(xenvif_kthread_guest_rx
,
573 (void *)queue
, "%s-guest-rx", queue
->name
);
575 pr_warn("Could not allocate kthread for %s\n", queue
->name
);
580 get_task_struct(task
);
582 task
= kthread_create(xenvif_dealloc_kthread
,
583 (void *)queue
, "%s-dealloc", queue
->name
);
585 pr_warn("Could not allocate kthread for %s\n", queue
->name
);
589 queue
->dealloc_task
= task
;
591 wake_up_process(queue
->task
);
592 wake_up_process(queue
->dealloc_task
);
597 unbind_from_irqhandler(queue
->rx_irq
, queue
);
600 unbind_from_irqhandler(queue
->tx_irq
, queue
);
603 xenvif_unmap_frontend_rings(queue
);
605 module_put(THIS_MODULE
);
609 void xenvif_carrier_off(struct xenvif
*vif
)
611 struct net_device
*dev
= vif
->dev
;
614 if (test_and_clear_bit(VIF_STATUS_CONNECTED
, &vif
->status
)) {
615 netif_carrier_off(dev
); /* discard queued packets */
616 if (netif_running(dev
))
622 void xenvif_disconnect(struct xenvif
*vif
)
624 struct xenvif_queue
*queue
= NULL
;
625 unsigned int num_queues
= vif
->num_queues
;
626 unsigned int queue_index
;
628 xenvif_carrier_off(vif
);
630 for (queue_index
= 0; queue_index
< num_queues
; ++queue_index
) {
631 queue
= &vif
->queues
[queue_index
];
633 netif_napi_del(&queue
->napi
);
636 kthread_stop(queue
->task
);
637 put_task_struct(queue
->task
);
641 if (queue
->dealloc_task
) {
642 kthread_stop(queue
->dealloc_task
);
643 queue
->dealloc_task
= NULL
;
647 if (queue
->tx_irq
== queue
->rx_irq
)
648 unbind_from_irqhandler(queue
->tx_irq
, queue
);
650 unbind_from_irqhandler(queue
->tx_irq
, queue
);
651 unbind_from_irqhandler(queue
->rx_irq
, queue
);
656 xenvif_unmap_frontend_rings(queue
);
660 /* Reverse the relevant parts of xenvif_init_queue().
661 * Used for queue teardown from xenvif_free(), and on the
662 * error handling paths in xenbus.c:connect().
664 void xenvif_deinit_queue(struct xenvif_queue
*queue
)
666 gnttab_free_pages(MAX_PENDING_REQS
, queue
->mmap_pages
);
669 void xenvif_free(struct xenvif
*vif
)
671 struct xenvif_queue
*queue
= NULL
;
672 unsigned int num_queues
= vif
->num_queues
;
673 unsigned int queue_index
;
675 unregister_netdev(vif
->dev
);
677 for (queue_index
= 0; queue_index
< num_queues
; ++queue_index
) {
678 queue
= &vif
->queues
[queue_index
];
679 xenvif_deinit_queue(queue
);
686 free_netdev(vif
->dev
);
688 module_put(THIS_MODULE
);