2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/module.h"
18 #include "hw/virtio/virtio.h"
20 #include "net/checksum.h"
22 #include "qemu/error-report.h"
23 #include "qemu/timer.h"
24 #include "hw/virtio/virtio-net.h"
25 #include "net/vhost_net.h"
26 #include "net/announce.h"
27 #include "hw/virtio/virtio-bus.h"
28 #include "qapi/error.h"
29 #include "qapi/qapi-events-net.h"
30 #include "hw/qdev-properties.h"
31 #include "hw/virtio/virtio-access.h"
32 #include "migration/misc.h"
33 #include "standard-headers/linux/ethtool.h"
34 #include "sysemu/sysemu.h"
37 #define VIRTIO_NET_VM_VERSION 11
39 #define MAC_TABLE_ENTRIES 64
40 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
42 /* previously fixed value */
43 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
44 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
46 /* for now, only allow larger queues; with virtio-1, guest can downsize */
47 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
48 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
50 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
52 #define VIRTIO_NET_TCP_FLAG 0x3F
53 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
55 /* IPv4 max payload, 16 bits in the header */
56 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
57 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
59 /* header length value in ip header without option */
60 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
62 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
63 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
65 /* Purge coalesced packets timer interval, This value affects the performance
66 a lot, and should be tuned carefully, '300000'(300us) is the recommended
67 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
69 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
71 /* temporary until standard header include it */
72 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
74 #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */
75 #define VIRTIO_NET_F_RSC_EXT 61
77 static inline __virtio16
*virtio_net_rsc_ext_num_packets(
78 struct virtio_net_hdr
*hdr
)
80 return &hdr
->csum_start
;
83 static inline __virtio16
*virtio_net_rsc_ext_num_dupacks(
84 struct virtio_net_hdr
*hdr
)
86 return &hdr
->csum_offset
;
91 static VirtIOFeature feature_sizes
[] = {
92 {.flags
= 1ULL << VIRTIO_NET_F_MAC
,
93 .end
= virtio_endof(struct virtio_net_config
, mac
)},
94 {.flags
= 1ULL << VIRTIO_NET_F_STATUS
,
95 .end
= virtio_endof(struct virtio_net_config
, status
)},
96 {.flags
= 1ULL << VIRTIO_NET_F_MQ
,
97 .end
= virtio_endof(struct virtio_net_config
, max_virtqueue_pairs
)},
98 {.flags
= 1ULL << VIRTIO_NET_F_MTU
,
99 .end
= virtio_endof(struct virtio_net_config
, mtu
)},
100 {.flags
= 1ULL << VIRTIO_NET_F_SPEED_DUPLEX
,
101 .end
= virtio_endof(struct virtio_net_config
, duplex
)},
105 static VirtIONetQueue
*virtio_net_get_subqueue(NetClientState
*nc
)
107 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
109 return &n
->vqs
[nc
->queue_index
];
112 static int vq2q(int queue_index
)
114 return queue_index
/ 2;
118 * - we could suppress RX interrupt if we were so inclined.
121 static void virtio_net_get_config(VirtIODevice
*vdev
, uint8_t *config
)
123 VirtIONet
*n
= VIRTIO_NET(vdev
);
124 struct virtio_net_config netcfg
;
126 virtio_stw_p(vdev
, &netcfg
.status
, n
->status
);
127 virtio_stw_p(vdev
, &netcfg
.max_virtqueue_pairs
, n
->max_queues
);
128 virtio_stw_p(vdev
, &netcfg
.mtu
, n
->net_conf
.mtu
);
129 memcpy(netcfg
.mac
, n
->mac
, ETH_ALEN
);
130 virtio_stl_p(vdev
, &netcfg
.speed
, n
->net_conf
.speed
);
131 netcfg
.duplex
= n
->net_conf
.duplex
;
132 memcpy(config
, &netcfg
, n
->config_size
);
135 static void virtio_net_set_config(VirtIODevice
*vdev
, const uint8_t *config
)
137 VirtIONet
*n
= VIRTIO_NET(vdev
);
138 struct virtio_net_config netcfg
= {};
140 memcpy(&netcfg
, config
, n
->config_size
);
142 if (!virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_MAC_ADDR
) &&
143 !virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
) &&
144 memcmp(netcfg
.mac
, n
->mac
, ETH_ALEN
)) {
145 memcpy(n
->mac
, netcfg
.mac
, ETH_ALEN
);
146 qemu_format_nic_info_str(qemu_get_queue(n
->nic
), n
->mac
);
150 static bool virtio_net_started(VirtIONet
*n
, uint8_t status
)
152 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
153 return (status
& VIRTIO_CONFIG_S_DRIVER_OK
) &&
154 (n
->status
& VIRTIO_NET_S_LINK_UP
) && vdev
->vm_running
;
157 static void virtio_net_announce_notify(VirtIONet
*net
)
159 VirtIODevice
*vdev
= VIRTIO_DEVICE(net
);
160 trace_virtio_net_announce_notify();
162 net
->status
|= VIRTIO_NET_S_ANNOUNCE
;
163 virtio_notify_config(vdev
);
166 static void virtio_net_announce_timer(void *opaque
)
168 VirtIONet
*n
= opaque
;
169 trace_virtio_net_announce_timer(n
->announce_timer
.round
);
171 n
->announce_timer
.round
--;
172 virtio_net_announce_notify(n
);
175 static void virtio_net_announce(NetClientState
*nc
)
177 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
178 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
181 * Make sure the virtio migration announcement timer isn't running
182 * If it is, let it trigger announcement so that we do not cause
185 if (n
->announce_timer
.round
) {
189 if (virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_GUEST_ANNOUNCE
) &&
190 virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_VQ
)) {
191 virtio_net_announce_notify(n
);
195 static void virtio_net_vhost_status(VirtIONet
*n
, uint8_t status
)
197 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
198 NetClientState
*nc
= qemu_get_queue(n
->nic
);
199 int queues
= n
->multiqueue
? n
->max_queues
: 1;
201 if (!get_vhost_net(nc
->peer
)) {
205 if ((virtio_net_started(n
, status
) && !nc
->peer
->link_down
) ==
206 !!n
->vhost_started
) {
209 if (!n
->vhost_started
) {
212 if (n
->needs_vnet_hdr_swap
) {
213 error_report("backend does not support %s vnet headers; "
214 "falling back on userspace virtio",
215 virtio_is_big_endian(vdev
) ? "BE" : "LE");
219 /* Any packets outstanding? Purge them to avoid touching rings
220 * when vhost is running.
222 for (i
= 0; i
< queues
; i
++) {
223 NetClientState
*qnc
= qemu_get_subqueue(n
->nic
, i
);
225 /* Purge both directions: TX and RX. */
226 qemu_net_queue_purge(qnc
->peer
->incoming_queue
, qnc
);
227 qemu_net_queue_purge(qnc
->incoming_queue
, qnc
->peer
);
230 if (virtio_has_feature(vdev
->guest_features
, VIRTIO_NET_F_MTU
)) {
231 r
= vhost_net_set_mtu(get_vhost_net(nc
->peer
), n
->net_conf
.mtu
);
233 error_report("%uBytes MTU not supported by the backend",
240 n
->vhost_started
= 1;
241 r
= vhost_net_start(vdev
, n
->nic
->ncs
, queues
);
243 error_report("unable to start vhost net: %d: "
244 "falling back on userspace virtio", -r
);
245 n
->vhost_started
= 0;
248 vhost_net_stop(vdev
, n
->nic
->ncs
, queues
);
249 n
->vhost_started
= 0;
253 static int virtio_net_set_vnet_endian_one(VirtIODevice
*vdev
,
254 NetClientState
*peer
,
257 if (virtio_is_big_endian(vdev
)) {
258 return qemu_set_vnet_be(peer
, enable
);
260 return qemu_set_vnet_le(peer
, enable
);
264 static bool virtio_net_set_vnet_endian(VirtIODevice
*vdev
, NetClientState
*ncs
,
265 int queues
, bool enable
)
269 for (i
= 0; i
< queues
; i
++) {
270 if (virtio_net_set_vnet_endian_one(vdev
, ncs
[i
].peer
, enable
) < 0 &&
273 virtio_net_set_vnet_endian_one(vdev
, ncs
[i
].peer
, false);
283 static void virtio_net_vnet_endian_status(VirtIONet
*n
, uint8_t status
)
285 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
286 int queues
= n
->multiqueue
? n
->max_queues
: 1;
288 if (virtio_net_started(n
, status
)) {
289 /* Before using the device, we tell the network backend about the
290 * endianness to use when parsing vnet headers. If the backend
291 * can't do it, we fallback onto fixing the headers in the core
294 n
->needs_vnet_hdr_swap
= virtio_net_set_vnet_endian(vdev
, n
->nic
->ncs
,
296 } else if (virtio_net_started(n
, vdev
->status
)) {
297 /* After using the device, we need to reset the network backend to
298 * the default (guest native endianness), otherwise the guest may
299 * lose network connectivity if it is rebooted into a different
302 virtio_net_set_vnet_endian(vdev
, n
->nic
->ncs
, queues
, false);
306 static void virtio_net_drop_tx_queue_data(VirtIODevice
*vdev
, VirtQueue
*vq
)
308 unsigned int dropped
= virtqueue_drop_all(vq
);
310 virtio_notify(vdev
, vq
);
314 static void virtio_net_set_status(struct VirtIODevice
*vdev
, uint8_t status
)
316 VirtIONet
*n
= VIRTIO_NET(vdev
);
319 uint8_t queue_status
;
321 virtio_net_vnet_endian_status(n
, status
);
322 virtio_net_vhost_status(n
, status
);
324 for (i
= 0; i
< n
->max_queues
; i
++) {
325 NetClientState
*ncs
= qemu_get_subqueue(n
->nic
, i
);
329 if ((!n
->multiqueue
&& i
!= 0) || i
>= n
->curr_queues
) {
332 queue_status
= status
;
335 virtio_net_started(n
, queue_status
) && !n
->vhost_started
;
338 qemu_flush_queued_packets(ncs
);
341 if (!q
->tx_waiting
) {
347 timer_mod(q
->tx_timer
,
348 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + n
->tx_timeout
);
350 qemu_bh_schedule(q
->tx_bh
);
354 timer_del(q
->tx_timer
);
356 qemu_bh_cancel(q
->tx_bh
);
358 if ((n
->status
& VIRTIO_NET_S_LINK_UP
) == 0 &&
359 (queue_status
& VIRTIO_CONFIG_S_DRIVER_OK
) &&
361 /* if tx is waiting we are likely have some packets in tx queue
362 * and disabled notification */
364 virtio_queue_set_notification(q
->tx_vq
, 1);
365 virtio_net_drop_tx_queue_data(vdev
, q
->tx_vq
);
371 static void virtio_net_set_link_status(NetClientState
*nc
)
373 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
374 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
375 uint16_t old_status
= n
->status
;
378 n
->status
&= ~VIRTIO_NET_S_LINK_UP
;
380 n
->status
|= VIRTIO_NET_S_LINK_UP
;
382 if (n
->status
!= old_status
)
383 virtio_notify_config(vdev
);
385 virtio_net_set_status(vdev
, vdev
->status
);
388 static void rxfilter_notify(NetClientState
*nc
)
390 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
392 if (nc
->rxfilter_notify_enabled
) {
393 gchar
*path
= object_get_canonical_path(OBJECT(n
->qdev
));
394 qapi_event_send_nic_rx_filter_changed(!!n
->netclient_name
,
395 n
->netclient_name
, path
);
398 /* disable event notification to avoid events flooding */
399 nc
->rxfilter_notify_enabled
= 0;
403 static intList
*get_vlan_table(VirtIONet
*n
)
405 intList
*list
, *entry
;
409 for (i
= 0; i
< MAX_VLAN
>> 5; i
++) {
410 for (j
= 0; n
->vlans
[i
] && j
<= 0x1f; j
++) {
411 if (n
->vlans
[i
] & (1U << j
)) {
412 entry
= g_malloc0(sizeof(*entry
));
413 entry
->value
= (i
<< 5) + j
;
423 static RxFilterInfo
*virtio_net_query_rxfilter(NetClientState
*nc
)
425 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
426 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
428 strList
*str_list
, *entry
;
431 info
= g_malloc0(sizeof(*info
));
432 info
->name
= g_strdup(nc
->name
);
433 info
->promiscuous
= n
->promisc
;
436 info
->unicast
= RX_STATE_NONE
;
437 } else if (n
->alluni
) {
438 info
->unicast
= RX_STATE_ALL
;
440 info
->unicast
= RX_STATE_NORMAL
;
444 info
->multicast
= RX_STATE_NONE
;
445 } else if (n
->allmulti
) {
446 info
->multicast
= RX_STATE_ALL
;
448 info
->multicast
= RX_STATE_NORMAL
;
451 info
->broadcast_allowed
= n
->nobcast
;
452 info
->multicast_overflow
= n
->mac_table
.multi_overflow
;
453 info
->unicast_overflow
= n
->mac_table
.uni_overflow
;
455 info
->main_mac
= qemu_mac_strdup_printf(n
->mac
);
458 for (i
= 0; i
< n
->mac_table
.first_multi
; i
++) {
459 entry
= g_malloc0(sizeof(*entry
));
460 entry
->value
= qemu_mac_strdup_printf(n
->mac_table
.macs
+ i
* ETH_ALEN
);
461 entry
->next
= str_list
;
464 info
->unicast_table
= str_list
;
467 for (i
= n
->mac_table
.first_multi
; i
< n
->mac_table
.in_use
; i
++) {
468 entry
= g_malloc0(sizeof(*entry
));
469 entry
->value
= qemu_mac_strdup_printf(n
->mac_table
.macs
+ i
* ETH_ALEN
);
470 entry
->next
= str_list
;
473 info
->multicast_table
= str_list
;
474 info
->vlan_table
= get_vlan_table(n
);
476 if (!virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_VLAN
)) {
477 info
->vlan
= RX_STATE_ALL
;
478 } else if (!info
->vlan_table
) {
479 info
->vlan
= RX_STATE_NONE
;
481 info
->vlan
= RX_STATE_NORMAL
;
484 /* enable event notification after query */
485 nc
->rxfilter_notify_enabled
= 1;
490 static void virtio_net_reset(VirtIODevice
*vdev
)
492 VirtIONet
*n
= VIRTIO_NET(vdev
);
495 /* Reset back to compatibility mode */
502 /* multiqueue is disabled by default */
504 timer_del(n
->announce_timer
.tm
);
505 n
->announce_timer
.round
= 0;
506 n
->status
&= ~VIRTIO_NET_S_ANNOUNCE
;
508 /* Flush any MAC and VLAN filter table state */
509 n
->mac_table
.in_use
= 0;
510 n
->mac_table
.first_multi
= 0;
511 n
->mac_table
.multi_overflow
= 0;
512 n
->mac_table
.uni_overflow
= 0;
513 memset(n
->mac_table
.macs
, 0, MAC_TABLE_ENTRIES
* ETH_ALEN
);
514 memcpy(&n
->mac
[0], &n
->nic
->conf
->macaddr
, sizeof(n
->mac
));
515 qemu_format_nic_info_str(qemu_get_queue(n
->nic
), n
->mac
);
516 memset(n
->vlans
, 0, MAX_VLAN
>> 3);
518 /* Flush any async TX */
519 for (i
= 0; i
< n
->max_queues
; i
++) {
520 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, i
);
523 qemu_flush_or_purge_queued_packets(nc
->peer
, true);
524 assert(!virtio_net_get_subqueue(nc
)->async_tx
.elem
);
529 static void peer_test_vnet_hdr(VirtIONet
*n
)
531 NetClientState
*nc
= qemu_get_queue(n
->nic
);
536 n
->has_vnet_hdr
= qemu_has_vnet_hdr(nc
->peer
);
539 static int peer_has_vnet_hdr(VirtIONet
*n
)
541 return n
->has_vnet_hdr
;
544 static int peer_has_ufo(VirtIONet
*n
)
546 if (!peer_has_vnet_hdr(n
))
549 n
->has_ufo
= qemu_has_ufo(qemu_get_queue(n
->nic
)->peer
);
554 static void virtio_net_set_mrg_rx_bufs(VirtIONet
*n
, int mergeable_rx_bufs
,
560 n
->mergeable_rx_bufs
= mergeable_rx_bufs
;
563 n
->guest_hdr_len
= sizeof(struct virtio_net_hdr_mrg_rxbuf
);
565 n
->guest_hdr_len
= n
->mergeable_rx_bufs
?
566 sizeof(struct virtio_net_hdr_mrg_rxbuf
) :
567 sizeof(struct virtio_net_hdr
);
570 for (i
= 0; i
< n
->max_queues
; i
++) {
571 nc
= qemu_get_subqueue(n
->nic
, i
);
573 if (peer_has_vnet_hdr(n
) &&
574 qemu_has_vnet_hdr_len(nc
->peer
, n
->guest_hdr_len
)) {
575 qemu_set_vnet_hdr_len(nc
->peer
, n
->guest_hdr_len
);
576 n
->host_hdr_len
= n
->guest_hdr_len
;
581 static int virtio_net_max_tx_queue_size(VirtIONet
*n
)
583 NetClientState
*peer
= n
->nic_conf
.peers
.ncs
[0];
586 * Backends other than vhost-user don't support max queue size.
589 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
;
592 if (peer
->info
->type
!= NET_CLIENT_DRIVER_VHOST_USER
) {
593 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
;
596 return VIRTQUEUE_MAX_SIZE
;
599 static int peer_attach(VirtIONet
*n
, int index
)
601 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, index
);
607 if (nc
->peer
->info
->type
== NET_CLIENT_DRIVER_VHOST_USER
) {
608 vhost_set_vring_enable(nc
->peer
, 1);
611 if (nc
->peer
->info
->type
!= NET_CLIENT_DRIVER_TAP
) {
615 if (n
->max_queues
== 1) {
619 return tap_enable(nc
->peer
);
622 static int peer_detach(VirtIONet
*n
, int index
)
624 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, index
);
630 if (nc
->peer
->info
->type
== NET_CLIENT_DRIVER_VHOST_USER
) {
631 vhost_set_vring_enable(nc
->peer
, 0);
634 if (nc
->peer
->info
->type
!= NET_CLIENT_DRIVER_TAP
) {
638 return tap_disable(nc
->peer
);
641 static void virtio_net_set_queues(VirtIONet
*n
)
646 if (n
->nic
->peer_deleted
) {
650 for (i
= 0; i
< n
->max_queues
; i
++) {
651 if (i
< n
->curr_queues
) {
652 r
= peer_attach(n
, i
);
655 r
= peer_detach(n
, i
);
661 static void virtio_net_set_multiqueue(VirtIONet
*n
, int multiqueue
);
663 static uint64_t virtio_net_get_features(VirtIODevice
*vdev
, uint64_t features
,
666 VirtIONet
*n
= VIRTIO_NET(vdev
);
667 NetClientState
*nc
= qemu_get_queue(n
->nic
);
669 /* Firstly sync all virtio-net possible supported features */
670 features
|= n
->host_features
;
672 virtio_add_feature(&features
, VIRTIO_NET_F_MAC
);
674 if (!peer_has_vnet_hdr(n
)) {
675 virtio_clear_feature(&features
, VIRTIO_NET_F_CSUM
);
676 virtio_clear_feature(&features
, VIRTIO_NET_F_HOST_TSO4
);
677 virtio_clear_feature(&features
, VIRTIO_NET_F_HOST_TSO6
);
678 virtio_clear_feature(&features
, VIRTIO_NET_F_HOST_ECN
);
680 virtio_clear_feature(&features
, VIRTIO_NET_F_GUEST_CSUM
);
681 virtio_clear_feature(&features
, VIRTIO_NET_F_GUEST_TSO4
);
682 virtio_clear_feature(&features
, VIRTIO_NET_F_GUEST_TSO6
);
683 virtio_clear_feature(&features
, VIRTIO_NET_F_GUEST_ECN
);
686 if (!peer_has_vnet_hdr(n
) || !peer_has_ufo(n
)) {
687 virtio_clear_feature(&features
, VIRTIO_NET_F_GUEST_UFO
);
688 virtio_clear_feature(&features
, VIRTIO_NET_F_HOST_UFO
);
691 if (!get_vhost_net(nc
->peer
)) {
695 features
= vhost_net_get_features(get_vhost_net(nc
->peer
), features
);
696 vdev
->backend_features
= features
;
698 if (n
->mtu_bypass_backend
&&
699 (n
->host_features
& 1ULL << VIRTIO_NET_F_MTU
)) {
700 features
|= (1ULL << VIRTIO_NET_F_MTU
);
706 static uint64_t virtio_net_bad_features(VirtIODevice
*vdev
)
708 uint64_t features
= 0;
710 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
712 virtio_add_feature(&features
, VIRTIO_NET_F_MAC
);
713 virtio_add_feature(&features
, VIRTIO_NET_F_CSUM
);
714 virtio_add_feature(&features
, VIRTIO_NET_F_HOST_TSO4
);
715 virtio_add_feature(&features
, VIRTIO_NET_F_HOST_TSO6
);
716 virtio_add_feature(&features
, VIRTIO_NET_F_HOST_ECN
);
721 static void virtio_net_apply_guest_offloads(VirtIONet
*n
)
723 qemu_set_offload(qemu_get_queue(n
->nic
)->peer
,
724 !!(n
->curr_guest_offloads
& (1ULL << VIRTIO_NET_F_GUEST_CSUM
)),
725 !!(n
->curr_guest_offloads
& (1ULL << VIRTIO_NET_F_GUEST_TSO4
)),
726 !!(n
->curr_guest_offloads
& (1ULL << VIRTIO_NET_F_GUEST_TSO6
)),
727 !!(n
->curr_guest_offloads
& (1ULL << VIRTIO_NET_F_GUEST_ECN
)),
728 !!(n
->curr_guest_offloads
& (1ULL << VIRTIO_NET_F_GUEST_UFO
)));
731 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features
)
733 static const uint64_t guest_offloads_mask
=
734 (1ULL << VIRTIO_NET_F_GUEST_CSUM
) |
735 (1ULL << VIRTIO_NET_F_GUEST_TSO4
) |
736 (1ULL << VIRTIO_NET_F_GUEST_TSO6
) |
737 (1ULL << VIRTIO_NET_F_GUEST_ECN
) |
738 (1ULL << VIRTIO_NET_F_GUEST_UFO
);
740 return guest_offloads_mask
& features
;
743 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet
*n
)
745 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
746 return virtio_net_guest_offloads_by_features(vdev
->guest_features
);
749 static void virtio_net_set_features(VirtIODevice
*vdev
, uint64_t features
)
751 VirtIONet
*n
= VIRTIO_NET(vdev
);
754 if (n
->mtu_bypass_backend
&&
755 !virtio_has_feature(vdev
->backend_features
, VIRTIO_NET_F_MTU
)) {
756 features
&= ~(1ULL << VIRTIO_NET_F_MTU
);
759 virtio_net_set_multiqueue(n
,
760 virtio_has_feature(features
, VIRTIO_NET_F_MQ
));
762 virtio_net_set_mrg_rx_bufs(n
,
763 virtio_has_feature(features
,
764 VIRTIO_NET_F_MRG_RXBUF
),
765 virtio_has_feature(features
,
766 VIRTIO_F_VERSION_1
));
768 n
->rsc4_enabled
= virtio_has_feature(features
, VIRTIO_NET_F_RSC_EXT
) &&
769 virtio_has_feature(features
, VIRTIO_NET_F_GUEST_TSO4
);
770 n
->rsc6_enabled
= virtio_has_feature(features
, VIRTIO_NET_F_RSC_EXT
) &&
771 virtio_has_feature(features
, VIRTIO_NET_F_GUEST_TSO6
);
773 if (n
->has_vnet_hdr
) {
774 n
->curr_guest_offloads
=
775 virtio_net_guest_offloads_by_features(features
);
776 virtio_net_apply_guest_offloads(n
);
779 for (i
= 0; i
< n
->max_queues
; i
++) {
780 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, i
);
782 if (!get_vhost_net(nc
->peer
)) {
785 vhost_net_ack_features(get_vhost_net(nc
->peer
), features
);
788 if (virtio_has_feature(features
, VIRTIO_NET_F_CTRL_VLAN
)) {
789 memset(n
->vlans
, 0, MAX_VLAN
>> 3);
791 memset(n
->vlans
, 0xff, MAX_VLAN
>> 3);
795 static int virtio_net_handle_rx_mode(VirtIONet
*n
, uint8_t cmd
,
796 struct iovec
*iov
, unsigned int iov_cnt
)
800 NetClientState
*nc
= qemu_get_queue(n
->nic
);
802 s
= iov_to_buf(iov
, iov_cnt
, 0, &on
, sizeof(on
));
803 if (s
!= sizeof(on
)) {
804 return VIRTIO_NET_ERR
;
807 if (cmd
== VIRTIO_NET_CTRL_RX_PROMISC
) {
809 } else if (cmd
== VIRTIO_NET_CTRL_RX_ALLMULTI
) {
811 } else if (cmd
== VIRTIO_NET_CTRL_RX_ALLUNI
) {
813 } else if (cmd
== VIRTIO_NET_CTRL_RX_NOMULTI
) {
815 } else if (cmd
== VIRTIO_NET_CTRL_RX_NOUNI
) {
817 } else if (cmd
== VIRTIO_NET_CTRL_RX_NOBCAST
) {
820 return VIRTIO_NET_ERR
;
825 return VIRTIO_NET_OK
;
828 static int virtio_net_handle_offloads(VirtIONet
*n
, uint8_t cmd
,
829 struct iovec
*iov
, unsigned int iov_cnt
)
831 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
835 if (!virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
)) {
836 return VIRTIO_NET_ERR
;
839 s
= iov_to_buf(iov
, iov_cnt
, 0, &offloads
, sizeof(offloads
));
840 if (s
!= sizeof(offloads
)) {
841 return VIRTIO_NET_ERR
;
844 if (cmd
== VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET
) {
845 uint64_t supported_offloads
;
847 offloads
= virtio_ldq_p(vdev
, &offloads
);
849 if (!n
->has_vnet_hdr
) {
850 return VIRTIO_NET_ERR
;
853 n
->rsc4_enabled
= virtio_has_feature(offloads
, VIRTIO_NET_F_RSC_EXT
) &&
854 virtio_has_feature(offloads
, VIRTIO_NET_F_GUEST_TSO4
);
855 n
->rsc6_enabled
= virtio_has_feature(offloads
, VIRTIO_NET_F_RSC_EXT
) &&
856 virtio_has_feature(offloads
, VIRTIO_NET_F_GUEST_TSO6
);
857 virtio_clear_feature(&offloads
, VIRTIO_NET_F_RSC_EXT
);
859 supported_offloads
= virtio_net_supported_guest_offloads(n
);
860 if (offloads
& ~supported_offloads
) {
861 return VIRTIO_NET_ERR
;
864 n
->curr_guest_offloads
= offloads
;
865 virtio_net_apply_guest_offloads(n
);
867 return VIRTIO_NET_OK
;
869 return VIRTIO_NET_ERR
;
873 static int virtio_net_handle_mac(VirtIONet
*n
, uint8_t cmd
,
874 struct iovec
*iov
, unsigned int iov_cnt
)
876 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
877 struct virtio_net_ctrl_mac mac_data
;
879 NetClientState
*nc
= qemu_get_queue(n
->nic
);
881 if (cmd
== VIRTIO_NET_CTRL_MAC_ADDR_SET
) {
882 if (iov_size(iov
, iov_cnt
) != sizeof(n
->mac
)) {
883 return VIRTIO_NET_ERR
;
885 s
= iov_to_buf(iov
, iov_cnt
, 0, &n
->mac
, sizeof(n
->mac
));
886 assert(s
== sizeof(n
->mac
));
887 qemu_format_nic_info_str(qemu_get_queue(n
->nic
), n
->mac
);
890 return VIRTIO_NET_OK
;
893 if (cmd
!= VIRTIO_NET_CTRL_MAC_TABLE_SET
) {
894 return VIRTIO_NET_ERR
;
899 uint8_t uni_overflow
= 0;
900 uint8_t multi_overflow
= 0;
901 uint8_t *macs
= g_malloc0(MAC_TABLE_ENTRIES
* ETH_ALEN
);
903 s
= iov_to_buf(iov
, iov_cnt
, 0, &mac_data
.entries
,
904 sizeof(mac_data
.entries
));
905 mac_data
.entries
= virtio_ldl_p(vdev
, &mac_data
.entries
);
906 if (s
!= sizeof(mac_data
.entries
)) {
909 iov_discard_front(&iov
, &iov_cnt
, s
);
911 if (mac_data
.entries
* ETH_ALEN
> iov_size(iov
, iov_cnt
)) {
915 if (mac_data
.entries
<= MAC_TABLE_ENTRIES
) {
916 s
= iov_to_buf(iov
, iov_cnt
, 0, macs
,
917 mac_data
.entries
* ETH_ALEN
);
918 if (s
!= mac_data
.entries
* ETH_ALEN
) {
921 in_use
+= mac_data
.entries
;
926 iov_discard_front(&iov
, &iov_cnt
, mac_data
.entries
* ETH_ALEN
);
928 first_multi
= in_use
;
930 s
= iov_to_buf(iov
, iov_cnt
, 0, &mac_data
.entries
,
931 sizeof(mac_data
.entries
));
932 mac_data
.entries
= virtio_ldl_p(vdev
, &mac_data
.entries
);
933 if (s
!= sizeof(mac_data
.entries
)) {
937 iov_discard_front(&iov
, &iov_cnt
, s
);
939 if (mac_data
.entries
* ETH_ALEN
!= iov_size(iov
, iov_cnt
)) {
943 if (mac_data
.entries
<= MAC_TABLE_ENTRIES
- in_use
) {
944 s
= iov_to_buf(iov
, iov_cnt
, 0, &macs
[in_use
* ETH_ALEN
],
945 mac_data
.entries
* ETH_ALEN
);
946 if (s
!= mac_data
.entries
* ETH_ALEN
) {
949 in_use
+= mac_data
.entries
;
954 n
->mac_table
.in_use
= in_use
;
955 n
->mac_table
.first_multi
= first_multi
;
956 n
->mac_table
.uni_overflow
= uni_overflow
;
957 n
->mac_table
.multi_overflow
= multi_overflow
;
958 memcpy(n
->mac_table
.macs
, macs
, MAC_TABLE_ENTRIES
* ETH_ALEN
);
962 return VIRTIO_NET_OK
;
966 return VIRTIO_NET_ERR
;
969 static int virtio_net_handle_vlan_table(VirtIONet
*n
, uint8_t cmd
,
970 struct iovec
*iov
, unsigned int iov_cnt
)
972 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
975 NetClientState
*nc
= qemu_get_queue(n
->nic
);
977 s
= iov_to_buf(iov
, iov_cnt
, 0, &vid
, sizeof(vid
));
978 vid
= virtio_lduw_p(vdev
, &vid
);
979 if (s
!= sizeof(vid
)) {
980 return VIRTIO_NET_ERR
;
984 return VIRTIO_NET_ERR
;
986 if (cmd
== VIRTIO_NET_CTRL_VLAN_ADD
)
987 n
->vlans
[vid
>> 5] |= (1U << (vid
& 0x1f));
988 else if (cmd
== VIRTIO_NET_CTRL_VLAN_DEL
)
989 n
->vlans
[vid
>> 5] &= ~(1U << (vid
& 0x1f));
991 return VIRTIO_NET_ERR
;
995 return VIRTIO_NET_OK
;
998 static int virtio_net_handle_announce(VirtIONet
*n
, uint8_t cmd
,
999 struct iovec
*iov
, unsigned int iov_cnt
)
1001 trace_virtio_net_handle_announce(n
->announce_timer
.round
);
1002 if (cmd
== VIRTIO_NET_CTRL_ANNOUNCE_ACK
&&
1003 n
->status
& VIRTIO_NET_S_ANNOUNCE
) {
1004 n
->status
&= ~VIRTIO_NET_S_ANNOUNCE
;
1005 if (n
->announce_timer
.round
) {
1006 qemu_announce_timer_step(&n
->announce_timer
);
1008 return VIRTIO_NET_OK
;
1010 return VIRTIO_NET_ERR
;
1014 static int virtio_net_handle_mq(VirtIONet
*n
, uint8_t cmd
,
1015 struct iovec
*iov
, unsigned int iov_cnt
)
1017 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
1018 struct virtio_net_ctrl_mq mq
;
1022 s
= iov_to_buf(iov
, iov_cnt
, 0, &mq
, sizeof(mq
));
1023 if (s
!= sizeof(mq
)) {
1024 return VIRTIO_NET_ERR
;
1027 if (cmd
!= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
) {
1028 return VIRTIO_NET_ERR
;
1031 queues
= virtio_lduw_p(vdev
, &mq
.virtqueue_pairs
);
1033 if (queues
< VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN
||
1034 queues
> VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX
||
1035 queues
> n
->max_queues
||
1037 return VIRTIO_NET_ERR
;
1040 n
->curr_queues
= queues
;
1041 /* stop the backend before changing the number of queues to avoid handling a
1043 virtio_net_set_status(vdev
, vdev
->status
);
1044 virtio_net_set_queues(n
);
1046 return VIRTIO_NET_OK
;
1049 static void virtio_net_handle_ctrl(VirtIODevice
*vdev
, VirtQueue
*vq
)
1051 VirtIONet
*n
= VIRTIO_NET(vdev
);
1052 struct virtio_net_ctrl_hdr ctrl
;
1053 virtio_net_ctrl_ack status
= VIRTIO_NET_ERR
;
1054 VirtQueueElement
*elem
;
1056 struct iovec
*iov
, *iov2
;
1057 unsigned int iov_cnt
;
1060 elem
= virtqueue_pop(vq
, sizeof(VirtQueueElement
));
1064 if (iov_size(elem
->in_sg
, elem
->in_num
) < sizeof(status
) ||
1065 iov_size(elem
->out_sg
, elem
->out_num
) < sizeof(ctrl
)) {
1066 virtio_error(vdev
, "virtio-net ctrl missing headers");
1067 virtqueue_detach_element(vq
, elem
, 0);
1072 iov_cnt
= elem
->out_num
;
1073 iov2
= iov
= g_memdup(elem
->out_sg
, sizeof(struct iovec
) * elem
->out_num
);
1074 s
= iov_to_buf(iov
, iov_cnt
, 0, &ctrl
, sizeof(ctrl
));
1075 iov_discard_front(&iov
, &iov_cnt
, sizeof(ctrl
));
1076 if (s
!= sizeof(ctrl
)) {
1077 status
= VIRTIO_NET_ERR
;
1078 } else if (ctrl
.class == VIRTIO_NET_CTRL_RX
) {
1079 status
= virtio_net_handle_rx_mode(n
, ctrl
.cmd
, iov
, iov_cnt
);
1080 } else if (ctrl
.class == VIRTIO_NET_CTRL_MAC
) {
1081 status
= virtio_net_handle_mac(n
, ctrl
.cmd
, iov
, iov_cnt
);
1082 } else if (ctrl
.class == VIRTIO_NET_CTRL_VLAN
) {
1083 status
= virtio_net_handle_vlan_table(n
, ctrl
.cmd
, iov
, iov_cnt
);
1084 } else if (ctrl
.class == VIRTIO_NET_CTRL_ANNOUNCE
) {
1085 status
= virtio_net_handle_announce(n
, ctrl
.cmd
, iov
, iov_cnt
);
1086 } else if (ctrl
.class == VIRTIO_NET_CTRL_MQ
) {
1087 status
= virtio_net_handle_mq(n
, ctrl
.cmd
, iov
, iov_cnt
);
1088 } else if (ctrl
.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS
) {
1089 status
= virtio_net_handle_offloads(n
, ctrl
.cmd
, iov
, iov_cnt
);
1092 s
= iov_from_buf(elem
->in_sg
, elem
->in_num
, 0, &status
, sizeof(status
));
1093 assert(s
== sizeof(status
));
1095 virtqueue_push(vq
, elem
, sizeof(status
));
1096 virtio_notify(vdev
, vq
);
1104 static void virtio_net_handle_rx(VirtIODevice
*vdev
, VirtQueue
*vq
)
1106 VirtIONet
*n
= VIRTIO_NET(vdev
);
1107 int queue_index
= vq2q(virtio_get_queue_index(vq
));
1109 qemu_flush_queued_packets(qemu_get_subqueue(n
->nic
, queue_index
));
1112 static int virtio_net_can_receive(NetClientState
*nc
)
1114 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
1115 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
1116 VirtIONetQueue
*q
= virtio_net_get_subqueue(nc
);
1118 if (!vdev
->vm_running
) {
1122 if (nc
->queue_index
>= n
->curr_queues
) {
1126 if (!virtio_queue_ready(q
->rx_vq
) ||
1127 !(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
)) {
1134 static int virtio_net_has_buffers(VirtIONetQueue
*q
, int bufsize
)
1136 VirtIONet
*n
= q
->n
;
1137 if (virtio_queue_empty(q
->rx_vq
) ||
1138 (n
->mergeable_rx_bufs
&&
1139 !virtqueue_avail_bytes(q
->rx_vq
, bufsize
, 0))) {
1140 virtio_queue_set_notification(q
->rx_vq
, 1);
1142 /* To avoid a race condition where the guest has made some buffers
1143 * available after the above check but before notification was
1144 * enabled, check for available buffers again.
1146 if (virtio_queue_empty(q
->rx_vq
) ||
1147 (n
->mergeable_rx_bufs
&&
1148 !virtqueue_avail_bytes(q
->rx_vq
, bufsize
, 0))) {
1153 virtio_queue_set_notification(q
->rx_vq
, 0);
1157 static void virtio_net_hdr_swap(VirtIODevice
*vdev
, struct virtio_net_hdr
*hdr
)
1159 virtio_tswap16s(vdev
, &hdr
->hdr_len
);
1160 virtio_tswap16s(vdev
, &hdr
->gso_size
);
1161 virtio_tswap16s(vdev
, &hdr
->csum_start
);
1162 virtio_tswap16s(vdev
, &hdr
->csum_offset
);
1165 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1166 * it never finds out that the packets don't have valid checksums. This
1167 * causes dhclient to get upset. Fedora's carried a patch for ages to
1168 * fix this with Xen but it hasn't appeared in an upstream release of
1171 * To avoid breaking existing guests, we catch udp packets and add
1172 * checksums. This is terrible but it's better than hacking the guest
1175 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1176 * we should provide a mechanism to disable it to avoid polluting the host
1179 static void work_around_broken_dhclient(struct virtio_net_hdr
*hdr
,
1180 uint8_t *buf
, size_t size
)
1182 if ((hdr
->flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) && /* missing csum */
1183 (size
> 27 && size
< 1500) && /* normal sized MTU */
1184 (buf
[12] == 0x08 && buf
[13] == 0x00) && /* ethertype == IPv4 */
1185 (buf
[23] == 17) && /* ip.protocol == UDP */
1186 (buf
[34] == 0 && buf
[35] == 67)) { /* udp.srcport == bootps */
1187 net_checksum_calculate(buf
, size
);
1188 hdr
->flags
&= ~VIRTIO_NET_HDR_F_NEEDS_CSUM
;
1192 static void receive_header(VirtIONet
*n
, const struct iovec
*iov
, int iov_cnt
,
1193 const void *buf
, size_t size
)
1195 if (n
->has_vnet_hdr
) {
1196 /* FIXME this cast is evil */
1197 void *wbuf
= (void *)buf
;
1198 work_around_broken_dhclient(wbuf
, wbuf
+ n
->host_hdr_len
,
1199 size
- n
->host_hdr_len
);
1201 if (n
->needs_vnet_hdr_swap
) {
1202 virtio_net_hdr_swap(VIRTIO_DEVICE(n
), wbuf
);
1204 iov_from_buf(iov
, iov_cnt
, 0, buf
, sizeof(struct virtio_net_hdr
));
1206 struct virtio_net_hdr hdr
= {
1208 .gso_type
= VIRTIO_NET_HDR_GSO_NONE
1210 iov_from_buf(iov
, iov_cnt
, 0, &hdr
, sizeof hdr
);
1214 static int receive_filter(VirtIONet
*n
, const uint8_t *buf
, int size
)
1216 static const uint8_t bcast
[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1217 static const uint8_t vlan
[] = {0x81, 0x00};
1218 uint8_t *ptr
= (uint8_t *)buf
;
1224 ptr
+= n
->host_hdr_len
;
1226 if (!memcmp(&ptr
[12], vlan
, sizeof(vlan
))) {
1227 int vid
= lduw_be_p(ptr
+ 14) & 0xfff;
1228 if (!(n
->vlans
[vid
>> 5] & (1U << (vid
& 0x1f))))
1232 if (ptr
[0] & 1) { // multicast
1233 if (!memcmp(ptr
, bcast
, sizeof(bcast
))) {
1235 } else if (n
->nomulti
) {
1237 } else if (n
->allmulti
|| n
->mac_table
.multi_overflow
) {
1241 for (i
= n
->mac_table
.first_multi
; i
< n
->mac_table
.in_use
; i
++) {
1242 if (!memcmp(ptr
, &n
->mac_table
.macs
[i
* ETH_ALEN
], ETH_ALEN
)) {
1249 } else if (n
->alluni
|| n
->mac_table
.uni_overflow
) {
1251 } else if (!memcmp(ptr
, n
->mac
, ETH_ALEN
)) {
1255 for (i
= 0; i
< n
->mac_table
.first_multi
; i
++) {
1256 if (!memcmp(ptr
, &n
->mac_table
.macs
[i
* ETH_ALEN
], ETH_ALEN
)) {
1265 static ssize_t
virtio_net_receive_rcu(NetClientState
*nc
, const uint8_t *buf
,
1268 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
1269 VirtIONetQueue
*q
= virtio_net_get_subqueue(nc
);
1270 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
1271 struct iovec mhdr_sg
[VIRTQUEUE_MAX_SIZE
];
1272 struct virtio_net_hdr_mrg_rxbuf mhdr
;
1273 unsigned mhdr_cnt
= 0;
1274 size_t offset
, i
, guest_offset
;
1276 if (!virtio_net_can_receive(nc
)) {
1280 /* hdr_len refers to the header we supply to the guest */
1281 if (!virtio_net_has_buffers(q
, size
+ n
->guest_hdr_len
- n
->host_hdr_len
)) {
1285 if (!receive_filter(n
, buf
, size
))
1290 while (offset
< size
) {
1291 VirtQueueElement
*elem
;
1293 const struct iovec
*sg
;
1297 elem
= virtqueue_pop(q
->rx_vq
, sizeof(VirtQueueElement
));
1300 virtio_error(vdev
, "virtio-net unexpected empty queue: "
1301 "i %zd mergeable %d offset %zd, size %zd, "
1302 "guest hdr len %zd, host hdr len %zd "
1303 "guest features 0x%" PRIx64
,
1304 i
, n
->mergeable_rx_bufs
, offset
, size
,
1305 n
->guest_hdr_len
, n
->host_hdr_len
,
1306 vdev
->guest_features
);
1311 if (elem
->in_num
< 1) {
1313 "virtio-net receive queue contains no in buffers");
1314 virtqueue_detach_element(q
->rx_vq
, elem
, 0);
1321 assert(offset
== 0);
1322 if (n
->mergeable_rx_bufs
) {
1323 mhdr_cnt
= iov_copy(mhdr_sg
, ARRAY_SIZE(mhdr_sg
),
1325 offsetof(typeof(mhdr
), num_buffers
),
1326 sizeof(mhdr
.num_buffers
));
1329 receive_header(n
, sg
, elem
->in_num
, buf
, size
);
1330 offset
= n
->host_hdr_len
;
1331 total
+= n
->guest_hdr_len
;
1332 guest_offset
= n
->guest_hdr_len
;
1337 /* copy in packet. ugh */
1338 len
= iov_from_buf(sg
, elem
->in_num
, guest_offset
,
1339 buf
+ offset
, size
- offset
);
1342 /* If buffers can't be merged, at this point we
1343 * must have consumed the complete packet.
1344 * Otherwise, drop it. */
1345 if (!n
->mergeable_rx_bufs
&& offset
< size
) {
1346 virtqueue_unpop(q
->rx_vq
, elem
, total
);
1351 /* signal other side */
1352 virtqueue_fill(q
->rx_vq
, elem
, total
, i
++);
1357 virtio_stw_p(vdev
, &mhdr
.num_buffers
, i
);
1358 iov_from_buf(mhdr_sg
, mhdr_cnt
,
1360 &mhdr
.num_buffers
, sizeof mhdr
.num_buffers
);
1363 virtqueue_flush(q
->rx_vq
, i
);
1364 virtio_notify(vdev
, q
->rx_vq
);
1369 static ssize_t
virtio_net_do_receive(NetClientState
*nc
, const uint8_t *buf
,
1375 r
= virtio_net_receive_rcu(nc
, buf
, size
);
1380 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain
*chain
,
1382 VirtioNetRscUnit
*unit
)
1385 struct ip_header
*ip
;
1387 ip
= (struct ip_header
*)(buf
+ chain
->n
->guest_hdr_len
1388 + sizeof(struct eth_header
));
1389 unit
->ip
= (void *)ip
;
1390 ip_hdrlen
= (ip
->ip_ver_len
& 0xF) << 2;
1391 unit
->ip_plen
= &ip
->ip_len
;
1392 unit
->tcp
= (struct tcp_header
*)(((uint8_t *)unit
->ip
) + ip_hdrlen
);
1393 unit
->tcp_hdrlen
= (htons(unit
->tcp
->th_offset_flags
) & 0xF000) >> 10;
1394 unit
->payload
= htons(*unit
->ip_plen
) - ip_hdrlen
- unit
->tcp_hdrlen
;
1397 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain
*chain
,
1399 VirtioNetRscUnit
*unit
)
1401 struct ip6_header
*ip6
;
1403 ip6
= (struct ip6_header
*)(buf
+ chain
->n
->guest_hdr_len
1404 + sizeof(struct eth_header
));
1406 unit
->ip_plen
= &(ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
);
1407 unit
->tcp
= (struct tcp_header
*)(((uint8_t *)unit
->ip
)\
1408 + sizeof(struct ip6_header
));
1409 unit
->tcp_hdrlen
= (htons(unit
->tcp
->th_offset_flags
) & 0xF000) >> 10;
1411 /* There is a difference between payload lenght in ipv4 and v6,
1412 ip header is excluded in ipv6 */
1413 unit
->payload
= htons(*unit
->ip_plen
) - unit
->tcp_hdrlen
;
1416 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain
*chain
,
1417 VirtioNetRscSeg
*seg
)
1420 struct virtio_net_hdr
*h
;
1422 h
= (struct virtio_net_hdr
*)seg
->buf
;
1424 h
->gso_type
= VIRTIO_NET_HDR_GSO_NONE
;
1426 if (seg
->is_coalesced
) {
1427 *virtio_net_rsc_ext_num_packets(h
) = seg
->packets
;
1428 *virtio_net_rsc_ext_num_dupacks(h
) = seg
->dup_ack
;
1429 h
->flags
= VIRTIO_NET_HDR_F_RSC_INFO
;
1430 if (chain
->proto
== ETH_P_IP
) {
1431 h
->gso_type
= VIRTIO_NET_HDR_GSO_TCPV4
;
1433 h
->gso_type
= VIRTIO_NET_HDR_GSO_TCPV6
;
1437 ret
= virtio_net_do_receive(seg
->nc
, seg
->buf
, seg
->size
);
1438 QTAILQ_REMOVE(&chain
->buffers
, seg
, next
);
1445 static void virtio_net_rsc_purge(void *opq
)
1447 VirtioNetRscSeg
*seg
, *rn
;
1448 VirtioNetRscChain
*chain
= (VirtioNetRscChain
*)opq
;
1450 QTAILQ_FOREACH_SAFE(seg
, &chain
->buffers
, next
, rn
) {
1451 if (virtio_net_rsc_drain_seg(chain
, seg
) == 0) {
1452 chain
->stat
.purge_failed
++;
1457 chain
->stat
.timer
++;
1458 if (!QTAILQ_EMPTY(&chain
->buffers
)) {
1459 timer_mod(chain
->drain_timer
,
1460 qemu_clock_get_ns(QEMU_CLOCK_HOST
) + chain
->n
->rsc_timeout
);
1464 static void virtio_net_rsc_cleanup(VirtIONet
*n
)
1466 VirtioNetRscChain
*chain
, *rn_chain
;
1467 VirtioNetRscSeg
*seg
, *rn_seg
;
1469 QTAILQ_FOREACH_SAFE(chain
, &n
->rsc_chains
, next
, rn_chain
) {
1470 QTAILQ_FOREACH_SAFE(seg
, &chain
->buffers
, next
, rn_seg
) {
1471 QTAILQ_REMOVE(&chain
->buffers
, seg
, next
);
1476 timer_del(chain
->drain_timer
);
1477 timer_free(chain
->drain_timer
);
1478 QTAILQ_REMOVE(&n
->rsc_chains
, chain
, next
);
1483 static void virtio_net_rsc_cache_buf(VirtioNetRscChain
*chain
,
1485 const uint8_t *buf
, size_t size
)
1488 VirtioNetRscSeg
*seg
;
1490 hdr_len
= chain
->n
->guest_hdr_len
;
1491 seg
= g_malloc(sizeof(VirtioNetRscSeg
));
1492 seg
->buf
= g_malloc(hdr_len
+ sizeof(struct eth_header
)
1493 + sizeof(struct ip6_header
) + VIRTIO_NET_MAX_TCP_PAYLOAD
);
1494 memcpy(seg
->buf
, buf
, size
);
1498 seg
->is_coalesced
= 0;
1501 QTAILQ_INSERT_TAIL(&chain
->buffers
, seg
, next
);
1502 chain
->stat
.cache
++;
1504 switch (chain
->proto
) {
1506 virtio_net_rsc_extract_unit4(chain
, seg
->buf
, &seg
->unit
);
1509 virtio_net_rsc_extract_unit6(chain
, seg
->buf
, &seg
->unit
);
1512 g_assert_not_reached();
1516 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain
*chain
,
1517 VirtioNetRscSeg
*seg
,
1519 struct tcp_header
*n_tcp
,
1520 struct tcp_header
*o_tcp
)
1522 uint32_t nack
, oack
;
1523 uint16_t nwin
, owin
;
1525 nack
= htonl(n_tcp
->th_ack
);
1526 nwin
= htons(n_tcp
->th_win
);
1527 oack
= htonl(o_tcp
->th_ack
);
1528 owin
= htons(o_tcp
->th_win
);
1530 if ((nack
- oack
) >= VIRTIO_NET_MAX_TCP_PAYLOAD
) {
1531 chain
->stat
.ack_out_of_win
++;
1533 } else if (nack
== oack
) {
1534 /* duplicated ack or window probe */
1536 /* duplicated ack, add dup ack count due to whql test up to 1 */
1537 chain
->stat
.dup_ack
++;
1540 /* Coalesce window update */
1541 o_tcp
->th_win
= n_tcp
->th_win
;
1542 chain
->stat
.win_update
++;
1543 return RSC_COALESCE
;
1546 /* pure ack, go to 'C', finalize*/
1547 chain
->stat
.pure_ack
++;
1552 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain
*chain
,
1553 VirtioNetRscSeg
*seg
,
1555 VirtioNetRscUnit
*n_unit
)
1559 uint32_t nseq
, oseq
;
1560 VirtioNetRscUnit
*o_unit
;
1562 o_unit
= &seg
->unit
;
1563 o_ip_len
= htons(*o_unit
->ip_plen
);
1564 nseq
= htonl(n_unit
->tcp
->th_seq
);
1565 oseq
= htonl(o_unit
->tcp
->th_seq
);
1567 /* out of order or retransmitted. */
1568 if ((nseq
- oseq
) > VIRTIO_NET_MAX_TCP_PAYLOAD
) {
1569 chain
->stat
.data_out_of_win
++;
1573 data
= ((uint8_t *)n_unit
->tcp
) + n_unit
->tcp_hdrlen
;
1575 if ((o_unit
->payload
== 0) && n_unit
->payload
) {
1576 /* From no payload to payload, normal case, not a dup ack or etc */
1577 chain
->stat
.data_after_pure_ack
++;
1580 return virtio_net_rsc_handle_ack(chain
, seg
, buf
,
1581 n_unit
->tcp
, o_unit
->tcp
);
1583 } else if ((nseq
- oseq
) != o_unit
->payload
) {
1584 /* Not a consistent packet, out of order */
1585 chain
->stat
.data_out_of_order
++;
1589 if ((o_ip_len
+ n_unit
->payload
) > chain
->max_payload
) {
1590 chain
->stat
.over_size
++;
1594 /* Here comes the right data, the payload length in v4/v6 is different,
1595 so use the field value to update and record the new data len */
1596 o_unit
->payload
+= n_unit
->payload
; /* update new data len */
1598 /* update field in ip header */
1599 *o_unit
->ip_plen
= htons(o_ip_len
+ n_unit
->payload
);
1601 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1602 for windows guest, while this may change the behavior for linux
1603 guest (only if it uses RSC feature). */
1604 o_unit
->tcp
->th_offset_flags
= n_unit
->tcp
->th_offset_flags
;
1606 o_unit
->tcp
->th_ack
= n_unit
->tcp
->th_ack
;
1607 o_unit
->tcp
->th_win
= n_unit
->tcp
->th_win
;
1609 memmove(seg
->buf
+ seg
->size
, data
, n_unit
->payload
);
1610 seg
->size
+= n_unit
->payload
;
1612 chain
->stat
.coalesced
++;
1613 return RSC_COALESCE
;
1617 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain
*chain
,
1618 VirtioNetRscSeg
*seg
,
1619 const uint8_t *buf
, size_t size
,
1620 VirtioNetRscUnit
*unit
)
1622 struct ip_header
*ip1
, *ip2
;
1624 ip1
= (struct ip_header
*)(unit
->ip
);
1625 ip2
= (struct ip_header
*)(seg
->unit
.ip
);
1626 if ((ip1
->ip_src
^ ip2
->ip_src
) || (ip1
->ip_dst
^ ip2
->ip_dst
)
1627 || (unit
->tcp
->th_sport
^ seg
->unit
.tcp
->th_sport
)
1628 || (unit
->tcp
->th_dport
^ seg
->unit
.tcp
->th_dport
)) {
1629 chain
->stat
.no_match
++;
1630 return RSC_NO_MATCH
;
1633 return virtio_net_rsc_coalesce_data(chain
, seg
, buf
, unit
);
1636 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain
*chain
,
1637 VirtioNetRscSeg
*seg
,
1638 const uint8_t *buf
, size_t size
,
1639 VirtioNetRscUnit
*unit
)
1641 struct ip6_header
*ip1
, *ip2
;
1643 ip1
= (struct ip6_header
*)(unit
->ip
);
1644 ip2
= (struct ip6_header
*)(seg
->unit
.ip
);
1645 if (memcmp(&ip1
->ip6_src
, &ip2
->ip6_src
, sizeof(struct in6_address
))
1646 || memcmp(&ip1
->ip6_dst
, &ip2
->ip6_dst
, sizeof(struct in6_address
))
1647 || (unit
->tcp
->th_sport
^ seg
->unit
.tcp
->th_sport
)
1648 || (unit
->tcp
->th_dport
^ seg
->unit
.tcp
->th_dport
)) {
1649 chain
->stat
.no_match
++;
1650 return RSC_NO_MATCH
;
1653 return virtio_net_rsc_coalesce_data(chain
, seg
, buf
, unit
);
1656 /* Packets with 'SYN' should bypass, other flag should be sent after drain
1657 * to prevent out of order */
1658 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain
*chain
,
1659 struct tcp_header
*tcp
)
1664 tcp_flag
= htons(tcp
->th_offset_flags
);
1665 tcp_hdr
= (tcp_flag
& VIRTIO_NET_TCP_HDR_LENGTH
) >> 10;
1666 tcp_flag
&= VIRTIO_NET_TCP_FLAG
;
1667 tcp_flag
= htons(tcp
->th_offset_flags
) & 0x3F;
1668 if (tcp_flag
& TH_SYN
) {
1669 chain
->stat
.tcp_syn
++;
1673 if (tcp_flag
& (TH_FIN
| TH_URG
| TH_RST
| TH_ECE
| TH_CWR
)) {
1674 chain
->stat
.tcp_ctrl_drain
++;
1678 if (tcp_hdr
> sizeof(struct tcp_header
)) {
1679 chain
->stat
.tcp_all_opt
++;
1683 return RSC_CANDIDATE
;
1686 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain
*chain
,
1688 const uint8_t *buf
, size_t size
,
1689 VirtioNetRscUnit
*unit
)
1692 VirtioNetRscSeg
*seg
, *nseg
;
1694 if (QTAILQ_EMPTY(&chain
->buffers
)) {
1695 chain
->stat
.empty_cache
++;
1696 virtio_net_rsc_cache_buf(chain
, nc
, buf
, size
);
1697 timer_mod(chain
->drain_timer
,
1698 qemu_clock_get_ns(QEMU_CLOCK_HOST
) + chain
->n
->rsc_timeout
);
1702 QTAILQ_FOREACH_SAFE(seg
, &chain
->buffers
, next
, nseg
) {
1703 if (chain
->proto
== ETH_P_IP
) {
1704 ret
= virtio_net_rsc_coalesce4(chain
, seg
, buf
, size
, unit
);
1706 ret
= virtio_net_rsc_coalesce6(chain
, seg
, buf
, size
, unit
);
1709 if (ret
== RSC_FINAL
) {
1710 if (virtio_net_rsc_drain_seg(chain
, seg
) == 0) {
1712 chain
->stat
.final_failed
++;
1716 /* Send current packet */
1717 return virtio_net_do_receive(nc
, buf
, size
);
1718 } else if (ret
== RSC_NO_MATCH
) {
1721 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1722 seg
->is_coalesced
= 1;
1727 chain
->stat
.no_match_cache
++;
1728 virtio_net_rsc_cache_buf(chain
, nc
, buf
, size
);
1732 /* Drain a connection data, this is to avoid out of order segments */
1733 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain
*chain
,
1735 const uint8_t *buf
, size_t size
,
1736 uint16_t ip_start
, uint16_t ip_size
,
1739 VirtioNetRscSeg
*seg
, *nseg
;
1740 uint32_t ppair1
, ppair2
;
1742 ppair1
= *(uint32_t *)(buf
+ tcp_port
);
1743 QTAILQ_FOREACH_SAFE(seg
, &chain
->buffers
, next
, nseg
) {
1744 ppair2
= *(uint32_t *)(seg
->buf
+ tcp_port
);
1745 if (memcmp(buf
+ ip_start
, seg
->buf
+ ip_start
, ip_size
)
1746 || (ppair1
!= ppair2
)) {
1749 if (virtio_net_rsc_drain_seg(chain
, seg
) == 0) {
1750 chain
->stat
.drain_failed
++;
1756 return virtio_net_do_receive(nc
, buf
, size
);
1759 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain
*chain
,
1760 struct ip_header
*ip
,
1761 const uint8_t *buf
, size_t size
)
1765 /* Not an ipv4 packet */
1766 if (((ip
->ip_ver_len
& 0xF0) >> 4) != IP_HEADER_VERSION_4
) {
1767 chain
->stat
.ip_option
++;
1771 /* Don't handle packets with ip option */
1772 if ((ip
->ip_ver_len
& 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH
) {
1773 chain
->stat
.ip_option
++;
1777 if (ip
->ip_p
!= IPPROTO_TCP
) {
1778 chain
->stat
.bypass_not_tcp
++;
1782 /* Don't handle packets with ip fragment */
1783 if (!(htons(ip
->ip_off
) & IP_DF
)) {
1784 chain
->stat
.ip_frag
++;
1788 /* Don't handle packets with ecn flag */
1789 if (IPTOS_ECN(ip
->ip_tos
)) {
1790 chain
->stat
.ip_ecn
++;
1794 ip_len
= htons(ip
->ip_len
);
1795 if (ip_len
< (sizeof(struct ip_header
) + sizeof(struct tcp_header
))
1796 || ip_len
> (size
- chain
->n
->guest_hdr_len
-
1797 sizeof(struct eth_header
))) {
1798 chain
->stat
.ip_hacked
++;
1802 return RSC_CANDIDATE
;
1805 static size_t virtio_net_rsc_receive4(VirtioNetRscChain
*chain
,
1807 const uint8_t *buf
, size_t size
)
1811 VirtioNetRscUnit unit
;
1813 hdr_len
= ((VirtIONet
*)(chain
->n
))->guest_hdr_len
;
1815 if (size
< (hdr_len
+ sizeof(struct eth_header
) + sizeof(struct ip_header
)
1816 + sizeof(struct tcp_header
))) {
1817 chain
->stat
.bypass_not_tcp
++;
1818 return virtio_net_do_receive(nc
, buf
, size
);
1821 virtio_net_rsc_extract_unit4(chain
, buf
, &unit
);
1822 if (virtio_net_rsc_sanity_check4(chain
, unit
.ip
, buf
, size
)
1824 return virtio_net_do_receive(nc
, buf
, size
);
1827 ret
= virtio_net_rsc_tcp_ctrl_check(chain
, unit
.tcp
);
1828 if (ret
== RSC_BYPASS
) {
1829 return virtio_net_do_receive(nc
, buf
, size
);
1830 } else if (ret
== RSC_FINAL
) {
1831 return virtio_net_rsc_drain_flow(chain
, nc
, buf
, size
,
1832 ((hdr_len
+ sizeof(struct eth_header
)) + 12),
1833 VIRTIO_NET_IP4_ADDR_SIZE
,
1834 hdr_len
+ sizeof(struct eth_header
) + sizeof(struct ip_header
));
1837 return virtio_net_rsc_do_coalesce(chain
, nc
, buf
, size
, &unit
);
1840 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain
*chain
,
1841 struct ip6_header
*ip6
,
1842 const uint8_t *buf
, size_t size
)
1846 if (((ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_flow
& 0xF0) >> 4)
1847 != IP_HEADER_VERSION_6
) {
1851 /* Both option and protocol is checked in this */
1852 if (ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_nxt
!= IPPROTO_TCP
) {
1853 chain
->stat
.bypass_not_tcp
++;
1857 ip_len
= htons(ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
);
1858 if (ip_len
< sizeof(struct tcp_header
) ||
1859 ip_len
> (size
- chain
->n
->guest_hdr_len
- sizeof(struct eth_header
)
1860 - sizeof(struct ip6_header
))) {
1861 chain
->stat
.ip_hacked
++;
1865 /* Don't handle packets with ecn flag */
1866 if (IP6_ECN(ip6
->ip6_ctlun
.ip6_un3
.ip6_un3_ecn
)) {
1867 chain
->stat
.ip_ecn
++;
1871 return RSC_CANDIDATE
;
1874 static size_t virtio_net_rsc_receive6(void *opq
, NetClientState
*nc
,
1875 const uint8_t *buf
, size_t size
)
1879 VirtioNetRscChain
*chain
;
1880 VirtioNetRscUnit unit
;
1882 chain
= (VirtioNetRscChain
*)opq
;
1883 hdr_len
= ((VirtIONet
*)(chain
->n
))->guest_hdr_len
;
1885 if (size
< (hdr_len
+ sizeof(struct eth_header
) + sizeof(struct ip6_header
)
1886 + sizeof(tcp_header
))) {
1887 return virtio_net_do_receive(nc
, buf
, size
);
1890 virtio_net_rsc_extract_unit6(chain
, buf
, &unit
);
1891 if (RSC_CANDIDATE
!= virtio_net_rsc_sanity_check6(chain
,
1892 unit
.ip
, buf
, size
)) {
1893 return virtio_net_do_receive(nc
, buf
, size
);
1896 ret
= virtio_net_rsc_tcp_ctrl_check(chain
, unit
.tcp
);
1897 if (ret
== RSC_BYPASS
) {
1898 return virtio_net_do_receive(nc
, buf
, size
);
1899 } else if (ret
== RSC_FINAL
) {
1900 return virtio_net_rsc_drain_flow(chain
, nc
, buf
, size
,
1901 ((hdr_len
+ sizeof(struct eth_header
)) + 8),
1902 VIRTIO_NET_IP6_ADDR_SIZE
,
1903 hdr_len
+ sizeof(struct eth_header
)
1904 + sizeof(struct ip6_header
));
1907 return virtio_net_rsc_do_coalesce(chain
, nc
, buf
, size
, &unit
);
1910 static VirtioNetRscChain
*virtio_net_rsc_lookup_chain(VirtIONet
*n
,
1914 VirtioNetRscChain
*chain
;
1916 if ((proto
!= (uint16_t)ETH_P_IP
) && (proto
!= (uint16_t)ETH_P_IPV6
)) {
1920 QTAILQ_FOREACH(chain
, &n
->rsc_chains
, next
) {
1921 if (chain
->proto
== proto
) {
1926 chain
= g_malloc(sizeof(*chain
));
1928 chain
->proto
= proto
;
1929 if (proto
== (uint16_t)ETH_P_IP
) {
1930 chain
->max_payload
= VIRTIO_NET_MAX_IP4_PAYLOAD
;
1931 chain
->gso_type
= VIRTIO_NET_HDR_GSO_TCPV4
;
1933 chain
->max_payload
= VIRTIO_NET_MAX_IP6_PAYLOAD
;
1934 chain
->gso_type
= VIRTIO_NET_HDR_GSO_TCPV6
;
1936 chain
->drain_timer
= timer_new_ns(QEMU_CLOCK_HOST
,
1937 virtio_net_rsc_purge
, chain
);
1938 memset(&chain
->stat
, 0, sizeof(chain
->stat
));
1940 QTAILQ_INIT(&chain
->buffers
);
1941 QTAILQ_INSERT_TAIL(&n
->rsc_chains
, chain
, next
);
1946 static ssize_t
virtio_net_rsc_receive(NetClientState
*nc
,
1951 VirtioNetRscChain
*chain
;
1952 struct eth_header
*eth
;
1955 n
= qemu_get_nic_opaque(nc
);
1956 if (size
< (n
->host_hdr_len
+ sizeof(struct eth_header
))) {
1957 return virtio_net_do_receive(nc
, buf
, size
);
1960 eth
= (struct eth_header
*)(buf
+ n
->guest_hdr_len
);
1961 proto
= htons(eth
->h_proto
);
1963 chain
= virtio_net_rsc_lookup_chain(n
, nc
, proto
);
1965 chain
->stat
.received
++;
1966 if (proto
== (uint16_t)ETH_P_IP
&& n
->rsc4_enabled
) {
1967 return virtio_net_rsc_receive4(chain
, nc
, buf
, size
);
1968 } else if (proto
== (uint16_t)ETH_P_IPV6
&& n
->rsc6_enabled
) {
1969 return virtio_net_rsc_receive6(chain
, nc
, buf
, size
);
1972 return virtio_net_do_receive(nc
, buf
, size
);
1975 static ssize_t
virtio_net_receive(NetClientState
*nc
, const uint8_t *buf
,
1978 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
1979 if ((n
->rsc4_enabled
|| n
->rsc6_enabled
)) {
1980 return virtio_net_rsc_receive(nc
, buf
, size
);
1982 return virtio_net_do_receive(nc
, buf
, size
);
1986 static int32_t virtio_net_flush_tx(VirtIONetQueue
*q
);
1988 static void virtio_net_tx_complete(NetClientState
*nc
, ssize_t len
)
1990 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
1991 VirtIONetQueue
*q
= virtio_net_get_subqueue(nc
);
1992 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
1994 virtqueue_push(q
->tx_vq
, q
->async_tx
.elem
, 0);
1995 virtio_notify(vdev
, q
->tx_vq
);
1997 g_free(q
->async_tx
.elem
);
1998 q
->async_tx
.elem
= NULL
;
2000 virtio_queue_set_notification(q
->tx_vq
, 1);
2001 virtio_net_flush_tx(q
);
2005 static int32_t virtio_net_flush_tx(VirtIONetQueue
*q
)
2007 VirtIONet
*n
= q
->n
;
2008 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2009 VirtQueueElement
*elem
;
2010 int32_t num_packets
= 0;
2011 int queue_index
= vq2q(virtio_get_queue_index(q
->tx_vq
));
2012 if (!(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
)) {
2016 if (q
->async_tx
.elem
) {
2017 virtio_queue_set_notification(q
->tx_vq
, 0);
2023 unsigned int out_num
;
2024 struct iovec sg
[VIRTQUEUE_MAX_SIZE
], sg2
[VIRTQUEUE_MAX_SIZE
+ 1], *out_sg
;
2025 struct virtio_net_hdr_mrg_rxbuf mhdr
;
2027 elem
= virtqueue_pop(q
->tx_vq
, sizeof(VirtQueueElement
));
2032 out_num
= elem
->out_num
;
2033 out_sg
= elem
->out_sg
;
2035 virtio_error(vdev
, "virtio-net header not in first element");
2036 virtqueue_detach_element(q
->tx_vq
, elem
, 0);
2041 if (n
->has_vnet_hdr
) {
2042 if (iov_to_buf(out_sg
, out_num
, 0, &mhdr
, n
->guest_hdr_len
) <
2044 virtio_error(vdev
, "virtio-net header incorrect");
2045 virtqueue_detach_element(q
->tx_vq
, elem
, 0);
2049 if (n
->needs_vnet_hdr_swap
) {
2050 virtio_net_hdr_swap(vdev
, (void *) &mhdr
);
2051 sg2
[0].iov_base
= &mhdr
;
2052 sg2
[0].iov_len
= n
->guest_hdr_len
;
2053 out_num
= iov_copy(&sg2
[1], ARRAY_SIZE(sg2
) - 1,
2055 n
->guest_hdr_len
, -1);
2056 if (out_num
== VIRTQUEUE_MAX_SIZE
) {
2064 * If host wants to see the guest header as is, we can
2065 * pass it on unchanged. Otherwise, copy just the parts
2066 * that host is interested in.
2068 assert(n
->host_hdr_len
<= n
->guest_hdr_len
);
2069 if (n
->host_hdr_len
!= n
->guest_hdr_len
) {
2070 unsigned sg_num
= iov_copy(sg
, ARRAY_SIZE(sg
),
2072 0, n
->host_hdr_len
);
2073 sg_num
+= iov_copy(sg
+ sg_num
, ARRAY_SIZE(sg
) - sg_num
,
2075 n
->guest_hdr_len
, -1);
2080 ret
= qemu_sendv_packet_async(qemu_get_subqueue(n
->nic
, queue_index
),
2081 out_sg
, out_num
, virtio_net_tx_complete
);
2083 virtio_queue_set_notification(q
->tx_vq
, 0);
2084 q
->async_tx
.elem
= elem
;
2089 virtqueue_push(q
->tx_vq
, elem
, 0);
2090 virtio_notify(vdev
, q
->tx_vq
);
2093 if (++num_packets
>= n
->tx_burst
) {
2100 static void virtio_net_handle_tx_timer(VirtIODevice
*vdev
, VirtQueue
*vq
)
2102 VirtIONet
*n
= VIRTIO_NET(vdev
);
2103 VirtIONetQueue
*q
= &n
->vqs
[vq2q(virtio_get_queue_index(vq
))];
2105 if (unlikely((n
->status
& VIRTIO_NET_S_LINK_UP
) == 0)) {
2106 virtio_net_drop_tx_queue_data(vdev
, vq
);
2110 /* This happens when device was stopped but VCPU wasn't. */
2111 if (!vdev
->vm_running
) {
2116 if (q
->tx_waiting
) {
2117 virtio_queue_set_notification(vq
, 1);
2118 timer_del(q
->tx_timer
);
2120 if (virtio_net_flush_tx(q
) == -EINVAL
) {
2124 timer_mod(q
->tx_timer
,
2125 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + n
->tx_timeout
);
2127 virtio_queue_set_notification(vq
, 0);
2131 static void virtio_net_handle_tx_bh(VirtIODevice
*vdev
, VirtQueue
*vq
)
2133 VirtIONet
*n
= VIRTIO_NET(vdev
);
2134 VirtIONetQueue
*q
= &n
->vqs
[vq2q(virtio_get_queue_index(vq
))];
2136 if (unlikely((n
->status
& VIRTIO_NET_S_LINK_UP
) == 0)) {
2137 virtio_net_drop_tx_queue_data(vdev
, vq
);
2141 if (unlikely(q
->tx_waiting
)) {
2145 /* This happens when device was stopped but VCPU wasn't. */
2146 if (!vdev
->vm_running
) {
2149 virtio_queue_set_notification(vq
, 0);
2150 qemu_bh_schedule(q
->tx_bh
);
2153 static void virtio_net_tx_timer(void *opaque
)
2155 VirtIONetQueue
*q
= opaque
;
2156 VirtIONet
*n
= q
->n
;
2157 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2158 /* This happens when device was stopped but BH wasn't. */
2159 if (!vdev
->vm_running
) {
2160 /* Make sure tx waiting is set, so we'll run when restarted. */
2161 assert(q
->tx_waiting
);
2167 /* Just in case the driver is not ready on more */
2168 if (!(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
)) {
2172 virtio_queue_set_notification(q
->tx_vq
, 1);
2173 virtio_net_flush_tx(q
);
2176 static void virtio_net_tx_bh(void *opaque
)
2178 VirtIONetQueue
*q
= opaque
;
2179 VirtIONet
*n
= q
->n
;
2180 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2183 /* This happens when device was stopped but BH wasn't. */
2184 if (!vdev
->vm_running
) {
2185 /* Make sure tx waiting is set, so we'll run when restarted. */
2186 assert(q
->tx_waiting
);
2192 /* Just in case the driver is not ready on more */
2193 if (unlikely(!(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
))) {
2197 ret
= virtio_net_flush_tx(q
);
2198 if (ret
== -EBUSY
|| ret
== -EINVAL
) {
2199 return; /* Notification re-enable handled by tx_complete or device
2203 /* If we flush a full burst of packets, assume there are
2204 * more coming and immediately reschedule */
2205 if (ret
>= n
->tx_burst
) {
2206 qemu_bh_schedule(q
->tx_bh
);
2211 /* If less than a full burst, re-enable notification and flush
2212 * anything that may have come in while we weren't looking. If
2213 * we find something, assume the guest is still active and reschedule */
2214 virtio_queue_set_notification(q
->tx_vq
, 1);
2215 ret
= virtio_net_flush_tx(q
);
2216 if (ret
== -EINVAL
) {
2218 } else if (ret
> 0) {
2219 virtio_queue_set_notification(q
->tx_vq
, 0);
2220 qemu_bh_schedule(q
->tx_bh
);
2225 static void virtio_net_add_queue(VirtIONet
*n
, int index
)
2227 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2229 n
->vqs
[index
].rx_vq
= virtio_add_queue(vdev
, n
->net_conf
.rx_queue_size
,
2230 virtio_net_handle_rx
);
2232 if (n
->net_conf
.tx
&& !strcmp(n
->net_conf
.tx
, "timer")) {
2233 n
->vqs
[index
].tx_vq
=
2234 virtio_add_queue(vdev
, n
->net_conf
.tx_queue_size
,
2235 virtio_net_handle_tx_timer
);
2236 n
->vqs
[index
].tx_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
2237 virtio_net_tx_timer
,
2240 n
->vqs
[index
].tx_vq
=
2241 virtio_add_queue(vdev
, n
->net_conf
.tx_queue_size
,
2242 virtio_net_handle_tx_bh
);
2243 n
->vqs
[index
].tx_bh
= qemu_bh_new(virtio_net_tx_bh
, &n
->vqs
[index
]);
2246 n
->vqs
[index
].tx_waiting
= 0;
2247 n
->vqs
[index
].n
= n
;
2250 static void virtio_net_del_queue(VirtIONet
*n
, int index
)
2252 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2253 VirtIONetQueue
*q
= &n
->vqs
[index
];
2254 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, index
);
2256 qemu_purge_queued_packets(nc
);
2258 virtio_del_queue(vdev
, index
* 2);
2260 timer_del(q
->tx_timer
);
2261 timer_free(q
->tx_timer
);
2264 qemu_bh_delete(q
->tx_bh
);
2268 virtio_del_queue(vdev
, index
* 2 + 1);
2271 static void virtio_net_change_num_queues(VirtIONet
*n
, int new_max_queues
)
2273 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2274 int old_num_queues
= virtio_get_num_queues(vdev
);
2275 int new_num_queues
= new_max_queues
* 2 + 1;
2278 assert(old_num_queues
>= 3);
2279 assert(old_num_queues
% 2 == 1);
2281 if (old_num_queues
== new_num_queues
) {
2286 * We always need to remove and add ctrl vq if
2287 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2288 * and then we only enter one of the following two loops.
2290 virtio_del_queue(vdev
, old_num_queues
- 1);
2292 for (i
= new_num_queues
- 1; i
< old_num_queues
- 1; i
+= 2) {
2293 /* new_num_queues < old_num_queues */
2294 virtio_net_del_queue(n
, i
/ 2);
2297 for (i
= old_num_queues
- 1; i
< new_num_queues
- 1; i
+= 2) {
2298 /* new_num_queues > old_num_queues */
2299 virtio_net_add_queue(n
, i
/ 2);
2302 /* add ctrl_vq last */
2303 n
->ctrl_vq
= virtio_add_queue(vdev
, 64, virtio_net_handle_ctrl
);
2306 static void virtio_net_set_multiqueue(VirtIONet
*n
, int multiqueue
)
2308 int max
= multiqueue
? n
->max_queues
: 1;
2310 n
->multiqueue
= multiqueue
;
2311 virtio_net_change_num_queues(n
, max
);
2313 virtio_net_set_queues(n
);
2316 static int virtio_net_post_load_device(void *opaque
, int version_id
)
2318 VirtIONet
*n
= opaque
;
2319 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2322 trace_virtio_net_post_load_device();
2323 virtio_net_set_mrg_rx_bufs(n
, n
->mergeable_rx_bufs
,
2324 virtio_vdev_has_feature(vdev
,
2325 VIRTIO_F_VERSION_1
));
2327 /* MAC_TABLE_ENTRIES may be different from the saved image */
2328 if (n
->mac_table
.in_use
> MAC_TABLE_ENTRIES
) {
2329 n
->mac_table
.in_use
= 0;
2332 if (!virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
)) {
2333 n
->curr_guest_offloads
= virtio_net_supported_guest_offloads(n
);
2336 if (peer_has_vnet_hdr(n
)) {
2337 virtio_net_apply_guest_offloads(n
);
2340 virtio_net_set_queues(n
);
2342 /* Find the first multicast entry in the saved MAC filter */
2343 for (i
= 0; i
< n
->mac_table
.in_use
; i
++) {
2344 if (n
->mac_table
.macs
[i
* ETH_ALEN
] & 1) {
2348 n
->mac_table
.first_multi
= i
;
2350 /* nc.link_down can't be migrated, so infer link_down according
2351 * to link status bit in n->status */
2352 link_down
= (n
->status
& VIRTIO_NET_S_LINK_UP
) == 0;
2353 for (i
= 0; i
< n
->max_queues
; i
++) {
2354 qemu_get_subqueue(n
->nic
, i
)->link_down
= link_down
;
2357 if (virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_GUEST_ANNOUNCE
) &&
2358 virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_VQ
)) {
2359 qemu_announce_timer_reset(&n
->announce_timer
, migrate_announce_params(),
2361 virtio_net_announce_timer
, n
);
2362 if (n
->announce_timer
.round
) {
2363 timer_mod(n
->announce_timer
.tm
,
2364 qemu_clock_get_ms(n
->announce_timer
.type
));
2366 qemu_announce_timer_del(&n
->announce_timer
, false);
2373 /* tx_waiting field of a VirtIONetQueue */
2374 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting
= {
2375 .name
= "virtio-net-queue-tx_waiting",
2376 .fields
= (VMStateField
[]) {
2377 VMSTATE_UINT32(tx_waiting
, VirtIONetQueue
),
2378 VMSTATE_END_OF_LIST()
2382 static bool max_queues_gt_1(void *opaque
, int version_id
)
2384 return VIRTIO_NET(opaque
)->max_queues
> 1;
2387 static bool has_ctrl_guest_offloads(void *opaque
, int version_id
)
2389 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque
),
2390 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
);
2393 static bool mac_table_fits(void *opaque
, int version_id
)
2395 return VIRTIO_NET(opaque
)->mac_table
.in_use
<= MAC_TABLE_ENTRIES
;
2398 static bool mac_table_doesnt_fit(void *opaque
, int version_id
)
2400 return !mac_table_fits(opaque
, version_id
);
2403 /* This temporary type is shared by all the WITH_TMP methods
2404 * although only some fields are used by each.
2406 struct VirtIONetMigTmp
{
2408 VirtIONetQueue
*vqs_1
;
2409 uint16_t curr_queues_1
;
2411 uint32_t has_vnet_hdr
;
2414 /* The 2nd and subsequent tx_waiting flags are loaded later than
2415 * the 1st entry in the queues and only if there's more than one
2416 * entry. We use the tmp mechanism to calculate a temporary
2417 * pointer and count and also validate the count.
2420 static int virtio_net_tx_waiting_pre_save(void *opaque
)
2422 struct VirtIONetMigTmp
*tmp
= opaque
;
2424 tmp
->vqs_1
= tmp
->parent
->vqs
+ 1;
2425 tmp
->curr_queues_1
= tmp
->parent
->curr_queues
- 1;
2426 if (tmp
->parent
->curr_queues
== 0) {
2427 tmp
->curr_queues_1
= 0;
2433 static int virtio_net_tx_waiting_pre_load(void *opaque
)
2435 struct VirtIONetMigTmp
*tmp
= opaque
;
2437 /* Reuse the pointer setup from save */
2438 virtio_net_tx_waiting_pre_save(opaque
);
2440 if (tmp
->parent
->curr_queues
> tmp
->parent
->max_queues
) {
2441 error_report("virtio-net: curr_queues %x > max_queues %x",
2442 tmp
->parent
->curr_queues
, tmp
->parent
->max_queues
);
2447 return 0; /* all good */
2450 static const VMStateDescription vmstate_virtio_net_tx_waiting
= {
2451 .name
= "virtio-net-tx_waiting",
2452 .pre_load
= virtio_net_tx_waiting_pre_load
,
2453 .pre_save
= virtio_net_tx_waiting_pre_save
,
2454 .fields
= (VMStateField
[]) {
2455 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1
, struct VirtIONetMigTmp
,
2457 vmstate_virtio_net_queue_tx_waiting
,
2458 struct VirtIONetQueue
),
2459 VMSTATE_END_OF_LIST()
2463 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2464 * flag set we need to check that we have it
2466 static int virtio_net_ufo_post_load(void *opaque
, int version_id
)
2468 struct VirtIONetMigTmp
*tmp
= opaque
;
2470 if (tmp
->has_ufo
&& !peer_has_ufo(tmp
->parent
)) {
2471 error_report("virtio-net: saved image requires TUN_F_UFO support");
2478 static int virtio_net_ufo_pre_save(void *opaque
)
2480 struct VirtIONetMigTmp
*tmp
= opaque
;
2482 tmp
->has_ufo
= tmp
->parent
->has_ufo
;
2487 static const VMStateDescription vmstate_virtio_net_has_ufo
= {
2488 .name
= "virtio-net-ufo",
2489 .post_load
= virtio_net_ufo_post_load
,
2490 .pre_save
= virtio_net_ufo_pre_save
,
2491 .fields
= (VMStateField
[]) {
2492 VMSTATE_UINT8(has_ufo
, struct VirtIONetMigTmp
),
2493 VMSTATE_END_OF_LIST()
2497 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2498 * flag set we need to check that we have it
2500 static int virtio_net_vnet_post_load(void *opaque
, int version_id
)
2502 struct VirtIONetMigTmp
*tmp
= opaque
;
2504 if (tmp
->has_vnet_hdr
&& !peer_has_vnet_hdr(tmp
->parent
)) {
2505 error_report("virtio-net: saved image requires vnet_hdr=on");
2512 static int virtio_net_vnet_pre_save(void *opaque
)
2514 struct VirtIONetMigTmp
*tmp
= opaque
;
2516 tmp
->has_vnet_hdr
= tmp
->parent
->has_vnet_hdr
;
2521 static const VMStateDescription vmstate_virtio_net_has_vnet
= {
2522 .name
= "virtio-net-vnet",
2523 .post_load
= virtio_net_vnet_post_load
,
2524 .pre_save
= virtio_net_vnet_pre_save
,
2525 .fields
= (VMStateField
[]) {
2526 VMSTATE_UINT32(has_vnet_hdr
, struct VirtIONetMigTmp
),
2527 VMSTATE_END_OF_LIST()
2531 static const VMStateDescription vmstate_virtio_net_device
= {
2532 .name
= "virtio-net-device",
2533 .version_id
= VIRTIO_NET_VM_VERSION
,
2534 .minimum_version_id
= VIRTIO_NET_VM_VERSION
,
2535 .post_load
= virtio_net_post_load_device
,
2536 .fields
= (VMStateField
[]) {
2537 VMSTATE_UINT8_ARRAY(mac
, VirtIONet
, ETH_ALEN
),
2538 VMSTATE_STRUCT_POINTER(vqs
, VirtIONet
,
2539 vmstate_virtio_net_queue_tx_waiting
,
2541 VMSTATE_UINT32(mergeable_rx_bufs
, VirtIONet
),
2542 VMSTATE_UINT16(status
, VirtIONet
),
2543 VMSTATE_UINT8(promisc
, VirtIONet
),
2544 VMSTATE_UINT8(allmulti
, VirtIONet
),
2545 VMSTATE_UINT32(mac_table
.in_use
, VirtIONet
),
2547 /* Guarded pair: If it fits we load it, else we throw it away
2548 * - can happen if source has a larger MAC table.; post-load
2549 * sets flags in this case.
2551 VMSTATE_VBUFFER_MULTIPLY(mac_table
.macs
, VirtIONet
,
2552 0, mac_table_fits
, mac_table
.in_use
,
2554 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet
, mac_table_doesnt_fit
, 0,
2555 mac_table
.in_use
, ETH_ALEN
),
2557 /* Note: This is an array of uint32's that's always been saved as a
2558 * buffer; hold onto your endiannesses; it's actually used as a bitmap
2559 * but based on the uint.
2561 VMSTATE_BUFFER_POINTER_UNSAFE(vlans
, VirtIONet
, 0, MAX_VLAN
>> 3),
2562 VMSTATE_WITH_TMP(VirtIONet
, struct VirtIONetMigTmp
,
2563 vmstate_virtio_net_has_vnet
),
2564 VMSTATE_UINT8(mac_table
.multi_overflow
, VirtIONet
),
2565 VMSTATE_UINT8(mac_table
.uni_overflow
, VirtIONet
),
2566 VMSTATE_UINT8(alluni
, VirtIONet
),
2567 VMSTATE_UINT8(nomulti
, VirtIONet
),
2568 VMSTATE_UINT8(nouni
, VirtIONet
),
2569 VMSTATE_UINT8(nobcast
, VirtIONet
),
2570 VMSTATE_WITH_TMP(VirtIONet
, struct VirtIONetMigTmp
,
2571 vmstate_virtio_net_has_ufo
),
2572 VMSTATE_SINGLE_TEST(max_queues
, VirtIONet
, max_queues_gt_1
, 0,
2573 vmstate_info_uint16_equal
, uint16_t),
2574 VMSTATE_UINT16_TEST(curr_queues
, VirtIONet
, max_queues_gt_1
),
2575 VMSTATE_WITH_TMP(VirtIONet
, struct VirtIONetMigTmp
,
2576 vmstate_virtio_net_tx_waiting
),
2577 VMSTATE_UINT64_TEST(curr_guest_offloads
, VirtIONet
,
2578 has_ctrl_guest_offloads
),
2579 VMSTATE_END_OF_LIST()
2583 static NetClientInfo net_virtio_info
= {
2584 .type
= NET_CLIENT_DRIVER_NIC
,
2585 .size
= sizeof(NICState
),
2586 .can_receive
= virtio_net_can_receive
,
2587 .receive
= virtio_net_receive
,
2588 .link_status_changed
= virtio_net_set_link_status
,
2589 .query_rx_filter
= virtio_net_query_rxfilter
,
2590 .announce
= virtio_net_announce
,
2593 static bool virtio_net_guest_notifier_pending(VirtIODevice
*vdev
, int idx
)
2595 VirtIONet
*n
= VIRTIO_NET(vdev
);
2596 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, vq2q(idx
));
2597 assert(n
->vhost_started
);
2598 return vhost_net_virtqueue_pending(get_vhost_net(nc
->peer
), idx
);
2601 static void virtio_net_guest_notifier_mask(VirtIODevice
*vdev
, int idx
,
2604 VirtIONet
*n
= VIRTIO_NET(vdev
);
2605 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, vq2q(idx
));
2606 assert(n
->vhost_started
);
2607 vhost_net_virtqueue_mask(get_vhost_net(nc
->peer
),
2611 static void virtio_net_set_config_size(VirtIONet
*n
, uint64_t host_features
)
2613 virtio_add_feature(&host_features
, VIRTIO_NET_F_MAC
);
2615 n
->config_size
= virtio_feature_get_config_size(feature_sizes
,
2619 void virtio_net_set_netclient_name(VirtIONet
*n
, const char *name
,
2623 * The name can be NULL, the netclient name will be type.x.
2625 assert(type
!= NULL
);
2627 g_free(n
->netclient_name
);
2628 g_free(n
->netclient_type
);
2629 n
->netclient_name
= g_strdup(name
);
2630 n
->netclient_type
= g_strdup(type
);
2633 static void virtio_net_device_realize(DeviceState
*dev
, Error
**errp
)
2635 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
2636 VirtIONet
*n
= VIRTIO_NET(dev
);
2640 if (n
->net_conf
.mtu
) {
2641 n
->host_features
|= (1ULL << VIRTIO_NET_F_MTU
);
2644 if (n
->net_conf
.duplex_str
) {
2645 if (strncmp(n
->net_conf
.duplex_str
, "half", 5) == 0) {
2646 n
->net_conf
.duplex
= DUPLEX_HALF
;
2647 } else if (strncmp(n
->net_conf
.duplex_str
, "full", 5) == 0) {
2648 n
->net_conf
.duplex
= DUPLEX_FULL
;
2650 error_setg(errp
, "'duplex' must be 'half' or 'full'");
2652 n
->host_features
|= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX
);
2654 n
->net_conf
.duplex
= DUPLEX_UNKNOWN
;
2657 if (n
->net_conf
.speed
< SPEED_UNKNOWN
) {
2658 error_setg(errp
, "'speed' must be between 0 and INT_MAX");
2659 } else if (n
->net_conf
.speed
>= 0) {
2660 n
->host_features
|= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX
);
2663 virtio_net_set_config_size(n
, n
->host_features
);
2664 virtio_init(vdev
, "virtio-net", VIRTIO_ID_NET
, n
->config_size
);
2667 * We set a lower limit on RX queue size to what it always was.
2668 * Guests that want a smaller ring can always resize it without
2669 * help from us (using virtio 1 and up).
2671 if (n
->net_conf
.rx_queue_size
< VIRTIO_NET_RX_QUEUE_MIN_SIZE
||
2672 n
->net_conf
.rx_queue_size
> VIRTQUEUE_MAX_SIZE
||
2673 !is_power_of_2(n
->net_conf
.rx_queue_size
)) {
2674 error_setg(errp
, "Invalid rx_queue_size (= %" PRIu16
"), "
2675 "must be a power of 2 between %d and %d.",
2676 n
->net_conf
.rx_queue_size
, VIRTIO_NET_RX_QUEUE_MIN_SIZE
,
2677 VIRTQUEUE_MAX_SIZE
);
2678 virtio_cleanup(vdev
);
2682 if (n
->net_conf
.tx_queue_size
< VIRTIO_NET_TX_QUEUE_MIN_SIZE
||
2683 n
->net_conf
.tx_queue_size
> VIRTQUEUE_MAX_SIZE
||
2684 !is_power_of_2(n
->net_conf
.tx_queue_size
)) {
2685 error_setg(errp
, "Invalid tx_queue_size (= %" PRIu16
"), "
2686 "must be a power of 2 between %d and %d",
2687 n
->net_conf
.tx_queue_size
, VIRTIO_NET_TX_QUEUE_MIN_SIZE
,
2688 VIRTQUEUE_MAX_SIZE
);
2689 virtio_cleanup(vdev
);
2693 n
->max_queues
= MAX(n
->nic_conf
.peers
.queues
, 1);
2694 if (n
->max_queues
* 2 + 1 > VIRTIO_QUEUE_MAX
) {
2695 error_setg(errp
, "Invalid number of queues (= %" PRIu32
"), "
2696 "must be a positive integer less than %d.",
2697 n
->max_queues
, (VIRTIO_QUEUE_MAX
- 1) / 2);
2698 virtio_cleanup(vdev
);
2701 n
->vqs
= g_malloc0(sizeof(VirtIONetQueue
) * n
->max_queues
);
2703 n
->tx_timeout
= n
->net_conf
.txtimer
;
2705 if (n
->net_conf
.tx
&& strcmp(n
->net_conf
.tx
, "timer")
2706 && strcmp(n
->net_conf
.tx
, "bh")) {
2707 warn_report("virtio-net: "
2708 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2710 error_printf("Defaulting to \"bh\"");
2713 n
->net_conf
.tx_queue_size
= MIN(virtio_net_max_tx_queue_size(n
),
2714 n
->net_conf
.tx_queue_size
);
2716 for (i
= 0; i
< n
->max_queues
; i
++) {
2717 virtio_net_add_queue(n
, i
);
2720 n
->ctrl_vq
= virtio_add_queue(vdev
, 64, virtio_net_handle_ctrl
);
2721 qemu_macaddr_default_if_unset(&n
->nic_conf
.macaddr
);
2722 memcpy(&n
->mac
[0], &n
->nic_conf
.macaddr
, sizeof(n
->mac
));
2723 n
->status
= VIRTIO_NET_S_LINK_UP
;
2724 qemu_announce_timer_reset(&n
->announce_timer
, migrate_announce_params(),
2726 virtio_net_announce_timer
, n
);
2727 n
->announce_timer
.round
= 0;
2729 if (n
->netclient_type
) {
2731 * Happen when virtio_net_set_netclient_name has been called.
2733 n
->nic
= qemu_new_nic(&net_virtio_info
, &n
->nic_conf
,
2734 n
->netclient_type
, n
->netclient_name
, n
);
2736 n
->nic
= qemu_new_nic(&net_virtio_info
, &n
->nic_conf
,
2737 object_get_typename(OBJECT(dev
)), dev
->id
, n
);
2740 peer_test_vnet_hdr(n
);
2741 if (peer_has_vnet_hdr(n
)) {
2742 for (i
= 0; i
< n
->max_queues
; i
++) {
2743 qemu_using_vnet_hdr(qemu_get_subqueue(n
->nic
, i
)->peer
, true);
2745 n
->host_hdr_len
= sizeof(struct virtio_net_hdr
);
2747 n
->host_hdr_len
= 0;
2750 qemu_format_nic_info_str(qemu_get_queue(n
->nic
), n
->nic_conf
.macaddr
.a
);
2752 n
->vqs
[0].tx_waiting
= 0;
2753 n
->tx_burst
= n
->net_conf
.txburst
;
2754 virtio_net_set_mrg_rx_bufs(n
, 0, 0);
2755 n
->promisc
= 1; /* for compatibility */
2757 n
->mac_table
.macs
= g_malloc0(MAC_TABLE_ENTRIES
* ETH_ALEN
);
2759 n
->vlans
= g_malloc0(MAX_VLAN
>> 3);
2761 nc
= qemu_get_queue(n
->nic
);
2762 nc
->rxfilter_notify_enabled
= 1;
2764 QTAILQ_INIT(&n
->rsc_chains
);
2768 static void virtio_net_device_unrealize(DeviceState
*dev
, Error
**errp
)
2770 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
2771 VirtIONet
*n
= VIRTIO_NET(dev
);
2774 /* This will stop vhost backend if appropriate. */
2775 virtio_net_set_status(vdev
, 0);
2777 g_free(n
->netclient_name
);
2778 n
->netclient_name
= NULL
;
2779 g_free(n
->netclient_type
);
2780 n
->netclient_type
= NULL
;
2782 g_free(n
->mac_table
.macs
);
2785 max_queues
= n
->multiqueue
? n
->max_queues
: 1;
2786 for (i
= 0; i
< max_queues
; i
++) {
2787 virtio_net_del_queue(n
, i
);
2790 qemu_announce_timer_del(&n
->announce_timer
, false);
2792 qemu_del_nic(n
->nic
);
2793 virtio_net_rsc_cleanup(n
);
2794 virtio_cleanup(vdev
);
2797 static void virtio_net_instance_init(Object
*obj
)
2799 VirtIONet
*n
= VIRTIO_NET(obj
);
2802 * The default config_size is sizeof(struct virtio_net_config).
2803 * Can be overriden with virtio_net_set_config_size.
2805 n
->config_size
= sizeof(struct virtio_net_config
);
2806 device_add_bootindex_property(obj
, &n
->nic_conf
.bootindex
,
2807 "bootindex", "/ethernet-phy@0",
2811 static int virtio_net_pre_save(void *opaque
)
2813 VirtIONet
*n
= opaque
;
2815 /* At this point, backend must be stopped, otherwise
2816 * it might keep writing to memory. */
2817 assert(!n
->vhost_started
);
2822 static const VMStateDescription vmstate_virtio_net
= {
2823 .name
= "virtio-net",
2824 .minimum_version_id
= VIRTIO_NET_VM_VERSION
,
2825 .version_id
= VIRTIO_NET_VM_VERSION
,
2826 .fields
= (VMStateField
[]) {
2827 VMSTATE_VIRTIO_DEVICE
,
2828 VMSTATE_END_OF_LIST()
2830 .pre_save
= virtio_net_pre_save
,
2833 static Property virtio_net_properties
[] = {
2834 DEFINE_PROP_BIT64("csum", VirtIONet
, host_features
,
2835 VIRTIO_NET_F_CSUM
, true),
2836 DEFINE_PROP_BIT64("guest_csum", VirtIONet
, host_features
,
2837 VIRTIO_NET_F_GUEST_CSUM
, true),
2838 DEFINE_PROP_BIT64("gso", VirtIONet
, host_features
, VIRTIO_NET_F_GSO
, true),
2839 DEFINE_PROP_BIT64("guest_tso4", VirtIONet
, host_features
,
2840 VIRTIO_NET_F_GUEST_TSO4
, true),
2841 DEFINE_PROP_BIT64("guest_tso6", VirtIONet
, host_features
,
2842 VIRTIO_NET_F_GUEST_TSO6
, true),
2843 DEFINE_PROP_BIT64("guest_ecn", VirtIONet
, host_features
,
2844 VIRTIO_NET_F_GUEST_ECN
, true),
2845 DEFINE_PROP_BIT64("guest_ufo", VirtIONet
, host_features
,
2846 VIRTIO_NET_F_GUEST_UFO
, true),
2847 DEFINE_PROP_BIT64("guest_announce", VirtIONet
, host_features
,
2848 VIRTIO_NET_F_GUEST_ANNOUNCE
, true),
2849 DEFINE_PROP_BIT64("host_tso4", VirtIONet
, host_features
,
2850 VIRTIO_NET_F_HOST_TSO4
, true),
2851 DEFINE_PROP_BIT64("host_tso6", VirtIONet
, host_features
,
2852 VIRTIO_NET_F_HOST_TSO6
, true),
2853 DEFINE_PROP_BIT64("host_ecn", VirtIONet
, host_features
,
2854 VIRTIO_NET_F_HOST_ECN
, true),
2855 DEFINE_PROP_BIT64("host_ufo", VirtIONet
, host_features
,
2856 VIRTIO_NET_F_HOST_UFO
, true),
2857 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet
, host_features
,
2858 VIRTIO_NET_F_MRG_RXBUF
, true),
2859 DEFINE_PROP_BIT64("status", VirtIONet
, host_features
,
2860 VIRTIO_NET_F_STATUS
, true),
2861 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet
, host_features
,
2862 VIRTIO_NET_F_CTRL_VQ
, true),
2863 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet
, host_features
,
2864 VIRTIO_NET_F_CTRL_RX
, true),
2865 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet
, host_features
,
2866 VIRTIO_NET_F_CTRL_VLAN
, true),
2867 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet
, host_features
,
2868 VIRTIO_NET_F_CTRL_RX_EXTRA
, true),
2869 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet
, host_features
,
2870 VIRTIO_NET_F_CTRL_MAC_ADDR
, true),
2871 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet
, host_features
,
2872 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
, true),
2873 DEFINE_PROP_BIT64("mq", VirtIONet
, host_features
, VIRTIO_NET_F_MQ
, false),
2874 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet
, host_features
,
2875 VIRTIO_NET_F_RSC_EXT
, false),
2876 DEFINE_PROP_UINT32("rsc_interval", VirtIONet
, rsc_timeout
,
2877 VIRTIO_NET_RSC_DEFAULT_INTERVAL
),
2878 DEFINE_NIC_PROPERTIES(VirtIONet
, nic_conf
),
2879 DEFINE_PROP_UINT32("x-txtimer", VirtIONet
, net_conf
.txtimer
,
2881 DEFINE_PROP_INT32("x-txburst", VirtIONet
, net_conf
.txburst
, TX_BURST
),
2882 DEFINE_PROP_STRING("tx", VirtIONet
, net_conf
.tx
),
2883 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet
, net_conf
.rx_queue_size
,
2884 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
),
2885 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet
, net_conf
.tx_queue_size
,
2886 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
),
2887 DEFINE_PROP_UINT16("host_mtu", VirtIONet
, net_conf
.mtu
, 0),
2888 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet
, mtu_bypass_backend
,
2890 DEFINE_PROP_INT32("speed", VirtIONet
, net_conf
.speed
, SPEED_UNKNOWN
),
2891 DEFINE_PROP_STRING("duplex", VirtIONet
, net_conf
.duplex_str
),
2892 DEFINE_PROP_END_OF_LIST(),
2895 static void virtio_net_class_init(ObjectClass
*klass
, void *data
)
2897 DeviceClass
*dc
= DEVICE_CLASS(klass
);
2898 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_CLASS(klass
);
2900 dc
->props
= virtio_net_properties
;
2901 dc
->vmsd
= &vmstate_virtio_net
;
2902 set_bit(DEVICE_CATEGORY_NETWORK
, dc
->categories
);
2903 vdc
->realize
= virtio_net_device_realize
;
2904 vdc
->unrealize
= virtio_net_device_unrealize
;
2905 vdc
->get_config
= virtio_net_get_config
;
2906 vdc
->set_config
= virtio_net_set_config
;
2907 vdc
->get_features
= virtio_net_get_features
;
2908 vdc
->set_features
= virtio_net_set_features
;
2909 vdc
->bad_features
= virtio_net_bad_features
;
2910 vdc
->reset
= virtio_net_reset
;
2911 vdc
->set_status
= virtio_net_set_status
;
2912 vdc
->guest_notifier_mask
= virtio_net_guest_notifier_mask
;
2913 vdc
->guest_notifier_pending
= virtio_net_guest_notifier_pending
;
2914 vdc
->legacy_features
|= (0x1 << VIRTIO_NET_F_GSO
);
2915 vdc
->vmsd
= &vmstate_virtio_net_device
;
2918 static const TypeInfo virtio_net_info
= {
2919 .name
= TYPE_VIRTIO_NET
,
2920 .parent
= TYPE_VIRTIO_DEVICE
,
2921 .instance_size
= sizeof(VirtIONet
),
2922 .instance_init
= virtio_net_instance_init
,
2923 .class_init
= virtio_net_class_init
,
2926 static void virtio_register_types(void)
2928 type_register_static(&virtio_net_info
);
2931 type_init(virtio_register_types
)