2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
21 #include "net/checksum.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
48 #define VIRTIO_NET_VM_VERSION 11
50 #define MAC_TABLE_ENTRIES 64
51 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
63 #define VIRTIO_NET_TCP_FLAG 0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 /* Purge coalesced packets timer interval, This value affects the performance
77 a lot, and should be tuned carefully, '300000'(300us) is the recommended
78 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 static VirtIOFeature feature_sizes
[] = {
93 {.flags
= 1ULL << VIRTIO_NET_F_MAC
,
94 .end
= endof(struct virtio_net_config
, mac
)},
95 {.flags
= 1ULL << VIRTIO_NET_F_STATUS
,
96 .end
= endof(struct virtio_net_config
, status
)},
97 {.flags
= 1ULL << VIRTIO_NET_F_MQ
,
98 .end
= endof(struct virtio_net_config
, max_virtqueue_pairs
)},
99 {.flags
= 1ULL << VIRTIO_NET_F_MTU
,
100 .end
= endof(struct virtio_net_config
, mtu
)},
101 {.flags
= 1ULL << VIRTIO_NET_F_SPEED_DUPLEX
,
102 .end
= endof(struct virtio_net_config
, duplex
)},
103 {.flags
= (1ULL << VIRTIO_NET_F_RSS
) | (1ULL << VIRTIO_NET_F_HASH_REPORT
),
104 .end
= endof(struct virtio_net_config
, supported_hash_types
)},
108 static VirtIONetQueue
*virtio_net_get_subqueue(NetClientState
*nc
)
110 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
112 return &n
->vqs
[nc
->queue_index
];
115 static int vq2q(int queue_index
)
117 return queue_index
/ 2;
121 * - we could suppress RX interrupt if we were so inclined.
124 static void virtio_net_get_config(VirtIODevice
*vdev
, uint8_t *config
)
126 VirtIONet
*n
= VIRTIO_NET(vdev
);
127 struct virtio_net_config netcfg
;
130 memset(&netcfg
, 0 , sizeof(struct virtio_net_config
));
131 virtio_stw_p(vdev
, &netcfg
.status
, n
->status
);
132 virtio_stw_p(vdev
, &netcfg
.max_virtqueue_pairs
, n
->max_queues
);
133 virtio_stw_p(vdev
, &netcfg
.mtu
, n
->net_conf
.mtu
);
134 memcpy(netcfg
.mac
, n
->mac
, ETH_ALEN
);
135 virtio_stl_p(vdev
, &netcfg
.speed
, n
->net_conf
.speed
);
136 netcfg
.duplex
= n
->net_conf
.duplex
;
137 netcfg
.rss_max_key_size
= VIRTIO_NET_RSS_MAX_KEY_SIZE
;
138 virtio_stw_p(vdev
, &netcfg
.rss_max_indirection_table_length
,
139 virtio_host_has_feature(vdev
, VIRTIO_NET_F_RSS
) ?
140 VIRTIO_NET_RSS_MAX_TABLE_LEN
: 1);
141 virtio_stl_p(vdev
, &netcfg
.supported_hash_types
,
142 VIRTIO_NET_RSS_SUPPORTED_HASHES
);
143 memcpy(config
, &netcfg
, n
->config_size
);
145 NetClientState
*nc
= qemu_get_queue(n
->nic
);
146 if (nc
->peer
->info
->type
== NET_CLIENT_DRIVER_VHOST_VDPA
) {
147 ret
= vhost_net_get_config(get_vhost_net(nc
->peer
), (uint8_t *)&netcfg
,
150 memcpy(config
, &netcfg
, n
->config_size
);
155 static void virtio_net_set_config(VirtIODevice
*vdev
, const uint8_t *config
)
157 VirtIONet
*n
= VIRTIO_NET(vdev
);
158 struct virtio_net_config netcfg
= {};
160 memcpy(&netcfg
, config
, n
->config_size
);
162 if (!virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_MAC_ADDR
) &&
163 !virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
) &&
164 memcmp(netcfg
.mac
, n
->mac
, ETH_ALEN
)) {
165 memcpy(n
->mac
, netcfg
.mac
, ETH_ALEN
);
166 qemu_format_nic_info_str(qemu_get_queue(n
->nic
), n
->mac
);
169 NetClientState
*nc
= qemu_get_queue(n
->nic
);
170 if (nc
->peer
->info
->type
== NET_CLIENT_DRIVER_VHOST_VDPA
) {
171 vhost_net_set_config(get_vhost_net(nc
->peer
), (uint8_t *)&netcfg
,
173 VHOST_SET_CONFIG_TYPE_MASTER
);
177 static bool virtio_net_started(VirtIONet
*n
, uint8_t status
)
179 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
180 return (status
& VIRTIO_CONFIG_S_DRIVER_OK
) &&
181 (n
->status
& VIRTIO_NET_S_LINK_UP
) && vdev
->vm_running
;
184 static void virtio_net_announce_notify(VirtIONet
*net
)
186 VirtIODevice
*vdev
= VIRTIO_DEVICE(net
);
187 trace_virtio_net_announce_notify();
189 net
->status
|= VIRTIO_NET_S_ANNOUNCE
;
190 virtio_notify_config(vdev
);
193 static void virtio_net_announce_timer(void *opaque
)
195 VirtIONet
*n
= opaque
;
196 trace_virtio_net_announce_timer(n
->announce_timer
.round
);
198 n
->announce_timer
.round
--;
199 virtio_net_announce_notify(n
);
202 static void virtio_net_announce(NetClientState
*nc
)
204 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
205 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
208 * Make sure the virtio migration announcement timer isn't running
209 * If it is, let it trigger announcement so that we do not cause
212 if (n
->announce_timer
.round
) {
216 if (virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_GUEST_ANNOUNCE
) &&
217 virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_VQ
)) {
218 virtio_net_announce_notify(n
);
222 static void virtio_net_vhost_status(VirtIONet
*n
, uint8_t status
)
224 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
225 NetClientState
*nc
= qemu_get_queue(n
->nic
);
226 int queues
= n
->multiqueue
? n
->max_queues
: 1;
228 if (!get_vhost_net(nc
->peer
)) {
232 if ((virtio_net_started(n
, status
) && !nc
->peer
->link_down
) ==
233 !!n
->vhost_started
) {
236 if (!n
->vhost_started
) {
239 if (n
->needs_vnet_hdr_swap
) {
240 error_report("backend does not support %s vnet headers; "
241 "falling back on userspace virtio",
242 virtio_is_big_endian(vdev
) ? "BE" : "LE");
246 /* Any packets outstanding? Purge them to avoid touching rings
247 * when vhost is running.
249 for (i
= 0; i
< queues
; i
++) {
250 NetClientState
*qnc
= qemu_get_subqueue(n
->nic
, i
);
252 /* Purge both directions: TX and RX. */
253 qemu_net_queue_purge(qnc
->peer
->incoming_queue
, qnc
);
254 qemu_net_queue_purge(qnc
->incoming_queue
, qnc
->peer
);
257 if (virtio_has_feature(vdev
->guest_features
, VIRTIO_NET_F_MTU
)) {
258 r
= vhost_net_set_mtu(get_vhost_net(nc
->peer
), n
->net_conf
.mtu
);
260 error_report("%uBytes MTU not supported by the backend",
267 n
->vhost_started
= 1;
268 r
= vhost_net_start(vdev
, n
->nic
->ncs
, queues
);
270 error_report("unable to start vhost net: %d: "
271 "falling back on userspace virtio", -r
);
272 n
->vhost_started
= 0;
275 vhost_net_stop(vdev
, n
->nic
->ncs
, queues
);
276 n
->vhost_started
= 0;
280 static int virtio_net_set_vnet_endian_one(VirtIODevice
*vdev
,
281 NetClientState
*peer
,
284 if (virtio_is_big_endian(vdev
)) {
285 return qemu_set_vnet_be(peer
, enable
);
287 return qemu_set_vnet_le(peer
, enable
);
291 static bool virtio_net_set_vnet_endian(VirtIODevice
*vdev
, NetClientState
*ncs
,
292 int queues
, bool enable
)
296 for (i
= 0; i
< queues
; i
++) {
297 if (virtio_net_set_vnet_endian_one(vdev
, ncs
[i
].peer
, enable
) < 0 &&
300 virtio_net_set_vnet_endian_one(vdev
, ncs
[i
].peer
, false);
310 static void virtio_net_vnet_endian_status(VirtIONet
*n
, uint8_t status
)
312 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
313 int queues
= n
->multiqueue
? n
->max_queues
: 1;
315 if (virtio_net_started(n
, status
)) {
316 /* Before using the device, we tell the network backend about the
317 * endianness to use when parsing vnet headers. If the backend
318 * can't do it, we fallback onto fixing the headers in the core
321 n
->needs_vnet_hdr_swap
= virtio_net_set_vnet_endian(vdev
, n
->nic
->ncs
,
323 } else if (virtio_net_started(n
, vdev
->status
)) {
324 /* After using the device, we need to reset the network backend to
325 * the default (guest native endianness), otherwise the guest may
326 * lose network connectivity if it is rebooted into a different
329 virtio_net_set_vnet_endian(vdev
, n
->nic
->ncs
, queues
, false);
333 static void virtio_net_drop_tx_queue_data(VirtIODevice
*vdev
, VirtQueue
*vq
)
335 unsigned int dropped
= virtqueue_drop_all(vq
);
337 virtio_notify(vdev
, vq
);
341 static void virtio_net_set_status(struct VirtIODevice
*vdev
, uint8_t status
)
343 VirtIONet
*n
= VIRTIO_NET(vdev
);
346 uint8_t queue_status
;
348 virtio_net_vnet_endian_status(n
, status
);
349 virtio_net_vhost_status(n
, status
);
351 for (i
= 0; i
< n
->max_queues
; i
++) {
352 NetClientState
*ncs
= qemu_get_subqueue(n
->nic
, i
);
356 if ((!n
->multiqueue
&& i
!= 0) || i
>= n
->curr_queues
) {
359 queue_status
= status
;
362 virtio_net_started(n
, queue_status
) && !n
->vhost_started
;
365 qemu_flush_queued_packets(ncs
);
368 if (!q
->tx_waiting
) {
374 timer_mod(q
->tx_timer
,
375 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + n
->tx_timeout
);
377 qemu_bh_schedule(q
->tx_bh
);
381 timer_del(q
->tx_timer
);
383 qemu_bh_cancel(q
->tx_bh
);
385 if ((n
->status
& VIRTIO_NET_S_LINK_UP
) == 0 &&
386 (queue_status
& VIRTIO_CONFIG_S_DRIVER_OK
) &&
388 /* if tx is waiting we are likely have some packets in tx queue
389 * and disabled notification */
391 virtio_queue_set_notification(q
->tx_vq
, 1);
392 virtio_net_drop_tx_queue_data(vdev
, q
->tx_vq
);
398 static void virtio_net_set_link_status(NetClientState
*nc
)
400 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
401 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
402 uint16_t old_status
= n
->status
;
405 n
->status
&= ~VIRTIO_NET_S_LINK_UP
;
407 n
->status
|= VIRTIO_NET_S_LINK_UP
;
409 if (n
->status
!= old_status
)
410 virtio_notify_config(vdev
);
412 virtio_net_set_status(vdev
, vdev
->status
);
415 static void rxfilter_notify(NetClientState
*nc
)
417 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
419 if (nc
->rxfilter_notify_enabled
) {
420 char *path
= object_get_canonical_path(OBJECT(n
->qdev
));
421 qapi_event_send_nic_rx_filter_changed(!!n
->netclient_name
,
422 n
->netclient_name
, path
);
425 /* disable event notification to avoid events flooding */
426 nc
->rxfilter_notify_enabled
= 0;
430 static intList
*get_vlan_table(VirtIONet
*n
)
432 intList
*list
, *entry
;
436 for (i
= 0; i
< MAX_VLAN
>> 5; i
++) {
437 for (j
= 0; n
->vlans
[i
] && j
<= 0x1f; j
++) {
438 if (n
->vlans
[i
] & (1U << j
)) {
439 entry
= g_malloc0(sizeof(*entry
));
440 entry
->value
= (i
<< 5) + j
;
450 static RxFilterInfo
*virtio_net_query_rxfilter(NetClientState
*nc
)
452 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
453 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
455 strList
*str_list
, *entry
;
458 info
= g_malloc0(sizeof(*info
));
459 info
->name
= g_strdup(nc
->name
);
460 info
->promiscuous
= n
->promisc
;
463 info
->unicast
= RX_STATE_NONE
;
464 } else if (n
->alluni
) {
465 info
->unicast
= RX_STATE_ALL
;
467 info
->unicast
= RX_STATE_NORMAL
;
471 info
->multicast
= RX_STATE_NONE
;
472 } else if (n
->allmulti
) {
473 info
->multicast
= RX_STATE_ALL
;
475 info
->multicast
= RX_STATE_NORMAL
;
478 info
->broadcast_allowed
= n
->nobcast
;
479 info
->multicast_overflow
= n
->mac_table
.multi_overflow
;
480 info
->unicast_overflow
= n
->mac_table
.uni_overflow
;
482 info
->main_mac
= qemu_mac_strdup_printf(n
->mac
);
485 for (i
= 0; i
< n
->mac_table
.first_multi
; i
++) {
486 entry
= g_malloc0(sizeof(*entry
));
487 entry
->value
= qemu_mac_strdup_printf(n
->mac_table
.macs
+ i
* ETH_ALEN
);
488 entry
->next
= str_list
;
491 info
->unicast_table
= str_list
;
494 for (i
= n
->mac_table
.first_multi
; i
< n
->mac_table
.in_use
; i
++) {
495 entry
= g_malloc0(sizeof(*entry
));
496 entry
->value
= qemu_mac_strdup_printf(n
->mac_table
.macs
+ i
* ETH_ALEN
);
497 entry
->next
= str_list
;
500 info
->multicast_table
= str_list
;
501 info
->vlan_table
= get_vlan_table(n
);
503 if (!virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_VLAN
)) {
504 info
->vlan
= RX_STATE_ALL
;
505 } else if (!info
->vlan_table
) {
506 info
->vlan
= RX_STATE_NONE
;
508 info
->vlan
= RX_STATE_NORMAL
;
511 /* enable event notification after query */
512 nc
->rxfilter_notify_enabled
= 1;
517 static void virtio_net_reset(VirtIODevice
*vdev
)
519 VirtIONet
*n
= VIRTIO_NET(vdev
);
522 /* Reset back to compatibility mode */
529 /* multiqueue is disabled by default */
531 timer_del(n
->announce_timer
.tm
);
532 n
->announce_timer
.round
= 0;
533 n
->status
&= ~VIRTIO_NET_S_ANNOUNCE
;
535 /* Flush any MAC and VLAN filter table state */
536 n
->mac_table
.in_use
= 0;
537 n
->mac_table
.first_multi
= 0;
538 n
->mac_table
.multi_overflow
= 0;
539 n
->mac_table
.uni_overflow
= 0;
540 memset(n
->mac_table
.macs
, 0, MAC_TABLE_ENTRIES
* ETH_ALEN
);
541 memcpy(&n
->mac
[0], &n
->nic
->conf
->macaddr
, sizeof(n
->mac
));
542 qemu_format_nic_info_str(qemu_get_queue(n
->nic
), n
->mac
);
543 memset(n
->vlans
, 0, MAX_VLAN
>> 3);
545 /* Flush any async TX */
546 for (i
= 0; i
< n
->max_queues
; i
++) {
547 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, i
);
550 qemu_flush_or_purge_queued_packets(nc
->peer
, true);
551 assert(!virtio_net_get_subqueue(nc
)->async_tx
.elem
);
556 static void peer_test_vnet_hdr(VirtIONet
*n
)
558 NetClientState
*nc
= qemu_get_queue(n
->nic
);
563 n
->has_vnet_hdr
= qemu_has_vnet_hdr(nc
->peer
);
566 static int peer_has_vnet_hdr(VirtIONet
*n
)
568 return n
->has_vnet_hdr
;
571 static int peer_has_ufo(VirtIONet
*n
)
573 if (!peer_has_vnet_hdr(n
))
576 n
->has_ufo
= qemu_has_ufo(qemu_get_queue(n
->nic
)->peer
);
581 static void virtio_net_set_mrg_rx_bufs(VirtIONet
*n
, int mergeable_rx_bufs
,
582 int version_1
, int hash_report
)
587 n
->mergeable_rx_bufs
= mergeable_rx_bufs
;
590 n
->guest_hdr_len
= hash_report
?
591 sizeof(struct virtio_net_hdr_v1_hash
) :
592 sizeof(struct virtio_net_hdr_mrg_rxbuf
);
593 n
->rss_data
.populate_hash
= !!hash_report
;
595 n
->guest_hdr_len
= n
->mergeable_rx_bufs
?
596 sizeof(struct virtio_net_hdr_mrg_rxbuf
) :
597 sizeof(struct virtio_net_hdr
);
600 for (i
= 0; i
< n
->max_queues
; i
++) {
601 nc
= qemu_get_subqueue(n
->nic
, i
);
603 if (peer_has_vnet_hdr(n
) &&
604 qemu_has_vnet_hdr_len(nc
->peer
, n
->guest_hdr_len
)) {
605 qemu_set_vnet_hdr_len(nc
->peer
, n
->guest_hdr_len
);
606 n
->host_hdr_len
= n
->guest_hdr_len
;
611 static int virtio_net_max_tx_queue_size(VirtIONet
*n
)
613 NetClientState
*peer
= n
->nic_conf
.peers
.ncs
[0];
616 * Backends other than vhost-user don't support max queue size.
619 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
;
622 if (peer
->info
->type
!= NET_CLIENT_DRIVER_VHOST_USER
) {
623 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
;
626 return VIRTQUEUE_MAX_SIZE
;
629 static int peer_attach(VirtIONet
*n
, int index
)
631 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, index
);
637 if (nc
->peer
->info
->type
== NET_CLIENT_DRIVER_VHOST_USER
) {
638 vhost_set_vring_enable(nc
->peer
, 1);
641 if (nc
->peer
->info
->type
!= NET_CLIENT_DRIVER_TAP
) {
645 if (n
->max_queues
== 1) {
649 return tap_enable(nc
->peer
);
652 static int peer_detach(VirtIONet
*n
, int index
)
654 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, index
);
660 if (nc
->peer
->info
->type
== NET_CLIENT_DRIVER_VHOST_USER
) {
661 vhost_set_vring_enable(nc
->peer
, 0);
664 if (nc
->peer
->info
->type
!= NET_CLIENT_DRIVER_TAP
) {
668 return tap_disable(nc
->peer
);
671 static void virtio_net_set_queues(VirtIONet
*n
)
676 if (n
->nic
->peer_deleted
) {
680 for (i
= 0; i
< n
->max_queues
; i
++) {
681 if (i
< n
->curr_queues
) {
682 r
= peer_attach(n
, i
);
685 r
= peer_detach(n
, i
);
691 static void virtio_net_set_multiqueue(VirtIONet
*n
, int multiqueue
);
693 static uint64_t virtio_net_get_features(VirtIODevice
*vdev
, uint64_t features
,
696 VirtIONet
*n
= VIRTIO_NET(vdev
);
697 NetClientState
*nc
= qemu_get_queue(n
->nic
);
699 /* Firstly sync all virtio-net possible supported features */
700 features
|= n
->host_features
;
702 virtio_add_feature(&features
, VIRTIO_NET_F_MAC
);
704 if (!peer_has_vnet_hdr(n
)) {
705 virtio_clear_feature(&features
, VIRTIO_NET_F_CSUM
);
706 virtio_clear_feature(&features
, VIRTIO_NET_F_HOST_TSO4
);
707 virtio_clear_feature(&features
, VIRTIO_NET_F_HOST_TSO6
);
708 virtio_clear_feature(&features
, VIRTIO_NET_F_HOST_ECN
);
710 virtio_clear_feature(&features
, VIRTIO_NET_F_GUEST_CSUM
);
711 virtio_clear_feature(&features
, VIRTIO_NET_F_GUEST_TSO4
);
712 virtio_clear_feature(&features
, VIRTIO_NET_F_GUEST_TSO6
);
713 virtio_clear_feature(&features
, VIRTIO_NET_F_GUEST_ECN
);
715 virtio_clear_feature(&features
, VIRTIO_NET_F_HASH_REPORT
);
718 if (!peer_has_vnet_hdr(n
) || !peer_has_ufo(n
)) {
719 virtio_clear_feature(&features
, VIRTIO_NET_F_GUEST_UFO
);
720 virtio_clear_feature(&features
, VIRTIO_NET_F_HOST_UFO
);
723 if (!get_vhost_net(nc
->peer
)) {
727 virtio_clear_feature(&features
, VIRTIO_NET_F_RSS
);
728 virtio_clear_feature(&features
, VIRTIO_NET_F_HASH_REPORT
);
729 features
= vhost_net_get_features(get_vhost_net(nc
->peer
), features
);
730 vdev
->backend_features
= features
;
732 if (n
->mtu_bypass_backend
&&
733 (n
->host_features
& 1ULL << VIRTIO_NET_F_MTU
)) {
734 features
|= (1ULL << VIRTIO_NET_F_MTU
);
740 static uint64_t virtio_net_bad_features(VirtIODevice
*vdev
)
742 uint64_t features
= 0;
744 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
746 virtio_add_feature(&features
, VIRTIO_NET_F_MAC
);
747 virtio_add_feature(&features
, VIRTIO_NET_F_CSUM
);
748 virtio_add_feature(&features
, VIRTIO_NET_F_HOST_TSO4
);
749 virtio_add_feature(&features
, VIRTIO_NET_F_HOST_TSO6
);
750 virtio_add_feature(&features
, VIRTIO_NET_F_HOST_ECN
);
755 static void virtio_net_apply_guest_offloads(VirtIONet
*n
)
757 qemu_set_offload(qemu_get_queue(n
->nic
)->peer
,
758 !!(n
->curr_guest_offloads
& (1ULL << VIRTIO_NET_F_GUEST_CSUM
)),
759 !!(n
->curr_guest_offloads
& (1ULL << VIRTIO_NET_F_GUEST_TSO4
)),
760 !!(n
->curr_guest_offloads
& (1ULL << VIRTIO_NET_F_GUEST_TSO6
)),
761 !!(n
->curr_guest_offloads
& (1ULL << VIRTIO_NET_F_GUEST_ECN
)),
762 !!(n
->curr_guest_offloads
& (1ULL << VIRTIO_NET_F_GUEST_UFO
)));
765 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features
)
767 static const uint64_t guest_offloads_mask
=
768 (1ULL << VIRTIO_NET_F_GUEST_CSUM
) |
769 (1ULL << VIRTIO_NET_F_GUEST_TSO4
) |
770 (1ULL << VIRTIO_NET_F_GUEST_TSO6
) |
771 (1ULL << VIRTIO_NET_F_GUEST_ECN
) |
772 (1ULL << VIRTIO_NET_F_GUEST_UFO
);
774 return guest_offloads_mask
& features
;
777 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet
*n
)
779 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
780 return virtio_net_guest_offloads_by_features(vdev
->guest_features
);
783 static void failover_add_primary(VirtIONet
*n
, Error
**errp
)
787 if (n
->primary_dev
) {
791 n
->primary_device_opts
= qemu_opts_find(qemu_find_opts("device"),
792 n
->primary_device_id
);
793 if (n
->primary_device_opts
) {
794 n
->primary_dev
= qdev_device_add(n
->primary_device_opts
, &err
);
796 qemu_opts_del(n
->primary_device_opts
);
798 if (n
->primary_dev
) {
799 n
->primary_bus
= n
->primary_dev
->parent_bus
;
801 qdev_unplug(n
->primary_dev
, &err
);
802 qdev_set_id(n
->primary_dev
, "");
807 error_setg(errp
, "Primary device not found");
808 error_append_hint(errp
, "Virtio-net failover will not work. Make "
809 "sure primary device has parameter"
810 " failover_pair_id=<virtio-net-id>\n");
813 error_propagate(errp
, err
);
817 static int is_my_primary(void *opaque
, QemuOpts
*opts
, Error
**errp
)
819 VirtIONet
*n
= opaque
;
822 const char *standby_id
= qemu_opt_get(opts
, "failover_pair_id");
824 if (standby_id
!= NULL
&& (g_strcmp0(standby_id
, n
->netclient_name
) == 0)) {
825 n
->primary_device_id
= g_strdup(opts
->id
);
832 static DeviceState
*virtio_net_find_primary(VirtIONet
*n
, Error
**errp
)
834 DeviceState
*dev
= NULL
;
837 if (qemu_opts_foreach(qemu_find_opts("device"),
838 is_my_primary
, n
, &err
)) {
840 error_propagate(errp
, err
);
843 if (n
->primary_device_id
) {
844 dev
= qdev_find_recursive(sysbus_get_default(),
845 n
->primary_device_id
);
847 error_setg(errp
, "Primary device id not found");
856 static DeviceState
*virtio_connect_failover_devices(VirtIONet
*n
,
860 DeviceState
*prim_dev
= NULL
;
863 prim_dev
= virtio_net_find_primary(n
, &err
);
865 n
->primary_device_id
= g_strdup(prim_dev
->id
);
866 n
->primary_device_opts
= prim_dev
->opts
;
869 error_propagate(errp
, err
);
876 static void virtio_net_set_features(VirtIODevice
*vdev
, uint64_t features
)
878 VirtIONet
*n
= VIRTIO_NET(vdev
);
882 if (n
->mtu_bypass_backend
&&
883 !virtio_has_feature(vdev
->backend_features
, VIRTIO_NET_F_MTU
)) {
884 features
&= ~(1ULL << VIRTIO_NET_F_MTU
);
887 virtio_net_set_multiqueue(n
,
888 virtio_has_feature(features
, VIRTIO_NET_F_RSS
) ||
889 virtio_has_feature(features
, VIRTIO_NET_F_MQ
));
891 virtio_net_set_mrg_rx_bufs(n
,
892 virtio_has_feature(features
,
893 VIRTIO_NET_F_MRG_RXBUF
),
894 virtio_has_feature(features
,
896 virtio_has_feature(features
,
897 VIRTIO_NET_F_HASH_REPORT
));
899 n
->rsc4_enabled
= virtio_has_feature(features
, VIRTIO_NET_F_RSC_EXT
) &&
900 virtio_has_feature(features
, VIRTIO_NET_F_GUEST_TSO4
);
901 n
->rsc6_enabled
= virtio_has_feature(features
, VIRTIO_NET_F_RSC_EXT
) &&
902 virtio_has_feature(features
, VIRTIO_NET_F_GUEST_TSO6
);
903 n
->rss_data
.redirect
= virtio_has_feature(features
, VIRTIO_NET_F_RSS
);
905 if (n
->has_vnet_hdr
) {
906 n
->curr_guest_offloads
=
907 virtio_net_guest_offloads_by_features(features
);
908 virtio_net_apply_guest_offloads(n
);
911 for (i
= 0; i
< n
->max_queues
; i
++) {
912 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, i
);
914 if (!get_vhost_net(nc
->peer
)) {
917 vhost_net_ack_features(get_vhost_net(nc
->peer
), features
);
920 if (virtio_has_feature(features
, VIRTIO_NET_F_CTRL_VLAN
)) {
921 memset(n
->vlans
, 0, MAX_VLAN
>> 3);
923 memset(n
->vlans
, 0xff, MAX_VLAN
>> 3);
926 if (virtio_has_feature(features
, VIRTIO_NET_F_STANDBY
)) {
927 qapi_event_send_failover_negotiated(n
->netclient_name
);
928 atomic_set(&n
->primary_should_be_hidden
, false);
929 failover_add_primary(n
, &err
);
931 n
->primary_dev
= virtio_connect_failover_devices(n
, n
->qdev
, &err
);
935 failover_add_primary(n
, &err
);
945 warn_report_err(err
);
949 static int virtio_net_handle_rx_mode(VirtIONet
*n
, uint8_t cmd
,
950 struct iovec
*iov
, unsigned int iov_cnt
)
954 NetClientState
*nc
= qemu_get_queue(n
->nic
);
956 s
= iov_to_buf(iov
, iov_cnt
, 0, &on
, sizeof(on
));
957 if (s
!= sizeof(on
)) {
958 return VIRTIO_NET_ERR
;
961 if (cmd
== VIRTIO_NET_CTRL_RX_PROMISC
) {
963 } else if (cmd
== VIRTIO_NET_CTRL_RX_ALLMULTI
) {
965 } else if (cmd
== VIRTIO_NET_CTRL_RX_ALLUNI
) {
967 } else if (cmd
== VIRTIO_NET_CTRL_RX_NOMULTI
) {
969 } else if (cmd
== VIRTIO_NET_CTRL_RX_NOUNI
) {
971 } else if (cmd
== VIRTIO_NET_CTRL_RX_NOBCAST
) {
974 return VIRTIO_NET_ERR
;
979 return VIRTIO_NET_OK
;
982 static int virtio_net_handle_offloads(VirtIONet
*n
, uint8_t cmd
,
983 struct iovec
*iov
, unsigned int iov_cnt
)
985 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
989 if (!virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
)) {
990 return VIRTIO_NET_ERR
;
993 s
= iov_to_buf(iov
, iov_cnt
, 0, &offloads
, sizeof(offloads
));
994 if (s
!= sizeof(offloads
)) {
995 return VIRTIO_NET_ERR
;
998 if (cmd
== VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET
) {
999 uint64_t supported_offloads
;
1001 offloads
= virtio_ldq_p(vdev
, &offloads
);
1003 if (!n
->has_vnet_hdr
) {
1004 return VIRTIO_NET_ERR
;
1007 n
->rsc4_enabled
= virtio_has_feature(offloads
, VIRTIO_NET_F_RSC_EXT
) &&
1008 virtio_has_feature(offloads
, VIRTIO_NET_F_GUEST_TSO4
);
1009 n
->rsc6_enabled
= virtio_has_feature(offloads
, VIRTIO_NET_F_RSC_EXT
) &&
1010 virtio_has_feature(offloads
, VIRTIO_NET_F_GUEST_TSO6
);
1011 virtio_clear_feature(&offloads
, VIRTIO_NET_F_RSC_EXT
);
1013 supported_offloads
= virtio_net_supported_guest_offloads(n
);
1014 if (offloads
& ~supported_offloads
) {
1015 return VIRTIO_NET_ERR
;
1018 n
->curr_guest_offloads
= offloads
;
1019 virtio_net_apply_guest_offloads(n
);
1021 return VIRTIO_NET_OK
;
1023 return VIRTIO_NET_ERR
;
1027 static int virtio_net_handle_mac(VirtIONet
*n
, uint8_t cmd
,
1028 struct iovec
*iov
, unsigned int iov_cnt
)
1030 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
1031 struct virtio_net_ctrl_mac mac_data
;
1033 NetClientState
*nc
= qemu_get_queue(n
->nic
);
1035 if (cmd
== VIRTIO_NET_CTRL_MAC_ADDR_SET
) {
1036 if (iov_size(iov
, iov_cnt
) != sizeof(n
->mac
)) {
1037 return VIRTIO_NET_ERR
;
1039 s
= iov_to_buf(iov
, iov_cnt
, 0, &n
->mac
, sizeof(n
->mac
));
1040 assert(s
== sizeof(n
->mac
));
1041 qemu_format_nic_info_str(qemu_get_queue(n
->nic
), n
->mac
);
1042 rxfilter_notify(nc
);
1044 return VIRTIO_NET_OK
;
1047 if (cmd
!= VIRTIO_NET_CTRL_MAC_TABLE_SET
) {
1048 return VIRTIO_NET_ERR
;
1052 int first_multi
= 0;
1053 uint8_t uni_overflow
= 0;
1054 uint8_t multi_overflow
= 0;
1055 uint8_t *macs
= g_malloc0(MAC_TABLE_ENTRIES
* ETH_ALEN
);
1057 s
= iov_to_buf(iov
, iov_cnt
, 0, &mac_data
.entries
,
1058 sizeof(mac_data
.entries
));
1059 mac_data
.entries
= virtio_ldl_p(vdev
, &mac_data
.entries
);
1060 if (s
!= sizeof(mac_data
.entries
)) {
1063 iov_discard_front(&iov
, &iov_cnt
, s
);
1065 if (mac_data
.entries
* ETH_ALEN
> iov_size(iov
, iov_cnt
)) {
1069 if (mac_data
.entries
<= MAC_TABLE_ENTRIES
) {
1070 s
= iov_to_buf(iov
, iov_cnt
, 0, macs
,
1071 mac_data
.entries
* ETH_ALEN
);
1072 if (s
!= mac_data
.entries
* ETH_ALEN
) {
1075 in_use
+= mac_data
.entries
;
1080 iov_discard_front(&iov
, &iov_cnt
, mac_data
.entries
* ETH_ALEN
);
1082 first_multi
= in_use
;
1084 s
= iov_to_buf(iov
, iov_cnt
, 0, &mac_data
.entries
,
1085 sizeof(mac_data
.entries
));
1086 mac_data
.entries
= virtio_ldl_p(vdev
, &mac_data
.entries
);
1087 if (s
!= sizeof(mac_data
.entries
)) {
1091 iov_discard_front(&iov
, &iov_cnt
, s
);
1093 if (mac_data
.entries
* ETH_ALEN
!= iov_size(iov
, iov_cnt
)) {
1097 if (mac_data
.entries
<= MAC_TABLE_ENTRIES
- in_use
) {
1098 s
= iov_to_buf(iov
, iov_cnt
, 0, &macs
[in_use
* ETH_ALEN
],
1099 mac_data
.entries
* ETH_ALEN
);
1100 if (s
!= mac_data
.entries
* ETH_ALEN
) {
1103 in_use
+= mac_data
.entries
;
1108 n
->mac_table
.in_use
= in_use
;
1109 n
->mac_table
.first_multi
= first_multi
;
1110 n
->mac_table
.uni_overflow
= uni_overflow
;
1111 n
->mac_table
.multi_overflow
= multi_overflow
;
1112 memcpy(n
->mac_table
.macs
, macs
, MAC_TABLE_ENTRIES
* ETH_ALEN
);
1114 rxfilter_notify(nc
);
1116 return VIRTIO_NET_OK
;
1120 return VIRTIO_NET_ERR
;
1123 static int virtio_net_handle_vlan_table(VirtIONet
*n
, uint8_t cmd
,
1124 struct iovec
*iov
, unsigned int iov_cnt
)
1126 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
1129 NetClientState
*nc
= qemu_get_queue(n
->nic
);
1131 s
= iov_to_buf(iov
, iov_cnt
, 0, &vid
, sizeof(vid
));
1132 vid
= virtio_lduw_p(vdev
, &vid
);
1133 if (s
!= sizeof(vid
)) {
1134 return VIRTIO_NET_ERR
;
1137 if (vid
>= MAX_VLAN
)
1138 return VIRTIO_NET_ERR
;
1140 if (cmd
== VIRTIO_NET_CTRL_VLAN_ADD
)
1141 n
->vlans
[vid
>> 5] |= (1U << (vid
& 0x1f));
1142 else if (cmd
== VIRTIO_NET_CTRL_VLAN_DEL
)
1143 n
->vlans
[vid
>> 5] &= ~(1U << (vid
& 0x1f));
1145 return VIRTIO_NET_ERR
;
1147 rxfilter_notify(nc
);
1149 return VIRTIO_NET_OK
;
1152 static int virtio_net_handle_announce(VirtIONet
*n
, uint8_t cmd
,
1153 struct iovec
*iov
, unsigned int iov_cnt
)
1155 trace_virtio_net_handle_announce(n
->announce_timer
.round
);
1156 if (cmd
== VIRTIO_NET_CTRL_ANNOUNCE_ACK
&&
1157 n
->status
& VIRTIO_NET_S_ANNOUNCE
) {
1158 n
->status
&= ~VIRTIO_NET_S_ANNOUNCE
;
1159 if (n
->announce_timer
.round
) {
1160 qemu_announce_timer_step(&n
->announce_timer
);
1162 return VIRTIO_NET_OK
;
1164 return VIRTIO_NET_ERR
;
1168 static void virtio_net_disable_rss(VirtIONet
*n
)
1170 if (n
->rss_data
.enabled
) {
1171 trace_virtio_net_rss_disable();
1173 n
->rss_data
.enabled
= false;
1176 static uint16_t virtio_net_handle_rss(VirtIONet
*n
,
1178 unsigned int iov_cnt
,
1181 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
1182 struct virtio_net_rss_config cfg
;
1183 size_t s
, offset
= 0, size_get
;
1189 const char *err_msg
= "";
1190 uint32_t err_value
= 0;
1192 if (do_rss
&& !virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_RSS
)) {
1193 err_msg
= "RSS is not negotiated";
1196 if (!do_rss
&& !virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_HASH_REPORT
)) {
1197 err_msg
= "Hash report is not negotiated";
1200 size_get
= offsetof(struct virtio_net_rss_config
, indirection_table
);
1201 s
= iov_to_buf(iov
, iov_cnt
, offset
, &cfg
, size_get
);
1202 if (s
!= size_get
) {
1203 err_msg
= "Short command buffer";
1204 err_value
= (uint32_t)s
;
1207 n
->rss_data
.hash_types
= virtio_ldl_p(vdev
, &cfg
.hash_types
);
1208 n
->rss_data
.indirections_len
=
1209 virtio_lduw_p(vdev
, &cfg
.indirection_table_mask
);
1210 n
->rss_data
.indirections_len
++;
1212 n
->rss_data
.indirections_len
= 1;
1214 if (!is_power_of_2(n
->rss_data
.indirections_len
)) {
1215 err_msg
= "Invalid size of indirection table";
1216 err_value
= n
->rss_data
.indirections_len
;
1219 if (n
->rss_data
.indirections_len
> VIRTIO_NET_RSS_MAX_TABLE_LEN
) {
1220 err_msg
= "Too large indirection table";
1221 err_value
= n
->rss_data
.indirections_len
;
1224 n
->rss_data
.default_queue
= do_rss
?
1225 virtio_lduw_p(vdev
, &cfg
.unclassified_queue
) : 0;
1226 if (n
->rss_data
.default_queue
>= n
->max_queues
) {
1227 err_msg
= "Invalid default queue";
1228 err_value
= n
->rss_data
.default_queue
;
1232 size_get
= sizeof(uint16_t) * n
->rss_data
.indirections_len
;
1233 g_free(n
->rss_data
.indirections_table
);
1234 n
->rss_data
.indirections_table
= g_malloc(size_get
);
1235 if (!n
->rss_data
.indirections_table
) {
1236 err_msg
= "Can't allocate indirections table";
1237 err_value
= n
->rss_data
.indirections_len
;
1240 s
= iov_to_buf(iov
, iov_cnt
, offset
,
1241 n
->rss_data
.indirections_table
, size_get
);
1242 if (s
!= size_get
) {
1243 err_msg
= "Short indirection table buffer";
1244 err_value
= (uint32_t)s
;
1247 for (i
= 0; i
< n
->rss_data
.indirections_len
; ++i
) {
1248 uint16_t val
= n
->rss_data
.indirections_table
[i
];
1249 n
->rss_data
.indirections_table
[i
] = virtio_lduw_p(vdev
, &val
);
1252 size_get
= sizeof(temp
);
1253 s
= iov_to_buf(iov
, iov_cnt
, offset
, &temp
, size_get
);
1254 if (s
!= size_get
) {
1255 err_msg
= "Can't get queues";
1256 err_value
= (uint32_t)s
;
1259 queues
= do_rss
? virtio_lduw_p(vdev
, &temp
.us
) : n
->curr_queues
;
1260 if (queues
== 0 || queues
> n
->max_queues
) {
1261 err_msg
= "Invalid number of queues";
1265 if (temp
.b
> VIRTIO_NET_RSS_MAX_KEY_SIZE
) {
1266 err_msg
= "Invalid key size";
1270 if (!temp
.b
&& n
->rss_data
.hash_types
) {
1271 err_msg
= "No key provided";
1275 if (!temp
.b
&& !n
->rss_data
.hash_types
) {
1276 virtio_net_disable_rss(n
);
1281 s
= iov_to_buf(iov
, iov_cnt
, offset
, n
->rss_data
.key
, size_get
);
1282 if (s
!= size_get
) {
1283 err_msg
= "Can get key buffer";
1284 err_value
= (uint32_t)s
;
1287 n
->rss_data
.enabled
= true;
1288 trace_virtio_net_rss_enable(n
->rss_data
.hash_types
,
1289 n
->rss_data
.indirections_len
,
1293 trace_virtio_net_rss_error(err_msg
, err_value
);
1294 virtio_net_disable_rss(n
);
1298 static int virtio_net_handle_mq(VirtIONet
*n
, uint8_t cmd
,
1299 struct iovec
*iov
, unsigned int iov_cnt
)
1301 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
1304 virtio_net_disable_rss(n
);
1305 if (cmd
== VIRTIO_NET_CTRL_MQ_HASH_CONFIG
) {
1306 queues
= virtio_net_handle_rss(n
, iov
, iov_cnt
, false);
1307 return queues
? VIRTIO_NET_OK
: VIRTIO_NET_ERR
;
1309 if (cmd
== VIRTIO_NET_CTRL_MQ_RSS_CONFIG
) {
1310 queues
= virtio_net_handle_rss(n
, iov
, iov_cnt
, true);
1311 } else if (cmd
== VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
) {
1312 struct virtio_net_ctrl_mq mq
;
1314 if (!virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_MQ
)) {
1315 return VIRTIO_NET_ERR
;
1317 s
= iov_to_buf(iov
, iov_cnt
, 0, &mq
, sizeof(mq
));
1318 if (s
!= sizeof(mq
)) {
1319 return VIRTIO_NET_ERR
;
1321 queues
= virtio_lduw_p(vdev
, &mq
.virtqueue_pairs
);
1324 return VIRTIO_NET_ERR
;
1327 if (queues
< VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN
||
1328 queues
> VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX
||
1329 queues
> n
->max_queues
||
1331 return VIRTIO_NET_ERR
;
1334 n
->curr_queues
= queues
;
1335 /* stop the backend before changing the number of queues to avoid handling a
1337 virtio_net_set_status(vdev
, vdev
->status
);
1338 virtio_net_set_queues(n
);
1340 return VIRTIO_NET_OK
;
1343 static void virtio_net_handle_ctrl(VirtIODevice
*vdev
, VirtQueue
*vq
)
1345 VirtIONet
*n
= VIRTIO_NET(vdev
);
1346 struct virtio_net_ctrl_hdr ctrl
;
1347 virtio_net_ctrl_ack status
= VIRTIO_NET_ERR
;
1348 VirtQueueElement
*elem
;
1350 struct iovec
*iov
, *iov2
;
1351 unsigned int iov_cnt
;
1354 elem
= virtqueue_pop(vq
, sizeof(VirtQueueElement
));
1358 if (iov_size(elem
->in_sg
, elem
->in_num
) < sizeof(status
) ||
1359 iov_size(elem
->out_sg
, elem
->out_num
) < sizeof(ctrl
)) {
1360 virtio_error(vdev
, "virtio-net ctrl missing headers");
1361 virtqueue_detach_element(vq
, elem
, 0);
1366 iov_cnt
= elem
->out_num
;
1367 iov2
= iov
= g_memdup(elem
->out_sg
, sizeof(struct iovec
) * elem
->out_num
);
1368 s
= iov_to_buf(iov
, iov_cnt
, 0, &ctrl
, sizeof(ctrl
));
1369 iov_discard_front(&iov
, &iov_cnt
, sizeof(ctrl
));
1370 if (s
!= sizeof(ctrl
)) {
1371 status
= VIRTIO_NET_ERR
;
1372 } else if (ctrl
.class == VIRTIO_NET_CTRL_RX
) {
1373 status
= virtio_net_handle_rx_mode(n
, ctrl
.cmd
, iov
, iov_cnt
);
1374 } else if (ctrl
.class == VIRTIO_NET_CTRL_MAC
) {
1375 status
= virtio_net_handle_mac(n
, ctrl
.cmd
, iov
, iov_cnt
);
1376 } else if (ctrl
.class == VIRTIO_NET_CTRL_VLAN
) {
1377 status
= virtio_net_handle_vlan_table(n
, ctrl
.cmd
, iov
, iov_cnt
);
1378 } else if (ctrl
.class == VIRTIO_NET_CTRL_ANNOUNCE
) {
1379 status
= virtio_net_handle_announce(n
, ctrl
.cmd
, iov
, iov_cnt
);
1380 } else if (ctrl
.class == VIRTIO_NET_CTRL_MQ
) {
1381 status
= virtio_net_handle_mq(n
, ctrl
.cmd
, iov
, iov_cnt
);
1382 } else if (ctrl
.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS
) {
1383 status
= virtio_net_handle_offloads(n
, ctrl
.cmd
, iov
, iov_cnt
);
1386 s
= iov_from_buf(elem
->in_sg
, elem
->in_num
, 0, &status
, sizeof(status
));
1387 assert(s
== sizeof(status
));
1389 virtqueue_push(vq
, elem
, sizeof(status
));
1390 virtio_notify(vdev
, vq
);
1398 static void virtio_net_handle_rx(VirtIODevice
*vdev
, VirtQueue
*vq
)
1400 VirtIONet
*n
= VIRTIO_NET(vdev
);
1401 int queue_index
= vq2q(virtio_get_queue_index(vq
));
1403 qemu_flush_queued_packets(qemu_get_subqueue(n
->nic
, queue_index
));
1406 static bool virtio_net_can_receive(NetClientState
*nc
)
1408 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
1409 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
1410 VirtIONetQueue
*q
= virtio_net_get_subqueue(nc
);
1412 if (!vdev
->vm_running
) {
1416 if (nc
->queue_index
>= n
->curr_queues
) {
1420 if (!virtio_queue_ready(q
->rx_vq
) ||
1421 !(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
)) {
1428 static int virtio_net_has_buffers(VirtIONetQueue
*q
, int bufsize
)
1430 VirtIONet
*n
= q
->n
;
1431 if (virtio_queue_empty(q
->rx_vq
) ||
1432 (n
->mergeable_rx_bufs
&&
1433 !virtqueue_avail_bytes(q
->rx_vq
, bufsize
, 0))) {
1434 virtio_queue_set_notification(q
->rx_vq
, 1);
1436 /* To avoid a race condition where the guest has made some buffers
1437 * available after the above check but before notification was
1438 * enabled, check for available buffers again.
1440 if (virtio_queue_empty(q
->rx_vq
) ||
1441 (n
->mergeable_rx_bufs
&&
1442 !virtqueue_avail_bytes(q
->rx_vq
, bufsize
, 0))) {
1447 virtio_queue_set_notification(q
->rx_vq
, 0);
1451 static void virtio_net_hdr_swap(VirtIODevice
*vdev
, struct virtio_net_hdr
*hdr
)
1453 virtio_tswap16s(vdev
, &hdr
->hdr_len
);
1454 virtio_tswap16s(vdev
, &hdr
->gso_size
);
1455 virtio_tswap16s(vdev
, &hdr
->csum_start
);
1456 virtio_tswap16s(vdev
, &hdr
->csum_offset
);
1459 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1460 * it never finds out that the packets don't have valid checksums. This
1461 * causes dhclient to get upset. Fedora's carried a patch for ages to
1462 * fix this with Xen but it hasn't appeared in an upstream release of
1465 * To avoid breaking existing guests, we catch udp packets and add
1466 * checksums. This is terrible but it's better than hacking the guest
1469 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1470 * we should provide a mechanism to disable it to avoid polluting the host
1473 static void work_around_broken_dhclient(struct virtio_net_hdr
*hdr
,
1474 uint8_t *buf
, size_t size
)
1476 if ((hdr
->flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) && /* missing csum */
1477 (size
> 27 && size
< 1500) && /* normal sized MTU */
1478 (buf
[12] == 0x08 && buf
[13] == 0x00) && /* ethertype == IPv4 */
1479 (buf
[23] == 17) && /* ip.protocol == UDP */
1480 (buf
[34] == 0 && buf
[35] == 67)) { /* udp.srcport == bootps */
1481 net_checksum_calculate(buf
, size
);
1482 hdr
->flags
&= ~VIRTIO_NET_HDR_F_NEEDS_CSUM
;
1486 static void receive_header(VirtIONet
*n
, const struct iovec
*iov
, int iov_cnt
,
1487 const void *buf
, size_t size
)
1489 if (n
->has_vnet_hdr
) {
1490 /* FIXME this cast is evil */
1491 void *wbuf
= (void *)buf
;
1492 work_around_broken_dhclient(wbuf
, wbuf
+ n
->host_hdr_len
,
1493 size
- n
->host_hdr_len
);
1495 if (n
->needs_vnet_hdr_swap
) {
1496 virtio_net_hdr_swap(VIRTIO_DEVICE(n
), wbuf
);
1498 iov_from_buf(iov
, iov_cnt
, 0, buf
, sizeof(struct virtio_net_hdr
));
1500 struct virtio_net_hdr hdr
= {
1502 .gso_type
= VIRTIO_NET_HDR_GSO_NONE
1504 iov_from_buf(iov
, iov_cnt
, 0, &hdr
, sizeof hdr
);
1508 static int receive_filter(VirtIONet
*n
, const uint8_t *buf
, int size
)
1510 static const uint8_t bcast
[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1511 static const uint8_t vlan
[] = {0x81, 0x00};
1512 uint8_t *ptr
= (uint8_t *)buf
;
1518 ptr
+= n
->host_hdr_len
;
1520 if (!memcmp(&ptr
[12], vlan
, sizeof(vlan
))) {
1521 int vid
= lduw_be_p(ptr
+ 14) & 0xfff;
1522 if (!(n
->vlans
[vid
>> 5] & (1U << (vid
& 0x1f))))
1526 if (ptr
[0] & 1) { // multicast
1527 if (!memcmp(ptr
, bcast
, sizeof(bcast
))) {
1529 } else if (n
->nomulti
) {
1531 } else if (n
->allmulti
|| n
->mac_table
.multi_overflow
) {
1535 for (i
= n
->mac_table
.first_multi
; i
< n
->mac_table
.in_use
; i
++) {
1536 if (!memcmp(ptr
, &n
->mac_table
.macs
[i
* ETH_ALEN
], ETH_ALEN
)) {
1543 } else if (n
->alluni
|| n
->mac_table
.uni_overflow
) {
1545 } else if (!memcmp(ptr
, n
->mac
, ETH_ALEN
)) {
1549 for (i
= 0; i
< n
->mac_table
.first_multi
; i
++) {
1550 if (!memcmp(ptr
, &n
->mac_table
.macs
[i
* ETH_ALEN
], ETH_ALEN
)) {
1559 static uint8_t virtio_net_get_hash_type(bool isip4
,
1566 if (istcp
&& (types
& VIRTIO_NET_RSS_HASH_TYPE_TCPv4
)) {
1567 return NetPktRssIpV4Tcp
;
1569 if (isudp
&& (types
& VIRTIO_NET_RSS_HASH_TYPE_UDPv4
)) {
1570 return NetPktRssIpV4Udp
;
1572 if (types
& VIRTIO_NET_RSS_HASH_TYPE_IPv4
) {
1573 return NetPktRssIpV4
;
1576 uint32_t mask
= VIRTIO_NET_RSS_HASH_TYPE_TCP_EX
|
1577 VIRTIO_NET_RSS_HASH_TYPE_TCPv6
;
1579 if (istcp
&& (types
& mask
)) {
1580 return (types
& VIRTIO_NET_RSS_HASH_TYPE_TCP_EX
) ?
1581 NetPktRssIpV6TcpEx
: NetPktRssIpV6Tcp
;
1583 mask
= VIRTIO_NET_RSS_HASH_TYPE_UDP_EX
| VIRTIO_NET_RSS_HASH_TYPE_UDPv6
;
1584 if (isudp
&& (types
& mask
)) {
1585 return (types
& VIRTIO_NET_RSS_HASH_TYPE_UDP_EX
) ?
1586 NetPktRssIpV6UdpEx
: NetPktRssIpV6Udp
;
1588 mask
= VIRTIO_NET_RSS_HASH_TYPE_IP_EX
| VIRTIO_NET_RSS_HASH_TYPE_IPv6
;
1590 return (types
& VIRTIO_NET_RSS_HASH_TYPE_IP_EX
) ?
1591 NetPktRssIpV6Ex
: NetPktRssIpV6
;
1597 static void virtio_set_packet_hash(const uint8_t *buf
, uint8_t report
,
1600 struct virtio_net_hdr_v1_hash
*hdr
= (void *)buf
;
1601 hdr
->hash_value
= hash
;
1602 hdr
->hash_report
= report
;
1605 static int virtio_net_process_rss(NetClientState
*nc
, const uint8_t *buf
,
1608 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
1609 unsigned int index
= nc
->queue_index
, new_index
= index
;
1610 struct NetRxPkt
*pkt
= n
->rx_pkt
;
1611 uint8_t net_hash_type
;
1613 bool isip4
, isip6
, isudp
, istcp
;
1614 static const uint8_t reports
[NetPktRssIpV6UdpEx
+ 1] = {
1615 VIRTIO_NET_HASH_REPORT_IPv4
,
1616 VIRTIO_NET_HASH_REPORT_TCPv4
,
1617 VIRTIO_NET_HASH_REPORT_TCPv6
,
1618 VIRTIO_NET_HASH_REPORT_IPv6
,
1619 VIRTIO_NET_HASH_REPORT_IPv6_EX
,
1620 VIRTIO_NET_HASH_REPORT_TCPv6_EX
,
1621 VIRTIO_NET_HASH_REPORT_UDPv4
,
1622 VIRTIO_NET_HASH_REPORT_UDPv6
,
1623 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1626 net_rx_pkt_set_protocols(pkt
, buf
+ n
->host_hdr_len
,
1627 size
- n
->host_hdr_len
);
1628 net_rx_pkt_get_protocols(pkt
, &isip4
, &isip6
, &isudp
, &istcp
);
1629 if (isip4
&& (net_rx_pkt_get_ip4_info(pkt
)->fragment
)) {
1630 istcp
= isudp
= false;
1632 if (isip6
&& (net_rx_pkt_get_ip6_info(pkt
)->fragment
)) {
1633 istcp
= isudp
= false;
1635 net_hash_type
= virtio_net_get_hash_type(isip4
, isip6
, isudp
, istcp
,
1636 n
->rss_data
.hash_types
);
1637 if (net_hash_type
> NetPktRssIpV6UdpEx
) {
1638 if (n
->rss_data
.populate_hash
) {
1639 virtio_set_packet_hash(buf
, VIRTIO_NET_HASH_REPORT_NONE
, 0);
1641 return n
->rss_data
.redirect
? n
->rss_data
.default_queue
: -1;
1644 hash
= net_rx_pkt_calc_rss_hash(pkt
, net_hash_type
, n
->rss_data
.key
);
1646 if (n
->rss_data
.populate_hash
) {
1647 virtio_set_packet_hash(buf
, reports
[net_hash_type
], hash
);
1650 if (n
->rss_data
.redirect
) {
1651 new_index
= hash
& (n
->rss_data
.indirections_len
- 1);
1652 new_index
= n
->rss_data
.indirections_table
[new_index
];
1655 return (index
== new_index
) ? -1 : new_index
;
1658 static ssize_t
virtio_net_receive_rcu(NetClientState
*nc
, const uint8_t *buf
,
1659 size_t size
, bool no_rss
)
1661 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
1662 VirtIONetQueue
*q
= virtio_net_get_subqueue(nc
);
1663 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
1664 struct iovec mhdr_sg
[VIRTQUEUE_MAX_SIZE
];
1665 struct virtio_net_hdr_mrg_rxbuf mhdr
;
1666 unsigned mhdr_cnt
= 0;
1667 size_t offset
, i
, guest_offset
;
1669 if (!virtio_net_can_receive(nc
)) {
1673 if (!no_rss
&& n
->rss_data
.enabled
) {
1674 int index
= virtio_net_process_rss(nc
, buf
, size
);
1676 NetClientState
*nc2
= qemu_get_subqueue(n
->nic
, index
);
1677 return virtio_net_receive_rcu(nc2
, buf
, size
, true);
1681 /* hdr_len refers to the header we supply to the guest */
1682 if (!virtio_net_has_buffers(q
, size
+ n
->guest_hdr_len
- n
->host_hdr_len
)) {
1686 if (!receive_filter(n
, buf
, size
))
1691 while (offset
< size
) {
1692 VirtQueueElement
*elem
;
1694 const struct iovec
*sg
;
1698 elem
= virtqueue_pop(q
->rx_vq
, sizeof(VirtQueueElement
));
1701 virtio_error(vdev
, "virtio-net unexpected empty queue: "
1702 "i %zd mergeable %d offset %zd, size %zd, "
1703 "guest hdr len %zd, host hdr len %zd "
1704 "guest features 0x%" PRIx64
,
1705 i
, n
->mergeable_rx_bufs
, offset
, size
,
1706 n
->guest_hdr_len
, n
->host_hdr_len
,
1707 vdev
->guest_features
);
1712 if (elem
->in_num
< 1) {
1714 "virtio-net receive queue contains no in buffers");
1715 virtqueue_detach_element(q
->rx_vq
, elem
, 0);
1722 assert(offset
== 0);
1723 if (n
->mergeable_rx_bufs
) {
1724 mhdr_cnt
= iov_copy(mhdr_sg
, ARRAY_SIZE(mhdr_sg
),
1726 offsetof(typeof(mhdr
), num_buffers
),
1727 sizeof(mhdr
.num_buffers
));
1730 receive_header(n
, sg
, elem
->in_num
, buf
, size
);
1731 if (n
->rss_data
.populate_hash
) {
1732 offset
= sizeof(mhdr
);
1733 iov_from_buf(sg
, elem
->in_num
, offset
,
1734 buf
+ offset
, n
->host_hdr_len
- sizeof(mhdr
));
1736 offset
= n
->host_hdr_len
;
1737 total
+= n
->guest_hdr_len
;
1738 guest_offset
= n
->guest_hdr_len
;
1743 /* copy in packet. ugh */
1744 len
= iov_from_buf(sg
, elem
->in_num
, guest_offset
,
1745 buf
+ offset
, size
- offset
);
1748 /* If buffers can't be merged, at this point we
1749 * must have consumed the complete packet.
1750 * Otherwise, drop it. */
1751 if (!n
->mergeable_rx_bufs
&& offset
< size
) {
1752 virtqueue_unpop(q
->rx_vq
, elem
, total
);
1757 /* signal other side */
1758 virtqueue_fill(q
->rx_vq
, elem
, total
, i
++);
1763 virtio_stw_p(vdev
, &mhdr
.num_buffers
, i
);
1764 iov_from_buf(mhdr_sg
, mhdr_cnt
,
1766 &mhdr
.num_buffers
, sizeof mhdr
.num_buffers
);
1769 virtqueue_flush(q
->rx_vq
, i
);
1770 virtio_notify(vdev
, q
->rx_vq
);
1775 static ssize_t
virtio_net_do_receive(NetClientState
*nc
, const uint8_t *buf
,
1778 RCU_READ_LOCK_GUARD();
1780 return virtio_net_receive_rcu(nc
, buf
, size
, false);
1783 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain
*chain
,
1785 VirtioNetRscUnit
*unit
)
1788 struct ip_header
*ip
;
1790 ip
= (struct ip_header
*)(buf
+ chain
->n
->guest_hdr_len
1791 + sizeof(struct eth_header
));
1792 unit
->ip
= (void *)ip
;
1793 ip_hdrlen
= (ip
->ip_ver_len
& 0xF) << 2;
1794 unit
->ip_plen
= &ip
->ip_len
;
1795 unit
->tcp
= (struct tcp_header
*)(((uint8_t *)unit
->ip
) + ip_hdrlen
);
1796 unit
->tcp_hdrlen
= (htons(unit
->tcp
->th_offset_flags
) & 0xF000) >> 10;
1797 unit
->payload
= htons(*unit
->ip_plen
) - ip_hdrlen
- unit
->tcp_hdrlen
;
1800 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain
*chain
,
1802 VirtioNetRscUnit
*unit
)
1804 struct ip6_header
*ip6
;
1806 ip6
= (struct ip6_header
*)(buf
+ chain
->n
->guest_hdr_len
1807 + sizeof(struct eth_header
));
1809 unit
->ip_plen
= &(ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
);
1810 unit
->tcp
= (struct tcp_header
*)(((uint8_t *)unit
->ip
)
1811 + sizeof(struct ip6_header
));
1812 unit
->tcp_hdrlen
= (htons(unit
->tcp
->th_offset_flags
) & 0xF000) >> 10;
1814 /* There is a difference between payload lenght in ipv4 and v6,
1815 ip header is excluded in ipv6 */
1816 unit
->payload
= htons(*unit
->ip_plen
) - unit
->tcp_hdrlen
;
1819 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain
*chain
,
1820 VirtioNetRscSeg
*seg
)
1823 struct virtio_net_hdr_v1
*h
;
1825 h
= (struct virtio_net_hdr_v1
*)seg
->buf
;
1827 h
->gso_type
= VIRTIO_NET_HDR_GSO_NONE
;
1829 if (seg
->is_coalesced
) {
1830 h
->rsc
.segments
= seg
->packets
;
1831 h
->rsc
.dup_acks
= seg
->dup_ack
;
1832 h
->flags
= VIRTIO_NET_HDR_F_RSC_INFO
;
1833 if (chain
->proto
== ETH_P_IP
) {
1834 h
->gso_type
= VIRTIO_NET_HDR_GSO_TCPV4
;
1836 h
->gso_type
= VIRTIO_NET_HDR_GSO_TCPV6
;
1840 ret
= virtio_net_do_receive(seg
->nc
, seg
->buf
, seg
->size
);
1841 QTAILQ_REMOVE(&chain
->buffers
, seg
, next
);
1848 static void virtio_net_rsc_purge(void *opq
)
1850 VirtioNetRscSeg
*seg
, *rn
;
1851 VirtioNetRscChain
*chain
= (VirtioNetRscChain
*)opq
;
1853 QTAILQ_FOREACH_SAFE(seg
, &chain
->buffers
, next
, rn
) {
1854 if (virtio_net_rsc_drain_seg(chain
, seg
) == 0) {
1855 chain
->stat
.purge_failed
++;
1860 chain
->stat
.timer
++;
1861 if (!QTAILQ_EMPTY(&chain
->buffers
)) {
1862 timer_mod(chain
->drain_timer
,
1863 qemu_clock_get_ns(QEMU_CLOCK_HOST
) + chain
->n
->rsc_timeout
);
1867 static void virtio_net_rsc_cleanup(VirtIONet
*n
)
1869 VirtioNetRscChain
*chain
, *rn_chain
;
1870 VirtioNetRscSeg
*seg
, *rn_seg
;
1872 QTAILQ_FOREACH_SAFE(chain
, &n
->rsc_chains
, next
, rn_chain
) {
1873 QTAILQ_FOREACH_SAFE(seg
, &chain
->buffers
, next
, rn_seg
) {
1874 QTAILQ_REMOVE(&chain
->buffers
, seg
, next
);
1879 timer_del(chain
->drain_timer
);
1880 timer_free(chain
->drain_timer
);
1881 QTAILQ_REMOVE(&n
->rsc_chains
, chain
, next
);
1886 static void virtio_net_rsc_cache_buf(VirtioNetRscChain
*chain
,
1888 const uint8_t *buf
, size_t size
)
1891 VirtioNetRscSeg
*seg
;
1893 hdr_len
= chain
->n
->guest_hdr_len
;
1894 seg
= g_malloc(sizeof(VirtioNetRscSeg
));
1895 seg
->buf
= g_malloc(hdr_len
+ sizeof(struct eth_header
)
1896 + sizeof(struct ip6_header
) + VIRTIO_NET_MAX_TCP_PAYLOAD
);
1897 memcpy(seg
->buf
, buf
, size
);
1901 seg
->is_coalesced
= 0;
1904 QTAILQ_INSERT_TAIL(&chain
->buffers
, seg
, next
);
1905 chain
->stat
.cache
++;
1907 switch (chain
->proto
) {
1909 virtio_net_rsc_extract_unit4(chain
, seg
->buf
, &seg
->unit
);
1912 virtio_net_rsc_extract_unit6(chain
, seg
->buf
, &seg
->unit
);
1915 g_assert_not_reached();
1919 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain
*chain
,
1920 VirtioNetRscSeg
*seg
,
1922 struct tcp_header
*n_tcp
,
1923 struct tcp_header
*o_tcp
)
1925 uint32_t nack
, oack
;
1926 uint16_t nwin
, owin
;
1928 nack
= htonl(n_tcp
->th_ack
);
1929 nwin
= htons(n_tcp
->th_win
);
1930 oack
= htonl(o_tcp
->th_ack
);
1931 owin
= htons(o_tcp
->th_win
);
1933 if ((nack
- oack
) >= VIRTIO_NET_MAX_TCP_PAYLOAD
) {
1934 chain
->stat
.ack_out_of_win
++;
1936 } else if (nack
== oack
) {
1937 /* duplicated ack or window probe */
1939 /* duplicated ack, add dup ack count due to whql test up to 1 */
1940 chain
->stat
.dup_ack
++;
1943 /* Coalesce window update */
1944 o_tcp
->th_win
= n_tcp
->th_win
;
1945 chain
->stat
.win_update
++;
1946 return RSC_COALESCE
;
1949 /* pure ack, go to 'C', finalize*/
1950 chain
->stat
.pure_ack
++;
1955 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain
*chain
,
1956 VirtioNetRscSeg
*seg
,
1958 VirtioNetRscUnit
*n_unit
)
1962 uint32_t nseq
, oseq
;
1963 VirtioNetRscUnit
*o_unit
;
1965 o_unit
= &seg
->unit
;
1966 o_ip_len
= htons(*o_unit
->ip_plen
);
1967 nseq
= htonl(n_unit
->tcp
->th_seq
);
1968 oseq
= htonl(o_unit
->tcp
->th_seq
);
1970 /* out of order or retransmitted. */
1971 if ((nseq
- oseq
) > VIRTIO_NET_MAX_TCP_PAYLOAD
) {
1972 chain
->stat
.data_out_of_win
++;
1976 data
= ((uint8_t *)n_unit
->tcp
) + n_unit
->tcp_hdrlen
;
1978 if ((o_unit
->payload
== 0) && n_unit
->payload
) {
1979 /* From no payload to payload, normal case, not a dup ack or etc */
1980 chain
->stat
.data_after_pure_ack
++;
1983 return virtio_net_rsc_handle_ack(chain
, seg
, buf
,
1984 n_unit
->tcp
, o_unit
->tcp
);
1986 } else if ((nseq
- oseq
) != o_unit
->payload
) {
1987 /* Not a consistent packet, out of order */
1988 chain
->stat
.data_out_of_order
++;
1992 if ((o_ip_len
+ n_unit
->payload
) > chain
->max_payload
) {
1993 chain
->stat
.over_size
++;
1997 /* Here comes the right data, the payload length in v4/v6 is different,
1998 so use the field value to update and record the new data len */
1999 o_unit
->payload
+= n_unit
->payload
; /* update new data len */
2001 /* update field in ip header */
2002 *o_unit
->ip_plen
= htons(o_ip_len
+ n_unit
->payload
);
2004 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2005 for windows guest, while this may change the behavior for linux
2006 guest (only if it uses RSC feature). */
2007 o_unit
->tcp
->th_offset_flags
= n_unit
->tcp
->th_offset_flags
;
2009 o_unit
->tcp
->th_ack
= n_unit
->tcp
->th_ack
;
2010 o_unit
->tcp
->th_win
= n_unit
->tcp
->th_win
;
2012 memmove(seg
->buf
+ seg
->size
, data
, n_unit
->payload
);
2013 seg
->size
+= n_unit
->payload
;
2015 chain
->stat
.coalesced
++;
2016 return RSC_COALESCE
;
2020 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain
*chain
,
2021 VirtioNetRscSeg
*seg
,
2022 const uint8_t *buf
, size_t size
,
2023 VirtioNetRscUnit
*unit
)
2025 struct ip_header
*ip1
, *ip2
;
2027 ip1
= (struct ip_header
*)(unit
->ip
);
2028 ip2
= (struct ip_header
*)(seg
->unit
.ip
);
2029 if ((ip1
->ip_src
^ ip2
->ip_src
) || (ip1
->ip_dst
^ ip2
->ip_dst
)
2030 || (unit
->tcp
->th_sport
^ seg
->unit
.tcp
->th_sport
)
2031 || (unit
->tcp
->th_dport
^ seg
->unit
.tcp
->th_dport
)) {
2032 chain
->stat
.no_match
++;
2033 return RSC_NO_MATCH
;
2036 return virtio_net_rsc_coalesce_data(chain
, seg
, buf
, unit
);
2039 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain
*chain
,
2040 VirtioNetRscSeg
*seg
,
2041 const uint8_t *buf
, size_t size
,
2042 VirtioNetRscUnit
*unit
)
2044 struct ip6_header
*ip1
, *ip2
;
2046 ip1
= (struct ip6_header
*)(unit
->ip
);
2047 ip2
= (struct ip6_header
*)(seg
->unit
.ip
);
2048 if (memcmp(&ip1
->ip6_src
, &ip2
->ip6_src
, sizeof(struct in6_address
))
2049 || memcmp(&ip1
->ip6_dst
, &ip2
->ip6_dst
, sizeof(struct in6_address
))
2050 || (unit
->tcp
->th_sport
^ seg
->unit
.tcp
->th_sport
)
2051 || (unit
->tcp
->th_dport
^ seg
->unit
.tcp
->th_dport
)) {
2052 chain
->stat
.no_match
++;
2053 return RSC_NO_MATCH
;
2056 return virtio_net_rsc_coalesce_data(chain
, seg
, buf
, unit
);
2059 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2060 * to prevent out of order */
2061 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain
*chain
,
2062 struct tcp_header
*tcp
)
2067 tcp_flag
= htons(tcp
->th_offset_flags
);
2068 tcp_hdr
= (tcp_flag
& VIRTIO_NET_TCP_HDR_LENGTH
) >> 10;
2069 tcp_flag
&= VIRTIO_NET_TCP_FLAG
;
2070 tcp_flag
= htons(tcp
->th_offset_flags
) & 0x3F;
2071 if (tcp_flag
& TH_SYN
) {
2072 chain
->stat
.tcp_syn
++;
2076 if (tcp_flag
& (TH_FIN
| TH_URG
| TH_RST
| TH_ECE
| TH_CWR
)) {
2077 chain
->stat
.tcp_ctrl_drain
++;
2081 if (tcp_hdr
> sizeof(struct tcp_header
)) {
2082 chain
->stat
.tcp_all_opt
++;
2086 return RSC_CANDIDATE
;
2089 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain
*chain
,
2091 const uint8_t *buf
, size_t size
,
2092 VirtioNetRscUnit
*unit
)
2095 VirtioNetRscSeg
*seg
, *nseg
;
2097 if (QTAILQ_EMPTY(&chain
->buffers
)) {
2098 chain
->stat
.empty_cache
++;
2099 virtio_net_rsc_cache_buf(chain
, nc
, buf
, size
);
2100 timer_mod(chain
->drain_timer
,
2101 qemu_clock_get_ns(QEMU_CLOCK_HOST
) + chain
->n
->rsc_timeout
);
2105 QTAILQ_FOREACH_SAFE(seg
, &chain
->buffers
, next
, nseg
) {
2106 if (chain
->proto
== ETH_P_IP
) {
2107 ret
= virtio_net_rsc_coalesce4(chain
, seg
, buf
, size
, unit
);
2109 ret
= virtio_net_rsc_coalesce6(chain
, seg
, buf
, size
, unit
);
2112 if (ret
== RSC_FINAL
) {
2113 if (virtio_net_rsc_drain_seg(chain
, seg
) == 0) {
2115 chain
->stat
.final_failed
++;
2119 /* Send current packet */
2120 return virtio_net_do_receive(nc
, buf
, size
);
2121 } else if (ret
== RSC_NO_MATCH
) {
2124 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2125 seg
->is_coalesced
= 1;
2130 chain
->stat
.no_match_cache
++;
2131 virtio_net_rsc_cache_buf(chain
, nc
, buf
, size
);
2135 /* Drain a connection data, this is to avoid out of order segments */
2136 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain
*chain
,
2138 const uint8_t *buf
, size_t size
,
2139 uint16_t ip_start
, uint16_t ip_size
,
2142 VirtioNetRscSeg
*seg
, *nseg
;
2143 uint32_t ppair1
, ppair2
;
2145 ppair1
= *(uint32_t *)(buf
+ tcp_port
);
2146 QTAILQ_FOREACH_SAFE(seg
, &chain
->buffers
, next
, nseg
) {
2147 ppair2
= *(uint32_t *)(seg
->buf
+ tcp_port
);
2148 if (memcmp(buf
+ ip_start
, seg
->buf
+ ip_start
, ip_size
)
2149 || (ppair1
!= ppair2
)) {
2152 if (virtio_net_rsc_drain_seg(chain
, seg
) == 0) {
2153 chain
->stat
.drain_failed
++;
2159 return virtio_net_do_receive(nc
, buf
, size
);
2162 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain
*chain
,
2163 struct ip_header
*ip
,
2164 const uint8_t *buf
, size_t size
)
2168 /* Not an ipv4 packet */
2169 if (((ip
->ip_ver_len
& 0xF0) >> 4) != IP_HEADER_VERSION_4
) {
2170 chain
->stat
.ip_option
++;
2174 /* Don't handle packets with ip option */
2175 if ((ip
->ip_ver_len
& 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH
) {
2176 chain
->stat
.ip_option
++;
2180 if (ip
->ip_p
!= IPPROTO_TCP
) {
2181 chain
->stat
.bypass_not_tcp
++;
2185 /* Don't handle packets with ip fragment */
2186 if (!(htons(ip
->ip_off
) & IP_DF
)) {
2187 chain
->stat
.ip_frag
++;
2191 /* Don't handle packets with ecn flag */
2192 if (IPTOS_ECN(ip
->ip_tos
)) {
2193 chain
->stat
.ip_ecn
++;
2197 ip_len
= htons(ip
->ip_len
);
2198 if (ip_len
< (sizeof(struct ip_header
) + sizeof(struct tcp_header
))
2199 || ip_len
> (size
- chain
->n
->guest_hdr_len
-
2200 sizeof(struct eth_header
))) {
2201 chain
->stat
.ip_hacked
++;
2205 return RSC_CANDIDATE
;
2208 static size_t virtio_net_rsc_receive4(VirtioNetRscChain
*chain
,
2210 const uint8_t *buf
, size_t size
)
2214 VirtioNetRscUnit unit
;
2216 hdr_len
= ((VirtIONet
*)(chain
->n
))->guest_hdr_len
;
2218 if (size
< (hdr_len
+ sizeof(struct eth_header
) + sizeof(struct ip_header
)
2219 + sizeof(struct tcp_header
))) {
2220 chain
->stat
.bypass_not_tcp
++;
2221 return virtio_net_do_receive(nc
, buf
, size
);
2224 virtio_net_rsc_extract_unit4(chain
, buf
, &unit
);
2225 if (virtio_net_rsc_sanity_check4(chain
, unit
.ip
, buf
, size
)
2227 return virtio_net_do_receive(nc
, buf
, size
);
2230 ret
= virtio_net_rsc_tcp_ctrl_check(chain
, unit
.tcp
);
2231 if (ret
== RSC_BYPASS
) {
2232 return virtio_net_do_receive(nc
, buf
, size
);
2233 } else if (ret
== RSC_FINAL
) {
2234 return virtio_net_rsc_drain_flow(chain
, nc
, buf
, size
,
2235 ((hdr_len
+ sizeof(struct eth_header
)) + 12),
2236 VIRTIO_NET_IP4_ADDR_SIZE
,
2237 hdr_len
+ sizeof(struct eth_header
) + sizeof(struct ip_header
));
2240 return virtio_net_rsc_do_coalesce(chain
, nc
, buf
, size
, &unit
);
2243 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain
*chain
,
2244 struct ip6_header
*ip6
,
2245 const uint8_t *buf
, size_t size
)
2249 if (((ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_flow
& 0xF0) >> 4)
2250 != IP_HEADER_VERSION_6
) {
2254 /* Both option and protocol is checked in this */
2255 if (ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_nxt
!= IPPROTO_TCP
) {
2256 chain
->stat
.bypass_not_tcp
++;
2260 ip_len
= htons(ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
);
2261 if (ip_len
< sizeof(struct tcp_header
) ||
2262 ip_len
> (size
- chain
->n
->guest_hdr_len
- sizeof(struct eth_header
)
2263 - sizeof(struct ip6_header
))) {
2264 chain
->stat
.ip_hacked
++;
2268 /* Don't handle packets with ecn flag */
2269 if (IP6_ECN(ip6
->ip6_ctlun
.ip6_un3
.ip6_un3_ecn
)) {
2270 chain
->stat
.ip_ecn
++;
2274 return RSC_CANDIDATE
;
2277 static size_t virtio_net_rsc_receive6(void *opq
, NetClientState
*nc
,
2278 const uint8_t *buf
, size_t size
)
2282 VirtioNetRscChain
*chain
;
2283 VirtioNetRscUnit unit
;
2285 chain
= (VirtioNetRscChain
*)opq
;
2286 hdr_len
= ((VirtIONet
*)(chain
->n
))->guest_hdr_len
;
2288 if (size
< (hdr_len
+ sizeof(struct eth_header
) + sizeof(struct ip6_header
)
2289 + sizeof(tcp_header
))) {
2290 return virtio_net_do_receive(nc
, buf
, size
);
2293 virtio_net_rsc_extract_unit6(chain
, buf
, &unit
);
2294 if (RSC_CANDIDATE
!= virtio_net_rsc_sanity_check6(chain
,
2295 unit
.ip
, buf
, size
)) {
2296 return virtio_net_do_receive(nc
, buf
, size
);
2299 ret
= virtio_net_rsc_tcp_ctrl_check(chain
, unit
.tcp
);
2300 if (ret
== RSC_BYPASS
) {
2301 return virtio_net_do_receive(nc
, buf
, size
);
2302 } else if (ret
== RSC_FINAL
) {
2303 return virtio_net_rsc_drain_flow(chain
, nc
, buf
, size
,
2304 ((hdr_len
+ sizeof(struct eth_header
)) + 8),
2305 VIRTIO_NET_IP6_ADDR_SIZE
,
2306 hdr_len
+ sizeof(struct eth_header
)
2307 + sizeof(struct ip6_header
));
2310 return virtio_net_rsc_do_coalesce(chain
, nc
, buf
, size
, &unit
);
2313 static VirtioNetRscChain
*virtio_net_rsc_lookup_chain(VirtIONet
*n
,
2317 VirtioNetRscChain
*chain
;
2319 if ((proto
!= (uint16_t)ETH_P_IP
) && (proto
!= (uint16_t)ETH_P_IPV6
)) {
2323 QTAILQ_FOREACH(chain
, &n
->rsc_chains
, next
) {
2324 if (chain
->proto
== proto
) {
2329 chain
= g_malloc(sizeof(*chain
));
2331 chain
->proto
= proto
;
2332 if (proto
== (uint16_t)ETH_P_IP
) {
2333 chain
->max_payload
= VIRTIO_NET_MAX_IP4_PAYLOAD
;
2334 chain
->gso_type
= VIRTIO_NET_HDR_GSO_TCPV4
;
2336 chain
->max_payload
= VIRTIO_NET_MAX_IP6_PAYLOAD
;
2337 chain
->gso_type
= VIRTIO_NET_HDR_GSO_TCPV6
;
2339 chain
->drain_timer
= timer_new_ns(QEMU_CLOCK_HOST
,
2340 virtio_net_rsc_purge
, chain
);
2341 memset(&chain
->stat
, 0, sizeof(chain
->stat
));
2343 QTAILQ_INIT(&chain
->buffers
);
2344 QTAILQ_INSERT_TAIL(&n
->rsc_chains
, chain
, next
);
2349 static ssize_t
virtio_net_rsc_receive(NetClientState
*nc
,
2354 VirtioNetRscChain
*chain
;
2355 struct eth_header
*eth
;
2358 n
= qemu_get_nic_opaque(nc
);
2359 if (size
< (n
->host_hdr_len
+ sizeof(struct eth_header
))) {
2360 return virtio_net_do_receive(nc
, buf
, size
);
2363 eth
= (struct eth_header
*)(buf
+ n
->guest_hdr_len
);
2364 proto
= htons(eth
->h_proto
);
2366 chain
= virtio_net_rsc_lookup_chain(n
, nc
, proto
);
2368 chain
->stat
.received
++;
2369 if (proto
== (uint16_t)ETH_P_IP
&& n
->rsc4_enabled
) {
2370 return virtio_net_rsc_receive4(chain
, nc
, buf
, size
);
2371 } else if (proto
== (uint16_t)ETH_P_IPV6
&& n
->rsc6_enabled
) {
2372 return virtio_net_rsc_receive6(chain
, nc
, buf
, size
);
2375 return virtio_net_do_receive(nc
, buf
, size
);
2378 static ssize_t
virtio_net_receive(NetClientState
*nc
, const uint8_t *buf
,
2381 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
2382 if ((n
->rsc4_enabled
|| n
->rsc6_enabled
)) {
2383 return virtio_net_rsc_receive(nc
, buf
, size
);
2385 return virtio_net_do_receive(nc
, buf
, size
);
2389 static int32_t virtio_net_flush_tx(VirtIONetQueue
*q
);
2391 static void virtio_net_tx_complete(NetClientState
*nc
, ssize_t len
)
2393 VirtIONet
*n
= qemu_get_nic_opaque(nc
);
2394 VirtIONetQueue
*q
= virtio_net_get_subqueue(nc
);
2395 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2397 virtqueue_push(q
->tx_vq
, q
->async_tx
.elem
, 0);
2398 virtio_notify(vdev
, q
->tx_vq
);
2400 g_free(q
->async_tx
.elem
);
2401 q
->async_tx
.elem
= NULL
;
2403 virtio_queue_set_notification(q
->tx_vq
, 1);
2404 virtio_net_flush_tx(q
);
2408 static int32_t virtio_net_flush_tx(VirtIONetQueue
*q
)
2410 VirtIONet
*n
= q
->n
;
2411 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2412 VirtQueueElement
*elem
;
2413 int32_t num_packets
= 0;
2414 int queue_index
= vq2q(virtio_get_queue_index(q
->tx_vq
));
2415 if (!(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
)) {
2419 if (q
->async_tx
.elem
) {
2420 virtio_queue_set_notification(q
->tx_vq
, 0);
2426 unsigned int out_num
;
2427 struct iovec sg
[VIRTQUEUE_MAX_SIZE
], sg2
[VIRTQUEUE_MAX_SIZE
+ 1], *out_sg
;
2428 struct virtio_net_hdr_mrg_rxbuf mhdr
;
2430 elem
= virtqueue_pop(q
->tx_vq
, sizeof(VirtQueueElement
));
2435 out_num
= elem
->out_num
;
2436 out_sg
= elem
->out_sg
;
2438 virtio_error(vdev
, "virtio-net header not in first element");
2439 virtqueue_detach_element(q
->tx_vq
, elem
, 0);
2444 if (n
->has_vnet_hdr
) {
2445 if (iov_to_buf(out_sg
, out_num
, 0, &mhdr
, n
->guest_hdr_len
) <
2447 virtio_error(vdev
, "virtio-net header incorrect");
2448 virtqueue_detach_element(q
->tx_vq
, elem
, 0);
2452 if (n
->needs_vnet_hdr_swap
) {
2453 virtio_net_hdr_swap(vdev
, (void *) &mhdr
);
2454 sg2
[0].iov_base
= &mhdr
;
2455 sg2
[0].iov_len
= n
->guest_hdr_len
;
2456 out_num
= iov_copy(&sg2
[1], ARRAY_SIZE(sg2
) - 1,
2458 n
->guest_hdr_len
, -1);
2459 if (out_num
== VIRTQUEUE_MAX_SIZE
) {
2467 * If host wants to see the guest header as is, we can
2468 * pass it on unchanged. Otherwise, copy just the parts
2469 * that host is interested in.
2471 assert(n
->host_hdr_len
<= n
->guest_hdr_len
);
2472 if (n
->host_hdr_len
!= n
->guest_hdr_len
) {
2473 unsigned sg_num
= iov_copy(sg
, ARRAY_SIZE(sg
),
2475 0, n
->host_hdr_len
);
2476 sg_num
+= iov_copy(sg
+ sg_num
, ARRAY_SIZE(sg
) - sg_num
,
2478 n
->guest_hdr_len
, -1);
2483 ret
= qemu_sendv_packet_async(qemu_get_subqueue(n
->nic
, queue_index
),
2484 out_sg
, out_num
, virtio_net_tx_complete
);
2486 virtio_queue_set_notification(q
->tx_vq
, 0);
2487 q
->async_tx
.elem
= elem
;
2492 virtqueue_push(q
->tx_vq
, elem
, 0);
2493 virtio_notify(vdev
, q
->tx_vq
);
2496 if (++num_packets
>= n
->tx_burst
) {
2503 static void virtio_net_handle_tx_timer(VirtIODevice
*vdev
, VirtQueue
*vq
)
2505 VirtIONet
*n
= VIRTIO_NET(vdev
);
2506 VirtIONetQueue
*q
= &n
->vqs
[vq2q(virtio_get_queue_index(vq
))];
2508 if (unlikely((n
->status
& VIRTIO_NET_S_LINK_UP
) == 0)) {
2509 virtio_net_drop_tx_queue_data(vdev
, vq
);
2513 /* This happens when device was stopped but VCPU wasn't. */
2514 if (!vdev
->vm_running
) {
2519 if (q
->tx_waiting
) {
2520 virtio_queue_set_notification(vq
, 1);
2521 timer_del(q
->tx_timer
);
2523 if (virtio_net_flush_tx(q
) == -EINVAL
) {
2527 timer_mod(q
->tx_timer
,
2528 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + n
->tx_timeout
);
2530 virtio_queue_set_notification(vq
, 0);
2534 static void virtio_net_handle_tx_bh(VirtIODevice
*vdev
, VirtQueue
*vq
)
2536 VirtIONet
*n
= VIRTIO_NET(vdev
);
2537 VirtIONetQueue
*q
= &n
->vqs
[vq2q(virtio_get_queue_index(vq
))];
2539 if (unlikely((n
->status
& VIRTIO_NET_S_LINK_UP
) == 0)) {
2540 virtio_net_drop_tx_queue_data(vdev
, vq
);
2544 if (unlikely(q
->tx_waiting
)) {
2548 /* This happens when device was stopped but VCPU wasn't. */
2549 if (!vdev
->vm_running
) {
2552 virtio_queue_set_notification(vq
, 0);
2553 qemu_bh_schedule(q
->tx_bh
);
2556 static void virtio_net_tx_timer(void *opaque
)
2558 VirtIONetQueue
*q
= opaque
;
2559 VirtIONet
*n
= q
->n
;
2560 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2561 /* This happens when device was stopped but BH wasn't. */
2562 if (!vdev
->vm_running
) {
2563 /* Make sure tx waiting is set, so we'll run when restarted. */
2564 assert(q
->tx_waiting
);
2570 /* Just in case the driver is not ready on more */
2571 if (!(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
)) {
2575 virtio_queue_set_notification(q
->tx_vq
, 1);
2576 virtio_net_flush_tx(q
);
2579 static void virtio_net_tx_bh(void *opaque
)
2581 VirtIONetQueue
*q
= opaque
;
2582 VirtIONet
*n
= q
->n
;
2583 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2586 /* This happens when device was stopped but BH wasn't. */
2587 if (!vdev
->vm_running
) {
2588 /* Make sure tx waiting is set, so we'll run when restarted. */
2589 assert(q
->tx_waiting
);
2595 /* Just in case the driver is not ready on more */
2596 if (unlikely(!(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
))) {
2600 ret
= virtio_net_flush_tx(q
);
2601 if (ret
== -EBUSY
|| ret
== -EINVAL
) {
2602 return; /* Notification re-enable handled by tx_complete or device
2606 /* If we flush a full burst of packets, assume there are
2607 * more coming and immediately reschedule */
2608 if (ret
>= n
->tx_burst
) {
2609 qemu_bh_schedule(q
->tx_bh
);
2614 /* If less than a full burst, re-enable notification and flush
2615 * anything that may have come in while we weren't looking. If
2616 * we find something, assume the guest is still active and reschedule */
2617 virtio_queue_set_notification(q
->tx_vq
, 1);
2618 ret
= virtio_net_flush_tx(q
);
2619 if (ret
== -EINVAL
) {
2621 } else if (ret
> 0) {
2622 virtio_queue_set_notification(q
->tx_vq
, 0);
2623 qemu_bh_schedule(q
->tx_bh
);
2628 static void virtio_net_add_queue(VirtIONet
*n
, int index
)
2630 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2632 n
->vqs
[index
].rx_vq
= virtio_add_queue(vdev
, n
->net_conf
.rx_queue_size
,
2633 virtio_net_handle_rx
);
2635 if (n
->net_conf
.tx
&& !strcmp(n
->net_conf
.tx
, "timer")) {
2636 n
->vqs
[index
].tx_vq
=
2637 virtio_add_queue(vdev
, n
->net_conf
.tx_queue_size
,
2638 virtio_net_handle_tx_timer
);
2639 n
->vqs
[index
].tx_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
2640 virtio_net_tx_timer
,
2643 n
->vqs
[index
].tx_vq
=
2644 virtio_add_queue(vdev
, n
->net_conf
.tx_queue_size
,
2645 virtio_net_handle_tx_bh
);
2646 n
->vqs
[index
].tx_bh
= qemu_bh_new(virtio_net_tx_bh
, &n
->vqs
[index
]);
2649 n
->vqs
[index
].tx_waiting
= 0;
2650 n
->vqs
[index
].n
= n
;
2653 static void virtio_net_del_queue(VirtIONet
*n
, int index
)
2655 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2656 VirtIONetQueue
*q
= &n
->vqs
[index
];
2657 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, index
);
2659 qemu_purge_queued_packets(nc
);
2661 virtio_del_queue(vdev
, index
* 2);
2663 timer_del(q
->tx_timer
);
2664 timer_free(q
->tx_timer
);
2667 qemu_bh_delete(q
->tx_bh
);
2671 virtio_del_queue(vdev
, index
* 2 + 1);
2674 static void virtio_net_change_num_queues(VirtIONet
*n
, int new_max_queues
)
2676 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2677 int old_num_queues
= virtio_get_num_queues(vdev
);
2678 int new_num_queues
= new_max_queues
* 2 + 1;
2681 assert(old_num_queues
>= 3);
2682 assert(old_num_queues
% 2 == 1);
2684 if (old_num_queues
== new_num_queues
) {
2689 * We always need to remove and add ctrl vq if
2690 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2691 * and then we only enter one of the following two loops.
2693 virtio_del_queue(vdev
, old_num_queues
- 1);
2695 for (i
= new_num_queues
- 1; i
< old_num_queues
- 1; i
+= 2) {
2696 /* new_num_queues < old_num_queues */
2697 virtio_net_del_queue(n
, i
/ 2);
2700 for (i
= old_num_queues
- 1; i
< new_num_queues
- 1; i
+= 2) {
2701 /* new_num_queues > old_num_queues */
2702 virtio_net_add_queue(n
, i
/ 2);
2705 /* add ctrl_vq last */
2706 n
->ctrl_vq
= virtio_add_queue(vdev
, 64, virtio_net_handle_ctrl
);
2709 static void virtio_net_set_multiqueue(VirtIONet
*n
, int multiqueue
)
2711 int max
= multiqueue
? n
->max_queues
: 1;
2713 n
->multiqueue
= multiqueue
;
2714 virtio_net_change_num_queues(n
, max
);
2716 virtio_net_set_queues(n
);
2719 static int virtio_net_post_load_device(void *opaque
, int version_id
)
2721 VirtIONet
*n
= opaque
;
2722 VirtIODevice
*vdev
= VIRTIO_DEVICE(n
);
2725 trace_virtio_net_post_load_device();
2726 virtio_net_set_mrg_rx_bufs(n
, n
->mergeable_rx_bufs
,
2727 virtio_vdev_has_feature(vdev
,
2728 VIRTIO_F_VERSION_1
),
2729 virtio_vdev_has_feature(vdev
,
2730 VIRTIO_NET_F_HASH_REPORT
));
2732 /* MAC_TABLE_ENTRIES may be different from the saved image */
2733 if (n
->mac_table
.in_use
> MAC_TABLE_ENTRIES
) {
2734 n
->mac_table
.in_use
= 0;
2737 if (!virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
)) {
2738 n
->curr_guest_offloads
= virtio_net_supported_guest_offloads(n
);
2742 * curr_guest_offloads will be later overwritten by the
2743 * virtio_set_features_nocheck call done from the virtio_load.
2744 * Here we make sure it is preserved and restored accordingly
2745 * in the virtio_net_post_load_virtio callback.
2747 n
->saved_guest_offloads
= n
->curr_guest_offloads
;
2749 virtio_net_set_queues(n
);
2751 /* Find the first multicast entry in the saved MAC filter */
2752 for (i
= 0; i
< n
->mac_table
.in_use
; i
++) {
2753 if (n
->mac_table
.macs
[i
* ETH_ALEN
] & 1) {
2757 n
->mac_table
.first_multi
= i
;
2759 /* nc.link_down can't be migrated, so infer link_down according
2760 * to link status bit in n->status */
2761 link_down
= (n
->status
& VIRTIO_NET_S_LINK_UP
) == 0;
2762 for (i
= 0; i
< n
->max_queues
; i
++) {
2763 qemu_get_subqueue(n
->nic
, i
)->link_down
= link_down
;
2766 if (virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_GUEST_ANNOUNCE
) &&
2767 virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_CTRL_VQ
)) {
2768 qemu_announce_timer_reset(&n
->announce_timer
, migrate_announce_params(),
2770 virtio_net_announce_timer
, n
);
2771 if (n
->announce_timer
.round
) {
2772 timer_mod(n
->announce_timer
.tm
,
2773 qemu_clock_get_ms(n
->announce_timer
.type
));
2775 qemu_announce_timer_del(&n
->announce_timer
, false);
2779 if (n
->rss_data
.enabled
) {
2780 trace_virtio_net_rss_enable(n
->rss_data
.hash_types
,
2781 n
->rss_data
.indirections_len
,
2782 sizeof(n
->rss_data
.key
));
2784 trace_virtio_net_rss_disable();
2789 static int virtio_net_post_load_virtio(VirtIODevice
*vdev
)
2791 VirtIONet
*n
= VIRTIO_NET(vdev
);
2793 * The actual needed state is now in saved_guest_offloads,
2794 * see virtio_net_post_load_device for detail.
2795 * Restore it back and apply the desired offloads.
2797 n
->curr_guest_offloads
= n
->saved_guest_offloads
;
2798 if (peer_has_vnet_hdr(n
)) {
2799 virtio_net_apply_guest_offloads(n
);
2805 /* tx_waiting field of a VirtIONetQueue */
2806 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting
= {
2807 .name
= "virtio-net-queue-tx_waiting",
2808 .fields
= (VMStateField
[]) {
2809 VMSTATE_UINT32(tx_waiting
, VirtIONetQueue
),
2810 VMSTATE_END_OF_LIST()
2814 static bool max_queues_gt_1(void *opaque
, int version_id
)
2816 return VIRTIO_NET(opaque
)->max_queues
> 1;
2819 static bool has_ctrl_guest_offloads(void *opaque
, int version_id
)
2821 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque
),
2822 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
);
2825 static bool mac_table_fits(void *opaque
, int version_id
)
2827 return VIRTIO_NET(opaque
)->mac_table
.in_use
<= MAC_TABLE_ENTRIES
;
2830 static bool mac_table_doesnt_fit(void *opaque
, int version_id
)
2832 return !mac_table_fits(opaque
, version_id
);
2835 /* This temporary type is shared by all the WITH_TMP methods
2836 * although only some fields are used by each.
2838 struct VirtIONetMigTmp
{
2840 VirtIONetQueue
*vqs_1
;
2841 uint16_t curr_queues_1
;
2843 uint32_t has_vnet_hdr
;
2846 /* The 2nd and subsequent tx_waiting flags are loaded later than
2847 * the 1st entry in the queues and only if there's more than one
2848 * entry. We use the tmp mechanism to calculate a temporary
2849 * pointer and count and also validate the count.
2852 static int virtio_net_tx_waiting_pre_save(void *opaque
)
2854 struct VirtIONetMigTmp
*tmp
= opaque
;
2856 tmp
->vqs_1
= tmp
->parent
->vqs
+ 1;
2857 tmp
->curr_queues_1
= tmp
->parent
->curr_queues
- 1;
2858 if (tmp
->parent
->curr_queues
== 0) {
2859 tmp
->curr_queues_1
= 0;
2865 static int virtio_net_tx_waiting_pre_load(void *opaque
)
2867 struct VirtIONetMigTmp
*tmp
= opaque
;
2869 /* Reuse the pointer setup from save */
2870 virtio_net_tx_waiting_pre_save(opaque
);
2872 if (tmp
->parent
->curr_queues
> tmp
->parent
->max_queues
) {
2873 error_report("virtio-net: curr_queues %x > max_queues %x",
2874 tmp
->parent
->curr_queues
, tmp
->parent
->max_queues
);
2879 return 0; /* all good */
2882 static const VMStateDescription vmstate_virtio_net_tx_waiting
= {
2883 .name
= "virtio-net-tx_waiting",
2884 .pre_load
= virtio_net_tx_waiting_pre_load
,
2885 .pre_save
= virtio_net_tx_waiting_pre_save
,
2886 .fields
= (VMStateField
[]) {
2887 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1
, struct VirtIONetMigTmp
,
2889 vmstate_virtio_net_queue_tx_waiting
,
2890 struct VirtIONetQueue
),
2891 VMSTATE_END_OF_LIST()
2895 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2896 * flag set we need to check that we have it
2898 static int virtio_net_ufo_post_load(void *opaque
, int version_id
)
2900 struct VirtIONetMigTmp
*tmp
= opaque
;
2902 if (tmp
->has_ufo
&& !peer_has_ufo(tmp
->parent
)) {
2903 error_report("virtio-net: saved image requires TUN_F_UFO support");
2910 static int virtio_net_ufo_pre_save(void *opaque
)
2912 struct VirtIONetMigTmp
*tmp
= opaque
;
2914 tmp
->has_ufo
= tmp
->parent
->has_ufo
;
2919 static const VMStateDescription vmstate_virtio_net_has_ufo
= {
2920 .name
= "virtio-net-ufo",
2921 .post_load
= virtio_net_ufo_post_load
,
2922 .pre_save
= virtio_net_ufo_pre_save
,
2923 .fields
= (VMStateField
[]) {
2924 VMSTATE_UINT8(has_ufo
, struct VirtIONetMigTmp
),
2925 VMSTATE_END_OF_LIST()
2929 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2930 * flag set we need to check that we have it
2932 static int virtio_net_vnet_post_load(void *opaque
, int version_id
)
2934 struct VirtIONetMigTmp
*tmp
= opaque
;
2936 if (tmp
->has_vnet_hdr
&& !peer_has_vnet_hdr(tmp
->parent
)) {
2937 error_report("virtio-net: saved image requires vnet_hdr=on");
2944 static int virtio_net_vnet_pre_save(void *opaque
)
2946 struct VirtIONetMigTmp
*tmp
= opaque
;
2948 tmp
->has_vnet_hdr
= tmp
->parent
->has_vnet_hdr
;
2953 static const VMStateDescription vmstate_virtio_net_has_vnet
= {
2954 .name
= "virtio-net-vnet",
2955 .post_load
= virtio_net_vnet_post_load
,
2956 .pre_save
= virtio_net_vnet_pre_save
,
2957 .fields
= (VMStateField
[]) {
2958 VMSTATE_UINT32(has_vnet_hdr
, struct VirtIONetMigTmp
),
2959 VMSTATE_END_OF_LIST()
2963 static bool virtio_net_rss_needed(void *opaque
)
2965 return VIRTIO_NET(opaque
)->rss_data
.enabled
;
2968 static const VMStateDescription vmstate_virtio_net_rss
= {
2969 .name
= "virtio-net-device/rss",
2971 .minimum_version_id
= 1,
2972 .needed
= virtio_net_rss_needed
,
2973 .fields
= (VMStateField
[]) {
2974 VMSTATE_BOOL(rss_data
.enabled
, VirtIONet
),
2975 VMSTATE_BOOL(rss_data
.redirect
, VirtIONet
),
2976 VMSTATE_BOOL(rss_data
.populate_hash
, VirtIONet
),
2977 VMSTATE_UINT32(rss_data
.hash_types
, VirtIONet
),
2978 VMSTATE_UINT16(rss_data
.indirections_len
, VirtIONet
),
2979 VMSTATE_UINT16(rss_data
.default_queue
, VirtIONet
),
2980 VMSTATE_UINT8_ARRAY(rss_data
.key
, VirtIONet
,
2981 VIRTIO_NET_RSS_MAX_KEY_SIZE
),
2982 VMSTATE_VARRAY_UINT16_ALLOC(rss_data
.indirections_table
, VirtIONet
,
2983 rss_data
.indirections_len
, 0,
2984 vmstate_info_uint16
, uint16_t),
2985 VMSTATE_END_OF_LIST()
2989 static const VMStateDescription vmstate_virtio_net_device
= {
2990 .name
= "virtio-net-device",
2991 .version_id
= VIRTIO_NET_VM_VERSION
,
2992 .minimum_version_id
= VIRTIO_NET_VM_VERSION
,
2993 .post_load
= virtio_net_post_load_device
,
2994 .fields
= (VMStateField
[]) {
2995 VMSTATE_UINT8_ARRAY(mac
, VirtIONet
, ETH_ALEN
),
2996 VMSTATE_STRUCT_POINTER(vqs
, VirtIONet
,
2997 vmstate_virtio_net_queue_tx_waiting
,
2999 VMSTATE_UINT32(mergeable_rx_bufs
, VirtIONet
),
3000 VMSTATE_UINT16(status
, VirtIONet
),
3001 VMSTATE_UINT8(promisc
, VirtIONet
),
3002 VMSTATE_UINT8(allmulti
, VirtIONet
),
3003 VMSTATE_UINT32(mac_table
.in_use
, VirtIONet
),
3005 /* Guarded pair: If it fits we load it, else we throw it away
3006 * - can happen if source has a larger MAC table.; post-load
3007 * sets flags in this case.
3009 VMSTATE_VBUFFER_MULTIPLY(mac_table
.macs
, VirtIONet
,
3010 0, mac_table_fits
, mac_table
.in_use
,
3012 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet
, mac_table_doesnt_fit
, 0,
3013 mac_table
.in_use
, ETH_ALEN
),
3015 /* Note: This is an array of uint32's that's always been saved as a
3016 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3017 * but based on the uint.
3019 VMSTATE_BUFFER_POINTER_UNSAFE(vlans
, VirtIONet
, 0, MAX_VLAN
>> 3),
3020 VMSTATE_WITH_TMP(VirtIONet
, struct VirtIONetMigTmp
,
3021 vmstate_virtio_net_has_vnet
),
3022 VMSTATE_UINT8(mac_table
.multi_overflow
, VirtIONet
),
3023 VMSTATE_UINT8(mac_table
.uni_overflow
, VirtIONet
),
3024 VMSTATE_UINT8(alluni
, VirtIONet
),
3025 VMSTATE_UINT8(nomulti
, VirtIONet
),
3026 VMSTATE_UINT8(nouni
, VirtIONet
),
3027 VMSTATE_UINT8(nobcast
, VirtIONet
),
3028 VMSTATE_WITH_TMP(VirtIONet
, struct VirtIONetMigTmp
,
3029 vmstate_virtio_net_has_ufo
),
3030 VMSTATE_SINGLE_TEST(max_queues
, VirtIONet
, max_queues_gt_1
, 0,
3031 vmstate_info_uint16_equal
, uint16_t),
3032 VMSTATE_UINT16_TEST(curr_queues
, VirtIONet
, max_queues_gt_1
),
3033 VMSTATE_WITH_TMP(VirtIONet
, struct VirtIONetMigTmp
,
3034 vmstate_virtio_net_tx_waiting
),
3035 VMSTATE_UINT64_TEST(curr_guest_offloads
, VirtIONet
,
3036 has_ctrl_guest_offloads
),
3037 VMSTATE_END_OF_LIST()
3039 .subsections
= (const VMStateDescription
* []) {
3040 &vmstate_virtio_net_rss
,
3045 static NetClientInfo net_virtio_info
= {
3046 .type
= NET_CLIENT_DRIVER_NIC
,
3047 .size
= sizeof(NICState
),
3048 .can_receive
= virtio_net_can_receive
,
3049 .receive
= virtio_net_receive
,
3050 .link_status_changed
= virtio_net_set_link_status
,
3051 .query_rx_filter
= virtio_net_query_rxfilter
,
3052 .announce
= virtio_net_announce
,
3055 static bool virtio_net_guest_notifier_pending(VirtIODevice
*vdev
, int idx
)
3057 VirtIONet
*n
= VIRTIO_NET(vdev
);
3058 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, vq2q(idx
));
3059 assert(n
->vhost_started
);
3060 return vhost_net_virtqueue_pending(get_vhost_net(nc
->peer
), idx
);
3063 static void virtio_net_guest_notifier_mask(VirtIODevice
*vdev
, int idx
,
3066 VirtIONet
*n
= VIRTIO_NET(vdev
);
3067 NetClientState
*nc
= qemu_get_subqueue(n
->nic
, vq2q(idx
));
3068 assert(n
->vhost_started
);
3069 vhost_net_virtqueue_mask(get_vhost_net(nc
->peer
),
3073 static void virtio_net_set_config_size(VirtIONet
*n
, uint64_t host_features
)
3075 virtio_add_feature(&host_features
, VIRTIO_NET_F_MAC
);
3077 n
->config_size
= virtio_feature_get_config_size(feature_sizes
,
3081 void virtio_net_set_netclient_name(VirtIONet
*n
, const char *name
,
3085 * The name can be NULL, the netclient name will be type.x.
3087 assert(type
!= NULL
);
3089 g_free(n
->netclient_name
);
3090 g_free(n
->netclient_type
);
3091 n
->netclient_name
= g_strdup(name
);
3092 n
->netclient_type
= g_strdup(type
);
3095 static bool failover_unplug_primary(VirtIONet
*n
)
3097 HotplugHandler
*hotplug_ctrl
;
3101 hotplug_ctrl
= qdev_get_hotplug_handler(n
->primary_dev
);
3103 pci_dev
= PCI_DEVICE(n
->primary_dev
);
3104 pci_dev
->partially_hotplugged
= true;
3105 hotplug_handler_unplug_request(hotplug_ctrl
, n
->primary_dev
, &err
);
3107 error_report_err(err
);
3116 static bool failover_replug_primary(VirtIONet
*n
, Error
**errp
)
3119 HotplugHandler
*hotplug_ctrl
;
3120 PCIDevice
*pdev
= PCI_DEVICE(n
->primary_dev
);
3122 if (!pdev
->partially_hotplugged
) {
3125 if (!n
->primary_device_opts
) {
3126 n
->primary_device_opts
= qemu_opts_from_qdict(
3127 qemu_find_opts("device"),
3128 n
->primary_device_dict
, errp
);
3129 if (!n
->primary_device_opts
) {
3133 n
->primary_bus
= n
->primary_dev
->parent_bus
;
3134 if (!n
->primary_bus
) {
3135 error_setg(errp
, "virtio_net: couldn't find primary bus");
3138 qdev_set_parent_bus(n
->primary_dev
, n
->primary_bus
);
3139 n
->primary_should_be_hidden
= false;
3140 if (!qemu_opt_set_bool(n
->primary_device_opts
,
3141 "partially_hotplugged", true, errp
)) {
3144 hotplug_ctrl
= qdev_get_hotplug_handler(n
->primary_dev
);
3146 hotplug_handler_pre_plug(hotplug_ctrl
, n
->primary_dev
, &err
);
3150 hotplug_handler_plug(hotplug_ctrl
, n
->primary_dev
, &err
);
3154 error_propagate(errp
, err
);
3158 static void virtio_net_handle_migration_primary(VirtIONet
*n
,
3161 bool should_be_hidden
;
3164 should_be_hidden
= atomic_read(&n
->primary_should_be_hidden
);
3166 if (!n
->primary_dev
) {
3167 n
->primary_dev
= virtio_connect_failover_devices(n
, n
->qdev
, &err
);
3168 if (!n
->primary_dev
) {
3173 if (migration_in_setup(s
) && !should_be_hidden
) {
3174 if (failover_unplug_primary(n
)) {
3175 vmstate_unregister(VMSTATE_IF(n
->primary_dev
),
3176 qdev_get_vmsd(n
->primary_dev
),
3178 qapi_event_send_unplug_primary(n
->primary_device_id
);
3179 atomic_set(&n
->primary_should_be_hidden
, true);
3181 warn_report("couldn't unplug primary device");
3183 } else if (migration_has_failed(s
)) {
3184 /* We already unplugged the device let's plug it back */
3185 if (!failover_replug_primary(n
, &err
)) {
3187 error_report_err(err
);
3193 static void virtio_net_migration_state_notifier(Notifier
*notifier
, void *data
)
3195 MigrationState
*s
= data
;
3196 VirtIONet
*n
= container_of(notifier
, VirtIONet
, migration_state
);
3197 virtio_net_handle_migration_primary(n
, s
);
3200 static int virtio_net_primary_should_be_hidden(DeviceListener
*listener
,
3201 QemuOpts
*device_opts
)
3203 VirtIONet
*n
= container_of(listener
, VirtIONet
, primary_listener
);
3204 bool match_found
= false;
3210 n
->primary_device_dict
= qemu_opts_to_qdict(device_opts
,
3211 n
->primary_device_dict
);
3212 if (n
->primary_device_dict
) {
3213 g_free(n
->standby_id
);
3214 n
->standby_id
= g_strdup(qdict_get_try_str(n
->primary_device_dict
,
3215 "failover_pair_id"));
3217 if (g_strcmp0(n
->standby_id
, n
->netclient_name
) == 0) {
3220 match_found
= false;
3222 g_free(n
->standby_id
);
3223 n
->primary_device_dict
= NULL
;
3227 n
->primary_device_opts
= device_opts
;
3229 /* primary_should_be_hidden is set during feature negotiation */
3230 hide
= atomic_read(&n
->primary_should_be_hidden
);
3232 if (n
->primary_device_dict
) {
3233 g_free(n
->primary_device_id
);
3234 n
->primary_device_id
= g_strdup(qdict_get_try_str(
3235 n
->primary_device_dict
, "id"));
3236 if (!n
->primary_device_id
) {
3237 warn_report("primary_device_id not set");
3242 if (match_found
&& hide
) {
3244 } else if (match_found
&& !hide
) {
3251 static void virtio_net_device_realize(DeviceState
*dev
, Error
**errp
)
3253 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
3254 VirtIONet
*n
= VIRTIO_NET(dev
);
3258 if (n
->net_conf
.mtu
) {
3259 n
->host_features
|= (1ULL << VIRTIO_NET_F_MTU
);
3262 if (n
->net_conf
.duplex_str
) {
3263 if (strncmp(n
->net_conf
.duplex_str
, "half", 5) == 0) {
3264 n
->net_conf
.duplex
= DUPLEX_HALF
;
3265 } else if (strncmp(n
->net_conf
.duplex_str
, "full", 5) == 0) {
3266 n
->net_conf
.duplex
= DUPLEX_FULL
;
3268 error_setg(errp
, "'duplex' must be 'half' or 'full'");
3271 n
->host_features
|= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX
);
3273 n
->net_conf
.duplex
= DUPLEX_UNKNOWN
;
3276 if (n
->net_conf
.speed
< SPEED_UNKNOWN
) {
3277 error_setg(errp
, "'speed' must be between 0 and INT_MAX");
3280 if (n
->net_conf
.speed
>= 0) {
3281 n
->host_features
|= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX
);
3285 n
->primary_listener
.should_be_hidden
=
3286 virtio_net_primary_should_be_hidden
;
3287 atomic_set(&n
->primary_should_be_hidden
, true);
3288 device_listener_register(&n
->primary_listener
);
3289 n
->migration_state
.notify
= virtio_net_migration_state_notifier
;
3290 add_migration_state_change_notifier(&n
->migration_state
);
3291 n
->host_features
|= (1ULL << VIRTIO_NET_F_STANDBY
);
3294 virtio_net_set_config_size(n
, n
->host_features
);
3295 virtio_init(vdev
, "virtio-net", VIRTIO_ID_NET
, n
->config_size
);
3298 * We set a lower limit on RX queue size to what it always was.
3299 * Guests that want a smaller ring can always resize it without
3300 * help from us (using virtio 1 and up).
3302 if (n
->net_conf
.rx_queue_size
< VIRTIO_NET_RX_QUEUE_MIN_SIZE
||
3303 n
->net_conf
.rx_queue_size
> VIRTQUEUE_MAX_SIZE
||
3304 !is_power_of_2(n
->net_conf
.rx_queue_size
)) {
3305 error_setg(errp
, "Invalid rx_queue_size (= %" PRIu16
"), "
3306 "must be a power of 2 between %d and %d.",
3307 n
->net_conf
.rx_queue_size
, VIRTIO_NET_RX_QUEUE_MIN_SIZE
,
3308 VIRTQUEUE_MAX_SIZE
);
3309 virtio_cleanup(vdev
);
3313 if (n
->net_conf
.tx_queue_size
< VIRTIO_NET_TX_QUEUE_MIN_SIZE
||
3314 n
->net_conf
.tx_queue_size
> VIRTQUEUE_MAX_SIZE
||
3315 !is_power_of_2(n
->net_conf
.tx_queue_size
)) {
3316 error_setg(errp
, "Invalid tx_queue_size (= %" PRIu16
"), "
3317 "must be a power of 2 between %d and %d",
3318 n
->net_conf
.tx_queue_size
, VIRTIO_NET_TX_QUEUE_MIN_SIZE
,
3319 VIRTQUEUE_MAX_SIZE
);
3320 virtio_cleanup(vdev
);
3324 n
->max_queues
= MAX(n
->nic_conf
.peers
.queues
, 1);
3325 if (n
->max_queues
* 2 + 1 > VIRTIO_QUEUE_MAX
) {
3326 error_setg(errp
, "Invalid number of queues (= %" PRIu32
"), "
3327 "must be a positive integer less than %d.",
3328 n
->max_queues
, (VIRTIO_QUEUE_MAX
- 1) / 2);
3329 virtio_cleanup(vdev
);
3332 n
->vqs
= g_malloc0(sizeof(VirtIONetQueue
) * n
->max_queues
);
3334 n
->tx_timeout
= n
->net_conf
.txtimer
;
3336 if (n
->net_conf
.tx
&& strcmp(n
->net_conf
.tx
, "timer")
3337 && strcmp(n
->net_conf
.tx
, "bh")) {
3338 warn_report("virtio-net: "
3339 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3341 error_printf("Defaulting to \"bh\"");
3344 n
->net_conf
.tx_queue_size
= MIN(virtio_net_max_tx_queue_size(n
),
3345 n
->net_conf
.tx_queue_size
);
3347 for (i
= 0; i
< n
->max_queues
; i
++) {
3348 virtio_net_add_queue(n
, i
);
3351 n
->ctrl_vq
= virtio_add_queue(vdev
, 64, virtio_net_handle_ctrl
);
3352 qemu_macaddr_default_if_unset(&n
->nic_conf
.macaddr
);
3353 memcpy(&n
->mac
[0], &n
->nic_conf
.macaddr
, sizeof(n
->mac
));
3354 n
->status
= VIRTIO_NET_S_LINK_UP
;
3355 qemu_announce_timer_reset(&n
->announce_timer
, migrate_announce_params(),
3357 virtio_net_announce_timer
, n
);
3358 n
->announce_timer
.round
= 0;
3360 if (n
->netclient_type
) {
3362 * Happen when virtio_net_set_netclient_name has been called.
3364 n
->nic
= qemu_new_nic(&net_virtio_info
, &n
->nic_conf
,
3365 n
->netclient_type
, n
->netclient_name
, n
);
3367 n
->nic
= qemu_new_nic(&net_virtio_info
, &n
->nic_conf
,
3368 object_get_typename(OBJECT(dev
)), dev
->id
, n
);
3371 peer_test_vnet_hdr(n
);
3372 if (peer_has_vnet_hdr(n
)) {
3373 for (i
= 0; i
< n
->max_queues
; i
++) {
3374 qemu_using_vnet_hdr(qemu_get_subqueue(n
->nic
, i
)->peer
, true);
3376 n
->host_hdr_len
= sizeof(struct virtio_net_hdr
);
3378 n
->host_hdr_len
= 0;
3381 qemu_format_nic_info_str(qemu_get_queue(n
->nic
), n
->nic_conf
.macaddr
.a
);
3383 n
->vqs
[0].tx_waiting
= 0;
3384 n
->tx_burst
= n
->net_conf
.txburst
;
3385 virtio_net_set_mrg_rx_bufs(n
, 0, 0, 0);
3386 n
->promisc
= 1; /* for compatibility */
3388 n
->mac_table
.macs
= g_malloc0(MAC_TABLE_ENTRIES
* ETH_ALEN
);
3390 n
->vlans
= g_malloc0(MAX_VLAN
>> 3);
3392 nc
= qemu_get_queue(n
->nic
);
3393 nc
->rxfilter_notify_enabled
= 1;
3395 QTAILQ_INIT(&n
->rsc_chains
);
3398 net_rx_pkt_init(&n
->rx_pkt
, false);
3401 static void virtio_net_device_unrealize(DeviceState
*dev
)
3403 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
3404 VirtIONet
*n
= VIRTIO_NET(dev
);
3407 /* This will stop vhost backend if appropriate. */
3408 virtio_net_set_status(vdev
, 0);
3410 g_free(n
->netclient_name
);
3411 n
->netclient_name
= NULL
;
3412 g_free(n
->netclient_type
);
3413 n
->netclient_type
= NULL
;
3415 g_free(n
->mac_table
.macs
);
3419 device_listener_unregister(&n
->primary_listener
);
3420 g_free(n
->primary_device_id
);
3421 g_free(n
->standby_id
);
3422 qobject_unref(n
->primary_device_dict
);
3423 n
->primary_device_dict
= NULL
;
3426 max_queues
= n
->multiqueue
? n
->max_queues
: 1;
3427 for (i
= 0; i
< max_queues
; i
++) {
3428 virtio_net_del_queue(n
, i
);
3430 /* delete also control vq */
3431 virtio_del_queue(vdev
, max_queues
* 2);
3432 qemu_announce_timer_del(&n
->announce_timer
, false);
3434 qemu_del_nic(n
->nic
);
3435 virtio_net_rsc_cleanup(n
);
3436 g_free(n
->rss_data
.indirections_table
);
3437 net_rx_pkt_uninit(n
->rx_pkt
);
3438 virtio_cleanup(vdev
);
3441 static void virtio_net_instance_init(Object
*obj
)
3443 VirtIONet
*n
= VIRTIO_NET(obj
);
3446 * The default config_size is sizeof(struct virtio_net_config).
3447 * Can be overriden with virtio_net_set_config_size.
3449 n
->config_size
= sizeof(struct virtio_net_config
);
3450 device_add_bootindex_property(obj
, &n
->nic_conf
.bootindex
,
3451 "bootindex", "/ethernet-phy@0",
3455 static int virtio_net_pre_save(void *opaque
)
3457 VirtIONet
*n
= opaque
;
3459 /* At this point, backend must be stopped, otherwise
3460 * it might keep writing to memory. */
3461 assert(!n
->vhost_started
);
3466 static bool primary_unplug_pending(void *opaque
)
3468 DeviceState
*dev
= opaque
;
3469 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
3470 VirtIONet
*n
= VIRTIO_NET(vdev
);
3472 if (!virtio_vdev_has_feature(vdev
, VIRTIO_NET_F_STANDBY
)) {
3475 return n
->primary_dev
? n
->primary_dev
->pending_deleted_event
: false;
3478 static bool dev_unplug_pending(void *opaque
)
3480 DeviceState
*dev
= opaque
;
3481 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(dev
);
3483 return vdc
->primary_unplug_pending(dev
);
3486 static const VMStateDescription vmstate_virtio_net
= {
3487 .name
= "virtio-net",
3488 .minimum_version_id
= VIRTIO_NET_VM_VERSION
,
3489 .version_id
= VIRTIO_NET_VM_VERSION
,
3490 .fields
= (VMStateField
[]) {
3491 VMSTATE_VIRTIO_DEVICE
,
3492 VMSTATE_END_OF_LIST()
3494 .pre_save
= virtio_net_pre_save
,
3495 .dev_unplug_pending
= dev_unplug_pending
,
3498 static Property virtio_net_properties
[] = {
3499 DEFINE_PROP_BIT64("csum", VirtIONet
, host_features
,
3500 VIRTIO_NET_F_CSUM
, true),
3501 DEFINE_PROP_BIT64("guest_csum", VirtIONet
, host_features
,
3502 VIRTIO_NET_F_GUEST_CSUM
, true),
3503 DEFINE_PROP_BIT64("gso", VirtIONet
, host_features
, VIRTIO_NET_F_GSO
, true),
3504 DEFINE_PROP_BIT64("guest_tso4", VirtIONet
, host_features
,
3505 VIRTIO_NET_F_GUEST_TSO4
, true),
3506 DEFINE_PROP_BIT64("guest_tso6", VirtIONet
, host_features
,
3507 VIRTIO_NET_F_GUEST_TSO6
, true),
3508 DEFINE_PROP_BIT64("guest_ecn", VirtIONet
, host_features
,
3509 VIRTIO_NET_F_GUEST_ECN
, true),
3510 DEFINE_PROP_BIT64("guest_ufo", VirtIONet
, host_features
,
3511 VIRTIO_NET_F_GUEST_UFO
, true),
3512 DEFINE_PROP_BIT64("guest_announce", VirtIONet
, host_features
,
3513 VIRTIO_NET_F_GUEST_ANNOUNCE
, true),
3514 DEFINE_PROP_BIT64("host_tso4", VirtIONet
, host_features
,
3515 VIRTIO_NET_F_HOST_TSO4
, true),
3516 DEFINE_PROP_BIT64("host_tso6", VirtIONet
, host_features
,
3517 VIRTIO_NET_F_HOST_TSO6
, true),
3518 DEFINE_PROP_BIT64("host_ecn", VirtIONet
, host_features
,
3519 VIRTIO_NET_F_HOST_ECN
, true),
3520 DEFINE_PROP_BIT64("host_ufo", VirtIONet
, host_features
,
3521 VIRTIO_NET_F_HOST_UFO
, true),
3522 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet
, host_features
,
3523 VIRTIO_NET_F_MRG_RXBUF
, true),
3524 DEFINE_PROP_BIT64("status", VirtIONet
, host_features
,
3525 VIRTIO_NET_F_STATUS
, true),
3526 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet
, host_features
,
3527 VIRTIO_NET_F_CTRL_VQ
, true),
3528 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet
, host_features
,
3529 VIRTIO_NET_F_CTRL_RX
, true),
3530 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet
, host_features
,
3531 VIRTIO_NET_F_CTRL_VLAN
, true),
3532 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet
, host_features
,
3533 VIRTIO_NET_F_CTRL_RX_EXTRA
, true),
3534 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet
, host_features
,
3535 VIRTIO_NET_F_CTRL_MAC_ADDR
, true),
3536 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet
, host_features
,
3537 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
, true),
3538 DEFINE_PROP_BIT64("mq", VirtIONet
, host_features
, VIRTIO_NET_F_MQ
, false),
3539 DEFINE_PROP_BIT64("rss", VirtIONet
, host_features
,
3540 VIRTIO_NET_F_RSS
, false),
3541 DEFINE_PROP_BIT64("hash", VirtIONet
, host_features
,
3542 VIRTIO_NET_F_HASH_REPORT
, false),
3543 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet
, host_features
,
3544 VIRTIO_NET_F_RSC_EXT
, false),
3545 DEFINE_PROP_UINT32("rsc_interval", VirtIONet
, rsc_timeout
,
3546 VIRTIO_NET_RSC_DEFAULT_INTERVAL
),
3547 DEFINE_NIC_PROPERTIES(VirtIONet
, nic_conf
),
3548 DEFINE_PROP_UINT32("x-txtimer", VirtIONet
, net_conf
.txtimer
,
3550 DEFINE_PROP_INT32("x-txburst", VirtIONet
, net_conf
.txburst
, TX_BURST
),
3551 DEFINE_PROP_STRING("tx", VirtIONet
, net_conf
.tx
),
3552 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet
, net_conf
.rx_queue_size
,
3553 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
),
3554 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet
, net_conf
.tx_queue_size
,
3555 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
),
3556 DEFINE_PROP_UINT16("host_mtu", VirtIONet
, net_conf
.mtu
, 0),
3557 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet
, mtu_bypass_backend
,
3559 DEFINE_PROP_INT32("speed", VirtIONet
, net_conf
.speed
, SPEED_UNKNOWN
),
3560 DEFINE_PROP_STRING("duplex", VirtIONet
, net_conf
.duplex_str
),
3561 DEFINE_PROP_BOOL("failover", VirtIONet
, failover
, false),
3562 DEFINE_PROP_END_OF_LIST(),
3565 static void virtio_net_class_init(ObjectClass
*klass
, void *data
)
3567 DeviceClass
*dc
= DEVICE_CLASS(klass
);
3568 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_CLASS(klass
);
3570 device_class_set_props(dc
, virtio_net_properties
);
3571 dc
->vmsd
= &vmstate_virtio_net
;
3572 set_bit(DEVICE_CATEGORY_NETWORK
, dc
->categories
);
3573 vdc
->realize
= virtio_net_device_realize
;
3574 vdc
->unrealize
= virtio_net_device_unrealize
;
3575 vdc
->get_config
= virtio_net_get_config
;
3576 vdc
->set_config
= virtio_net_set_config
;
3577 vdc
->get_features
= virtio_net_get_features
;
3578 vdc
->set_features
= virtio_net_set_features
;
3579 vdc
->bad_features
= virtio_net_bad_features
;
3580 vdc
->reset
= virtio_net_reset
;
3581 vdc
->set_status
= virtio_net_set_status
;
3582 vdc
->guest_notifier_mask
= virtio_net_guest_notifier_mask
;
3583 vdc
->guest_notifier_pending
= virtio_net_guest_notifier_pending
;
3584 vdc
->legacy_features
|= (0x1 << VIRTIO_NET_F_GSO
);
3585 vdc
->post_load
= virtio_net_post_load_virtio
;
3586 vdc
->vmsd
= &vmstate_virtio_net_device
;
3587 vdc
->primary_unplug_pending
= primary_unplug_pending
;
3590 static const TypeInfo virtio_net_info
= {
3591 .name
= TYPE_VIRTIO_NET
,
3592 .parent
= TYPE_VIRTIO_DEVICE
,
3593 .instance_size
= sizeof(VirtIONet
),
3594 .instance_init
= virtio_net_instance_init
,
3595 .class_init
= virtio_net_class_init
,
3598 static void virtio_register_types(void)
3600 type_register_static(&virtio_net_info
);
3603 type_init(virtio_register_types
)