2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
16 #include "qemu-timer.h"
17 #include "virtio-net.h"
24 typedef struct VirtIONet
34 int mergeable_rx_bufs
;
38 * - we could suppress RX interrupt if we were so inclined.
41 static VirtIONet
*to_virtio_net(VirtIODevice
*vdev
)
43 return (VirtIONet
*)vdev
;
46 static void virtio_net_update_config(VirtIODevice
*vdev
, uint8_t *config
)
48 VirtIONet
*n
= to_virtio_net(vdev
);
49 struct virtio_net_config netcfg
;
51 netcfg
.status
= n
->status
;
52 memcpy(netcfg
.mac
, n
->mac
, 6);
53 memcpy(config
, &netcfg
, sizeof(netcfg
));
56 static void virtio_net_set_link_status(VLANClientState
*vc
)
58 VirtIONet
*n
= vc
->opaque
;
59 uint16_t old_status
= n
->status
;
62 n
->status
&= ~VIRTIO_NET_S_LINK_UP
;
64 n
->status
|= VIRTIO_NET_S_LINK_UP
;
66 if (n
->status
!= old_status
)
67 virtio_notify_config(&n
->vdev
);
70 static uint32_t virtio_net_get_features(VirtIODevice
*vdev
)
72 uint32_t features
= (1 << VIRTIO_NET_F_MAC
) | (1 << VIRTIO_NET_F_STATUS
);
74 VirtIONet
*n
= to_virtio_net(vdev
);
75 VLANClientState
*host
= n
->vc
->vlan
->first_client
;
77 if (tap_has_vnet_hdr(host
)) {
78 tap_using_vnet_hdr(host
, 1);
79 features
|= (1 << VIRTIO_NET_F_CSUM
);
80 features
|= (1 << VIRTIO_NET_F_GUEST_CSUM
);
81 features
|= (1 << VIRTIO_NET_F_GUEST_TSO4
);
82 features
|= (1 << VIRTIO_NET_F_GUEST_TSO6
);
83 features
|= (1 << VIRTIO_NET_F_GUEST_ECN
);
84 features
|= (1 << VIRTIO_NET_F_HOST_TSO4
);
85 features
|= (1 << VIRTIO_NET_F_HOST_TSO6
);
86 features
|= (1 << VIRTIO_NET_F_HOST_ECN
);
87 features
|= (1 << VIRTIO_NET_F_MRG_RXBUF
);
88 /* Kernel can't actually handle UFO in software currently. */
95 static void virtio_net_set_features(VirtIODevice
*vdev
, uint32_t features
)
97 VirtIONet
*n
= to_virtio_net(vdev
);
99 VLANClientState
*host
= n
->vc
->vlan
->first_client
;
102 n
->mergeable_rx_bufs
= !!(features
& (1 << VIRTIO_NET_F_MRG_RXBUF
));
105 if (!tap_has_vnet_hdr(host
) || !host
->set_offload
)
108 host
->set_offload(host
,
109 (features
>> VIRTIO_NET_F_GUEST_CSUM
) & 1,
110 (features
>> VIRTIO_NET_F_GUEST_TSO4
) & 1,
111 (features
>> VIRTIO_NET_F_GUEST_TSO6
) & 1,
112 (features
>> VIRTIO_NET_F_GUEST_ECN
) & 1);
118 static void virtio_net_handle_rx(VirtIODevice
*vdev
, VirtQueue
*vq
)
121 /* We now have RX buffers, signal to the IO thread to break out of the
122 select to re-poll the tap file descriptor */
124 qemu_kvm_notify_work();
128 static int do_virtio_net_can_receive(VirtIONet
*n
, int bufsize
)
130 if (!virtio_queue_ready(n
->rx_vq
) ||
131 !(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
134 if (virtio_queue_empty(n
->rx_vq
) ||
135 (n
->mergeable_rx_bufs
&&
136 !virtqueue_avail_bytes(n
->rx_vq
, bufsize
, 0))) {
137 virtio_queue_set_notification(n
->rx_vq
, 1);
141 virtio_queue_set_notification(n
->rx_vq
, 0);
145 static int virtio_net_can_receive(void *opaque
)
147 VirtIONet
*n
= opaque
;
149 return do_virtio_net_can_receive(n
, VIRTIO_NET_MAX_BUFSIZE
);
153 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
154 * it never finds out that the packets don't have valid checksums. This
155 * causes dhclient to get upset. Fedora's carried a patch for ages to
156 * fix this with Xen but it hasn't appeared in an upstream release of
159 * To avoid breaking existing guests, we catch udp packets and add
160 * checksums. This is terrible but it's better than hacking the guest
163 * N.B. if we introduce a zero-copy API, this operation is no longer free so
164 * we should provide a mechanism to disable it to avoid polluting the host
167 static void work_around_broken_dhclient(struct virtio_net_hdr
*hdr
,
168 const uint8_t *buf
, size_t size
)
170 if ((hdr
->flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) && /* missing csum */
171 (size
> 27 && size
< 1500) && /* normal sized MTU */
172 (buf
[12] == 0x08 && buf
[13] == 0x00) && /* ethertype == IPv4 */
173 (buf
[23] == 17) && /* ip.protocol == UDP */
174 (buf
[34] == 0 && buf
[35] == 67)) { /* udp.srcport == bootps */
175 /* FIXME this cast is evil */
176 net_checksum_calculate((uint8_t *)buf
, size
);
177 hdr
->flags
&= ~VIRTIO_NET_HDR_F_NEEDS_CSUM
;
182 static int iov_fill(struct iovec
*iov
, int iovcnt
, const void *buf
, int count
)
187 while (offset
< count
&& i
< iovcnt
) {
188 int len
= MIN(iov
[i
].iov_len
, count
- offset
);
189 memcpy(iov
[i
].iov_base
, buf
+ offset
, len
);
197 static int receive_header(VirtIONet
*n
, struct iovec
*iov
, int iovcnt
,
198 const void *buf
, size_t size
, size_t hdr_len
)
200 struct virtio_net_hdr
*hdr
= iov
[0].iov_base
;
204 hdr
->gso_type
= VIRTIO_NET_HDR_GSO_NONE
;
207 if (tap_has_vnet_hdr(n
->vc
->vlan
->first_client
)) {
208 memcpy(hdr
, buf
, sizeof(*hdr
));
209 offset
= sizeof(*hdr
);
210 work_around_broken_dhclient(hdr
, buf
+ offset
, size
- offset
);
214 /* We only ever receive a struct virtio_net_hdr from the tapfd,
215 * but we may be passing along a larger header to the guest.
217 iov
[0].iov_base
+= hdr_len
;
218 iov
[0].iov_len
-= hdr_len
;
223 static void virtio_net_receive(void *opaque
, const uint8_t *buf
, int size
)
225 VirtIONet
*n
= opaque
;
226 struct virtio_net_hdr_mrg_rxbuf
*mhdr
= NULL
;
227 size_t hdr_len
, offset
, i
;
229 if (!do_virtio_net_can_receive(n
, size
))
232 /* hdr_len refers to the header we supply to the guest */
233 hdr_len
= n
->mergeable_rx_bufs
?
234 sizeof(struct virtio_net_hdr_mrg_rxbuf
) : sizeof(struct virtio_net_hdr
);
238 while (offset
< size
) {
239 VirtQueueElement elem
;
241 struct iovec sg
[VIRTQUEUE_MAX_SIZE
];
245 if ((i
!= 0 && !n
->mergeable_rx_bufs
) ||
246 virtqueue_pop(n
->rx_vq
, &elem
) == 0) {
249 fprintf(stderr
, "virtio-net truncating packet\n");
253 if (elem
.in_num
< 1) {
254 fprintf(stderr
, "virtio-net receive queue contains no in buffers\n");
258 if (!n
->mergeable_rx_bufs
&& elem
.in_sg
[0].iov_len
!= hdr_len
) {
259 fprintf(stderr
, "virtio-net header not in first element\n");
263 memcpy(&sg
, &elem
.in_sg
[0], sizeof(sg
[0]) * elem
.in_num
);
266 if (n
->mergeable_rx_bufs
)
267 mhdr
= (struct virtio_net_hdr_mrg_rxbuf
*)sg
[0].iov_base
;
269 offset
+= receive_header(n
, sg
, elem
.in_num
,
270 buf
+ offset
, size
- offset
, hdr_len
);
274 /* copy in packet. ugh */
275 len
= iov_fill(sg
, elem
.in_num
,
276 buf
+ offset
, size
- offset
);
279 /* signal other side */
280 virtqueue_fill(n
->rx_vq
, &elem
, total
, i
++);
286 mhdr
->num_buffers
= i
;
288 virtqueue_flush(n
->rx_vq
, i
);
289 virtio_notify(&n
->vdev
, n
->rx_vq
);
293 static void virtio_net_flush_tx(VirtIONet
*n
, VirtQueue
*vq
)
295 VirtQueueElement elem
;
297 int has_vnet_hdr
= tap_has_vnet_hdr(n
->vc
->vlan
->first_client
);
299 int has_vnet_hdr
= 0;
302 if (!(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
305 while (virtqueue_pop(vq
, &elem
)) {
307 unsigned int out_num
= elem
.out_num
;
308 struct iovec
*out_sg
= &elem
.out_sg
[0];
311 /* hdr_len refers to the header received from the guest */
312 hdr_len
= n
->mergeable_rx_bufs
?
313 sizeof(struct virtio_net_hdr_mrg_rxbuf
) :
314 sizeof(struct virtio_net_hdr
);
316 if (out_num
< 1 || out_sg
->iov_len
!= hdr_len
) {
317 fprintf(stderr
, "virtio-net header not in first element\n");
321 /* ignore the header if GSO is not supported */
326 } else if (n
->mergeable_rx_bufs
) {
327 /* tapfd expects a struct virtio_net_hdr */
328 hdr_len
-= sizeof(struct virtio_net_hdr
);
329 out_sg
->iov_len
-= hdr_len
;
333 len
+= qemu_sendv_packet(n
->vc
, out_sg
, out_num
);
335 virtqueue_push(vq
, &elem
, len
);
336 virtio_notify(&n
->vdev
, vq
);
340 static void virtio_net_handle_tx(VirtIODevice
*vdev
, VirtQueue
*vq
)
342 VirtIONet
*n
= to_virtio_net(vdev
);
344 if (n
->tx_timer_active
) {
345 virtio_queue_set_notification(vq
, 1);
346 qemu_del_timer(n
->tx_timer
);
347 n
->tx_timer_active
= 0;
348 virtio_net_flush_tx(n
, vq
);
350 qemu_mod_timer(n
->tx_timer
,
351 qemu_get_clock(vm_clock
) + TX_TIMER_INTERVAL
);
352 n
->tx_timer_active
= 1;
353 virtio_queue_set_notification(vq
, 0);
357 static void virtio_net_tx_timer(void *opaque
)
359 VirtIONet
*n
= opaque
;
361 n
->tx_timer_active
= 0;
363 /* Just in case the driver is not ready on more */
364 if (!(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
367 virtio_queue_set_notification(n
->tx_vq
, 1);
368 virtio_net_flush_tx(n
, n
->tx_vq
);
371 static void virtio_net_save(QEMUFile
*f
, void *opaque
)
373 VirtIONet
*n
= opaque
;
375 virtio_save(&n
->vdev
, f
);
377 qemu_put_buffer(f
, n
->mac
, 6);
378 qemu_put_be32(f
, n
->tx_timer_active
);
379 qemu_put_be32(f
, n
->mergeable_rx_bufs
);
382 qemu_put_be32(f
, tap_has_vnet_hdr(n
->vc
->vlan
->first_client
));
386 static int virtio_net_load(QEMUFile
*f
, void *opaque
, int version_id
)
388 VirtIONet
*n
= opaque
;
393 virtio_load(&n
->vdev
, f
);
395 qemu_get_buffer(f
, n
->mac
, 6);
396 n
->tx_timer_active
= qemu_get_be32(f
);
397 n
->mergeable_rx_bufs
= qemu_get_be32(f
);
400 if (qemu_get_be32(f
))
401 tap_using_vnet_hdr(n
->vc
->vlan
->first_client
, 1);
404 if (n
->tx_timer_active
) {
405 qemu_mod_timer(n
->tx_timer
,
406 qemu_get_clock(vm_clock
) + TX_TIMER_INTERVAL
);
412 PCIDevice
*virtio_net_init(PCIBus
*bus
, NICInfo
*nd
, int devfn
)
415 static int virtio_net_id
;
417 n
= (VirtIONet
*)virtio_init_pci(bus
, "virtio-net",
418 PCI_VENDOR_ID_REDHAT_QUMRANET
,
419 PCI_DEVICE_ID_VIRTIO_NET
,
420 PCI_VENDOR_ID_REDHAT_QUMRANET
,
422 PCI_CLASS_NETWORK_ETHERNET
, 0x00,
423 sizeof(struct virtio_net_config
),
428 n
->vdev
.get_config
= virtio_net_update_config
;
429 n
->vdev
.get_features
= virtio_net_get_features
;
430 n
->vdev
.set_features
= virtio_net_set_features
;
431 n
->rx_vq
= virtio_add_queue(&n
->vdev
, 256, virtio_net_handle_rx
);
432 n
->tx_vq
= virtio_add_queue(&n
->vdev
, 256, virtio_net_handle_tx
);
433 memcpy(n
->mac
, nd
->macaddr
, 6);
434 n
->status
= VIRTIO_NET_S_LINK_UP
;
435 n
->vc
= qemu_new_vlan_client(nd
->vlan
, nd
->model
, nd
->name
,
436 virtio_net_receive
, virtio_net_can_receive
, n
);
437 n
->vc
->link_status_changed
= virtio_net_set_link_status
;
439 qemu_format_nic_info_str(n
->vc
, n
->mac
);
441 n
->tx_timer
= qemu_new_timer(vm_clock
, virtio_net_tx_timer
, n
);
442 n
->tx_timer_active
= 0;
443 n
->mergeable_rx_bufs
= 0;
445 register_savevm("virtio-net", virtio_net_id
++, 2,
446 virtio_net_save
, virtio_net_load
, n
);
448 return (PCIDevice
*)n
;