hw/virtio-net.c

   1 /*
   2  * Virtio Network Device
   3  *
   4  * Copyright IBM, Corp. 2007
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 #include "qemu/iov.h"
  15 #include "virtio.h"
  16 #include "net/net.h"
  17 #include "net/checksum.h"
  18 #include "net/tap.h"
  19 #include "qemu/error-report.h"
  20 #include "qemu/timer.h"
  21 #include "virtio-net.h"
  22 #include "vhost_net.h"
  23
  24 #define VIRTIO_NET_VM_VERSION    11
  25
  26 #define MAC_TABLE_ENTRIES    64
  27 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  28
  29 typedef struct VirtIONet
  30 {
  31     VirtIODevice vdev;
  32     uint8_t mac[ETH_ALEN];
  33     uint16_t status;
  34     VirtQueue *rx_vq;
  35     VirtQueue *tx_vq;
  36     VirtQueue *ctrl_vq;
  37     NICState *nic;
  38     QEMUTimer *tx_timer;
  39     QEMUBH *tx_bh;
  40     uint32_t tx_timeout;
  41     int32_t tx_burst;
  42     int tx_waiting;
  43     uint32_t has_vnet_hdr;
  44     size_t host_hdr_len;
  45     size_t guest_hdr_len;
  46     uint8_t has_ufo;
  47     struct {
  48         VirtQueueElement elem;
  49         ssize_t len;
  50     } async_tx;
  51     int mergeable_rx_bufs;
  52     uint8_t promisc;
  53     uint8_t allmulti;
  54     uint8_t alluni;
  55     uint8_t nomulti;
  56     uint8_t nouni;
  57     uint8_t nobcast;
  58     uint8_t vhost_started;
  59     struct {
  60         int in_use;
  61         int first_multi;
  62         uint8_t multi_overflow;
  63         uint8_t uni_overflow;
  64         uint8_t *macs;
  65     } mac_table;
  66     uint32_t *vlans;
  67     DeviceState *qdev;
  68 } VirtIONet;
  69
  70 /* TODO
  71  * - we could suppress RX interrupt if we were so inclined.
  72  */
  73
  74 static VirtIONet *to_virtio_net(VirtIODevice *vdev)
  75 {
  76     return (VirtIONet *)vdev;
  77 }
  78
  79 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
  80 {
  81     VirtIONet *n = to_virtio_net(vdev);
  82     struct virtio_net_config netcfg;
  83
  84     stw_p(&netcfg.status, n->status);
  85     memcpy(netcfg.mac, n->mac, ETH_ALEN);
  86     memcpy(config, &netcfg, sizeof(netcfg));
  87 }
  88
  89 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
  90 {
  91     VirtIONet *n = to_virtio_net(vdev);
  92     struct virtio_net_config netcfg;
  93
  94     memcpy(&netcfg, config, sizeof(netcfg));
  95
  96     if (memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
  97         memcpy(n->mac, netcfg.mac, ETH_ALEN);
  98         qemu_format_nic_info_str(&n->nic->nc, n->mac);
  99     }
 100 }
 101
 102 static bool virtio_net_started(VirtIONet *n, uint8_t status)
 103 {
 104     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 105         (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
 106 }
 107
 108 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 109 {
 110     if (!n->nic->nc.peer) {
 111         return;
 112     }
 113     if (n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
 114         return;
 115     }
 116
 117     if (!tap_get_vhost_net(n->nic->nc.peer)) {
 118         return;
 119     }
 120     if (!!n->vhost_started == virtio_net_started(n, status) &&
 121                               !n->nic->nc.peer->link_down) {
 122         return;
 123     }
 124     if (!n->vhost_started) {
 125         int r;
 126         if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
 127             return;
 128         }
 129         n->vhost_started = 1;
 130         r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
 131         if (r < 0) {
 132             error_report("unable to start vhost net: %d: "
 133                          "falling back on userspace virtio", -r);
 134             n->vhost_started = 0;
 135         }
 136     } else {
 137         vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
 138         n->vhost_started = 0;
 139     }
 140 }
 141
 142 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 143 {
 144     VirtIONet *n = to_virtio_net(vdev);
 145
 146     virtio_net_vhost_status(n, status);
 147
 148     if (!n->tx_waiting) {
 149         return;
 150     }
 151
 152     if (virtio_net_started(n, status) && !n->vhost_started) {
 153         if (n->tx_timer) {
 154             qemu_mod_timer(n->tx_timer,
 155                            qemu_get_clock_ns(vm_clock) + n->tx_timeout);
 156         } else {
 157             qemu_bh_schedule(n->tx_bh);
 158         }
 159     } else {
 160         if (n->tx_timer) {
 161             qemu_del_timer(n->tx_timer);
 162         } else {
 163             qemu_bh_cancel(n->tx_bh);
 164         }
 165     }
 166 }
 167
 168 static void virtio_net_set_link_status(NetClientState *nc)
 169 {
 170     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
 171     uint16_t old_status = n->status;
 172
 173     if (nc->link_down)
 174         n->status &= ~VIRTIO_NET_S_LINK_UP;
 175     else
 176         n->status |= VIRTIO_NET_S_LINK_UP;
 177
 178     if (n->status != old_status)
 179         virtio_notify_config(&n->vdev);
 180
 181     virtio_net_set_status(&n->vdev, n->vdev.status);
 182 }
 183
 184 static void virtio_net_reset(VirtIODevice *vdev)
 185 {
 186     VirtIONet *n = to_virtio_net(vdev);
 187
 188     /* Reset back to compatibility mode */
 189     n->promisc = 1;
 190     n->allmulti = 0;
 191     n->alluni = 0;
 192     n->nomulti = 0;
 193     n->nouni = 0;
 194     n->nobcast = 0;
 195
 196     /* Flush any MAC and VLAN filter table state */
 197     n->mac_table.in_use = 0;
 198     n->mac_table.first_multi = 0;
 199     n->mac_table.multi_overflow = 0;
 200     n->mac_table.uni_overflow = 0;
 201     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 202     memset(n->vlans, 0, MAX_VLAN >> 3);
 203 }
 204
 205 static void peer_test_vnet_hdr(VirtIONet *n)
 206 {
 207     if (!n->nic->nc.peer)
 208         return;
 209
 210     if (n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP)
 211         return;
 212
 213     n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
 214 }
 215
 216 static int peer_has_vnet_hdr(VirtIONet *n)
 217 {
 218     return n->has_vnet_hdr;
 219 }
 220
 221 static int peer_has_ufo(VirtIONet *n)
 222 {
 223     if (!peer_has_vnet_hdr(n))
 224         return 0;
 225
 226     n->has_ufo = tap_has_ufo(n->nic->nc.peer);
 227
 228     return n->has_ufo;
 229 }
 230
 231 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
 232 {
 233     n->mergeable_rx_bufs = mergeable_rx_bufs;
 234
 235     n->guest_hdr_len = n->mergeable_rx_bufs ?
 236         sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
 237
 238     if (peer_has_vnet_hdr(n) &&
 239         tap_has_vnet_hdr_len(n->nic->nc.peer, n->guest_hdr_len)) {
 240         tap_set_vnet_hdr_len(n->nic->nc.peer, n->guest_hdr_len);
 241         n->host_hdr_len = n->guest_hdr_len;
 242     }
 243 }
 244
 245 static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
 246 {
 247     VirtIONet *n = to_virtio_net(vdev);
 248
 249     features |= (1 << VIRTIO_NET_F_MAC);
 250
 251     if (!peer_has_vnet_hdr(n)) {
 252         features &= ~(0x1 << VIRTIO_NET_F_CSUM);
 253         features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
 254         features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
 255         features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);
 256
 257         features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
 258         features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
 259         features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
 260         features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
 261     }
 262
 263     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 264         features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
 265         features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
 266     }
 267
 268     if (!n->nic->nc.peer ||
 269         n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
 270         return features;
 271     }
 272     if (!tap_get_vhost_net(n->nic->nc.peer)) {
 273         return features;
 274     }
 275     return vhost_net_get_features(tap_get_vhost_net(n->nic->nc.peer), features);
 276 }
 277
 278 static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
 279 {
 280     uint32_t features = 0;
 281
 282     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 283      * but also these: */
 284     features |= (1 << VIRTIO_NET_F_MAC);
 285     features |= (1 << VIRTIO_NET_F_CSUM);
 286     features |= (1 << VIRTIO_NET_F_HOST_TSO4);
 287     features |= (1 << VIRTIO_NET_F_HOST_TSO6);
 288     features |= (1 << VIRTIO_NET_F_HOST_ECN);
 289
 290     return features;
 291 }
 292
 293 static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
 294 {
 295     VirtIONet *n = to_virtio_net(vdev);
 296
 297     virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
 298
 299     if (n->has_vnet_hdr) {
 300         tap_set_offload(n->nic->nc.peer,
 301                         (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
 302                         (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
 303                         (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
 304                         (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
 305                         (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
 306     }
 307     if (!n->nic->nc.peer ||
 308         n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
 309         return;
 310     }
 311     if (!tap_get_vhost_net(n->nic->nc.peer)) {
 312         return;
 313     }
 314     vhost_net_ack_features(tap_get_vhost_net(n->nic->nc.peer), features);
 315 }
 316
 317 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 318                                      VirtQueueElement *elem)
 319 {
 320     uint8_t on;
 321
 322     if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(on)) {
 323         error_report("virtio-net ctrl invalid rx mode command");
 324         exit(1);
 325     }
 326
 327     on = ldub_p(elem->out_sg[1].iov_base);
 328
 329     if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC)
 330         n->promisc = on;
 331     else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI)
 332         n->allmulti = on;
 333     else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI)
 334         n->alluni = on;
 335     else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI)
 336         n->nomulti = on;
 337     else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI)
 338         n->nouni = on;
 339     else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST)
 340         n->nobcast = on;
 341     else
 342         return VIRTIO_NET_ERR;
 343
 344     return VIRTIO_NET_OK;
 345 }
 346
 347 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
 348                                  VirtQueueElement *elem)
 349 {
 350     struct virtio_net_ctrl_mac mac_data;
 351
 352     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET || elem->out_num != 3 ||
 353         elem->out_sg[1].iov_len < sizeof(mac_data) ||
 354         elem->out_sg[2].iov_len < sizeof(mac_data))
 355         return VIRTIO_NET_ERR;
 356
 357     n->mac_table.in_use = 0;
 358     n->mac_table.first_multi = 0;
 359     n->mac_table.uni_overflow = 0;
 360     n->mac_table.multi_overflow = 0;
 361     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 362
 363     mac_data.entries = ldl_p(elem->out_sg[1].iov_base);
 364
 365     if (sizeof(mac_data.entries) +
 366         (mac_data.entries * ETH_ALEN) > elem->out_sg[1].iov_len)
 367         return VIRTIO_NET_ERR;
 368
 369     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
 370         memcpy(n->mac_table.macs, elem->out_sg[1].iov_base + sizeof(mac_data),
 371                mac_data.entries * ETH_ALEN);
 372         n->mac_table.in_use += mac_data.entries;
 373     } else {
 374         n->mac_table.uni_overflow = 1;
 375     }
 376
 377     n->mac_table.first_multi = n->mac_table.in_use;
 378
 379     mac_data.entries = ldl_p(elem->out_sg[2].iov_base);
 380
 381     if (sizeof(mac_data.entries) +
 382         (mac_data.entries * ETH_ALEN) > elem->out_sg[2].iov_len)
 383         return VIRTIO_NET_ERR;
 384
 385     if (mac_data.entries) {
 386         if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
 387             memcpy(n->mac_table.macs + (n->mac_table.in_use * ETH_ALEN),
 388                    elem->out_sg[2].iov_base + sizeof(mac_data),
 389                    mac_data.entries * ETH_ALEN);
 390             n->mac_table.in_use += mac_data.entries;
 391         } else {
 392             n->mac_table.multi_overflow = 1;
 393         }
 394     }
 395
 396     return VIRTIO_NET_OK;
 397 }
 398
 399 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
 400                                         VirtQueueElement *elem)
 401 {
 402     uint16_t vid;
 403
 404     if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(vid)) {
 405         error_report("virtio-net ctrl invalid vlan command");
 406         return VIRTIO_NET_ERR;
 407     }
 408
 409     vid = lduw_p(elem->out_sg[1].iov_base);
 410
 411     if (vid >= MAX_VLAN)
 412         return VIRTIO_NET_ERR;
 413
 414     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
 415         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
 416     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
 417         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
 418     else
 419         return VIRTIO_NET_ERR;
 420
 421     return VIRTIO_NET_OK;
 422 }
 423
 424 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 425 {
 426     VirtIONet *n = to_virtio_net(vdev);
 427     struct virtio_net_ctrl_hdr ctrl;
 428     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
 429     VirtQueueElement elem;
 430
 431     while (virtqueue_pop(vq, &elem)) {
 432         if ((elem.in_num < 1) || (elem.out_num < 1)) {
 433             error_report("virtio-net ctrl missing headers");
 434             exit(1);
 435         }
 436
 437         if (elem.out_sg[0].iov_len < sizeof(ctrl) ||
 438             elem.in_sg[elem.in_num - 1].iov_len < sizeof(status)) {
 439             error_report("virtio-net ctrl header not in correct element");
 440             exit(1);
 441         }
 442
 443         ctrl.class = ldub_p(elem.out_sg[0].iov_base);
 444         ctrl.cmd = ldub_p(elem.out_sg[0].iov_base + sizeof(ctrl.class));
 445
 446         if (ctrl.class == VIRTIO_NET_CTRL_RX_MODE)
 447             status = virtio_net_handle_rx_mode(n, ctrl.cmd, &elem);
 448         else if (ctrl.class == VIRTIO_NET_CTRL_MAC)
 449             status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
 450         else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
 451             status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
 452
 453         stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);
 454
 455         virtqueue_push(vq, &elem, sizeof(status));
 456         virtio_notify(vdev, vq);
 457     }
 458 }
 459
 460 /* RX */
 461
 462 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
 463 {
 464     VirtIONet *n = to_virtio_net(vdev);
 465
 466     qemu_flush_queued_packets(&n->nic->nc);
 467 }
 468
 469 static int virtio_net_can_receive(NetClientState *nc)
 470 {
 471     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
 472     if (!n->vdev.vm_running) {
 473         return 0;
 474     }
 475
 476     if (!virtio_queue_ready(n->rx_vq) ||
 477         !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
 478         return 0;
 479
 480     return 1;
 481 }
 482
 483 static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
 484 {
 485     if (virtio_queue_empty(n->rx_vq) ||
 486         (n->mergeable_rx_bufs &&
 487          !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
 488         virtio_queue_set_notification(n->rx_vq, 1);
 489
 490         /* To avoid a race condition where the guest has made some buffers
 491          * available after the above check but before notification was
 492          * enabled, check for available buffers again.
 493          */
 494         if (virtio_queue_empty(n->rx_vq) ||
 495             (n->mergeable_rx_bufs &&
 496              !virtqueue_avail_bytes(n->rx_vq, bufsize, 0)))
 497             return 0;
 498     }
 499
 500     virtio_queue_set_notification(n->rx_vq, 0);
 501     return 1;
 502 }
 503
 504 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
 505  * it never finds out that the packets don't have valid checksums.  This
 506  * causes dhclient to get upset.  Fedora's carried a patch for ages to
 507  * fix this with Xen but it hasn't appeared in an upstream release of
 508  * dhclient yet.
 509  *
 510  * To avoid breaking existing guests, we catch udp packets and add
 511  * checksums.  This is terrible but it's better than hacking the guest
 512  * kernels.
 513  *
 514  * N.B. if we introduce a zero-copy API, this operation is no longer free so
 515  * we should provide a mechanism to disable it to avoid polluting the host
 516  * cache.
 517  */
 518 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
 519                                         uint8_t *buf, size_t size)
 520 {
 521     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
 522         (size > 27 && size < 1500) && /* normal sized MTU */
 523         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
 524         (buf[23] == 17) && /* ip.protocol == UDP */
 525         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
 526         net_checksum_calculate(buf, size);
 527         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
 528     }
 529 }
 530
 531 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
 532                            const void *buf, size_t size)
 533 {
 534     if (n->has_vnet_hdr) {
 535         /* FIXME this cast is evil */
 536         void *wbuf = (void *)buf;
 537         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
 538                                     size - n->host_hdr_len);
 539         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
 540     } else {
 541         struct virtio_net_hdr hdr = {
 542             .flags = 0,
 543             .gso_type = VIRTIO_NET_HDR_GSO_NONE
 544         };
 545         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
 546     }
 547 }
 548
 549 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
 550 {
 551     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 552     static const uint8_t vlan[] = {0x81, 0x00};
 553     uint8_t *ptr = (uint8_t *)buf;
 554     int i;
 555
 556     if (n->promisc)
 557         return 1;
 558
 559     ptr += n->host_hdr_len;
 560
 561     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
 562         int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
 563         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
 564             return 0;
 565     }
 566
 567     if (ptr[0] & 1) { // multicast
 568         if (!memcmp(ptr, bcast, sizeof(bcast))) {
 569             return !n->nobcast;
 570         } else if (n->nomulti) {
 571             return 0;
 572         } else if (n->allmulti || n->mac_table.multi_overflow) {
 573             return 1;
 574         }
 575
 576         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 577             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
 578                 return 1;
 579             }
 580         }
 581     } else { // unicast
 582         if (n->nouni) {
 583             return 0;
 584         } else if (n->alluni || n->mac_table.uni_overflow) {
 585             return 1;
 586         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
 587             return 1;
 588         }
 589
 590         for (i = 0; i < n->mac_table.first_multi; i++) {
 591             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
 592                 return 1;
 593             }
 594         }
 595     }
 596
 597     return 0;
 598 }
 599
 600 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 601 {
 602     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
 603     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
 604     struct virtio_net_hdr_mrg_rxbuf mhdr;
 605     unsigned mhdr_cnt = 0;
 606     size_t offset, i, guest_offset;
 607
 608     if (!virtio_net_can_receive(&n->nic->nc))
 609         return -1;
 610
 611     /* hdr_len refers to the header we supply to the guest */
 612     if (!virtio_net_has_buffers(n, size + n->guest_hdr_len - n->host_hdr_len))
 613         return 0;
 614
 615     if (!receive_filter(n, buf, size))
 616         return size;
 617
 618     offset = i = 0;
 619
 620     while (offset < size) {
 621         VirtQueueElement elem;
 622         int len, total;
 623         const struct iovec *sg = elem.in_sg;
 624
 625         total = 0;
 626
 627         if (virtqueue_pop(n->rx_vq, &elem) == 0) {
 628             if (i == 0)
 629                 return -1;
 630             error_report("virtio-net unexpected empty queue: "
 631                     "i %zd mergeable %d offset %zd, size %zd, "
 632                     "guest hdr len %zd, host hdr len %zd guest features 0x%x",
 633                     i, n->mergeable_rx_bufs, offset, size,
 634                     n->guest_hdr_len, n->host_hdr_len, n->vdev.guest_features);
 635             exit(1);
 636         }
 637
 638         if (elem.in_num < 1) {
 639             error_report("virtio-net receive queue contains no in buffers");
 640             exit(1);
 641         }
 642
 643         if (i == 0) {
 644             assert(offset == 0);
 645             if (n->mergeable_rx_bufs) {
 646                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
 647                                     sg, elem.in_num,
 648                                     offsetof(typeof(mhdr), num_buffers),
 649                                     sizeof(mhdr.num_buffers));
 650             }
 651
 652             receive_header(n, sg, elem.in_num, buf, size);
 653             offset = n->host_hdr_len;
 654             total += n->guest_hdr_len;
 655             guest_offset = n->guest_hdr_len;
 656         } else {
 657             guest_offset = 0;
 658         }
 659
 660         /* copy in packet.  ugh */
 661         len = iov_from_buf(sg, elem.in_num, guest_offset,
 662                            buf + offset, size - offset);
 663         total += len;
 664         offset += len;
 665         /* If buffers can't be merged, at this point we
 666          * must have consumed the complete packet.
 667          * Otherwise, drop it. */
 668         if (!n->mergeable_rx_bufs && offset < size) {
 669 #if 0
 670             error_report("virtio-net truncated non-mergeable packet: "
 671                          "i %zd mergeable %d offset %zd, size %zd, "
 672                          "guest hdr len %zd, host hdr len %zd",
 673                          i, n->mergeable_rx_bufs,
 674                          offset, size, n->guest_hdr_len, n->host_hdr_len);
 675 #endif
 676             return size;
 677         }
 678
 679         /* signal other side */
 680         virtqueue_fill(n->rx_vq, &elem, total, i++);
 681     }
 682
 683     if (mhdr_cnt) {
 684         stw_p(&mhdr.num_buffers, i);
 685         iov_from_buf(mhdr_sg, mhdr_cnt,
 686                      0,
 687                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
 688     }
 689
 690     virtqueue_flush(n->rx_vq, i);
 691     virtio_notify(&n->vdev, n->rx_vq);
 692
 693     return size;
 694 }
 695
 696 static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
 697
 698 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
 699 {
 700     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
 701
 702     virtqueue_push(n->tx_vq, &n->async_tx.elem, 0);
 703     virtio_notify(&n->vdev, n->tx_vq);
 704
 705     n->async_tx.elem.out_num = n->async_tx.len = 0;
 706
 707     virtio_queue_set_notification(n->tx_vq, 1);
 708     virtio_net_flush_tx(n, n->tx_vq);
 709 }
 710
 711 /* TX */
 712 static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
 713 {
 714     VirtQueueElement elem;
 715     int32_t num_packets = 0;
 716     if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
 717         return num_packets;
 718     }
 719
 720     assert(n->vdev.vm_running);
 721
 722     if (n->async_tx.elem.out_num) {
 723         virtio_queue_set_notification(n->tx_vq, 0);
 724         return num_packets;
 725     }
 726
 727     while (virtqueue_pop(vq, &elem)) {
 728         ssize_t ret, len;
 729         unsigned int out_num = elem.out_num;
 730         struct iovec *out_sg = &elem.out_sg[0];
 731         struct iovec sg[VIRTQUEUE_MAX_SIZE];
 732
 733         if (out_num < 1) {
 734             error_report("virtio-net header not in first element");
 735             exit(1);
 736         }
 737
 738         /*
 739          * If host wants to see the guest header as is, we can
 740          * pass it on unchanged. Otherwise, copy just the parts
 741          * that host is interested in.
 742          */
 743         assert(n->host_hdr_len <= n->guest_hdr_len);
 744         if (n->host_hdr_len != n->guest_hdr_len) {
 745             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
 746                                        out_sg, out_num,
 747                                        0, n->host_hdr_len);
 748             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
 749                              out_sg, out_num,
 750                              n->guest_hdr_len, -1);
 751             out_num = sg_num;
 752             out_sg = sg;
 753         }
 754
 755         len = n->guest_hdr_len;
 756
 757         ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
 758                                       virtio_net_tx_complete);
 759         if (ret == 0) {
 760             virtio_queue_set_notification(n->tx_vq, 0);
 761             n->async_tx.elem = elem;
 762             n->async_tx.len  = len;
 763             return -EBUSY;
 764         }
 765
 766         len += ret;
 767
 768         virtqueue_push(vq, &elem, 0);
 769         virtio_notify(&n->vdev, vq);
 770
 771         if (++num_packets >= n->tx_burst) {
 772             break;
 773         }
 774     }
 775     return num_packets;
 776 }
 777
 778 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
 779 {
 780     VirtIONet *n = to_virtio_net(vdev);
 781
 782     /* This happens when device was stopped but VCPU wasn't. */
 783     if (!n->vdev.vm_running) {
 784         n->tx_waiting = 1;
 785         return;
 786     }
 787
 788     if (n->tx_waiting) {
 789         virtio_queue_set_notification(vq, 1);
 790         qemu_del_timer(n->tx_timer);
 791         n->tx_waiting = 0;
 792         virtio_net_flush_tx(n, vq);
 793     } else {
 794         qemu_mod_timer(n->tx_timer,
 795                        qemu_get_clock_ns(vm_clock) + n->tx_timeout);
 796         n->tx_waiting = 1;
 797         virtio_queue_set_notification(vq, 0);
 798     }
 799 }
 800
 801 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
 802 {
 803     VirtIONet *n = to_virtio_net(vdev);
 804
 805     if (unlikely(n->tx_waiting)) {
 806         return;
 807     }
 808     n->tx_waiting = 1;
 809     /* This happens when device was stopped but VCPU wasn't. */
 810     if (!n->vdev.vm_running) {
 811         return;
 812     }
 813     virtio_queue_set_notification(vq, 0);
 814     qemu_bh_schedule(n->tx_bh);
 815 }
 816
 817 static void virtio_net_tx_timer(void *opaque)
 818 {
 819     VirtIONet *n = opaque;
 820     assert(n->vdev.vm_running);
 821
 822     n->tx_waiting = 0;
 823
 824     /* Just in case the driver is not ready on more */
 825     if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
 826         return;
 827
 828     virtio_queue_set_notification(n->tx_vq, 1);
 829     virtio_net_flush_tx(n, n->tx_vq);
 830 }
 831
 832 static void virtio_net_tx_bh(void *opaque)
 833 {
 834     VirtIONet *n = opaque;
 835     int32_t ret;
 836
 837     assert(n->vdev.vm_running);
 838
 839     n->tx_waiting = 0;
 840
 841     /* Just in case the driver is not ready on more */
 842     if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
 843         return;
 844
 845     ret = virtio_net_flush_tx(n, n->tx_vq);
 846     if (ret == -EBUSY) {
 847         return; /* Notification re-enable handled by tx_complete */
 848     }
 849
 850     /* If we flush a full burst of packets, assume there are
 851      * more coming and immediately reschedule */
 852     if (ret >= n->tx_burst) {
 853         qemu_bh_schedule(n->tx_bh);
 854         n->tx_waiting = 1;
 855         return;
 856     }
 857
 858     /* If less than a full burst, re-enable notification and flush
 859      * anything that may have come in while we weren't looking.  If
 860      * we find something, assume the guest is still active and reschedule */
 861     virtio_queue_set_notification(n->tx_vq, 1);
 862     if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
 863         virtio_queue_set_notification(n->tx_vq, 0);
 864         qemu_bh_schedule(n->tx_bh);
 865         n->tx_waiting = 1;
 866     }
 867 }
 868
 869 static void virtio_net_save(QEMUFile *f, void *opaque)
 870 {
 871     VirtIONet *n = opaque;
 872
 873     /* At this point, backend must be stopped, otherwise
 874      * it might keep writing to memory. */
 875     assert(!n->vhost_started);
 876     virtio_save(&n->vdev, f);
 877
 878     qemu_put_buffer(f, n->mac, ETH_ALEN);
 879     qemu_put_be32(f, n->tx_waiting);
 880     qemu_put_be32(f, n->mergeable_rx_bufs);
 881     qemu_put_be16(f, n->status);
 882     qemu_put_byte(f, n->promisc);
 883     qemu_put_byte(f, n->allmulti);
 884     qemu_put_be32(f, n->mac_table.in_use);
 885     qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
 886     qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
 887     qemu_put_be32(f, n->has_vnet_hdr);
 888     qemu_put_byte(f, n->mac_table.multi_overflow);
 889     qemu_put_byte(f, n->mac_table.uni_overflow);
 890     qemu_put_byte(f, n->alluni);
 891     qemu_put_byte(f, n->nomulti);
 892     qemu_put_byte(f, n->nouni);
 893     qemu_put_byte(f, n->nobcast);
 894     qemu_put_byte(f, n->has_ufo);
 895 }
 896
 897 static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
 898 {
 899     VirtIONet *n = opaque;
 900     int i;
 901     int ret;
 902
 903     if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
 904         return -EINVAL;
 905
 906     ret = virtio_load(&n->vdev, f);
 907     if (ret) {
 908         return ret;
 909     }
 910
 911     qemu_get_buffer(f, n->mac, ETH_ALEN);
 912     n->tx_waiting = qemu_get_be32(f);
 913
 914     virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
 915
 916     if (version_id >= 3)
 917         n->status = qemu_get_be16(f);
 918
 919     if (version_id >= 4) {
 920         if (version_id < 8) {
 921             n->promisc = qemu_get_be32(f);
 922             n->allmulti = qemu_get_be32(f);
 923         } else {
 924             n->promisc = qemu_get_byte(f);
 925             n->allmulti = qemu_get_byte(f);
 926         }
 927     }
 928
 929     if (version_id >= 5) {
 930         n->mac_table.in_use = qemu_get_be32(f);
 931         /* MAC_TABLE_ENTRIES may be different from the saved image */
 932         if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
 933             qemu_get_buffer(f, n->mac_table.macs,
 934                             n->mac_table.in_use * ETH_ALEN);
 935         } else if (n->mac_table.in_use) {
 936             uint8_t *buf = g_malloc0(n->mac_table.in_use);
 937             qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
 938             g_free(buf);
 939             n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
 940             n->mac_table.in_use = 0;
 941         }
 942     }
 943
 944     if (version_id >= 6)
 945         qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
 946
 947     if (version_id >= 7) {
 948         if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
 949             error_report("virtio-net: saved image requires vnet_hdr=on");
 950             return -1;
 951         }
 952
 953         if (n->has_vnet_hdr) {
 954             tap_set_offload(n->nic->nc.peer,
 955                     (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
 956                     (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
 957                     (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
 958                     (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
 959                     (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
 960         }
 961     }
 962
 963     if (version_id >= 9) {
 964         n->mac_table.multi_overflow = qemu_get_byte(f);
 965         n->mac_table.uni_overflow = qemu_get_byte(f);
 966     }
 967
 968     if (version_id >= 10) {
 969         n->alluni = qemu_get_byte(f);
 970         n->nomulti = qemu_get_byte(f);
 971         n->nouni = qemu_get_byte(f);
 972         n->nobcast = qemu_get_byte(f);
 973     }
 974
 975     if (version_id >= 11) {
 976         if (qemu_get_byte(f) && !peer_has_ufo(n)) {
 977             error_report("virtio-net: saved image requires TUN_F_UFO support");
 978             return -1;
 979         }
 980     }
 981
 982     /* Find the first multicast entry in the saved MAC filter */
 983     for (i = 0; i < n->mac_table.in_use; i++) {
 984         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
 985             break;
 986         }
 987     }
 988     n->mac_table.first_multi = i;
 989
 990     /* nc.link_down can't be migrated, so infer link_down according
 991      * to link status bit in n->status */
 992     n->nic->nc.link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
 993
 994     return 0;
 995 }
 996
 997 static void virtio_net_cleanup(NetClientState *nc)
 998 {
 999     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
1000
1001     n->nic = NULL;
1002 }
1003
1004 static NetClientInfo net_virtio_info = {
1005     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1006     .size = sizeof(NICState),
1007     .can_receive = virtio_net_can_receive,
1008     .receive = virtio_net_receive,
1009         .cleanup = virtio_net_cleanup,
1010     .link_status_changed = virtio_net_set_link_status,
1011 };
1012
1013 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
1014 {
1015     VirtIONet *n = to_virtio_net(vdev);
1016     assert(n->vhost_started);
1017     return vhost_net_virtqueue_pending(tap_get_vhost_net(n->nic->nc.peer), idx);
1018 }
1019
1020 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
1021                                            bool mask)
1022 {
1023     VirtIONet *n = to_virtio_net(vdev);
1024     assert(n->vhost_started);
1025     vhost_net_virtqueue_mask(tap_get_vhost_net(n->nic->nc.peer),
1026                              vdev, idx, mask);
1027 }
1028
1029 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
1030                               virtio_net_conf *net)
1031 {
1032     VirtIONet *n;
1033
1034     n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
1035                                         sizeof(struct virtio_net_config),
1036                                         sizeof(VirtIONet));
1037
1038     n->vdev.get_config = virtio_net_get_config;
1039     n->vdev.set_config = virtio_net_set_config;
1040     n->vdev.get_features = virtio_net_get_features;
1041     n->vdev.set_features = virtio_net_set_features;
1042     n->vdev.bad_features = virtio_net_bad_features;
1043     n->vdev.reset = virtio_net_reset;
1044     n->vdev.set_status = virtio_net_set_status;
1045     n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
1046     n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
1047     n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
1048
1049     if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
1050         error_report("virtio-net: "
1051                      "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1052                      net->tx);
1053         error_report("Defaulting to \"bh\"");
1054     }
1055
1056     if (net->tx && !strcmp(net->tx, "timer")) {
1057         n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer);
1058         n->tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer, n);
1059         n->tx_timeout = net->txtimer;
1060     } else {
1061         n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
1062         n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
1063     }
1064     n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
1065     qemu_macaddr_default_if_unset(&conf->macaddr);
1066     memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
1067     n->status = VIRTIO_NET_S_LINK_UP;
1068
1069     n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
1070     peer_test_vnet_hdr(n);
1071     if (peer_has_vnet_hdr(n)) {
1072         tap_using_vnet_hdr(n->nic->nc.peer, 1);
1073         n->host_hdr_len = sizeof(struct virtio_net_hdr);
1074     } else {
1075         n->host_hdr_len = 0;
1076     }
1077
1078     qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
1079
1080     n->tx_waiting = 0;
1081     n->tx_burst = net->txburst;
1082     virtio_net_set_mrg_rx_bufs(n, 0);
1083     n->promisc = 1; /* for compatibility */
1084
1085     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1086
1087     n->vlans = g_malloc0(MAX_VLAN >> 3);
1088
1089     n->qdev = dev;
1090     register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
1091                     virtio_net_save, virtio_net_load, n);
1092
1093     add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
1094
1095     return &n->vdev;
1096 }
1097
1098 void virtio_net_exit(VirtIODevice *vdev)
1099 {
1100     VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
1101
1102     /* This will stop vhost backend if appropriate. */
1103     virtio_net_set_status(vdev, 0);
1104
1105     qemu_purge_queued_packets(&n->nic->nc);
1106
1107     unregister_savevm(n->qdev, "virtio-net", n);
1108
1109     g_free(n->mac_table.macs);
1110     g_free(n->vlans);
1111
1112     if (n->tx_timer) {
1113         qemu_del_timer(n->tx_timer);
1114         qemu_free_timer(n->tx_timer);
1115     } else {
1116         qemu_bh_delete(n->tx_bh);
1117     }
1118
1119     qemu_del_net_client(&n->nic->nc);
1120     virtio_cleanup(&n->vdev);
1121 }