hw/net/virtio-net.c

   1 /*
   2  * Virtio Network Device
   3  *
   4  * Copyright IBM, Corp. 2007
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 #include "qemu/osdep.h"
  15 #include "qemu/atomic.h"
  16 #include "qemu/iov.h"
  17 #include "qemu/main-loop.h"
  18 #include "qemu/module.h"
  19 #include "hw/virtio/virtio.h"
  20 #include "net/net.h"
  21 #include "net/checksum.h"
  22 #include "net/tap.h"
  23 #include "qemu/error-report.h"
  24 #include "qemu/timer.h"
  25 #include "qemu/option.h"
  26 #include "qemu/option_int.h"
  27 #include "qemu/config-file.h"
  28 #include "qapi/qmp/qdict.h"
  29 #include "hw/virtio/virtio-net.h"
  30 #include "net/vhost_net.h"
  31 #include "net/announce.h"
  32 #include "hw/virtio/virtio-bus.h"
  33 #include "qapi/error.h"
  34 #include "qapi/qapi-events-net.h"
  35 #include "hw/qdev-properties.h"
  36 #include "qapi/qapi-types-migration.h"
  37 #include "qapi/qapi-events-migration.h"
  38 #include "hw/virtio/virtio-access.h"
  39 #include "migration/misc.h"
  40 #include "standard-headers/linux/ethtool.h"
  41 #include "sysemu/sysemu.h"
  42 #include "trace.h"
  43 #include "monitor/qdev.h"
  44 #include "hw/pci/pci.h"
  45 #include "net_rx_pkt.h"
  46 #include "hw/virtio/vhost.h"
  47
  48 #define VIRTIO_NET_VM_VERSION    11
  49
  50 #define MAC_TABLE_ENTRIES    64
  51 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  52
  53 /* previously fixed value */
  54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  56
  57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
  58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  60
  61 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  62
  63 #define VIRTIO_NET_TCP_FLAG         0x3F
  64 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  65
  66 /* IPv4 max payload, 16 bits in the header */
  67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  69
  70 /* header length value in ip header without option */
  71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
  72
  73 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  75
  76 /* Purge coalesced packets timer interval, This value affects the performance
  77    a lot, and should be tuned carefully, '300000'(300us) is the recommended
  78    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  79    tso/gso/gro 'off'. */
  80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  81
  82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  83                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  84                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  85                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  86                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  87                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  88                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  89                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  90                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  91
  92 static const VirtIOFeature feature_sizes[] = {
  93     {.flags = 1ULL << VIRTIO_NET_F_MAC,
  94      .end = endof(struct virtio_net_config, mac)},
  95     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  96      .end = endof(struct virtio_net_config, status)},
  97     {.flags = 1ULL << VIRTIO_NET_F_MQ,
  98      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
  99     {.flags = 1ULL << VIRTIO_NET_F_MTU,
 100      .end = endof(struct virtio_net_config, mtu)},
 101     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 102      .end = endof(struct virtio_net_config, duplex)},
 103     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 104      .end = endof(struct virtio_net_config, supported_hash_types)},
 105     {}
 106 };
 107
 108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 109 {
 110     VirtIONet *n = qemu_get_nic_opaque(nc);
 111
 112     return &n->vqs[nc->queue_index];
 113 }
 114
 115 static int vq2q(int queue_index)
 116 {
 117     return queue_index / 2;
 118 }
 119
 120 /* TODO
 121  * - we could suppress RX interrupt if we were so inclined.
 122  */
 123
 124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 125 {
 126     VirtIONet *n = VIRTIO_NET(vdev);
 127     struct virtio_net_config netcfg;
 128     NetClientState *nc = qemu_get_queue(n->nic);
 129     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 130
 131     int ret = 0;
 132     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 133     virtio_stw_p(vdev, &netcfg.status, n->status);
 134     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
 135     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 136     memcpy(netcfg.mac, n->mac, ETH_ALEN);
 137     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 138     netcfg.duplex = n->net_conf.duplex;
 139     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 140     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 141                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 142                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 143     virtio_stl_p(vdev, &netcfg.supported_hash_types,
 144                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
 145     memcpy(config, &netcfg, n->config_size);
 146
 147     /*
 148      * Is this VDPA? No peer means not VDPA: there's no way to
 149      * disconnect/reconnect a VDPA peer.
 150      */
 151     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 152         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 153                                    n->config_size);
 154         if (ret != -1) {
 155             /*
 156              * Some NIC/kernel combinations present 0 as the mac address.  As
 157              * that is not a legal address, try to proceed with the
 158              * address from the QEMU command line in the hope that the
 159              * address has been configured correctly elsewhere - just not
 160              * reported by the device.
 161              */
 162             if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 163                 info_report("Zero hardware mac address detected. Ignoring.");
 164                 memcpy(netcfg.mac, n->mac, ETH_ALEN);
 165             }
 166             memcpy(config, &netcfg, n->config_size);
 167         }
 168     }
 169 }
 170
 171 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 172 {
 173     VirtIONet *n = VIRTIO_NET(vdev);
 174     struct virtio_net_config netcfg = {};
 175     NetClientState *nc = qemu_get_queue(n->nic);
 176
 177     memcpy(&netcfg, config, n->config_size);
 178
 179     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 180         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 181         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 182         memcpy(n->mac, netcfg.mac, ETH_ALEN);
 183         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 184     }
 185
 186     /*
 187      * Is this VDPA? No peer means not VDPA: there's no way to
 188      * disconnect/reconnect a VDPA peer.
 189      */
 190     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 191         vhost_net_set_config(get_vhost_net(nc->peer),
 192                              (uint8_t *)&netcfg, 0, n->config_size,
 193                              VHOST_SET_CONFIG_TYPE_MASTER);
 194       }
 195 }
 196
 197 static bool virtio_net_started(VirtIONet *n, uint8_t status)
 198 {
 199     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 200     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 201         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 202 }
 203
 204 static void virtio_net_announce_notify(VirtIONet *net)
 205 {
 206     VirtIODevice *vdev = VIRTIO_DEVICE(net);
 207     trace_virtio_net_announce_notify();
 208
 209     net->status |= VIRTIO_NET_S_ANNOUNCE;
 210     virtio_notify_config(vdev);
 211 }
 212
 213 static void virtio_net_announce_timer(void *opaque)
 214 {
 215     VirtIONet *n = opaque;
 216     trace_virtio_net_announce_timer(n->announce_timer.round);
 217
 218     n->announce_timer.round--;
 219     virtio_net_announce_notify(n);
 220 }
 221
 222 static void virtio_net_announce(NetClientState *nc)
 223 {
 224     VirtIONet *n = qemu_get_nic_opaque(nc);
 225     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 226
 227     /*
 228      * Make sure the virtio migration announcement timer isn't running
 229      * If it is, let it trigger announcement so that we do not cause
 230      * confusion.
 231      */
 232     if (n->announce_timer.round) {
 233         return;
 234     }
 235
 236     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 237         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 238             virtio_net_announce_notify(n);
 239     }
 240 }
 241
 242 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 243 {
 244     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 245     NetClientState *nc = qemu_get_queue(n->nic);
 246     int queues = n->multiqueue ? n->max_queues : 1;
 247
 248     if (!get_vhost_net(nc->peer)) {
 249         return;
 250     }
 251
 252     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 253         !!n->vhost_started) {
 254         return;
 255     }
 256     if (!n->vhost_started) {
 257         int r, i;
 258
 259         if (n->needs_vnet_hdr_swap) {
 260             error_report("backend does not support %s vnet headers; "
 261                          "falling back on userspace virtio",
 262                          virtio_is_big_endian(vdev) ? "BE" : "LE");
 263             return;
 264         }
 265
 266         /* Any packets outstanding? Purge them to avoid touching rings
 267          * when vhost is running.
 268          */
 269         for (i = 0;  i < queues; i++) {
 270             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 271
 272             /* Purge both directions: TX and RX. */
 273             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 274             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 275         }
 276
 277         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 278             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 279             if (r < 0) {
 280                 error_report("%uBytes MTU not supported by the backend",
 281                              n->net_conf.mtu);
 282
 283                 return;
 284             }
 285         }
 286
 287         n->vhost_started = 1;
 288         r = vhost_net_start(vdev, n->nic->ncs, queues);
 289         if (r < 0) {
 290             error_report("unable to start vhost net: %d: "
 291                          "falling back on userspace virtio", -r);
 292             n->vhost_started = 0;
 293         }
 294     } else {
 295         vhost_net_stop(vdev, n->nic->ncs, queues);
 296         n->vhost_started = 0;
 297     }
 298 }
 299
 300 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 301                                           NetClientState *peer,
 302                                           bool enable)
 303 {
 304     if (virtio_is_big_endian(vdev)) {
 305         return qemu_set_vnet_be(peer, enable);
 306     } else {
 307         return qemu_set_vnet_le(peer, enable);
 308     }
 309 }
 310
 311 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 312                                        int queues, bool enable)
 313 {
 314     int i;
 315
 316     for (i = 0; i < queues; i++) {
 317         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 318             enable) {
 319             while (--i >= 0) {
 320                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 321             }
 322
 323             return true;
 324         }
 325     }
 326
 327     return false;
 328 }
 329
 330 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 331 {
 332     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 333     int queues = n->multiqueue ? n->max_queues : 1;
 334
 335     if (virtio_net_started(n, status)) {
 336         /* Before using the device, we tell the network backend about the
 337          * endianness to use when parsing vnet headers. If the backend
 338          * can't do it, we fallback onto fixing the headers in the core
 339          * virtio-net code.
 340          */
 341         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 342                                                             queues, true);
 343     } else if (virtio_net_started(n, vdev->status)) {
 344         /* After using the device, we need to reset the network backend to
 345          * the default (guest native endianness), otherwise the guest may
 346          * lose network connectivity if it is rebooted into a different
 347          * endianness.
 348          */
 349         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
 350     }
 351 }
 352
 353 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 354 {
 355     unsigned int dropped = virtqueue_drop_all(vq);
 356     if (dropped) {
 357         virtio_notify(vdev, vq);
 358     }
 359 }
 360
 361 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 362 {
 363     VirtIONet *n = VIRTIO_NET(vdev);
 364     VirtIONetQueue *q;
 365     int i;
 366     uint8_t queue_status;
 367
 368     virtio_net_vnet_endian_status(n, status);
 369     virtio_net_vhost_status(n, status);
 370
 371     for (i = 0; i < n->max_queues; i++) {
 372         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 373         bool queue_started;
 374         q = &n->vqs[i];
 375
 376         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
 377             queue_status = 0;
 378         } else {
 379             queue_status = status;
 380         }
 381         queue_started =
 382             virtio_net_started(n, queue_status) && !n->vhost_started;
 383
 384         if (queue_started) {
 385             qemu_flush_queued_packets(ncs);
 386         }
 387
 388         if (!q->tx_waiting) {
 389             continue;
 390         }
 391
 392         if (queue_started) {
 393             if (q->tx_timer) {
 394                 timer_mod(q->tx_timer,
 395                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 396             } else {
 397                 qemu_bh_schedule(q->tx_bh);
 398             }
 399         } else {
 400             if (q->tx_timer) {
 401                 timer_del(q->tx_timer);
 402             } else {
 403                 qemu_bh_cancel(q->tx_bh);
 404             }
 405             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 406                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 407                 vdev->vm_running) {
 408                 /* if tx is waiting we are likely have some packets in tx queue
 409                  * and disabled notification */
 410                 q->tx_waiting = 0;
 411                 virtio_queue_set_notification(q->tx_vq, 1);
 412                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 413             }
 414         }
 415     }
 416 }
 417
 418 static void virtio_net_set_link_status(NetClientState *nc)
 419 {
 420     VirtIONet *n = qemu_get_nic_opaque(nc);
 421     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 422     uint16_t old_status = n->status;
 423
 424     if (nc->link_down)
 425         n->status &= ~VIRTIO_NET_S_LINK_UP;
 426     else
 427         n->status |= VIRTIO_NET_S_LINK_UP;
 428
 429     if (n->status != old_status)
 430         virtio_notify_config(vdev);
 431
 432     virtio_net_set_status(vdev, vdev->status);
 433 }
 434
 435 static void rxfilter_notify(NetClientState *nc)
 436 {
 437     VirtIONet *n = qemu_get_nic_opaque(nc);
 438
 439     if (nc->rxfilter_notify_enabled) {
 440         char *path = object_get_canonical_path(OBJECT(n->qdev));
 441         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 442                                               n->netclient_name, path);
 443         g_free(path);
 444
 445         /* disable event notification to avoid events flooding */
 446         nc->rxfilter_notify_enabled = 0;
 447     }
 448 }
 449
 450 static intList *get_vlan_table(VirtIONet *n)
 451 {
 452     intList *list;
 453     int i, j;
 454
 455     list = NULL;
 456     for (i = 0; i < MAX_VLAN >> 5; i++) {
 457         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 458             if (n->vlans[i] & (1U << j)) {
 459                 QAPI_LIST_PREPEND(list, (i << 5) + j);
 460             }
 461         }
 462     }
 463
 464     return list;
 465 }
 466
 467 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 468 {
 469     VirtIONet *n = qemu_get_nic_opaque(nc);
 470     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 471     RxFilterInfo *info;
 472     strList *str_list;
 473     int i;
 474
 475     info = g_malloc0(sizeof(*info));
 476     info->name = g_strdup(nc->name);
 477     info->promiscuous = n->promisc;
 478
 479     if (n->nouni) {
 480         info->unicast = RX_STATE_NONE;
 481     } else if (n->alluni) {
 482         info->unicast = RX_STATE_ALL;
 483     } else {
 484         info->unicast = RX_STATE_NORMAL;
 485     }
 486
 487     if (n->nomulti) {
 488         info->multicast = RX_STATE_NONE;
 489     } else if (n->allmulti) {
 490         info->multicast = RX_STATE_ALL;
 491     } else {
 492         info->multicast = RX_STATE_NORMAL;
 493     }
 494
 495     info->broadcast_allowed = n->nobcast;
 496     info->multicast_overflow = n->mac_table.multi_overflow;
 497     info->unicast_overflow = n->mac_table.uni_overflow;
 498
 499     info->main_mac = qemu_mac_strdup_printf(n->mac);
 500
 501     str_list = NULL;
 502     for (i = 0; i < n->mac_table.first_multi; i++) {
 503         QAPI_LIST_PREPEND(str_list,
 504                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 505     }
 506     info->unicast_table = str_list;
 507
 508     str_list = NULL;
 509     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 510         QAPI_LIST_PREPEND(str_list,
 511                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 512     }
 513     info->multicast_table = str_list;
 514     info->vlan_table = get_vlan_table(n);
 515
 516     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 517         info->vlan = RX_STATE_ALL;
 518     } else if (!info->vlan_table) {
 519         info->vlan = RX_STATE_NONE;
 520     } else {
 521         info->vlan = RX_STATE_NORMAL;
 522     }
 523
 524     /* enable event notification after query */
 525     nc->rxfilter_notify_enabled = 1;
 526
 527     return info;
 528 }
 529
 530 static void virtio_net_reset(VirtIODevice *vdev)
 531 {
 532     VirtIONet *n = VIRTIO_NET(vdev);
 533     int i;
 534
 535     /* Reset back to compatibility mode */
 536     n->promisc = 1;
 537     n->allmulti = 0;
 538     n->alluni = 0;
 539     n->nomulti = 0;
 540     n->nouni = 0;
 541     n->nobcast = 0;
 542     /* multiqueue is disabled by default */
 543     n->curr_queues = 1;
 544     timer_del(n->announce_timer.tm);
 545     n->announce_timer.round = 0;
 546     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 547
 548     /* Flush any MAC and VLAN filter table state */
 549     n->mac_table.in_use = 0;
 550     n->mac_table.first_multi = 0;
 551     n->mac_table.multi_overflow = 0;
 552     n->mac_table.uni_overflow = 0;
 553     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 554     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 555     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 556     memset(n->vlans, 0, MAX_VLAN >> 3);
 557
 558     /* Flush any async TX */
 559     for (i = 0;  i < n->max_queues; i++) {
 560         NetClientState *nc = qemu_get_subqueue(n->nic, i);
 561
 562         if (nc->peer) {
 563             qemu_flush_or_purge_queued_packets(nc->peer, true);
 564             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 565         }
 566     }
 567 }
 568
 569 static void peer_test_vnet_hdr(VirtIONet *n)
 570 {
 571     NetClientState *nc = qemu_get_queue(n->nic);
 572     if (!nc->peer) {
 573         return;
 574     }
 575
 576     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 577 }
 578
 579 static int peer_has_vnet_hdr(VirtIONet *n)
 580 {
 581     return n->has_vnet_hdr;
 582 }
 583
 584 static int peer_has_ufo(VirtIONet *n)
 585 {
 586     if (!peer_has_vnet_hdr(n))
 587         return 0;
 588
 589     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 590
 591     return n->has_ufo;
 592 }
 593
 594 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 595                                        int version_1, int hash_report)
 596 {
 597     int i;
 598     NetClientState *nc;
 599
 600     n->mergeable_rx_bufs = mergeable_rx_bufs;
 601
 602     if (version_1) {
 603         n->guest_hdr_len = hash_report ?
 604             sizeof(struct virtio_net_hdr_v1_hash) :
 605             sizeof(struct virtio_net_hdr_mrg_rxbuf);
 606         n->rss_data.populate_hash = !!hash_report;
 607     } else {
 608         n->guest_hdr_len = n->mergeable_rx_bufs ?
 609             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 610             sizeof(struct virtio_net_hdr);
 611     }
 612
 613     for (i = 0; i < n->max_queues; i++) {
 614         nc = qemu_get_subqueue(n->nic, i);
 615
 616         if (peer_has_vnet_hdr(n) &&
 617             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 618             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 619             n->host_hdr_len = n->guest_hdr_len;
 620         }
 621     }
 622 }
 623
 624 static int virtio_net_max_tx_queue_size(VirtIONet *n)
 625 {
 626     NetClientState *peer = n->nic_conf.peers.ncs[0];
 627
 628     /*
 629      * Backends other than vhost-user don't support max queue size.
 630      */
 631     if (!peer) {
 632         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 633     }
 634
 635     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
 636         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 637     }
 638
 639     return VIRTQUEUE_MAX_SIZE;
 640 }
 641
 642 static int peer_attach(VirtIONet *n, int index)
 643 {
 644     NetClientState *nc = qemu_get_subqueue(n->nic, index);
 645
 646     if (!nc->peer) {
 647         return 0;
 648     }
 649
 650     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 651         vhost_set_vring_enable(nc->peer, 1);
 652     }
 653
 654     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 655         return 0;
 656     }
 657
 658     if (n->max_queues == 1) {
 659         return 0;
 660     }
 661
 662     return tap_enable(nc->peer);
 663 }
 664
 665 static int peer_detach(VirtIONet *n, int index)
 666 {
 667     NetClientState *nc = qemu_get_subqueue(n->nic, index);
 668
 669     if (!nc->peer) {
 670         return 0;
 671     }
 672
 673     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 674         vhost_set_vring_enable(nc->peer, 0);
 675     }
 676
 677     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 678         return 0;
 679     }
 680
 681     return tap_disable(nc->peer);
 682 }
 683
 684 static void virtio_net_set_queues(VirtIONet *n)
 685 {
 686     int i;
 687     int r;
 688
 689     if (n->nic->peer_deleted) {
 690         return;
 691     }
 692
 693     for (i = 0; i < n->max_queues; i++) {
 694         if (i < n->curr_queues) {
 695             r = peer_attach(n, i);
 696             assert(!r);
 697         } else {
 698             r = peer_detach(n, i);
 699             assert(!r);
 700         }
 701     }
 702 }
 703
 704 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 705
 706 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 707                                         Error **errp)
 708 {
 709     VirtIONet *n = VIRTIO_NET(vdev);
 710     NetClientState *nc = qemu_get_queue(n->nic);
 711
 712     /* Firstly sync all virtio-net possible supported features */
 713     features |= n->host_features;
 714
 715     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 716
 717     if (!peer_has_vnet_hdr(n)) {
 718         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 719         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 720         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 721         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 722
 723         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 724         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 725         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 726         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 727
 728         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 729     }
 730
 731     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 732         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 733         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 734     }
 735
 736     if (!get_vhost_net(nc->peer)) {
 737         return features;
 738     }
 739
 740     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 741         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 742     }
 743     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 744     vdev->backend_features = features;
 745
 746     if (n->mtu_bypass_backend &&
 747             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 748         features |= (1ULL << VIRTIO_NET_F_MTU);
 749     }
 750
 751     return features;
 752 }
 753
 754 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 755 {
 756     uint64_t features = 0;
 757
 758     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 759      * but also these: */
 760     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 761     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 762     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 763     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 764     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 765
 766     return features;
 767 }
 768
 769 static void virtio_net_apply_guest_offloads(VirtIONet *n)
 770 {
 771     qemu_set_offload(qemu_get_queue(n->nic)->peer,
 772             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 773             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 774             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 775             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 776             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 777 }
 778
 779 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 780 {
 781     static const uint64_t guest_offloads_mask =
 782         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 783         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 784         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 785         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 786         (1ULL << VIRTIO_NET_F_GUEST_UFO);
 787
 788     return guest_offloads_mask & features;
 789 }
 790
 791 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 792 {
 793     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 794     return virtio_net_guest_offloads_by_features(vdev->guest_features);
 795 }
 796
 797 typedef struct {
 798     VirtIONet *n;
 799     char *id;
 800 } FailoverId;
 801
 802 /**
 803  * Set the id of the failover primary device
 804  *
 805  * @opaque: FailoverId to setup
 806  * @opts: opts for device we are handling
 807  * @errp: returns an error if this function fails
 808  */
 809 static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
 810 {
 811     FailoverId *fid = opaque;
 812     const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
 813
 814     if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
 815         fid->id = g_strdup(opts->id);
 816         return 1;
 817     }
 818
 819     return 0;
 820 }
 821
 822 /**
 823  * Find the primary device id for this failover virtio-net
 824  *
 825  * @n: VirtIONet device
 826  * @errp: returns an error if this function fails
 827  */
 828 static char *failover_find_primary_device_id(VirtIONet *n)
 829 {
 830     Error *err = NULL;
 831     FailoverId fid;
 832
 833     fid.n = n;
 834     if (!qemu_opts_foreach(qemu_find_opts("device"),
 835                            failover_set_primary, &fid, &err)) {
 836         return NULL;
 837     }
 838     return fid.id;
 839 }
 840
 841 /**
 842  * Find the primary device for this failover virtio-net
 843  *
 844  * @n: VirtIONet device
 845  * @errp: returns an error if this function fails
 846  */
 847 static DeviceState *failover_find_primary_device(VirtIONet *n)
 848 {
 849     char *id = failover_find_primary_device_id(n);
 850
 851     if (!id) {
 852         return NULL;
 853     }
 854
 855     return qdev_find_recursive(sysbus_get_default(), id);
 856 }
 857
 858 static void failover_add_primary(VirtIONet *n, Error **errp)
 859 {
 860     Error *err = NULL;
 861     QemuOpts *opts;
 862     char *id;
 863     DeviceState *dev = failover_find_primary_device(n);
 864
 865     if (dev) {
 866         return;
 867     }
 868
 869     id = failover_find_primary_device_id(n);
 870     if (!id) {
 871         error_setg(errp, "Primary device not found");
 872         error_append_hint(errp, "Virtio-net failover will not work. Make "
 873                           "sure primary device has parameter"
 874                           " failover_pair_id=%s\n", n->netclient_name);
 875         return;
 876     }
 877     opts = qemu_opts_find(qemu_find_opts("device"), id);
 878     g_assert(opts); /* cannot be NULL because id was found using opts list */
 879     dev = qdev_device_add(opts, &err);
 880     if (err) {
 881         qemu_opts_del(opts);
 882     } else {
 883         object_unref(OBJECT(dev));
 884     }
 885     error_propagate(errp, err);
 886 }
 887
 888 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 889 {
 890     VirtIONet *n = VIRTIO_NET(vdev);
 891     Error *err = NULL;
 892     int i;
 893
 894     if (n->mtu_bypass_backend &&
 895             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 896         features &= ~(1ULL << VIRTIO_NET_F_MTU);
 897     }
 898
 899     virtio_net_set_multiqueue(n,
 900                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 901                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
 902
 903     virtio_net_set_mrg_rx_bufs(n,
 904                                virtio_has_feature(features,
 905                                                   VIRTIO_NET_F_MRG_RXBUF),
 906                                virtio_has_feature(features,
 907                                                   VIRTIO_F_VERSION_1),
 908                                virtio_has_feature(features,
 909                                                   VIRTIO_NET_F_HASH_REPORT));
 910
 911     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 912         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 913     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 914         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 915     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 916
 917     if (n->has_vnet_hdr) {
 918         n->curr_guest_offloads =
 919             virtio_net_guest_offloads_by_features(features);
 920         virtio_net_apply_guest_offloads(n);
 921     }
 922
 923     for (i = 0;  i < n->max_queues; i++) {
 924         NetClientState *nc = qemu_get_subqueue(n->nic, i);
 925
 926         if (!get_vhost_net(nc->peer)) {
 927             continue;
 928         }
 929         vhost_net_ack_features(get_vhost_net(nc->peer), features);
 930     }
 931
 932     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 933         memset(n->vlans, 0, MAX_VLAN >> 3);
 934     } else {
 935         memset(n->vlans, 0xff, MAX_VLAN >> 3);
 936     }
 937
 938     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 939         qapi_event_send_failover_negotiated(n->netclient_name);
 940         qatomic_set(&n->failover_primary_hidden, false);
 941         failover_add_primary(n, &err);
 942         if (err) {
 943             warn_report_err(err);
 944         }
 945     }
 946 }
 947
 948 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 949                                      struct iovec *iov, unsigned int iov_cnt)
 950 {
 951     uint8_t on;
 952     size_t s;
 953     NetClientState *nc = qemu_get_queue(n->nic);
 954
 955     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 956     if (s != sizeof(on)) {
 957         return VIRTIO_NET_ERR;
 958     }
 959
 960     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 961         n->promisc = on;
 962     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 963         n->allmulti = on;
 964     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 965         n->alluni = on;
 966     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 967         n->nomulti = on;
 968     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 969         n->nouni = on;
 970     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 971         n->nobcast = on;
 972     } else {
 973         return VIRTIO_NET_ERR;
 974     }
 975
 976     rxfilter_notify(nc);
 977
 978     return VIRTIO_NET_OK;
 979 }
 980
 981 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 982                                      struct iovec *iov, unsigned int iov_cnt)
 983 {
 984     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 985     uint64_t offloads;
 986     size_t s;
 987
 988     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 989         return VIRTIO_NET_ERR;
 990     }
 991
 992     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 993     if (s != sizeof(offloads)) {
 994         return VIRTIO_NET_ERR;
 995     }
 996
 997     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 998         uint64_t supported_offloads;
 999
1000         offloads = virtio_ldq_p(vdev, &offloads);
1001
1002         if (!n->has_vnet_hdr) {
1003             return VIRTIO_NET_ERR;
1004         }
1005
1006         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1007             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1008         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1009             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1010         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1011
1012         supported_offloads = virtio_net_supported_guest_offloads(n);
1013         if (offloads & ~supported_offloads) {
1014             return VIRTIO_NET_ERR;
1015         }
1016
1017         n->curr_guest_offloads = offloads;
1018         virtio_net_apply_guest_offloads(n);
1019
1020         return VIRTIO_NET_OK;
1021     } else {
1022         return VIRTIO_NET_ERR;
1023     }
1024 }
1025
1026 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1027                                  struct iovec *iov, unsigned int iov_cnt)
1028 {
1029     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1030     struct virtio_net_ctrl_mac mac_data;
1031     size_t s;
1032     NetClientState *nc = qemu_get_queue(n->nic);
1033
1034     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1035         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1036             return VIRTIO_NET_ERR;
1037         }
1038         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1039         assert(s == sizeof(n->mac));
1040         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1041         rxfilter_notify(nc);
1042
1043         return VIRTIO_NET_OK;
1044     }
1045
1046     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1047         return VIRTIO_NET_ERR;
1048     }
1049
1050     int in_use = 0;
1051     int first_multi = 0;
1052     uint8_t uni_overflow = 0;
1053     uint8_t multi_overflow = 0;
1054     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1055
1056     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1057                    sizeof(mac_data.entries));
1058     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1059     if (s != sizeof(mac_data.entries)) {
1060         goto error;
1061     }
1062     iov_discard_front(&iov, &iov_cnt, s);
1063
1064     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1065         goto error;
1066     }
1067
1068     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1069         s = iov_to_buf(iov, iov_cnt, 0, macs,
1070                        mac_data.entries * ETH_ALEN);
1071         if (s != mac_data.entries * ETH_ALEN) {
1072             goto error;
1073         }
1074         in_use += mac_data.entries;
1075     } else {
1076         uni_overflow = 1;
1077     }
1078
1079     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1080
1081     first_multi = in_use;
1082
1083     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1084                    sizeof(mac_data.entries));
1085     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1086     if (s != sizeof(mac_data.entries)) {
1087         goto error;
1088     }
1089
1090     iov_discard_front(&iov, &iov_cnt, s);
1091
1092     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1093         goto error;
1094     }
1095
1096     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1097         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1098                        mac_data.entries * ETH_ALEN);
1099         if (s != mac_data.entries * ETH_ALEN) {
1100             goto error;
1101         }
1102         in_use += mac_data.entries;
1103     } else {
1104         multi_overflow = 1;
1105     }
1106
1107     n->mac_table.in_use = in_use;
1108     n->mac_table.first_multi = first_multi;
1109     n->mac_table.uni_overflow = uni_overflow;
1110     n->mac_table.multi_overflow = multi_overflow;
1111     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1112     g_free(macs);
1113     rxfilter_notify(nc);
1114
1115     return VIRTIO_NET_OK;
1116
1117 error:
1118     g_free(macs);
1119     return VIRTIO_NET_ERR;
1120 }
1121
1122 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1123                                         struct iovec *iov, unsigned int iov_cnt)
1124 {
1125     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1126     uint16_t vid;
1127     size_t s;
1128     NetClientState *nc = qemu_get_queue(n->nic);
1129
1130     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1131     vid = virtio_lduw_p(vdev, &vid);
1132     if (s != sizeof(vid)) {
1133         return VIRTIO_NET_ERR;
1134     }
1135
1136     if (vid >= MAX_VLAN)
1137         return VIRTIO_NET_ERR;
1138
1139     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1140         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1141     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1142         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1143     else
1144         return VIRTIO_NET_ERR;
1145
1146     rxfilter_notify(nc);
1147
1148     return VIRTIO_NET_OK;
1149 }
1150
1151 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1152                                       struct iovec *iov, unsigned int iov_cnt)
1153 {
1154     trace_virtio_net_handle_announce(n->announce_timer.round);
1155     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1156         n->status & VIRTIO_NET_S_ANNOUNCE) {
1157         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1158         if (n->announce_timer.round) {
1159             qemu_announce_timer_step(&n->announce_timer);
1160         }
1161         return VIRTIO_NET_OK;
1162     } else {
1163         return VIRTIO_NET_ERR;
1164     }
1165 }
1166
1167 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1168
1169 static void virtio_net_disable_rss(VirtIONet *n)
1170 {
1171     if (n->rss_data.enabled) {
1172         trace_virtio_net_rss_disable();
1173     }
1174     n->rss_data.enabled = false;
1175
1176     virtio_net_detach_epbf_rss(n);
1177 }
1178
1179 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1180 {
1181     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1182     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1183         return false;
1184     }
1185
1186     return nc->info->set_steering_ebpf(nc, prog_fd);
1187 }
1188
1189 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1190                                    struct EBPFRSSConfig *config)
1191 {
1192     config->redirect = data->redirect;
1193     config->populate_hash = data->populate_hash;
1194     config->hash_types = data->hash_types;
1195     config->indirections_len = data->indirections_len;
1196     config->default_queue = data->default_queue;
1197 }
1198
1199 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1200 {
1201     struct EBPFRSSConfig config = {};
1202
1203     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1204         return false;
1205     }
1206
1207     rss_data_to_rss_config(&n->rss_data, &config);
1208
1209     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1210                           n->rss_data.indirections_table, n->rss_data.key)) {
1211         return false;
1212     }
1213
1214     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1215         return false;
1216     }
1217
1218     return true;
1219 }
1220
1221 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1222 {
1223     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1224 }
1225
1226 static bool virtio_net_load_ebpf(VirtIONet *n)
1227 {
1228     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1229         /* backend does't support steering ebpf */
1230         return false;
1231     }
1232
1233     return ebpf_rss_load(&n->ebpf_rss);
1234 }
1235
1236 static void virtio_net_unload_ebpf(VirtIONet *n)
1237 {
1238     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1239     ebpf_rss_unload(&n->ebpf_rss);
1240 }
1241
1242 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1243                                       struct iovec *iov,
1244                                       unsigned int iov_cnt,
1245                                       bool do_rss)
1246 {
1247     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1248     struct virtio_net_rss_config cfg;
1249     size_t s, offset = 0, size_get;
1250     uint16_t queues, i;
1251     struct {
1252         uint16_t us;
1253         uint8_t b;
1254     } QEMU_PACKED temp;
1255     const char *err_msg = "";
1256     uint32_t err_value = 0;
1257
1258     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1259         err_msg = "RSS is not negotiated";
1260         goto error;
1261     }
1262     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1263         err_msg = "Hash report is not negotiated";
1264         goto error;
1265     }
1266     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1267     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1268     if (s != size_get) {
1269         err_msg = "Short command buffer";
1270         err_value = (uint32_t)s;
1271         goto error;
1272     }
1273     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1274     n->rss_data.indirections_len =
1275         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1276     n->rss_data.indirections_len++;
1277     if (!do_rss) {
1278         n->rss_data.indirections_len = 1;
1279     }
1280     if (!is_power_of_2(n->rss_data.indirections_len)) {
1281         err_msg = "Invalid size of indirection table";
1282         err_value = n->rss_data.indirections_len;
1283         goto error;
1284     }
1285     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1286         err_msg = "Too large indirection table";
1287         err_value = n->rss_data.indirections_len;
1288         goto error;
1289     }
1290     n->rss_data.default_queue = do_rss ?
1291         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1292     if (n->rss_data.default_queue >= n->max_queues) {
1293         err_msg = "Invalid default queue";
1294         err_value = n->rss_data.default_queue;
1295         goto error;
1296     }
1297     offset += size_get;
1298     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1299     g_free(n->rss_data.indirections_table);
1300     n->rss_data.indirections_table = g_malloc(size_get);
1301     if (!n->rss_data.indirections_table) {
1302         err_msg = "Can't allocate indirections table";
1303         err_value = n->rss_data.indirections_len;
1304         goto error;
1305     }
1306     s = iov_to_buf(iov, iov_cnt, offset,
1307                    n->rss_data.indirections_table, size_get);
1308     if (s != size_get) {
1309         err_msg = "Short indirection table buffer";
1310         err_value = (uint32_t)s;
1311         goto error;
1312     }
1313     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1314         uint16_t val = n->rss_data.indirections_table[i];
1315         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1316     }
1317     offset += size_get;
1318     size_get = sizeof(temp);
1319     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1320     if (s != size_get) {
1321         err_msg = "Can't get queues";
1322         err_value = (uint32_t)s;
1323         goto error;
1324     }
1325     queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1326     if (queues == 0 || queues > n->max_queues) {
1327         err_msg = "Invalid number of queues";
1328         err_value = queues;
1329         goto error;
1330     }
1331     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1332         err_msg = "Invalid key size";
1333         err_value = temp.b;
1334         goto error;
1335     }
1336     if (!temp.b && n->rss_data.hash_types) {
1337         err_msg = "No key provided";
1338         err_value = 0;
1339         goto error;
1340     }
1341     if (!temp.b && !n->rss_data.hash_types) {
1342         virtio_net_disable_rss(n);
1343         return queues;
1344     }
1345     offset += size_get;
1346     size_get = temp.b;
1347     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1348     if (s != size_get) {
1349         err_msg = "Can get key buffer";
1350         err_value = (uint32_t)s;
1351         goto error;
1352     }
1353     n->rss_data.enabled = true;
1354
1355     if (!n->rss_data.populate_hash) {
1356         if (!virtio_net_attach_epbf_rss(n)) {
1357             /* EBPF must be loaded for vhost */
1358             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1359                 warn_report("Can't load eBPF RSS for vhost");
1360                 goto error;
1361             }
1362             /* fallback to software RSS */
1363             warn_report("Can't load eBPF RSS - fallback to software RSS");
1364             n->rss_data.enabled_software_rss = true;
1365         }
1366     } else {
1367         /* use software RSS for hash populating */
1368         /* and detach eBPF if was loaded before */
1369         virtio_net_detach_epbf_rss(n);
1370         n->rss_data.enabled_software_rss = true;
1371     }
1372
1373     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1374                                 n->rss_data.indirections_len,
1375                                 temp.b);
1376     return queues;
1377 error:
1378     trace_virtio_net_rss_error(err_msg, err_value);
1379     virtio_net_disable_rss(n);
1380     return 0;
1381 }
1382
1383 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1384                                 struct iovec *iov, unsigned int iov_cnt)
1385 {
1386     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1387     uint16_t queues;
1388
1389     virtio_net_disable_rss(n);
1390     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1391         queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1392         return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1393     }
1394     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1395         queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1396     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1397         struct virtio_net_ctrl_mq mq;
1398         size_t s;
1399         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1400             return VIRTIO_NET_ERR;
1401         }
1402         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1403         if (s != sizeof(mq)) {
1404             return VIRTIO_NET_ERR;
1405         }
1406         queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1407
1408     } else {
1409         return VIRTIO_NET_ERR;
1410     }
1411
1412     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1413         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1414         queues > n->max_queues ||
1415         !n->multiqueue) {
1416         return VIRTIO_NET_ERR;
1417     }
1418
1419     n->curr_queues = queues;
1420     /* stop the backend before changing the number of queues to avoid handling a
1421      * disabled queue */
1422     virtio_net_set_status(vdev, vdev->status);
1423     virtio_net_set_queues(n);
1424
1425     return VIRTIO_NET_OK;
1426 }
1427
1428 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1429 {
1430     VirtIONet *n = VIRTIO_NET(vdev);
1431     struct virtio_net_ctrl_hdr ctrl;
1432     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1433     VirtQueueElement *elem;
1434     size_t s;
1435     struct iovec *iov, *iov2;
1436     unsigned int iov_cnt;
1437
1438     for (;;) {
1439         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1440         if (!elem) {
1441             break;
1442         }
1443         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1444             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1445             virtio_error(vdev, "virtio-net ctrl missing headers");
1446             virtqueue_detach_element(vq, elem, 0);
1447             g_free(elem);
1448             break;
1449         }
1450
1451         iov_cnt = elem->out_num;
1452         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1453         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1454         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1455         if (s != sizeof(ctrl)) {
1456             status = VIRTIO_NET_ERR;
1457         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1458             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1459         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1460             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1461         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1462             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1463         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1464             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1465         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1466             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1467         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1468             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1469         }
1470
1471         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1472         assert(s == sizeof(status));
1473
1474         virtqueue_push(vq, elem, sizeof(status));
1475         virtio_notify(vdev, vq);
1476         g_free(iov2);
1477         g_free(elem);
1478     }
1479 }
1480
1481 /* RX */
1482
1483 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1484 {
1485     VirtIONet *n = VIRTIO_NET(vdev);
1486     int queue_index = vq2q(virtio_get_queue_index(vq));
1487
1488     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1489 }
1490
1491 static bool virtio_net_can_receive(NetClientState *nc)
1492 {
1493     VirtIONet *n = qemu_get_nic_opaque(nc);
1494     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1495     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1496
1497     if (!vdev->vm_running) {
1498         return false;
1499     }
1500
1501     if (nc->queue_index >= n->curr_queues) {
1502         return false;
1503     }
1504
1505     if (!virtio_queue_ready(q->rx_vq) ||
1506         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1507         return false;
1508     }
1509
1510     return true;
1511 }
1512
1513 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1514 {
1515     VirtIONet *n = q->n;
1516     if (virtio_queue_empty(q->rx_vq) ||
1517         (n->mergeable_rx_bufs &&
1518          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1519         virtio_queue_set_notification(q->rx_vq, 1);
1520
1521         /* To avoid a race condition where the guest has made some buffers
1522          * available after the above check but before notification was
1523          * enabled, check for available buffers again.
1524          */
1525         if (virtio_queue_empty(q->rx_vq) ||
1526             (n->mergeable_rx_bufs &&
1527              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1528             return 0;
1529         }
1530     }
1531
1532     virtio_queue_set_notification(q->rx_vq, 0);
1533     return 1;
1534 }
1535
1536 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1537 {
1538     virtio_tswap16s(vdev, &hdr->hdr_len);
1539     virtio_tswap16s(vdev, &hdr->gso_size);
1540     virtio_tswap16s(vdev, &hdr->csum_start);
1541     virtio_tswap16s(vdev, &hdr->csum_offset);
1542 }
1543
1544 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1545  * it never finds out that the packets don't have valid checksums.  This
1546  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1547  * fix this with Xen but it hasn't appeared in an upstream release of
1548  * dhclient yet.
1549  *
1550  * To avoid breaking existing guests, we catch udp packets and add
1551  * checksums.  This is terrible but it's better than hacking the guest
1552  * kernels.
1553  *
1554  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1555  * we should provide a mechanism to disable it to avoid polluting the host
1556  * cache.
1557  */
1558 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1559                                         uint8_t *buf, size_t size)
1560 {
1561     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1562         (size > 27 && size < 1500) && /* normal sized MTU */
1563         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1564         (buf[23] == 17) && /* ip.protocol == UDP */
1565         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1566         net_checksum_calculate(buf, size, CSUM_UDP);
1567         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1568     }
1569 }
1570
1571 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1572                            const void *buf, size_t size)
1573 {
1574     if (n->has_vnet_hdr) {
1575         /* FIXME this cast is evil */
1576         void *wbuf = (void *)buf;
1577         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1578                                     size - n->host_hdr_len);
1579
1580         if (n->needs_vnet_hdr_swap) {
1581             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1582         }
1583         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1584     } else {
1585         struct virtio_net_hdr hdr = {
1586             .flags = 0,
1587             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1588         };
1589         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1590     }
1591 }
1592
1593 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1594 {
1595     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1596     static const uint8_t vlan[] = {0x81, 0x00};
1597     uint8_t *ptr = (uint8_t *)buf;
1598     int i;
1599
1600     if (n->promisc)
1601         return 1;
1602
1603     ptr += n->host_hdr_len;
1604
1605     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1606         int vid = lduw_be_p(ptr + 14) & 0xfff;
1607         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1608             return 0;
1609     }
1610
1611     if (ptr[0] & 1) { // multicast
1612         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1613             return !n->nobcast;
1614         } else if (n->nomulti) {
1615             return 0;
1616         } else if (n->allmulti || n->mac_table.multi_overflow) {
1617             return 1;
1618         }
1619
1620         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1621             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1622                 return 1;
1623             }
1624         }
1625     } else { // unicast
1626         if (n->nouni) {
1627             return 0;
1628         } else if (n->alluni || n->mac_table.uni_overflow) {
1629             return 1;
1630         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1631             return 1;
1632         }
1633
1634         for (i = 0; i < n->mac_table.first_multi; i++) {
1635             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1636                 return 1;
1637             }
1638         }
1639     }
1640
1641     return 0;
1642 }
1643
1644 static uint8_t virtio_net_get_hash_type(bool isip4,
1645                                         bool isip6,
1646                                         bool isudp,
1647                                         bool istcp,
1648                                         uint32_t types)
1649 {
1650     if (isip4) {
1651         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1652             return NetPktRssIpV4Tcp;
1653         }
1654         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1655             return NetPktRssIpV4Udp;
1656         }
1657         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1658             return NetPktRssIpV4;
1659         }
1660     } else if (isip6) {
1661         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1662                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1663
1664         if (istcp && (types & mask)) {
1665             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1666                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1667         }
1668         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1669         if (isudp && (types & mask)) {
1670             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1671                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1672         }
1673         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1674         if (types & mask) {
1675             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1676                 NetPktRssIpV6Ex : NetPktRssIpV6;
1677         }
1678     }
1679     return 0xff;
1680 }
1681
1682 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1683                                    uint32_t hash)
1684 {
1685     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1686     hdr->hash_value = hash;
1687     hdr->hash_report = report;
1688 }
1689
1690 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1691                                   size_t size)
1692 {
1693     VirtIONet *n = qemu_get_nic_opaque(nc);
1694     unsigned int index = nc->queue_index, new_index = index;
1695     struct NetRxPkt *pkt = n->rx_pkt;
1696     uint8_t net_hash_type;
1697     uint32_t hash;
1698     bool isip4, isip6, isudp, istcp;
1699     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1700         VIRTIO_NET_HASH_REPORT_IPv4,
1701         VIRTIO_NET_HASH_REPORT_TCPv4,
1702         VIRTIO_NET_HASH_REPORT_TCPv6,
1703         VIRTIO_NET_HASH_REPORT_IPv6,
1704         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1705         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1706         VIRTIO_NET_HASH_REPORT_UDPv4,
1707         VIRTIO_NET_HASH_REPORT_UDPv6,
1708         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1709     };
1710
1711     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1712                              size - n->host_hdr_len);
1713     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1714     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1715         istcp = isudp = false;
1716     }
1717     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1718         istcp = isudp = false;
1719     }
1720     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1721                                              n->rss_data.hash_types);
1722     if (net_hash_type > NetPktRssIpV6UdpEx) {
1723         if (n->rss_data.populate_hash) {
1724             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1725         }
1726         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1727     }
1728
1729     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1730
1731     if (n->rss_data.populate_hash) {
1732         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1733     }
1734
1735     if (n->rss_data.redirect) {
1736         new_index = hash & (n->rss_data.indirections_len - 1);
1737         new_index = n->rss_data.indirections_table[new_index];
1738     }
1739
1740     return (index == new_index) ? -1 : new_index;
1741 }
1742
1743 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1744                                       size_t size, bool no_rss)
1745 {
1746     VirtIONet *n = qemu_get_nic_opaque(nc);
1747     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1748     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1749     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1750     size_t lens[VIRTQUEUE_MAX_SIZE];
1751     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1752     struct virtio_net_hdr_mrg_rxbuf mhdr;
1753     unsigned mhdr_cnt = 0;
1754     size_t offset, i, guest_offset, j;
1755     ssize_t err;
1756
1757     if (!virtio_net_can_receive(nc)) {
1758         return -1;
1759     }
1760
1761     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1762         int index = virtio_net_process_rss(nc, buf, size);
1763         if (index >= 0) {
1764             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1765             return virtio_net_receive_rcu(nc2, buf, size, true);
1766         }
1767     }
1768
1769     /* hdr_len refers to the header we supply to the guest */
1770     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1771         return 0;
1772     }
1773
1774     if (!receive_filter(n, buf, size))
1775         return size;
1776
1777     offset = i = 0;
1778
1779     while (offset < size) {
1780         VirtQueueElement *elem;
1781         int len, total;
1782         const struct iovec *sg;
1783
1784         total = 0;
1785
1786         if (i == VIRTQUEUE_MAX_SIZE) {
1787             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1788             err = size;
1789             goto err;
1790         }
1791
1792         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1793         if (!elem) {
1794             if (i) {
1795                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1796                              "i %zd mergeable %d offset %zd, size %zd, "
1797                              "guest hdr len %zd, host hdr len %zd "
1798                              "guest features 0x%" PRIx64,
1799                              i, n->mergeable_rx_bufs, offset, size,
1800                              n->guest_hdr_len, n->host_hdr_len,
1801                              vdev->guest_features);
1802             }
1803             err = -1;
1804             goto err;
1805         }
1806
1807         if (elem->in_num < 1) {
1808             virtio_error(vdev,
1809                          "virtio-net receive queue contains no in buffers");
1810             virtqueue_detach_element(q->rx_vq, elem, 0);
1811             g_free(elem);
1812             err = -1;
1813             goto err;
1814         }
1815
1816         sg = elem->in_sg;
1817         if (i == 0) {
1818             assert(offset == 0);
1819             if (n->mergeable_rx_bufs) {
1820                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1821                                     sg, elem->in_num,
1822                                     offsetof(typeof(mhdr), num_buffers),
1823                                     sizeof(mhdr.num_buffers));
1824             }
1825
1826             receive_header(n, sg, elem->in_num, buf, size);
1827             if (n->rss_data.populate_hash) {
1828                 offset = sizeof(mhdr);
1829                 iov_from_buf(sg, elem->in_num, offset,
1830                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1831             }
1832             offset = n->host_hdr_len;
1833             total += n->guest_hdr_len;
1834             guest_offset = n->guest_hdr_len;
1835         } else {
1836             guest_offset = 0;
1837         }
1838
1839         /* copy in packet.  ugh */
1840         len = iov_from_buf(sg, elem->in_num, guest_offset,
1841                            buf + offset, size - offset);
1842         total += len;
1843         offset += len;
1844         /* If buffers can't be merged, at this point we
1845          * must have consumed the complete packet.
1846          * Otherwise, drop it. */
1847         if (!n->mergeable_rx_bufs && offset < size) {
1848             virtqueue_unpop(q->rx_vq, elem, total);
1849             g_free(elem);
1850             err = size;
1851             goto err;
1852         }
1853
1854         elems[i] = elem;
1855         lens[i] = total;
1856         i++;
1857     }
1858
1859     if (mhdr_cnt) {
1860         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1861         iov_from_buf(mhdr_sg, mhdr_cnt,
1862                      0,
1863                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1864     }
1865
1866     for (j = 0; j < i; j++) {
1867         /* signal other side */
1868         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1869         g_free(elems[j]);
1870     }
1871
1872     virtqueue_flush(q->rx_vq, i);
1873     virtio_notify(vdev, q->rx_vq);
1874
1875     return size;
1876
1877 err:
1878     for (j = 0; j < i; j++) {
1879         g_free(elems[j]);
1880     }
1881
1882     return err;
1883 }
1884
1885 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1886                                   size_t size)
1887 {
1888     RCU_READ_LOCK_GUARD();
1889
1890     return virtio_net_receive_rcu(nc, buf, size, false);
1891 }
1892
1893 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1894                                          const uint8_t *buf,
1895                                          VirtioNetRscUnit *unit)
1896 {
1897     uint16_t ip_hdrlen;
1898     struct ip_header *ip;
1899
1900     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1901                               + sizeof(struct eth_header));
1902     unit->ip = (void *)ip;
1903     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1904     unit->ip_plen = &ip->ip_len;
1905     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1906     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1907     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1908 }
1909
1910 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1911                                          const uint8_t *buf,
1912                                          VirtioNetRscUnit *unit)
1913 {
1914     struct ip6_header *ip6;
1915
1916     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1917                                  + sizeof(struct eth_header));
1918     unit->ip = ip6;
1919     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1920     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1921                                         + sizeof(struct ip6_header));
1922     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1923
1924     /* There is a difference between payload lenght in ipv4 and v6,
1925        ip header is excluded in ipv6 */
1926     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1927 }
1928
1929 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1930                                        VirtioNetRscSeg *seg)
1931 {
1932     int ret;
1933     struct virtio_net_hdr_v1 *h;
1934
1935     h = (struct virtio_net_hdr_v1 *)seg->buf;
1936     h->flags = 0;
1937     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1938
1939     if (seg->is_coalesced) {
1940         h->rsc.segments = seg->packets;
1941         h->rsc.dup_acks = seg->dup_ack;
1942         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1943         if (chain->proto == ETH_P_IP) {
1944             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1945         } else {
1946             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1947         }
1948     }
1949
1950     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1951     QTAILQ_REMOVE(&chain->buffers, seg, next);
1952     g_free(seg->buf);
1953     g_free(seg);
1954
1955     return ret;
1956 }
1957
1958 static void virtio_net_rsc_purge(void *opq)
1959 {
1960     VirtioNetRscSeg *seg, *rn;
1961     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1962
1963     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1964         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1965             chain->stat.purge_failed++;
1966             continue;
1967         }
1968     }
1969
1970     chain->stat.timer++;
1971     if (!QTAILQ_EMPTY(&chain->buffers)) {
1972         timer_mod(chain->drain_timer,
1973               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1974     }
1975 }
1976
1977 static void virtio_net_rsc_cleanup(VirtIONet *n)
1978 {
1979     VirtioNetRscChain *chain, *rn_chain;
1980     VirtioNetRscSeg *seg, *rn_seg;
1981
1982     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1983         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1984             QTAILQ_REMOVE(&chain->buffers, seg, next);
1985             g_free(seg->buf);
1986             g_free(seg);
1987         }
1988
1989         timer_free(chain->drain_timer);
1990         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1991         g_free(chain);
1992     }
1993 }
1994
1995 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1996                                      NetClientState *nc,
1997                                      const uint8_t *buf, size_t size)
1998 {
1999     uint16_t hdr_len;
2000     VirtioNetRscSeg *seg;
2001
2002     hdr_len = chain->n->guest_hdr_len;
2003     seg = g_malloc(sizeof(VirtioNetRscSeg));
2004     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2005         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2006     memcpy(seg->buf, buf, size);
2007     seg->size = size;
2008     seg->packets = 1;
2009     seg->dup_ack = 0;
2010     seg->is_coalesced = 0;
2011     seg->nc = nc;
2012
2013     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2014     chain->stat.cache++;
2015
2016     switch (chain->proto) {
2017     case ETH_P_IP:
2018         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2019         break;
2020     case ETH_P_IPV6:
2021         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2022         break;
2023     default:
2024         g_assert_not_reached();
2025     }
2026 }
2027
2028 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2029                                          VirtioNetRscSeg *seg,
2030                                          const uint8_t *buf,
2031                                          struct tcp_header *n_tcp,
2032                                          struct tcp_header *o_tcp)
2033 {
2034     uint32_t nack, oack;
2035     uint16_t nwin, owin;
2036
2037     nack = htonl(n_tcp->th_ack);
2038     nwin = htons(n_tcp->th_win);
2039     oack = htonl(o_tcp->th_ack);
2040     owin = htons(o_tcp->th_win);
2041
2042     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2043         chain->stat.ack_out_of_win++;
2044         return RSC_FINAL;
2045     } else if (nack == oack) {
2046         /* duplicated ack or window probe */
2047         if (nwin == owin) {
2048             /* duplicated ack, add dup ack count due to whql test up to 1 */
2049             chain->stat.dup_ack++;
2050             return RSC_FINAL;
2051         } else {
2052             /* Coalesce window update */
2053             o_tcp->th_win = n_tcp->th_win;
2054             chain->stat.win_update++;
2055             return RSC_COALESCE;
2056         }
2057     } else {
2058         /* pure ack, go to 'C', finalize*/
2059         chain->stat.pure_ack++;
2060         return RSC_FINAL;
2061     }
2062 }
2063
2064 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2065                                             VirtioNetRscSeg *seg,
2066                                             const uint8_t *buf,
2067                                             VirtioNetRscUnit *n_unit)
2068 {
2069     void *data;
2070     uint16_t o_ip_len;
2071     uint32_t nseq, oseq;
2072     VirtioNetRscUnit *o_unit;
2073
2074     o_unit = &seg->unit;
2075     o_ip_len = htons(*o_unit->ip_plen);
2076     nseq = htonl(n_unit->tcp->th_seq);
2077     oseq = htonl(o_unit->tcp->th_seq);
2078
2079     /* out of order or retransmitted. */
2080     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2081         chain->stat.data_out_of_win++;
2082         return RSC_FINAL;
2083     }
2084
2085     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2086     if (nseq == oseq) {
2087         if ((o_unit->payload == 0) && n_unit->payload) {
2088             /* From no payload to payload, normal case, not a dup ack or etc */
2089             chain->stat.data_after_pure_ack++;
2090             goto coalesce;
2091         } else {
2092             return virtio_net_rsc_handle_ack(chain, seg, buf,
2093                                              n_unit->tcp, o_unit->tcp);
2094         }
2095     } else if ((nseq - oseq) != o_unit->payload) {
2096         /* Not a consistent packet, out of order */
2097         chain->stat.data_out_of_order++;
2098         return RSC_FINAL;
2099     } else {
2100 coalesce:
2101         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2102             chain->stat.over_size++;
2103             return RSC_FINAL;
2104         }
2105
2106         /* Here comes the right data, the payload length in v4/v6 is different,
2107            so use the field value to update and record the new data len */
2108         o_unit->payload += n_unit->payload; /* update new data len */
2109
2110         /* update field in ip header */
2111         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2112
2113         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2114            for windows guest, while this may change the behavior for linux
2115            guest (only if it uses RSC feature). */
2116         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2117
2118         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2119         o_unit->tcp->th_win = n_unit->tcp->th_win;
2120
2121         memmove(seg->buf + seg->size, data, n_unit->payload);
2122         seg->size += n_unit->payload;
2123         seg->packets++;
2124         chain->stat.coalesced++;
2125         return RSC_COALESCE;
2126     }
2127 }
2128
2129 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2130                                         VirtioNetRscSeg *seg,
2131                                         const uint8_t *buf, size_t size,
2132                                         VirtioNetRscUnit *unit)
2133 {
2134     struct ip_header *ip1, *ip2;
2135
2136     ip1 = (struct ip_header *)(unit->ip);
2137     ip2 = (struct ip_header *)(seg->unit.ip);
2138     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2139         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2140         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2141         chain->stat.no_match++;
2142         return RSC_NO_MATCH;
2143     }
2144
2145     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2146 }
2147
2148 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2149                                         VirtioNetRscSeg *seg,
2150                                         const uint8_t *buf, size_t size,
2151                                         VirtioNetRscUnit *unit)
2152 {
2153     struct ip6_header *ip1, *ip2;
2154
2155     ip1 = (struct ip6_header *)(unit->ip);
2156     ip2 = (struct ip6_header *)(seg->unit.ip);
2157     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2158         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2159         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2160         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2161             chain->stat.no_match++;
2162             return RSC_NO_MATCH;
2163     }
2164
2165     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2166 }
2167
2168 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2169  * to prevent out of order */
2170 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2171                                          struct tcp_header *tcp)
2172 {
2173     uint16_t tcp_hdr;
2174     uint16_t tcp_flag;
2175
2176     tcp_flag = htons(tcp->th_offset_flags);
2177     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2178     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2179     if (tcp_flag & TH_SYN) {
2180         chain->stat.tcp_syn++;
2181         return RSC_BYPASS;
2182     }
2183
2184     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2185         chain->stat.tcp_ctrl_drain++;
2186         return RSC_FINAL;
2187     }
2188
2189     if (tcp_hdr > sizeof(struct tcp_header)) {
2190         chain->stat.tcp_all_opt++;
2191         return RSC_FINAL;
2192     }
2193
2194     return RSC_CANDIDATE;
2195 }
2196
2197 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2198                                          NetClientState *nc,
2199                                          const uint8_t *buf, size_t size,
2200                                          VirtioNetRscUnit *unit)
2201 {
2202     int ret;
2203     VirtioNetRscSeg *seg, *nseg;
2204
2205     if (QTAILQ_EMPTY(&chain->buffers)) {
2206         chain->stat.empty_cache++;
2207         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2208         timer_mod(chain->drain_timer,
2209               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2210         return size;
2211     }
2212
2213     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2214         if (chain->proto == ETH_P_IP) {
2215             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2216         } else {
2217             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2218         }
2219
2220         if (ret == RSC_FINAL) {
2221             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2222                 /* Send failed */
2223                 chain->stat.final_failed++;
2224                 return 0;
2225             }
2226
2227             /* Send current packet */
2228             return virtio_net_do_receive(nc, buf, size);
2229         } else if (ret == RSC_NO_MATCH) {
2230             continue;
2231         } else {
2232             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2233             seg->is_coalesced = 1;
2234             return size;
2235         }
2236     }
2237
2238     chain->stat.no_match_cache++;
2239     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2240     return size;
2241 }
2242
2243 /* Drain a connection data, this is to avoid out of order segments */
2244 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2245                                         NetClientState *nc,
2246                                         const uint8_t *buf, size_t size,
2247                                         uint16_t ip_start, uint16_t ip_size,
2248                                         uint16_t tcp_port)
2249 {
2250     VirtioNetRscSeg *seg, *nseg;
2251     uint32_t ppair1, ppair2;
2252
2253     ppair1 = *(uint32_t *)(buf + tcp_port);
2254     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2255         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2256         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2257             || (ppair1 != ppair2)) {
2258             continue;
2259         }
2260         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2261             chain->stat.drain_failed++;
2262         }
2263
2264         break;
2265     }
2266
2267     return virtio_net_do_receive(nc, buf, size);
2268 }
2269
2270 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2271                                             struct ip_header *ip,
2272                                             const uint8_t *buf, size_t size)
2273 {
2274     uint16_t ip_len;
2275
2276     /* Not an ipv4 packet */
2277     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2278         chain->stat.ip_option++;
2279         return RSC_BYPASS;
2280     }
2281
2282     /* Don't handle packets with ip option */
2283     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2284         chain->stat.ip_option++;
2285         return RSC_BYPASS;
2286     }
2287
2288     if (ip->ip_p != IPPROTO_TCP) {
2289         chain->stat.bypass_not_tcp++;
2290         return RSC_BYPASS;
2291     }
2292
2293     /* Don't handle packets with ip fragment */
2294     if (!(htons(ip->ip_off) & IP_DF)) {
2295         chain->stat.ip_frag++;
2296         return RSC_BYPASS;
2297     }
2298
2299     /* Don't handle packets with ecn flag */
2300     if (IPTOS_ECN(ip->ip_tos)) {
2301         chain->stat.ip_ecn++;
2302         return RSC_BYPASS;
2303     }
2304
2305     ip_len = htons(ip->ip_len);
2306     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2307         || ip_len > (size - chain->n->guest_hdr_len -
2308                      sizeof(struct eth_header))) {
2309         chain->stat.ip_hacked++;
2310         return RSC_BYPASS;
2311     }
2312
2313     return RSC_CANDIDATE;
2314 }
2315
2316 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2317                                       NetClientState *nc,
2318                                       const uint8_t *buf, size_t size)
2319 {
2320     int32_t ret;
2321     uint16_t hdr_len;
2322     VirtioNetRscUnit unit;
2323
2324     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2325
2326     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2327         + sizeof(struct tcp_header))) {
2328         chain->stat.bypass_not_tcp++;
2329         return virtio_net_do_receive(nc, buf, size);
2330     }
2331
2332     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2333     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2334         != RSC_CANDIDATE) {
2335         return virtio_net_do_receive(nc, buf, size);
2336     }
2337
2338     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2339     if (ret == RSC_BYPASS) {
2340         return virtio_net_do_receive(nc, buf, size);
2341     } else if (ret == RSC_FINAL) {
2342         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2343                 ((hdr_len + sizeof(struct eth_header)) + 12),
2344                 VIRTIO_NET_IP4_ADDR_SIZE,
2345                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2346     }
2347
2348     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2349 }
2350
2351 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2352                                             struct ip6_header *ip6,
2353                                             const uint8_t *buf, size_t size)
2354 {
2355     uint16_t ip_len;
2356
2357     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2358         != IP_HEADER_VERSION_6) {
2359         return RSC_BYPASS;
2360     }
2361
2362     /* Both option and protocol is checked in this */
2363     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2364         chain->stat.bypass_not_tcp++;
2365         return RSC_BYPASS;
2366     }
2367
2368     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2369     if (ip_len < sizeof(struct tcp_header) ||
2370         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2371                   - sizeof(struct ip6_header))) {
2372         chain->stat.ip_hacked++;
2373         return RSC_BYPASS;
2374     }
2375
2376     /* Don't handle packets with ecn flag */
2377     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2378         chain->stat.ip_ecn++;
2379         return RSC_BYPASS;
2380     }
2381
2382     return RSC_CANDIDATE;
2383 }
2384
2385 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2386                                       const uint8_t *buf, size_t size)
2387 {
2388     int32_t ret;
2389     uint16_t hdr_len;
2390     VirtioNetRscChain *chain;
2391     VirtioNetRscUnit unit;
2392
2393     chain = (VirtioNetRscChain *)opq;
2394     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2395
2396     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2397         + sizeof(tcp_header))) {
2398         return virtio_net_do_receive(nc, buf, size);
2399     }
2400
2401     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2402     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2403                                                  unit.ip, buf, size)) {
2404         return virtio_net_do_receive(nc, buf, size);
2405     }
2406
2407     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2408     if (ret == RSC_BYPASS) {
2409         return virtio_net_do_receive(nc, buf, size);
2410     } else if (ret == RSC_FINAL) {
2411         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2412                 ((hdr_len + sizeof(struct eth_header)) + 8),
2413                 VIRTIO_NET_IP6_ADDR_SIZE,
2414                 hdr_len + sizeof(struct eth_header)
2415                 + sizeof(struct ip6_header));
2416     }
2417
2418     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2419 }
2420
2421 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2422                                                       NetClientState *nc,
2423                                                       uint16_t proto)
2424 {
2425     VirtioNetRscChain *chain;
2426
2427     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2428         return NULL;
2429     }
2430
2431     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2432         if (chain->proto == proto) {
2433             return chain;
2434         }
2435     }
2436
2437     chain = g_malloc(sizeof(*chain));
2438     chain->n = n;
2439     chain->proto = proto;
2440     if (proto == (uint16_t)ETH_P_IP) {
2441         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2442         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2443     } else {
2444         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2445         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2446     }
2447     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2448                                       virtio_net_rsc_purge, chain);
2449     memset(&chain->stat, 0, sizeof(chain->stat));
2450
2451     QTAILQ_INIT(&chain->buffers);
2452     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2453
2454     return chain;
2455 }
2456
2457 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2458                                       const uint8_t *buf,
2459                                       size_t size)
2460 {
2461     uint16_t proto;
2462     VirtioNetRscChain *chain;
2463     struct eth_header *eth;
2464     VirtIONet *n;
2465
2466     n = qemu_get_nic_opaque(nc);
2467     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2468         return virtio_net_do_receive(nc, buf, size);
2469     }
2470
2471     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2472     proto = htons(eth->h_proto);
2473
2474     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2475     if (chain) {
2476         chain->stat.received++;
2477         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2478             return virtio_net_rsc_receive4(chain, nc, buf, size);
2479         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2480             return virtio_net_rsc_receive6(chain, nc, buf, size);
2481         }
2482     }
2483     return virtio_net_do_receive(nc, buf, size);
2484 }
2485
2486 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2487                                   size_t size)
2488 {
2489     VirtIONet *n = qemu_get_nic_opaque(nc);
2490     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2491         return virtio_net_rsc_receive(nc, buf, size);
2492     } else {
2493         return virtio_net_do_receive(nc, buf, size);
2494     }
2495 }
2496
2497 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2498
2499 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2500 {
2501     VirtIONet *n = qemu_get_nic_opaque(nc);
2502     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2503     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2504
2505     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2506     virtio_notify(vdev, q->tx_vq);
2507
2508     g_free(q->async_tx.elem);
2509     q->async_tx.elem = NULL;
2510
2511     virtio_queue_set_notification(q->tx_vq, 1);
2512     virtio_net_flush_tx(q);
2513 }
2514
2515 /* TX */
2516 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2517 {
2518     VirtIONet *n = q->n;
2519     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2520     VirtQueueElement *elem;
2521     int32_t num_packets = 0;
2522     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2523     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2524         return num_packets;
2525     }
2526
2527     if (q->async_tx.elem) {
2528         virtio_queue_set_notification(q->tx_vq, 0);
2529         return num_packets;
2530     }
2531
2532     for (;;) {
2533         ssize_t ret;
2534         unsigned int out_num;
2535         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2536         struct virtio_net_hdr_mrg_rxbuf mhdr;
2537
2538         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2539         if (!elem) {
2540             break;
2541         }
2542
2543         out_num = elem->out_num;
2544         out_sg = elem->out_sg;
2545         if (out_num < 1) {
2546             virtio_error(vdev, "virtio-net header not in first element");
2547             virtqueue_detach_element(q->tx_vq, elem, 0);
2548             g_free(elem);
2549             return -EINVAL;
2550         }
2551
2552         if (n->has_vnet_hdr) {
2553             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2554                 n->guest_hdr_len) {
2555                 virtio_error(vdev, "virtio-net header incorrect");
2556                 virtqueue_detach_element(q->tx_vq, elem, 0);
2557                 g_free(elem);
2558                 return -EINVAL;
2559             }
2560             if (n->needs_vnet_hdr_swap) {
2561                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2562                 sg2[0].iov_base = &mhdr;
2563                 sg2[0].iov_len = n->guest_hdr_len;
2564                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2565                                    out_sg, out_num,
2566                                    n->guest_hdr_len, -1);
2567                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2568                     goto drop;
2569                 }
2570                 out_num += 1;
2571                 out_sg = sg2;
2572             }
2573         }
2574         /*
2575          * If host wants to see the guest header as is, we can
2576          * pass it on unchanged. Otherwise, copy just the parts
2577          * that host is interested in.
2578          */
2579         assert(n->host_hdr_len <= n->guest_hdr_len);
2580         if (n->host_hdr_len != n->guest_hdr_len) {
2581             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2582                                        out_sg, out_num,
2583                                        0, n->host_hdr_len);
2584             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2585                              out_sg, out_num,
2586                              n->guest_hdr_len, -1);
2587             out_num = sg_num;
2588             out_sg = sg;
2589         }
2590
2591         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2592                                       out_sg, out_num, virtio_net_tx_complete);
2593         if (ret == 0) {
2594             virtio_queue_set_notification(q->tx_vq, 0);
2595             q->async_tx.elem = elem;
2596             return -EBUSY;
2597         }
2598
2599 drop:
2600         virtqueue_push(q->tx_vq, elem, 0);
2601         virtio_notify(vdev, q->tx_vq);
2602         g_free(elem);
2603
2604         if (++num_packets >= n->tx_burst) {
2605             break;
2606         }
2607     }
2608     return num_packets;
2609 }
2610
2611 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2612 {
2613     VirtIONet *n = VIRTIO_NET(vdev);
2614     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2615
2616     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2617         virtio_net_drop_tx_queue_data(vdev, vq);
2618         return;
2619     }
2620
2621     /* This happens when device was stopped but VCPU wasn't. */
2622     if (!vdev->vm_running) {
2623         q->tx_waiting = 1;
2624         return;
2625     }
2626
2627     if (q->tx_waiting) {
2628         virtio_queue_set_notification(vq, 1);
2629         timer_del(q->tx_timer);
2630         q->tx_waiting = 0;
2631         if (virtio_net_flush_tx(q) == -EINVAL) {
2632             return;
2633         }
2634     } else {
2635         timer_mod(q->tx_timer,
2636                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2637         q->tx_waiting = 1;
2638         virtio_queue_set_notification(vq, 0);
2639     }
2640 }
2641
2642 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2643 {
2644     VirtIONet *n = VIRTIO_NET(vdev);
2645     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2646
2647     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2648         virtio_net_drop_tx_queue_data(vdev, vq);
2649         return;
2650     }
2651
2652     if (unlikely(q->tx_waiting)) {
2653         return;
2654     }
2655     q->tx_waiting = 1;
2656     /* This happens when device was stopped but VCPU wasn't. */
2657     if (!vdev->vm_running) {
2658         return;
2659     }
2660     virtio_queue_set_notification(vq, 0);
2661     qemu_bh_schedule(q->tx_bh);
2662 }
2663
2664 static void virtio_net_tx_timer(void *opaque)
2665 {
2666     VirtIONetQueue *q = opaque;
2667     VirtIONet *n = q->n;
2668     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2669     /* This happens when device was stopped but BH wasn't. */
2670     if (!vdev->vm_running) {
2671         /* Make sure tx waiting is set, so we'll run when restarted. */
2672         assert(q->tx_waiting);
2673         return;
2674     }
2675
2676     q->tx_waiting = 0;
2677
2678     /* Just in case the driver is not ready on more */
2679     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2680         return;
2681     }
2682
2683     virtio_queue_set_notification(q->tx_vq, 1);
2684     virtio_net_flush_tx(q);
2685 }
2686
2687 static void virtio_net_tx_bh(void *opaque)
2688 {
2689     VirtIONetQueue *q = opaque;
2690     VirtIONet *n = q->n;
2691     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2692     int32_t ret;
2693
2694     /* This happens when device was stopped but BH wasn't. */
2695     if (!vdev->vm_running) {
2696         /* Make sure tx waiting is set, so we'll run when restarted. */
2697         assert(q->tx_waiting);
2698         return;
2699     }
2700
2701     q->tx_waiting = 0;
2702
2703     /* Just in case the driver is not ready on more */
2704     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2705         return;
2706     }
2707
2708     ret = virtio_net_flush_tx(q);
2709     if (ret == -EBUSY || ret == -EINVAL) {
2710         return; /* Notification re-enable handled by tx_complete or device
2711                  * broken */
2712     }
2713
2714     /* If we flush a full burst of packets, assume there are
2715      * more coming and immediately reschedule */
2716     if (ret >= n->tx_burst) {
2717         qemu_bh_schedule(q->tx_bh);
2718         q->tx_waiting = 1;
2719         return;
2720     }
2721
2722     /* If less than a full burst, re-enable notification and flush
2723      * anything that may have come in while we weren't looking.  If
2724      * we find something, assume the guest is still active and reschedule */
2725     virtio_queue_set_notification(q->tx_vq, 1);
2726     ret = virtio_net_flush_tx(q);
2727     if (ret == -EINVAL) {
2728         return;
2729     } else if (ret > 0) {
2730         virtio_queue_set_notification(q->tx_vq, 0);
2731         qemu_bh_schedule(q->tx_bh);
2732         q->tx_waiting = 1;
2733     }
2734 }
2735
2736 static void virtio_net_add_queue(VirtIONet *n, int index)
2737 {
2738     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2739
2740     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2741                                            virtio_net_handle_rx);
2742
2743     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2744         n->vqs[index].tx_vq =
2745             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2746                              virtio_net_handle_tx_timer);
2747         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2748                                               virtio_net_tx_timer,
2749                                               &n->vqs[index]);
2750     } else {
2751         n->vqs[index].tx_vq =
2752             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2753                              virtio_net_handle_tx_bh);
2754         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2755     }
2756
2757     n->vqs[index].tx_waiting = 0;
2758     n->vqs[index].n = n;
2759 }
2760
2761 static void virtio_net_del_queue(VirtIONet *n, int index)
2762 {
2763     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2764     VirtIONetQueue *q = &n->vqs[index];
2765     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2766
2767     qemu_purge_queued_packets(nc);
2768
2769     virtio_del_queue(vdev, index * 2);
2770     if (q->tx_timer) {
2771         timer_free(q->tx_timer);
2772         q->tx_timer = NULL;
2773     } else {
2774         qemu_bh_delete(q->tx_bh);
2775         q->tx_bh = NULL;
2776     }
2777     q->tx_waiting = 0;
2778     virtio_del_queue(vdev, index * 2 + 1);
2779 }
2780
2781 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2782 {
2783     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2784     int old_num_queues = virtio_get_num_queues(vdev);
2785     int new_num_queues = new_max_queues * 2 + 1;
2786     int i;
2787
2788     assert(old_num_queues >= 3);
2789     assert(old_num_queues % 2 == 1);
2790
2791     if (old_num_queues == new_num_queues) {
2792         return;
2793     }
2794
2795     /*
2796      * We always need to remove and add ctrl vq if
2797      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2798      * and then we only enter one of the following two loops.
2799      */
2800     virtio_del_queue(vdev, old_num_queues - 1);
2801
2802     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2803         /* new_num_queues < old_num_queues */
2804         virtio_net_del_queue(n, i / 2);
2805     }
2806
2807     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2808         /* new_num_queues > old_num_queues */
2809         virtio_net_add_queue(n, i / 2);
2810     }
2811
2812     /* add ctrl_vq last */
2813     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2814 }
2815
2816 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2817 {
2818     int max = multiqueue ? n->max_queues : 1;
2819
2820     n->multiqueue = multiqueue;
2821     virtio_net_change_num_queues(n, max);
2822
2823     virtio_net_set_queues(n);
2824 }
2825
2826 static int virtio_net_post_load_device(void *opaque, int version_id)
2827 {
2828     VirtIONet *n = opaque;
2829     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2830     int i, link_down;
2831
2832     trace_virtio_net_post_load_device();
2833     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2834                                virtio_vdev_has_feature(vdev,
2835                                                        VIRTIO_F_VERSION_1),
2836                                virtio_vdev_has_feature(vdev,
2837                                                        VIRTIO_NET_F_HASH_REPORT));
2838
2839     /* MAC_TABLE_ENTRIES may be different from the saved image */
2840     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2841         n->mac_table.in_use = 0;
2842     }
2843
2844     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2845         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2846     }
2847
2848     /*
2849      * curr_guest_offloads will be later overwritten by the
2850      * virtio_set_features_nocheck call done from the virtio_load.
2851      * Here we make sure it is preserved and restored accordingly
2852      * in the virtio_net_post_load_virtio callback.
2853      */
2854     n->saved_guest_offloads = n->curr_guest_offloads;
2855
2856     virtio_net_set_queues(n);
2857
2858     /* Find the first multicast entry in the saved MAC filter */
2859     for (i = 0; i < n->mac_table.in_use; i++) {
2860         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2861             break;
2862         }
2863     }
2864     n->mac_table.first_multi = i;
2865
2866     /* nc.link_down can't be migrated, so infer link_down according
2867      * to link status bit in n->status */
2868     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2869     for (i = 0; i < n->max_queues; i++) {
2870         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2871     }
2872
2873     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2874         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2875         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2876                                   QEMU_CLOCK_VIRTUAL,
2877                                   virtio_net_announce_timer, n);
2878         if (n->announce_timer.round) {
2879             timer_mod(n->announce_timer.tm,
2880                       qemu_clock_get_ms(n->announce_timer.type));
2881         } else {
2882             qemu_announce_timer_del(&n->announce_timer, false);
2883         }
2884     }
2885
2886     if (n->rss_data.enabled) {
2887         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2888         if (!n->rss_data.populate_hash) {
2889             if (!virtio_net_attach_epbf_rss(n)) {
2890                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2891                     warn_report("Can't post-load eBPF RSS for vhost");
2892                 } else {
2893                     warn_report("Can't post-load eBPF RSS - "
2894                                 "fallback to software RSS");
2895                     n->rss_data.enabled_software_rss = true;
2896                 }
2897             }
2898         }
2899
2900         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2901                                     n->rss_data.indirections_len,
2902                                     sizeof(n->rss_data.key));
2903     } else {
2904         trace_virtio_net_rss_disable();
2905     }
2906     return 0;
2907 }
2908
2909 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2910 {
2911     VirtIONet *n = VIRTIO_NET(vdev);
2912     /*
2913      * The actual needed state is now in saved_guest_offloads,
2914      * see virtio_net_post_load_device for detail.
2915      * Restore it back and apply the desired offloads.
2916      */
2917     n->curr_guest_offloads = n->saved_guest_offloads;
2918     if (peer_has_vnet_hdr(n)) {
2919         virtio_net_apply_guest_offloads(n);
2920     }
2921
2922     return 0;
2923 }
2924
2925 /* tx_waiting field of a VirtIONetQueue */
2926 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2927     .name = "virtio-net-queue-tx_waiting",
2928     .fields = (VMStateField[]) {
2929         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2930         VMSTATE_END_OF_LIST()
2931    },
2932 };
2933
2934 static bool max_queues_gt_1(void *opaque, int version_id)
2935 {
2936     return VIRTIO_NET(opaque)->max_queues > 1;
2937 }
2938
2939 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2940 {
2941     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2942                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2943 }
2944
2945 static bool mac_table_fits(void *opaque, int version_id)
2946 {
2947     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2948 }
2949
2950 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2951 {
2952     return !mac_table_fits(opaque, version_id);
2953 }
2954
2955 /* This temporary type is shared by all the WITH_TMP methods
2956  * although only some fields are used by each.
2957  */
2958 struct VirtIONetMigTmp {
2959     VirtIONet      *parent;
2960     VirtIONetQueue *vqs_1;
2961     uint16_t        curr_queues_1;
2962     uint8_t         has_ufo;
2963     uint32_t        has_vnet_hdr;
2964 };
2965
2966 /* The 2nd and subsequent tx_waiting flags are loaded later than
2967  * the 1st entry in the queues and only if there's more than one
2968  * entry.  We use the tmp mechanism to calculate a temporary
2969  * pointer and count and also validate the count.
2970  */
2971
2972 static int virtio_net_tx_waiting_pre_save(void *opaque)
2973 {
2974     struct VirtIONetMigTmp *tmp = opaque;
2975
2976     tmp->vqs_1 = tmp->parent->vqs + 1;
2977     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2978     if (tmp->parent->curr_queues == 0) {
2979         tmp->curr_queues_1 = 0;
2980     }
2981
2982     return 0;
2983 }
2984
2985 static int virtio_net_tx_waiting_pre_load(void *opaque)
2986 {
2987     struct VirtIONetMigTmp *tmp = opaque;
2988
2989     /* Reuse the pointer setup from save */
2990     virtio_net_tx_waiting_pre_save(opaque);
2991
2992     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2993         error_report("virtio-net: curr_queues %x > max_queues %x",
2994             tmp->parent->curr_queues, tmp->parent->max_queues);
2995
2996         return -EINVAL;
2997     }
2998
2999     return 0; /* all good */
3000 }
3001
3002 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3003     .name      = "virtio-net-tx_waiting",
3004     .pre_load  = virtio_net_tx_waiting_pre_load,
3005     .pre_save  = virtio_net_tx_waiting_pre_save,
3006     .fields    = (VMStateField[]) {
3007         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3008                                      curr_queues_1,
3009                                      vmstate_virtio_net_queue_tx_waiting,
3010                                      struct VirtIONetQueue),
3011         VMSTATE_END_OF_LIST()
3012     },
3013 };
3014
3015 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3016  * flag set we need to check that we have it
3017  */
3018 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3019 {
3020     struct VirtIONetMigTmp *tmp = opaque;
3021
3022     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3023         error_report("virtio-net: saved image requires TUN_F_UFO support");
3024         return -EINVAL;
3025     }
3026
3027     return 0;
3028 }
3029
3030 static int virtio_net_ufo_pre_save(void *opaque)
3031 {
3032     struct VirtIONetMigTmp *tmp = opaque;
3033
3034     tmp->has_ufo = tmp->parent->has_ufo;
3035
3036     return 0;
3037 }
3038
3039 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3040     .name      = "virtio-net-ufo",
3041     .post_load = virtio_net_ufo_post_load,
3042     .pre_save  = virtio_net_ufo_pre_save,
3043     .fields    = (VMStateField[]) {
3044         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3045         VMSTATE_END_OF_LIST()
3046     },
3047 };
3048
3049 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3050  * flag set we need to check that we have it
3051  */
3052 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3053 {
3054     struct VirtIONetMigTmp *tmp = opaque;
3055
3056     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3057         error_report("virtio-net: saved image requires vnet_hdr=on");
3058         return -EINVAL;
3059     }
3060
3061     return 0;
3062 }
3063
3064 static int virtio_net_vnet_pre_save(void *opaque)
3065 {
3066     struct VirtIONetMigTmp *tmp = opaque;
3067
3068     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3069
3070     return 0;
3071 }
3072
3073 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3074     .name      = "virtio-net-vnet",
3075     .post_load = virtio_net_vnet_post_load,
3076     .pre_save  = virtio_net_vnet_pre_save,
3077     .fields    = (VMStateField[]) {
3078         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3079         VMSTATE_END_OF_LIST()
3080     },
3081 };
3082
3083 static bool virtio_net_rss_needed(void *opaque)
3084 {
3085     return VIRTIO_NET(opaque)->rss_data.enabled;
3086 }
3087
3088 static const VMStateDescription vmstate_virtio_net_rss = {
3089     .name      = "virtio-net-device/rss",
3090     .version_id = 1,
3091     .minimum_version_id = 1,
3092     .needed = virtio_net_rss_needed,
3093     .fields = (VMStateField[]) {
3094         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3095         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3096         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3097         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3098         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3099         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3100         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3101                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3102         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3103                                     rss_data.indirections_len, 0,
3104                                     vmstate_info_uint16, uint16_t),
3105         VMSTATE_END_OF_LIST()
3106     },
3107 };
3108
3109 static const VMStateDescription vmstate_virtio_net_device = {
3110     .name = "virtio-net-device",
3111     .version_id = VIRTIO_NET_VM_VERSION,
3112     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3113     .post_load = virtio_net_post_load_device,
3114     .fields = (VMStateField[]) {
3115         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3116         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3117                                vmstate_virtio_net_queue_tx_waiting,
3118                                VirtIONetQueue),
3119         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3120         VMSTATE_UINT16(status, VirtIONet),
3121         VMSTATE_UINT8(promisc, VirtIONet),
3122         VMSTATE_UINT8(allmulti, VirtIONet),
3123         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3124
3125         /* Guarded pair: If it fits we load it, else we throw it away
3126          * - can happen if source has a larger MAC table.; post-load
3127          *  sets flags in this case.
3128          */
3129         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3130                                 0, mac_table_fits, mac_table.in_use,
3131                                  ETH_ALEN),
3132         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3133                                      mac_table.in_use, ETH_ALEN),
3134
3135         /* Note: This is an array of uint32's that's always been saved as a
3136          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3137          * but based on the uint.
3138          */
3139         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3140         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3141                          vmstate_virtio_net_has_vnet),
3142         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3143         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3144         VMSTATE_UINT8(alluni, VirtIONet),
3145         VMSTATE_UINT8(nomulti, VirtIONet),
3146         VMSTATE_UINT8(nouni, VirtIONet),
3147         VMSTATE_UINT8(nobcast, VirtIONet),
3148         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3149                          vmstate_virtio_net_has_ufo),
3150         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3151                             vmstate_info_uint16_equal, uint16_t),
3152         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3153         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3154                          vmstate_virtio_net_tx_waiting),
3155         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3156                             has_ctrl_guest_offloads),
3157         VMSTATE_END_OF_LIST()
3158    },
3159     .subsections = (const VMStateDescription * []) {
3160         &vmstate_virtio_net_rss,
3161         NULL
3162     }
3163 };
3164
3165 static NetClientInfo net_virtio_info = {
3166     .type = NET_CLIENT_DRIVER_NIC,
3167     .size = sizeof(NICState),
3168     .can_receive = virtio_net_can_receive,
3169     .receive = virtio_net_receive,
3170     .link_status_changed = virtio_net_set_link_status,
3171     .query_rx_filter = virtio_net_query_rxfilter,
3172     .announce = virtio_net_announce,
3173 };
3174
3175 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3176 {
3177     VirtIONet *n = VIRTIO_NET(vdev);
3178     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3179     assert(n->vhost_started);
3180     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3181 }
3182
3183 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3184                                            bool mask)
3185 {
3186     VirtIONet *n = VIRTIO_NET(vdev);
3187     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3188     assert(n->vhost_started);
3189     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3190                              vdev, idx, mask);
3191 }
3192
3193 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3194 {
3195     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3196
3197     n->config_size = virtio_feature_get_config_size(feature_sizes,
3198                                                     host_features);
3199 }
3200
3201 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3202                                    const char *type)
3203 {
3204     /*
3205      * The name can be NULL, the netclient name will be type.x.
3206      */
3207     assert(type != NULL);
3208
3209     g_free(n->netclient_name);
3210     g_free(n->netclient_type);
3211     n->netclient_name = g_strdup(name);
3212     n->netclient_type = g_strdup(type);
3213 }
3214
3215 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3216 {
3217     HotplugHandler *hotplug_ctrl;
3218     PCIDevice *pci_dev;
3219     Error *err = NULL;
3220
3221     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3222     if (hotplug_ctrl) {
3223         pci_dev = PCI_DEVICE(dev);
3224         pci_dev->partially_hotplugged = true;
3225         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3226         if (err) {
3227             error_report_err(err);
3228             return false;
3229         }
3230     } else {
3231         return false;
3232     }
3233     return true;
3234 }
3235
3236 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3237                                     Error **errp)
3238 {
3239     Error *err = NULL;
3240     HotplugHandler *hotplug_ctrl;
3241     PCIDevice *pdev = PCI_DEVICE(dev);
3242     BusState *primary_bus;
3243
3244     if (!pdev->partially_hotplugged) {
3245         return true;
3246     }
3247     primary_bus = dev->parent_bus;
3248     if (!primary_bus) {
3249         error_setg(errp, "virtio_net: couldn't find primary bus");
3250         return false;
3251     }
3252     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3253     qatomic_set(&n->failover_primary_hidden, false);
3254     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3255     if (hotplug_ctrl) {
3256         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3257         if (err) {
3258             goto out;
3259         }
3260         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3261     }
3262     pdev->partially_hotplugged = false;
3263
3264 out:
3265     error_propagate(errp, err);
3266     return !err;
3267 }
3268
3269 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3270 {
3271     bool should_be_hidden;
3272     Error *err = NULL;
3273     DeviceState *dev = failover_find_primary_device(n);
3274
3275     if (!dev) {
3276         return;
3277     }
3278
3279     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3280
3281     if (migration_in_setup(s) && !should_be_hidden) {
3282         if (failover_unplug_primary(n, dev)) {
3283             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3284             qapi_event_send_unplug_primary(dev->id);
3285             qatomic_set(&n->failover_primary_hidden, true);
3286         } else {
3287             warn_report("couldn't unplug primary device");
3288         }
3289     } else if (migration_has_failed(s)) {
3290         /* We already unplugged the device let's plug it back */
3291         if (!failover_replug_primary(n, dev, &err)) {
3292             if (err) {
3293                 error_report_err(err);
3294             }
3295         }
3296     }
3297 }
3298
3299 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3300 {
3301     MigrationState *s = data;
3302     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3303     virtio_net_handle_migration_primary(n, s);
3304 }
3305
3306 static bool failover_hide_primary_device(DeviceListener *listener,
3307                                          QemuOpts *device_opts)
3308 {
3309     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3310     const char *standby_id;
3311
3312     if (!device_opts) {
3313         return false;
3314     }
3315     standby_id = qemu_opt_get(device_opts, "failover_pair_id");
3316     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3317         return false;
3318     }
3319
3320     /* failover_primary_hidden is set during feature negotiation */
3321     return qatomic_read(&n->failover_primary_hidden);
3322 }
3323
3324 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3325 {
3326     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3327     VirtIONet *n = VIRTIO_NET(dev);
3328     NetClientState *nc;
3329     int i;
3330
3331     if (n->net_conf.mtu) {
3332         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3333     }
3334
3335     if (n->net_conf.duplex_str) {
3336         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3337             n->net_conf.duplex = DUPLEX_HALF;
3338         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3339             n->net_conf.duplex = DUPLEX_FULL;
3340         } else {
3341             error_setg(errp, "'duplex' must be 'half' or 'full'");
3342             return;
3343         }
3344         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3345     } else {
3346         n->net_conf.duplex = DUPLEX_UNKNOWN;
3347     }
3348
3349     if (n->net_conf.speed < SPEED_UNKNOWN) {
3350         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3351         return;
3352     }
3353     if (n->net_conf.speed >= 0) {
3354         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3355     }
3356
3357     if (n->failover) {
3358         n->primary_listener.hide_device = failover_hide_primary_device;
3359         qatomic_set(&n->failover_primary_hidden, true);
3360         device_listener_register(&n->primary_listener);
3361         n->migration_state.notify = virtio_net_migration_state_notifier;
3362         add_migration_state_change_notifier(&n->migration_state);
3363         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3364     }
3365
3366     virtio_net_set_config_size(n, n->host_features);
3367     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3368
3369     /*
3370      * We set a lower limit on RX queue size to what it always was.
3371      * Guests that want a smaller ring can always resize it without
3372      * help from us (using virtio 1 and up).
3373      */
3374     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3375         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3376         !is_power_of_2(n->net_conf.rx_queue_size)) {
3377         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3378                    "must be a power of 2 between %d and %d.",
3379                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3380                    VIRTQUEUE_MAX_SIZE);
3381         virtio_cleanup(vdev);
3382         return;
3383     }
3384
3385     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3386         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3387         !is_power_of_2(n->net_conf.tx_queue_size)) {
3388         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3389                    "must be a power of 2 between %d and %d",
3390                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3391                    VIRTQUEUE_MAX_SIZE);
3392         virtio_cleanup(vdev);
3393         return;
3394     }
3395
3396     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3397     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3398         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3399                    "must be a positive integer less than %d.",
3400                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3401         virtio_cleanup(vdev);
3402         return;
3403     }
3404     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3405     n->curr_queues = 1;
3406     n->tx_timeout = n->net_conf.txtimer;
3407
3408     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3409                        && strcmp(n->net_conf.tx, "bh")) {
3410         warn_report("virtio-net: "
3411                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3412                     n->net_conf.tx);
3413         error_printf("Defaulting to \"bh\"");
3414     }
3415
3416     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3417                                     n->net_conf.tx_queue_size);
3418
3419     for (i = 0; i < n->max_queues; i++) {
3420         virtio_net_add_queue(n, i);
3421     }
3422
3423     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3424     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3425     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3426     n->status = VIRTIO_NET_S_LINK_UP;
3427     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3428                               QEMU_CLOCK_VIRTUAL,
3429                               virtio_net_announce_timer, n);
3430     n->announce_timer.round = 0;
3431
3432     if (n->netclient_type) {
3433         /*
3434          * Happen when virtio_net_set_netclient_name has been called.
3435          */
3436         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3437                               n->netclient_type, n->netclient_name, n);
3438     } else {
3439         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3440                               object_get_typename(OBJECT(dev)), dev->id, n);
3441     }
3442
3443     for (i = 0; i < n->max_queues; i++) {
3444         n->nic->ncs[i].do_not_pad = true;
3445     }
3446
3447     peer_test_vnet_hdr(n);
3448     if (peer_has_vnet_hdr(n)) {
3449         for (i = 0; i < n->max_queues; i++) {
3450             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3451         }
3452         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3453     } else {
3454         n->host_hdr_len = 0;
3455     }
3456
3457     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3458
3459     n->vqs[0].tx_waiting = 0;
3460     n->tx_burst = n->net_conf.txburst;
3461     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3462     n->promisc = 1; /* for compatibility */
3463
3464     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3465
3466     n->vlans = g_malloc0(MAX_VLAN >> 3);
3467
3468     nc = qemu_get_queue(n->nic);
3469     nc->rxfilter_notify_enabled = 1;
3470
3471    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3472         struct virtio_net_config netcfg = {};
3473         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3474         vhost_net_set_config(get_vhost_net(nc->peer),
3475             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3476     }
3477     QTAILQ_INIT(&n->rsc_chains);
3478     n->qdev = dev;
3479
3480     net_rx_pkt_init(&n->rx_pkt, false);
3481
3482     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3483         virtio_net_load_ebpf(n);
3484     }
3485 }
3486
3487 static void virtio_net_device_unrealize(DeviceState *dev)
3488 {
3489     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3490     VirtIONet *n = VIRTIO_NET(dev);
3491     int i, max_queues;
3492
3493     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3494         virtio_net_unload_ebpf(n);
3495     }
3496
3497     /* This will stop vhost backend if appropriate. */
3498     virtio_net_set_status(vdev, 0);
3499
3500     g_free(n->netclient_name);
3501     n->netclient_name = NULL;
3502     g_free(n->netclient_type);
3503     n->netclient_type = NULL;
3504
3505     g_free(n->mac_table.macs);
3506     g_free(n->vlans);
3507
3508     if (n->failover) {
3509         device_listener_unregister(&n->primary_listener);
3510         remove_migration_state_change_notifier(&n->migration_state);
3511     }
3512
3513     max_queues = n->multiqueue ? n->max_queues : 1;
3514     for (i = 0; i < max_queues; i++) {
3515         virtio_net_del_queue(n, i);
3516     }
3517     /* delete also control vq */
3518     virtio_del_queue(vdev, max_queues * 2);
3519     qemu_announce_timer_del(&n->announce_timer, false);
3520     g_free(n->vqs);
3521     qemu_del_nic(n->nic);
3522     virtio_net_rsc_cleanup(n);
3523     g_free(n->rss_data.indirections_table);
3524     net_rx_pkt_uninit(n->rx_pkt);
3525     virtio_cleanup(vdev);
3526 }
3527
3528 static void virtio_net_instance_init(Object *obj)
3529 {
3530     VirtIONet *n = VIRTIO_NET(obj);
3531
3532     /*
3533      * The default config_size is sizeof(struct virtio_net_config).
3534      * Can be overriden with virtio_net_set_config_size.
3535      */
3536     n->config_size = sizeof(struct virtio_net_config);
3537     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3538                                   "bootindex", "/ethernet-phy@0",
3539                                   DEVICE(n));
3540
3541     ebpf_rss_init(&n->ebpf_rss);
3542 }
3543
3544 static int virtio_net_pre_save(void *opaque)
3545 {
3546     VirtIONet *n = opaque;
3547
3548     /* At this point, backend must be stopped, otherwise
3549      * it might keep writing to memory. */
3550     assert(!n->vhost_started);
3551
3552     return 0;
3553 }
3554
3555 static bool primary_unplug_pending(void *opaque)
3556 {
3557     DeviceState *dev = opaque;
3558     DeviceState *primary;
3559     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3560     VirtIONet *n = VIRTIO_NET(vdev);
3561
3562     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3563         return false;
3564     }
3565     primary = failover_find_primary_device(n);
3566     return primary ? primary->pending_deleted_event : false;
3567 }
3568
3569 static bool dev_unplug_pending(void *opaque)
3570 {
3571     DeviceState *dev = opaque;
3572     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3573
3574     return vdc->primary_unplug_pending(dev);
3575 }
3576
3577 static const VMStateDescription vmstate_virtio_net = {
3578     .name = "virtio-net",
3579     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3580     .version_id = VIRTIO_NET_VM_VERSION,
3581     .fields = (VMStateField[]) {
3582         VMSTATE_VIRTIO_DEVICE,
3583         VMSTATE_END_OF_LIST()
3584     },
3585     .pre_save = virtio_net_pre_save,
3586     .dev_unplug_pending = dev_unplug_pending,
3587 };
3588
3589 static Property virtio_net_properties[] = {
3590     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3591                     VIRTIO_NET_F_CSUM, true),
3592     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3593                     VIRTIO_NET_F_GUEST_CSUM, true),
3594     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3595     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3596                     VIRTIO_NET_F_GUEST_TSO4, true),
3597     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3598                     VIRTIO_NET_F_GUEST_TSO6, true),
3599     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3600                     VIRTIO_NET_F_GUEST_ECN, true),
3601     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3602                     VIRTIO_NET_F_GUEST_UFO, true),
3603     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3604                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3605     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3606                     VIRTIO_NET_F_HOST_TSO4, true),
3607     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3608                     VIRTIO_NET_F_HOST_TSO6, true),
3609     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3610                     VIRTIO_NET_F_HOST_ECN, true),
3611     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3612                     VIRTIO_NET_F_HOST_UFO, true),
3613     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3614                     VIRTIO_NET_F_MRG_RXBUF, true),
3615     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3616                     VIRTIO_NET_F_STATUS, true),
3617     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3618                     VIRTIO_NET_F_CTRL_VQ, true),
3619     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3620                     VIRTIO_NET_F_CTRL_RX, true),
3621     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3622                     VIRTIO_NET_F_CTRL_VLAN, true),
3623     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3624                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3625     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3626                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3627     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3628                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3629     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3630     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3631                     VIRTIO_NET_F_RSS, false),
3632     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3633                     VIRTIO_NET_F_HASH_REPORT, false),
3634     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3635                     VIRTIO_NET_F_RSC_EXT, false),
3636     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3637                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3638     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3639     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3640                        TX_TIMER_INTERVAL),
3641     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3642     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3643     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3644                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3645     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3646                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3647     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3648     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3649                      true),
3650     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3651     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3652     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3653     DEFINE_PROP_END_OF_LIST(),
3654 };
3655
3656 static void virtio_net_class_init(ObjectClass *klass, void *data)
3657 {
3658     DeviceClass *dc = DEVICE_CLASS(klass);
3659     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3660
3661     device_class_set_props(dc, virtio_net_properties);
3662     dc->vmsd = &vmstate_virtio_net;
3663     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3664     vdc->realize = virtio_net_device_realize;
3665     vdc->unrealize = virtio_net_device_unrealize;
3666     vdc->get_config = virtio_net_get_config;
3667     vdc->set_config = virtio_net_set_config;
3668     vdc->get_features = virtio_net_get_features;
3669     vdc->set_features = virtio_net_set_features;
3670     vdc->bad_features = virtio_net_bad_features;
3671     vdc->reset = virtio_net_reset;
3672     vdc->set_status = virtio_net_set_status;
3673     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3674     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3675     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3676     vdc->post_load = virtio_net_post_load_virtio;
3677     vdc->vmsd = &vmstate_virtio_net_device;
3678     vdc->primary_unplug_pending = primary_unplug_pending;
3679 }
3680
3681 static const TypeInfo virtio_net_info = {
3682     .name = TYPE_VIRTIO_NET,
3683     .parent = TYPE_VIRTIO_DEVICE,
3684     .instance_size = sizeof(VirtIONet),
3685     .instance_init = virtio_net_instance_init,
3686     .class_init = virtio_net_class_init,
3687 };
3688
3689 static void virtio_register_types(void)
3690 {
3691     type_register_static(&virtio_net_info);
3692 }
3693
3694 type_init(virtio_register_types)