drivers/net/xen-netfront.c

   1 /*
   2  * Virtual network driver for conversing with remote driver backends.
   3  *
   4  * Copyright (c) 2002-2005, K A Fraser
   5  * Copyright (c) 2005, XenSource Ltd
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License version 2
   9  * as published by the Free Software Foundation; or, when distributed
  10  * separately from the Linux kernel or incorporated into other
  11  * software packages, subject to the following license:
  12  *
  13  * Permission is hereby granted, free of charge, to any person obtaining a copy
  14  * of this source file (the "Software"), to deal in the Software without
  15  * restriction, including without limitation the rights to use, copy, modify,
  16  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  17  * and to permit persons to whom the Software is furnished to do so, subject to
  18  * the following conditions:
  19  *
  20  * The above copyright notice and this permission notice shall be included in
  21  * all copies or substantial portions of the Software.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  29  * IN THE SOFTWARE.
  30  */
  31
  32 #include <linux/module.h>
  33 #include <linux/kernel.h>
  34 #include <linux/netdevice.h>
  35 #include <linux/etherdevice.h>
  36 #include <linux/skbuff.h>
  37 #include <linux/ethtool.h>
  38 #include <linux/if_ether.h>
  39 #include <net/tcp.h>
  40 #include <linux/udp.h>
  41 #include <linux/moduleparam.h>
  42 #include <linux/mm.h>
  43 #include <linux/slab.h>
  44 #include <net/ip.h>
  45
  46 #include <xen/xen.h>
  47 #include <xen/xenbus.h>
  48 #include <xen/events.h>
  49 #include <xen/page.h>
  50 #include <xen/platform_pci.h>
  51 #include <xen/grant_table.h>
  52
  53 #include <xen/interface/io/netif.h>
  54 #include <xen/interface/memory.h>
  55 #include <xen/interface/grant_table.h>
  56
  57 static const struct ethtool_ops xennet_ethtool_ops;
  58
  59 struct netfront_cb {
  60         struct page *page;
  61         unsigned offset;
  62 };
  63
  64 #define NETFRONT_SKB_CB(skb)    ((struct netfront_cb *)((skb)->cb))
  65
  66 #define RX_COPY_THRESHOLD 256
  67
  68 #define GRANT_INVALID_REF       0
  69
  70 #define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
  71 #define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
  72 #define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
  73
  74 struct netfront_stats {
  75         u64                     rx_packets;
  76         u64                     tx_packets;
  77         u64                     rx_bytes;
  78         u64                     tx_bytes;
  79         struct u64_stats_sync   syncp;
  80 };
  81
  82 struct netfront_info {
  83         struct list_head list;
  84         struct net_device *netdev;
  85
  86         struct napi_struct napi;
  87
  88         unsigned int evtchn;
  89         struct xenbus_device *xbdev;
  90
  91         spinlock_t   tx_lock;
  92         struct xen_netif_tx_front_ring tx;
  93         int tx_ring_ref;
  94
  95         /*
  96          * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
  97          * are linked from tx_skb_freelist through skb_entry.link.
  98          *
  99          *  NB. Freelist index entries are always going to be less than
 100          *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
 101          *  greater than PAGE_OFFSET: we use this property to distinguish
 102          *  them.
 103          */
 104         union skb_entry {
 105                 struct sk_buff *skb;
 106                 unsigned long link;
 107         } tx_skbs[NET_TX_RING_SIZE];
 108         grant_ref_t gref_tx_head;
 109         grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
 110         unsigned tx_skb_freelist;
 111
 112         spinlock_t   rx_lock ____cacheline_aligned_in_smp;
 113         struct xen_netif_rx_front_ring rx;
 114         int rx_ring_ref;
 115
 116         /* Receive-ring batched refills. */
 117 #define RX_MIN_TARGET 8
 118 #define RX_DFL_MIN_TARGET 64
 119 #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
 120         unsigned rx_min_target, rx_max_target, rx_target;
 121         struct sk_buff_head rx_batch;
 122
 123         struct timer_list rx_refill_timer;
 124
 125         struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
 126         grant_ref_t gref_rx_head;
 127         grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 128
 129         unsigned long rx_pfn_array[NET_RX_RING_SIZE];
 130         struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
 131         struct mmu_update rx_mmu[NET_RX_RING_SIZE];
 132
 133         /* Statistics */
 134         struct netfront_stats __percpu *stats;
 135
 136         unsigned long rx_gso_checksum_fixup;
 137 };
 138
 139 struct netfront_rx_info {
 140         struct xen_netif_rx_response rx;
 141         struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 142 };
 143
 144 static void skb_entry_set_link(union skb_entry *list, unsigned short id)
 145 {
 146         list->link = id;
 147 }
 148
 149 static int skb_entry_is_link(const union skb_entry *list)
 150 {
 151         BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
 152         return (unsigned long)list->skb < PAGE_OFFSET;
 153 }
 154
 155 /*
 156  * Access macros for acquiring freeing slots in tx_skbs[].
 157  */
 158
 159 static void add_id_to_freelist(unsigned *head, union skb_entry *list,
 160                                unsigned short id)
 161 {
 162         skb_entry_set_link(&list[id], *head);
 163         *head = id;
 164 }
 165
 166 static unsigned short get_id_from_freelist(unsigned *head,
 167                                            union skb_entry *list)
 168 {
 169         unsigned int id = *head;
 170         *head = list[id].link;
 171         return id;
 172 }
 173
 174 static int xennet_rxidx(RING_IDX idx)
 175 {
 176         return idx & (NET_RX_RING_SIZE - 1);
 177 }
 178
 179 static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
 180                                          RING_IDX ri)
 181 {
 182         int i = xennet_rxidx(ri);
 183         struct sk_buff *skb = np->rx_skbs[i];
 184         np->rx_skbs[i] = NULL;
 185         return skb;
 186 }
 187
 188 static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
 189                                             RING_IDX ri)
 190 {
 191         int i = xennet_rxidx(ri);
 192         grant_ref_t ref = np->grant_rx_ref[i];
 193         np->grant_rx_ref[i] = GRANT_INVALID_REF;
 194         return ref;
 195 }
 196
 197 #ifdef CONFIG_SYSFS
 198 static int xennet_sysfs_addif(struct net_device *netdev);
 199 static void xennet_sysfs_delif(struct net_device *netdev);
 200 #else /* !CONFIG_SYSFS */
 201 #define xennet_sysfs_addif(dev) (0)
 202 #define xennet_sysfs_delif(dev) do { } while (0)
 203 #endif
 204
 205 static bool xennet_can_sg(struct net_device *dev)
 206 {
 207         return dev->features & NETIF_F_SG;
 208 }
 209
 210
 211 static void rx_refill_timeout(unsigned long data)
 212 {
 213         struct net_device *dev = (struct net_device *)data;
 214         struct netfront_info *np = netdev_priv(dev);
 215         napi_schedule(&np->napi);
 216 }
 217
 218 static int netfront_tx_slot_available(struct netfront_info *np)
 219 {
 220         return (np->tx.req_prod_pvt - np->tx.rsp_cons) <
 221                 (TX_MAX_TARGET - MAX_SKB_FRAGS - 2);
 222 }
 223
 224 static void xennet_maybe_wake_tx(struct net_device *dev)
 225 {
 226         struct netfront_info *np = netdev_priv(dev);
 227
 228         if (unlikely(netif_queue_stopped(dev)) &&
 229             netfront_tx_slot_available(np) &&
 230             likely(netif_running(dev)))
 231                 netif_wake_queue(dev);
 232 }
 233
 234 static void xennet_alloc_rx_buffers(struct net_device *dev)
 235 {
 236         unsigned short id;
 237         struct netfront_info *np = netdev_priv(dev);
 238         struct sk_buff *skb;
 239         struct page *page;
 240         int i, batch_target, notify;
 241         RING_IDX req_prod = np->rx.req_prod_pvt;
 242         grant_ref_t ref;
 243         unsigned long pfn;
 244         void *vaddr;
 245         struct xen_netif_rx_request *req;
 246
 247         if (unlikely(!netif_carrier_ok(dev)))
 248                 return;
 249
 250         /*
 251          * Allocate skbuffs greedily, even though we batch updates to the
 252          * receive ring. This creates a less bursty demand on the memory
 253          * allocator, so should reduce the chance of failed allocation requests
 254          * both for ourself and for other kernel subsystems.
 255          */
 256         batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
 257         for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
 258                 skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD + NET_IP_ALIGN,
 259                                          GFP_ATOMIC | __GFP_NOWARN);
 260                 if (unlikely(!skb))
 261                         goto no_skb;
 262
 263                 /* Align ip header to a 16 bytes boundary */
 264                 skb_reserve(skb, NET_IP_ALIGN);
 265
 266                 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 267                 if (!page) {
 268                         kfree_skb(skb);
 269 no_skb:
 270                         /* Any skbuffs queued for refill? Force them out. */
 271                         if (i != 0)
 272                                 goto refill;
 273                         /* Could not allocate any skbuffs. Try again later. */
 274                         mod_timer(&np->rx_refill_timer,
 275                                   jiffies + (HZ/10));
 276                         break;
 277                 }
 278
 279                 __skb_fill_page_desc(skb, 0, page, 0, 0);
 280                 skb_shinfo(skb)->nr_frags = 1;
 281                 __skb_queue_tail(&np->rx_batch, skb);
 282         }
 283
 284         /* Is the batch large enough to be worthwhile? */
 285         if (i < (np->rx_target/2)) {
 286                 if (req_prod > np->rx.sring->req_prod)
 287                         goto push;
 288                 return;
 289         }
 290
 291         /* Adjust our fill target if we risked running out of buffers. */
 292         if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
 293             ((np->rx_target *= 2) > np->rx_max_target))
 294                 np->rx_target = np->rx_max_target;
 295
 296  refill:
 297         for (i = 0; ; i++) {
 298                 skb = __skb_dequeue(&np->rx_batch);
 299                 if (skb == NULL)
 300                         break;
 301
 302                 skb->dev = dev;
 303
 304                 id = xennet_rxidx(req_prod + i);
 305
 306                 BUG_ON(np->rx_skbs[id]);
 307                 np->rx_skbs[id] = skb;
 308
 309                 ref = gnttab_claim_grant_reference(&np->gref_rx_head);
 310                 BUG_ON((signed short)ref < 0);
 311                 np->grant_rx_ref[id] = ref;
 312
 313                 pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
 314                 vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0]));
 315
 316                 req = RING_GET_REQUEST(&np->rx, req_prod + i);
 317                 gnttab_grant_foreign_access_ref(ref,
 318                                                 np->xbdev->otherend_id,
 319                                                 pfn_to_mfn(pfn),
 320                                                 0);
 321
 322                 req->id = id;
 323                 req->gref = ref;
 324         }
 325
 326         wmb();          /* barrier so backend seens requests */
 327
 328         /* Above is a suitable barrier to ensure backend will see requests. */
 329         np->rx.req_prod_pvt = req_prod + i;
 330  push:
 331         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
 332         if (notify)
 333                 notify_remote_via_irq(np->netdev->irq);
 334 }
 335
 336 static int xennet_open(struct net_device *dev)
 337 {
 338         struct netfront_info *np = netdev_priv(dev);
 339
 340         napi_enable(&np->napi);
 341
 342         spin_lock_bh(&np->rx_lock);
 343         if (netif_carrier_ok(dev)) {
 344                 xennet_alloc_rx_buffers(dev);
 345                 np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
 346                 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
 347                         napi_schedule(&np->napi);
 348         }
 349         spin_unlock_bh(&np->rx_lock);
 350
 351         netif_start_queue(dev);
 352
 353         return 0;
 354 }
 355
 356 static void xennet_tx_buf_gc(struct net_device *dev)
 357 {
 358         RING_IDX cons, prod;
 359         unsigned short id;
 360         struct netfront_info *np = netdev_priv(dev);
 361         struct sk_buff *skb;
 362
 363         BUG_ON(!netif_carrier_ok(dev));
 364
 365         do {
 366                 prod = np->tx.sring->rsp_prod;
 367                 rmb(); /* Ensure we see responses up to 'rp'. */
 368
 369                 for (cons = np->tx.rsp_cons; cons != prod; cons++) {
 370                         struct xen_netif_tx_response *txrsp;
 371
 372                         txrsp = RING_GET_RESPONSE(&np->tx, cons);
 373                         if (txrsp->status == XEN_NETIF_RSP_NULL)
 374                                 continue;
 375
 376                         id  = txrsp->id;
 377                         skb = np->tx_skbs[id].skb;
 378                         if (unlikely(gnttab_query_foreign_access(
 379                                 np->grant_tx_ref[id]) != 0)) {
 380                                 printk(KERN_ALERT "xennet_tx_buf_gc: warning "
 381                                        "-- grant still in use by backend "
 382                                        "domain.\n");
 383                                 BUG();
 384                         }
 385                         gnttab_end_foreign_access_ref(
 386                                 np->grant_tx_ref[id], GNTMAP_readonly);
 387                         gnttab_release_grant_reference(
 388                                 &np->gref_tx_head, np->grant_tx_ref[id]);
 389                         np->grant_tx_ref[id] = GRANT_INVALID_REF;
 390                         add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
 391                         dev_kfree_skb_irq(skb);
 392                 }
 393
 394                 np->tx.rsp_cons = prod;
 395
 396                 /*
 397                  * Set a new event, then check for race with update of tx_cons.
 398                  * Note that it is essential to schedule a callback, no matter
 399                  * how few buffers are pending. Even if there is space in the
 400                  * transmit ring, higher layers may be blocked because too much
 401                  * data is outstanding: in such cases notification from Xen is
 402                  * likely to be the only kick that we'll get.
 403                  */
 404                 np->tx.sring->rsp_event =
 405                         prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
 406                 mb();           /* update shared area */
 407         } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
 408
 409         xennet_maybe_wake_tx(dev);
 410 }
 411
 412 static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
 413                               struct xen_netif_tx_request *tx)
 414 {
 415         struct netfront_info *np = netdev_priv(dev);
 416         char *data = skb->data;
 417         unsigned long mfn;
 418         RING_IDX prod = np->tx.req_prod_pvt;
 419         int frags = skb_shinfo(skb)->nr_frags;
 420         unsigned int offset = offset_in_page(data);
 421         unsigned int len = skb_headlen(skb);
 422         unsigned int id;
 423         grant_ref_t ref;
 424         int i;
 425
 426         /* While the header overlaps a page boundary (including being
 427            larger than a page), split it it into page-sized chunks. */
 428         while (len > PAGE_SIZE - offset) {
 429                 tx->size = PAGE_SIZE - offset;
 430                 tx->flags |= XEN_NETTXF_more_data;
 431                 len -= tx->size;
 432                 data += tx->size;
 433                 offset = 0;
 434
 435                 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
 436                 np->tx_skbs[id].skb = skb_get(skb);
 437                 tx = RING_GET_REQUEST(&np->tx, prod++);
 438                 tx->id = id;
 439                 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
 440                 BUG_ON((signed short)ref < 0);
 441
 442                 mfn = virt_to_mfn(data);
 443                 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
 444                                                 mfn, GNTMAP_readonly);
 445
 446                 tx->gref = np->grant_tx_ref[id] = ref;
 447                 tx->offset = offset;
 448                 tx->size = len;
 449                 tx->flags = 0;
 450         }
 451
 452         /* Grant backend access to each skb fragment page. */
 453         for (i = 0; i < frags; i++) {
 454                 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 455
 456                 tx->flags |= XEN_NETTXF_more_data;
 457
 458                 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
 459                 np->tx_skbs[id].skb = skb_get(skb);
 460                 tx = RING_GET_REQUEST(&np->tx, prod++);
 461                 tx->id = id;
 462                 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
 463                 BUG_ON((signed short)ref < 0);
 464
 465                 mfn = pfn_to_mfn(page_to_pfn(skb_frag_page(frag)));
 466                 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
 467                                                 mfn, GNTMAP_readonly);
 468
 469                 tx->gref = np->grant_tx_ref[id] = ref;
 470                 tx->offset = frag->page_offset;
 471                 tx->size = skb_frag_size(frag);
 472                 tx->flags = 0;
 473         }
 474
 475         np->tx.req_prod_pvt = prod;
 476 }
 477
 478 static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 479 {
 480         unsigned short id;
 481         struct netfront_info *np = netdev_priv(dev);
 482         struct netfront_stats *stats = this_cpu_ptr(np->stats);
 483         struct xen_netif_tx_request *tx;
 484         struct xen_netif_extra_info *extra;
 485         char *data = skb->data;
 486         RING_IDX i;
 487         grant_ref_t ref;
 488         unsigned long mfn;
 489         int notify;
 490         int frags = skb_shinfo(skb)->nr_frags;
 491         unsigned int offset = offset_in_page(data);
 492         unsigned int len = skb_headlen(skb);
 493         unsigned long flags;
 494
 495         /* If skb->len is too big for wire format, drop skb and alert
 496          * user about misconfiguration.
 497          */
 498         if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
 499                 net_alert_ratelimited(
 500                         "xennet: skb->len = %u, too big for wire format\n",
 501                         skb->len);
 502                 goto drop;
 503         }
 504
 505         frags += DIV_ROUND_UP(offset + len, PAGE_SIZE);
 506         if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
 507                 printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
 508                        frags);
 509                 dump_stack();
 510                 goto drop;
 511         }
 512
 513         spin_lock_irqsave(&np->tx_lock, flags);
 514
 515         if (unlikely(!netif_carrier_ok(dev) ||
 516                      (frags > 1 && !xennet_can_sg(dev)) ||
 517                      netif_needs_gso(skb, netif_skb_features(skb)))) {
 518                 spin_unlock_irqrestore(&np->tx_lock, flags);
 519                 goto drop;
 520         }
 521
 522         i = np->tx.req_prod_pvt;
 523
 524         id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
 525         np->tx_skbs[id].skb = skb;
 526
 527         tx = RING_GET_REQUEST(&np->tx, i);
 528
 529         tx->id   = id;
 530         ref = gnttab_claim_grant_reference(&np->gref_tx_head);
 531         BUG_ON((signed short)ref < 0);
 532         mfn = virt_to_mfn(data);
 533         gnttab_grant_foreign_access_ref(
 534                 ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
 535         tx->gref = np->grant_tx_ref[id] = ref;
 536         tx->offset = offset;
 537         tx->size = len;
 538         extra = NULL;
 539
 540         tx->flags = 0;
 541         if (skb->ip_summed == CHECKSUM_PARTIAL)
 542                 /* local packet? */
 543                 tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
 544         else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 545                 /* remote but checksummed. */
 546                 tx->flags |= XEN_NETTXF_data_validated;
 547
 548         if (skb_shinfo(skb)->gso_size) {
 549                 struct xen_netif_extra_info *gso;
 550
 551                 gso = (struct xen_netif_extra_info *)
 552                         RING_GET_REQUEST(&np->tx, ++i);
 553
 554                 if (extra)
 555                         extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
 556                 else
 557                         tx->flags |= XEN_NETTXF_extra_info;
 558
 559                 gso->u.gso.size = skb_shinfo(skb)->gso_size;
 560                 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 561                 gso->u.gso.pad = 0;
 562                 gso->u.gso.features = 0;
 563
 564                 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 565                 gso->flags = 0;
 566                 extra = gso;
 567         }
 568
 569         np->tx.req_prod_pvt = i + 1;
 570
 571         xennet_make_frags(skb, dev, tx);
 572         tx->size = skb->len;
 573
 574         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
 575         if (notify)
 576                 notify_remote_via_irq(np->netdev->irq);
 577
 578         u64_stats_update_begin(&stats->syncp);
 579         stats->tx_bytes += skb->len;
 580         stats->tx_packets++;
 581         u64_stats_update_end(&stats->syncp);
 582
 583         /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
 584         xennet_tx_buf_gc(dev);
 585
 586         if (!netfront_tx_slot_available(np))
 587                 netif_stop_queue(dev);
 588
 589         spin_unlock_irqrestore(&np->tx_lock, flags);
 590
 591         return NETDEV_TX_OK;
 592
 593  drop:
 594         dev->stats.tx_dropped++;
 595         dev_kfree_skb(skb);
 596         return NETDEV_TX_OK;
 597 }
 598
 599 static int xennet_close(struct net_device *dev)
 600 {
 601         struct netfront_info *np = netdev_priv(dev);
 602         netif_stop_queue(np->netdev);
 603         napi_disable(&np->napi);
 604         return 0;
 605 }
 606
 607 static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
 608                                 grant_ref_t ref)
 609 {
 610         int new = xennet_rxidx(np->rx.req_prod_pvt);
 611
 612         BUG_ON(np->rx_skbs[new]);
 613         np->rx_skbs[new] = skb;
 614         np->grant_rx_ref[new] = ref;
 615         RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
 616         RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
 617         np->rx.req_prod_pvt++;
 618 }
 619
 620 static int xennet_get_extras(struct netfront_info *np,
 621                              struct xen_netif_extra_info *extras,
 622                              RING_IDX rp)
 623
 624 {
 625         struct xen_netif_extra_info *extra;
 626         struct device *dev = &np->netdev->dev;
 627         RING_IDX cons = np->rx.rsp_cons;
 628         int err = 0;
 629
 630         do {
 631                 struct sk_buff *skb;
 632                 grant_ref_t ref;
 633
 634                 if (unlikely(cons + 1 == rp)) {
 635                         if (net_ratelimit())
 636                                 dev_warn(dev, "Missing extra info\n");
 637                         err = -EBADR;
 638                         break;
 639                 }
 640
 641                 extra = (struct xen_netif_extra_info *)
 642                         RING_GET_RESPONSE(&np->rx, ++cons);
 643
 644                 if (unlikely(!extra->type ||
 645                              extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 646                         if (net_ratelimit())
 647                                 dev_warn(dev, "Invalid extra type: %d\n",
 648                                         extra->type);
 649                         err = -EINVAL;
 650                 } else {
 651                         memcpy(&extras[extra->type - 1], extra,
 652                                sizeof(*extra));
 653                 }
 654
 655                 skb = xennet_get_rx_skb(np, cons);
 656                 ref = xennet_get_rx_ref(np, cons);
 657                 xennet_move_rx_slot(np, skb, ref);
 658         } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 659
 660         np->rx.rsp_cons = cons;
 661         return err;
 662 }
 663
 664 static int xennet_get_responses(struct netfront_info *np,
 665                                 struct netfront_rx_info *rinfo, RING_IDX rp,
 666                                 struct sk_buff_head *list)
 667 {
 668         struct xen_netif_rx_response *rx = &rinfo->rx;
 669         struct xen_netif_extra_info *extras = rinfo->extras;
 670         struct device *dev = &np->netdev->dev;
 671         RING_IDX cons = np->rx.rsp_cons;
 672         struct sk_buff *skb = xennet_get_rx_skb(np, cons);
 673         grant_ref_t ref = xennet_get_rx_ref(np, cons);
 674         int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
 675         int frags = 1;
 676         int err = 0;
 677         unsigned long ret;
 678
 679         if (rx->flags & XEN_NETRXF_extra_info) {
 680                 err = xennet_get_extras(np, extras, rp);
 681                 cons = np->rx.rsp_cons;
 682         }
 683
 684         for (;;) {
 685                 if (unlikely(rx->status < 0 ||
 686                              rx->offset + rx->status > PAGE_SIZE)) {
 687                         if (net_ratelimit())
 688                                 dev_warn(dev, "rx->offset: %x, size: %u\n",
 689                                          rx->offset, rx->status);
 690                         xennet_move_rx_slot(np, skb, ref);
 691                         err = -EINVAL;
 692                         goto next;
 693                 }
 694
 695                 /*
 696                  * This definitely indicates a bug, either in this driver or in
 697                  * the backend driver. In future this should flag the bad
 698                  * situation to the system controller to reboot the backed.
 699                  */
 700                 if (ref == GRANT_INVALID_REF) {
 701                         if (net_ratelimit())
 702                                 dev_warn(dev, "Bad rx response id %d.\n",
 703                                          rx->id);
 704                         err = -EINVAL;
 705                         goto next;
 706                 }
 707
 708                 ret = gnttab_end_foreign_access_ref(ref, 0);
 709                 BUG_ON(!ret);
 710
 711                 gnttab_release_grant_reference(&np->gref_rx_head, ref);
 712
 713                 __skb_queue_tail(list, skb);
 714
 715 next:
 716                 if (!(rx->flags & XEN_NETRXF_more_data))
 717                         break;
 718
 719                 if (cons + frags == rp) {
 720                         if (net_ratelimit())
 721                                 dev_warn(dev, "Need more frags\n");
 722                         err = -ENOENT;
 723                         break;
 724                 }
 725
 726                 rx = RING_GET_RESPONSE(&np->rx, cons + frags);
 727                 skb = xennet_get_rx_skb(np, cons + frags);
 728                 ref = xennet_get_rx_ref(np, cons + frags);
 729                 frags++;
 730         }
 731
 732         if (unlikely(frags > max)) {
 733                 if (net_ratelimit())
 734                         dev_warn(dev, "Too many frags\n");
 735                 err = -E2BIG;
 736         }
 737
 738         if (unlikely(err))
 739                 np->rx.rsp_cons = cons + frags;
 740
 741         return err;
 742 }
 743
 744 static int xennet_set_skb_gso(struct sk_buff *skb,
 745                               struct xen_netif_extra_info *gso)
 746 {
 747         if (!gso->u.gso.size) {
 748                 if (net_ratelimit())
 749                         printk(KERN_WARNING "GSO size must not be zero.\n");
 750                 return -EINVAL;
 751         }
 752
 753         /* Currently only TCPv4 S.O. is supported. */
 754         if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
 755                 if (net_ratelimit())
 756                         printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type);
 757                 return -EINVAL;
 758         }
 759
 760         skb_shinfo(skb)->gso_size = gso->u.gso.size;
 761         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 762
 763         /* Header must be checked, and gso_segs computed. */
 764         skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 765         skb_shinfo(skb)->gso_segs = 0;
 766
 767         return 0;
 768 }
 769
 770 static RING_IDX xennet_fill_frags(struct netfront_info *np,
 771                                   struct sk_buff *skb,
 772                                   struct sk_buff_head *list)
 773 {
 774         struct skb_shared_info *shinfo = skb_shinfo(skb);
 775         int nr_frags = shinfo->nr_frags;
 776         RING_IDX cons = np->rx.rsp_cons;
 777         struct sk_buff *nskb;
 778
 779         while ((nskb = __skb_dequeue(list))) {
 780                 struct xen_netif_rx_response *rx =
 781                         RING_GET_RESPONSE(&np->rx, ++cons);
 782                 skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 783
 784                 __skb_fill_page_desc(skb, nr_frags,
 785                                      skb_frag_page(nfrag),
 786                                      rx->offset, rx->status);
 787
 788                 skb->data_len += rx->status;
 789
 790                 skb_shinfo(nskb)->nr_frags = 0;
 791                 kfree_skb(nskb);
 792
 793                 nr_frags++;
 794         }
 795
 796         shinfo->nr_frags = nr_frags;
 797         return cons;
 798 }
 799
 800 static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
 801 {
 802         struct iphdr *iph;
 803         unsigned char *th;
 804         int err = -EPROTO;
 805         int recalculate_partial_csum = 0;
 806
 807         /*
 808          * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
 809          * peers can fail to set NETRXF_csum_blank when sending a GSO
 810          * frame. In this case force the SKB to CHECKSUM_PARTIAL and
 811          * recalculate the partial checksum.
 812          */
 813         if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 814                 struct netfront_info *np = netdev_priv(dev);
 815                 np->rx_gso_checksum_fixup++;
 816                 skb->ip_summed = CHECKSUM_PARTIAL;
 817                 recalculate_partial_csum = 1;
 818         }
 819
 820         /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
 821         if (skb->ip_summed != CHECKSUM_PARTIAL)
 822                 return 0;
 823
 824         if (skb->protocol != htons(ETH_P_IP))
 825                 goto out;
 826
 827         iph = (void *)skb->data;
 828         th = skb->data + 4 * iph->ihl;
 829         if (th >= skb_tail_pointer(skb))
 830                 goto out;
 831
 832         skb->csum_start = th - skb->head;
 833         switch (iph->protocol) {
 834         case IPPROTO_TCP:
 835                 skb->csum_offset = offsetof(struct tcphdr, check);
 836
 837                 if (recalculate_partial_csum) {
 838                         struct tcphdr *tcph = (struct tcphdr *)th;
 839                         tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 840                                                          skb->len - iph->ihl*4,
 841                                                          IPPROTO_TCP, 0);
 842                 }
 843                 break;
 844         case IPPROTO_UDP:
 845                 skb->csum_offset = offsetof(struct udphdr, check);
 846
 847                 if (recalculate_partial_csum) {
 848                         struct udphdr *udph = (struct udphdr *)th;
 849                         udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 850                                                          skb->len - iph->ihl*4,
 851                                                          IPPROTO_UDP, 0);
 852                 }
 853                 break;
 854         default:
 855                 if (net_ratelimit())
 856                         printk(KERN_ERR "Attempting to checksum a non-"
 857                                "TCP/UDP packet, dropping a protocol"
 858                                " %d packet", iph->protocol);
 859                 goto out;
 860         }
 861
 862         if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
 863                 goto out;
 864
 865         err = 0;
 866
 867 out:
 868         return err;
 869 }
 870
 871 static int handle_incoming_queue(struct net_device *dev,
 872                                  struct sk_buff_head *rxq)
 873 {
 874         struct netfront_info *np = netdev_priv(dev);
 875         struct netfront_stats *stats = this_cpu_ptr(np->stats);
 876         int packets_dropped = 0;
 877         struct sk_buff *skb;
 878
 879         while ((skb = __skb_dequeue(rxq)) != NULL) {
 880                 struct page *page = NETFRONT_SKB_CB(skb)->page;
 881                 void *vaddr = page_address(page);
 882                 unsigned offset = NETFRONT_SKB_CB(skb)->offset;
 883
 884                 memcpy(skb->data, vaddr + offset,
 885                        skb_headlen(skb));
 886
 887                 if (page != skb_frag_page(&skb_shinfo(skb)->frags[0]))
 888                         __free_page(page);
 889
 890                 /* Ethernet work: Delayed to here as it peeks the header. */
 891                 skb->protocol = eth_type_trans(skb, dev);
 892
 893                 if (checksum_setup(dev, skb)) {
 894                         kfree_skb(skb);
 895                         packets_dropped++;
 896                         dev->stats.rx_errors++;
 897                         continue;
 898                 }
 899
 900                 u64_stats_update_begin(&stats->syncp);
 901                 stats->rx_packets++;
 902                 stats->rx_bytes += skb->len;
 903                 u64_stats_update_end(&stats->syncp);
 904
 905                 /* Pass it up. */
 906                 netif_receive_skb(skb);
 907         }
 908
 909         return packets_dropped;
 910 }
 911
 912 static int xennet_poll(struct napi_struct *napi, int budget)
 913 {
 914         struct netfront_info *np = container_of(napi, struct netfront_info, napi);
 915         struct net_device *dev = np->netdev;
 916         struct sk_buff *skb;
 917         struct netfront_rx_info rinfo;
 918         struct xen_netif_rx_response *rx = &rinfo.rx;
 919         struct xen_netif_extra_info *extras = rinfo.extras;
 920         RING_IDX i, rp;
 921         int work_done;
 922         struct sk_buff_head rxq;
 923         struct sk_buff_head errq;
 924         struct sk_buff_head tmpq;
 925         unsigned long flags;
 926         unsigned int len;
 927         int err;
 928
 929         spin_lock(&np->rx_lock);
 930
 931         skb_queue_head_init(&rxq);
 932         skb_queue_head_init(&errq);
 933         skb_queue_head_init(&tmpq);
 934
 935         rp = np->rx.sring->rsp_prod;
 936         rmb(); /* Ensure we see queued responses up to 'rp'. */
 937
 938         i = np->rx.rsp_cons;
 939         work_done = 0;
 940         while ((i != rp) && (work_done < budget)) {
 941                 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
 942                 memset(extras, 0, sizeof(rinfo.extras));
 943
 944                 err = xennet_get_responses(np, &rinfo, rp, &tmpq);
 945
 946                 if (unlikely(err)) {
 947 err:
 948                         while ((skb = __skb_dequeue(&tmpq)))
 949                                 __skb_queue_tail(&errq, skb);
 950                         dev->stats.rx_errors++;
 951                         i = np->rx.rsp_cons;
 952                         continue;
 953                 }
 954
 955                 skb = __skb_dequeue(&tmpq);
 956
 957                 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
 958                         struct xen_netif_extra_info *gso;
 959                         gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
 960
 961                         if (unlikely(xennet_set_skb_gso(skb, gso))) {
 962                                 __skb_queue_head(&tmpq, skb);
 963                                 np->rx.rsp_cons += skb_queue_len(&tmpq);
 964                                 goto err;
 965                         }
 966                 }
 967
 968                 NETFRONT_SKB_CB(skb)->page =
 969                         skb_frag_page(&skb_shinfo(skb)->frags[0]);
 970                 NETFRONT_SKB_CB(skb)->offset = rx->offset;
 971
 972                 len = rx->status;
 973                 if (len > RX_COPY_THRESHOLD)
 974                         len = RX_COPY_THRESHOLD;
 975                 skb_put(skb, len);
 976
 977                 if (rx->status > len) {
 978                         skb_shinfo(skb)->frags[0].page_offset =
 979                                 rx->offset + len;
 980                         skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status - len);
 981                         skb->data_len = rx->status - len;
 982                 } else {
 983                         __skb_fill_page_desc(skb, 0, NULL, 0, 0);
 984                         skb_shinfo(skb)->nr_frags = 0;
 985                 }
 986
 987                 i = xennet_fill_frags(np, skb, &tmpq);
 988
 989                 /*
 990                  * Truesize approximates the size of true data plus
 991                  * any supervisor overheads. Adding hypervisor
 992                  * overheads has been shown to significantly reduce
 993                  * achievable bandwidth with the default receive
 994                  * buffer size. It is therefore not wise to account
 995                  * for it here.
 996                  *
 997                  * After alloc_skb(RX_COPY_THRESHOLD), truesize is set
 998                  * to RX_COPY_THRESHOLD + the supervisor
 999                  * overheads. Here, we add the size of the data pulled
1000                  * in xennet_fill_frags().
1001                  *
1002                  * We also adjust for any unused space in the main
1003                  * data area by subtracting (RX_COPY_THRESHOLD -
1004                  * len). This is especially important with drivers
1005                  * which split incoming packets into header and data,
1006                  * using only 66 bytes of the main data area (see the
1007                  * e1000 driver for example.)  On such systems,
1008                  * without this last adjustement, our achievable
1009                  * receive throughout using the standard receive
1010                  * buffer size was cut by 25%(!!!).
1011                  */
1012                 skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
1013                 skb->len += skb->data_len;
1014
1015                 if (rx->flags & XEN_NETRXF_csum_blank)
1016                         skb->ip_summed = CHECKSUM_PARTIAL;
1017                 else if (rx->flags & XEN_NETRXF_data_validated)
1018                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1019
1020                 __skb_queue_tail(&rxq, skb);
1021
1022                 np->rx.rsp_cons = ++i;
1023                 work_done++;
1024         }
1025
1026         __skb_queue_purge(&errq);
1027
1028         work_done -= handle_incoming_queue(dev, &rxq);
1029
1030         /* If we get a callback with very few responses, reduce fill target. */
1031         /* NB. Note exponential increase, linear decrease. */
1032         if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
1033              ((3*np->rx_target) / 4)) &&
1034             (--np->rx_target < np->rx_min_target))
1035                 np->rx_target = np->rx_min_target;
1036
1037         xennet_alloc_rx_buffers(dev);
1038
1039         if (work_done < budget) {
1040                 int more_to_do = 0;
1041
1042                 local_irq_save(flags);
1043
1044                 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
1045                 if (!more_to_do)
1046                         __napi_complete(napi);
1047
1048                 local_irq_restore(flags);
1049         }
1050
1051         spin_unlock(&np->rx_lock);
1052
1053         return work_done;
1054 }
1055
1056 static int xennet_change_mtu(struct net_device *dev, int mtu)
1057 {
1058         int max = xennet_can_sg(dev) ?
1059                 XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN;
1060
1061         if (mtu > max)
1062                 return -EINVAL;
1063         dev->mtu = mtu;
1064         return 0;
1065 }
1066
1067 static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev,
1068                                                     struct rtnl_link_stats64 *tot)
1069 {
1070         struct netfront_info *np = netdev_priv(dev);
1071         int cpu;
1072
1073         for_each_possible_cpu(cpu) {
1074                 struct netfront_stats *stats = per_cpu_ptr(np->stats, cpu);
1075                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1076                 unsigned int start;
1077
1078                 do {
1079                         start = u64_stats_fetch_begin_bh(&stats->syncp);
1080
1081                         rx_packets = stats->rx_packets;
1082                         tx_packets = stats->tx_packets;
1083                         rx_bytes = stats->rx_bytes;
1084                         tx_bytes = stats->tx_bytes;
1085                 } while (u64_stats_fetch_retry_bh(&stats->syncp, start));
1086
1087                 tot->rx_packets += rx_packets;
1088                 tot->tx_packets += tx_packets;
1089                 tot->rx_bytes   += rx_bytes;
1090                 tot->tx_bytes   += tx_bytes;
1091         }
1092
1093         tot->rx_errors  = dev->stats.rx_errors;
1094         tot->tx_dropped = dev->stats.tx_dropped;
1095
1096         return tot;
1097 }
1098
1099 static void xennet_release_tx_bufs(struct netfront_info *np)
1100 {
1101         struct sk_buff *skb;
1102         int i;
1103
1104         for (i = 0; i < NET_TX_RING_SIZE; i++) {
1105                 /* Skip over entries which are actually freelist references */
1106                 if (skb_entry_is_link(&np->tx_skbs[i]))
1107                         continue;
1108
1109                 skb = np->tx_skbs[i].skb;
1110                 gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
1111                                               GNTMAP_readonly);
1112                 gnttab_release_grant_reference(&np->gref_tx_head,
1113                                                np->grant_tx_ref[i]);
1114                 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1115                 add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
1116                 dev_kfree_skb_irq(skb);
1117         }
1118 }
1119
1120 static void xennet_release_rx_bufs(struct netfront_info *np)
1121 {
1122         struct mmu_update      *mmu = np->rx_mmu;
1123         struct multicall_entry *mcl = np->rx_mcl;
1124         struct sk_buff_head free_list;
1125         struct sk_buff *skb;
1126         unsigned long mfn;
1127         int xfer = 0, noxfer = 0, unused = 0;
1128         int id, ref;
1129
1130         dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n",
1131                          __func__);
1132         return;
1133
1134         skb_queue_head_init(&free_list);
1135
1136         spin_lock_bh(&np->rx_lock);
1137
1138         for (id = 0; id < NET_RX_RING_SIZE; id++) {
1139                 ref = np->grant_rx_ref[id];
1140                 if (ref == GRANT_INVALID_REF) {
1141                         unused++;
1142                         continue;
1143                 }
1144
1145                 skb = np->rx_skbs[id];
1146                 mfn = gnttab_end_foreign_transfer_ref(ref);
1147                 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1148                 np->grant_rx_ref[id] = GRANT_INVALID_REF;
1149
1150                 if (0 == mfn) {
1151                         skb_shinfo(skb)->nr_frags = 0;
1152                         dev_kfree_skb(skb);
1153                         noxfer++;
1154                         continue;
1155                 }
1156
1157                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1158                         /* Remap the page. */
1159                         const struct page *page =
1160                                 skb_frag_page(&skb_shinfo(skb)->frags[0]);
1161                         unsigned long pfn = page_to_pfn(page);
1162                         void *vaddr = page_address(page);
1163
1164                         MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
1165                                                 mfn_pte(mfn, PAGE_KERNEL),
1166                                                 0);
1167                         mcl++;
1168                         mmu->ptr = ((u64)mfn << PAGE_SHIFT)
1169                                 | MMU_MACHPHYS_UPDATE;
1170                         mmu->val = pfn;
1171                         mmu++;
1172
1173                         set_phys_to_machine(pfn, mfn);
1174                 }
1175                 __skb_queue_tail(&free_list, skb);
1176                 xfer++;
1177         }
1178
1179         dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n",
1180                  __func__, xfer, noxfer, unused);
1181
1182         if (xfer) {
1183                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1184                         /* Do all the remapping work and M2P updates. */
1185                         MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu,
1186                                          NULL, DOMID_SELF);
1187                         mcl++;
1188                         HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
1189                 }
1190         }
1191
1192         __skb_queue_purge(&free_list);
1193
1194         spin_unlock_bh(&np->rx_lock);
1195 }
1196
1197 static void xennet_uninit(struct net_device *dev)
1198 {
1199         struct netfront_info *np = netdev_priv(dev);
1200         xennet_release_tx_bufs(np);
1201         xennet_release_rx_bufs(np);
1202         gnttab_free_grant_references(np->gref_tx_head);
1203         gnttab_free_grant_references(np->gref_rx_head);
1204 }
1205
1206 static netdev_features_t xennet_fix_features(struct net_device *dev,
1207         netdev_features_t features)
1208 {
1209         struct netfront_info *np = netdev_priv(dev);
1210         int val;
1211
1212         if (features & NETIF_F_SG) {
1213                 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1214                                  "%d", &val) < 0)
1215                         val = 0;
1216
1217                 if (!val)
1218                         features &= ~NETIF_F_SG;
1219         }
1220
1221         if (features & NETIF_F_TSO) {
1222                 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1223                                  "feature-gso-tcpv4", "%d", &val) < 0)
1224                         val = 0;
1225
1226                 if (!val)
1227                         features &= ~NETIF_F_TSO;
1228         }
1229
1230         return features;
1231 }
1232
1233 static int xennet_set_features(struct net_device *dev,
1234         netdev_features_t features)
1235 {
1236         if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
1237                 netdev_info(dev, "Reducing MTU because no SG offload");
1238                 dev->mtu = ETH_DATA_LEN;
1239         }
1240
1241         return 0;
1242 }
1243
1244 static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1245 {
1246         struct net_device *dev = dev_id;
1247         struct netfront_info *np = netdev_priv(dev);
1248         unsigned long flags;
1249
1250         spin_lock_irqsave(&np->tx_lock, flags);
1251
1252         if (likely(netif_carrier_ok(dev))) {
1253                 xennet_tx_buf_gc(dev);
1254                 /* Under tx_lock: protects access to rx shared-ring indexes. */
1255                 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
1256                         napi_schedule(&np->napi);
1257         }
1258
1259         spin_unlock_irqrestore(&np->tx_lock, flags);
1260
1261         return IRQ_HANDLED;
1262 }
1263
1264 #ifdef CONFIG_NET_POLL_CONTROLLER
1265 static void xennet_poll_controller(struct net_device *dev)
1266 {
1267         xennet_interrupt(0, dev);
1268 }
1269 #endif
1270
1271 static const struct net_device_ops xennet_netdev_ops = {
1272         .ndo_open            = xennet_open,
1273         .ndo_uninit          = xennet_uninit,
1274         .ndo_stop            = xennet_close,
1275         .ndo_start_xmit      = xennet_start_xmit,
1276         .ndo_change_mtu      = xennet_change_mtu,
1277         .ndo_get_stats64     = xennet_get_stats64,
1278         .ndo_set_mac_address = eth_mac_addr,
1279         .ndo_validate_addr   = eth_validate_addr,
1280         .ndo_fix_features    = xennet_fix_features,
1281         .ndo_set_features    = xennet_set_features,
1282 #ifdef CONFIG_NET_POLL_CONTROLLER
1283         .ndo_poll_controller = xennet_poll_controller,
1284 #endif
1285 };
1286
1287 static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev)
1288 {
1289         int i, err;
1290         struct net_device *netdev;
1291         struct netfront_info *np;
1292
1293         netdev = alloc_etherdev(sizeof(struct netfront_info));
1294         if (!netdev)
1295                 return ERR_PTR(-ENOMEM);
1296
1297         np                   = netdev_priv(netdev);
1298         np->xbdev            = dev;
1299
1300         spin_lock_init(&np->tx_lock);
1301         spin_lock_init(&np->rx_lock);
1302
1303         skb_queue_head_init(&np->rx_batch);
1304         np->rx_target     = RX_DFL_MIN_TARGET;
1305         np->rx_min_target = RX_DFL_MIN_TARGET;
1306         np->rx_max_target = RX_MAX_TARGET;
1307
1308         init_timer(&np->rx_refill_timer);
1309         np->rx_refill_timer.data = (unsigned long)netdev;
1310         np->rx_refill_timer.function = rx_refill_timeout;
1311
1312         err = -ENOMEM;
1313         np->stats = alloc_percpu(struct netfront_stats);
1314         if (np->stats == NULL)
1315                 goto exit;
1316
1317         /* Initialise tx_skbs as a free chain containing every entry. */
1318         np->tx_skb_freelist = 0;
1319         for (i = 0; i < NET_TX_RING_SIZE; i++) {
1320                 skb_entry_set_link(&np->tx_skbs[i], i+1);
1321                 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1322         }
1323
1324         /* Clear out rx_skbs */
1325         for (i = 0; i < NET_RX_RING_SIZE; i++) {
1326                 np->rx_skbs[i] = NULL;
1327                 np->grant_rx_ref[i] = GRANT_INVALID_REF;
1328         }
1329
1330         /* A grant for every tx ring slot */
1331         if (gnttab_alloc_grant_references(TX_MAX_TARGET,
1332                                           &np->gref_tx_head) < 0) {
1333                 printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
1334                 err = -ENOMEM;
1335                 goto exit_free_stats;
1336         }
1337         /* A grant for every rx ring slot */
1338         if (gnttab_alloc_grant_references(RX_MAX_TARGET,
1339                                           &np->gref_rx_head) < 0) {
1340                 printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
1341                 err = -ENOMEM;
1342                 goto exit_free_tx;
1343         }
1344
1345         netdev->netdev_ops      = &xennet_netdev_ops;
1346
1347         netif_napi_add(netdev, &np->napi, xennet_poll, 64);
1348         netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
1349                                   NETIF_F_GSO_ROBUST;
1350         netdev->hw_features     = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
1351
1352         /*
1353          * Assume that all hw features are available for now. This set
1354          * will be adjusted by the call to netdev_update_features() in
1355          * xennet_connect() which is the earliest point where we can
1356          * negotiate with the backend regarding supported features.
1357          */
1358         netdev->features |= netdev->hw_features;
1359
1360         SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
1361         SET_NETDEV_DEV(netdev, &dev->dev);
1362
1363         netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER);
1364
1365         np->netdev = netdev;
1366
1367         netif_carrier_off(netdev);
1368
1369         return netdev;
1370
1371  exit_free_tx:
1372         gnttab_free_grant_references(np->gref_tx_head);
1373  exit_free_stats:
1374         free_percpu(np->stats);
1375  exit:
1376         free_netdev(netdev);
1377         return ERR_PTR(err);
1378 }
1379
1380 /**
1381  * Entry point to this code when a new device is created.  Allocate the basic
1382  * structures and the ring buffers for communication with the backend, and
1383  * inform the backend of the appropriate details for those.
1384  */
1385 static int __devinit netfront_probe(struct xenbus_device *dev,
1386                                     const struct xenbus_device_id *id)
1387 {
1388         int err;
1389         struct net_device *netdev;
1390         struct netfront_info *info;
1391
1392         netdev = xennet_create_dev(dev);
1393         if (IS_ERR(netdev)) {
1394                 err = PTR_ERR(netdev);
1395                 xenbus_dev_fatal(dev, err, "creating netdev");
1396                 return err;
1397         }
1398
1399         info = netdev_priv(netdev);
1400         dev_set_drvdata(&dev->dev, info);
1401
1402         err = register_netdev(info->netdev);
1403         if (err) {
1404                 printk(KERN_WARNING "%s: register_netdev err=%d\n",
1405                        __func__, err);
1406                 goto fail;
1407         }
1408
1409         err = xennet_sysfs_addif(info->netdev);
1410         if (err) {
1411                 unregister_netdev(info->netdev);
1412                 printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
1413                        __func__, err);
1414                 goto fail;
1415         }
1416
1417         return 0;
1418
1419  fail:
1420         free_netdev(netdev);
1421         dev_set_drvdata(&dev->dev, NULL);
1422         return err;
1423 }
1424
1425 static void xennet_end_access(int ref, void *page)
1426 {
1427         /* This frees the page as a side-effect */
1428         if (ref != GRANT_INVALID_REF)
1429                 gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1430 }
1431
1432 static void xennet_disconnect_backend(struct netfront_info *info)
1433 {
1434         /* Stop old i/f to prevent errors whilst we rebuild the state. */
1435         spin_lock_bh(&info->rx_lock);
1436         spin_lock_irq(&info->tx_lock);
1437         netif_carrier_off(info->netdev);
1438         spin_unlock_irq(&info->tx_lock);
1439         spin_unlock_bh(&info->rx_lock);
1440
1441         if (info->netdev->irq)
1442                 unbind_from_irqhandler(info->netdev->irq, info->netdev);
1443         info->evtchn = info->netdev->irq = 0;
1444
1445         /* End access and free the pages */
1446         xennet_end_access(info->tx_ring_ref, info->tx.sring);
1447         xennet_end_access(info->rx_ring_ref, info->rx.sring);
1448
1449         info->tx_ring_ref = GRANT_INVALID_REF;
1450         info->rx_ring_ref = GRANT_INVALID_REF;
1451         info->tx.sring = NULL;
1452         info->rx.sring = NULL;
1453 }
1454
1455 /**
1456  * We are reconnecting to the backend, due to a suspend/resume, or a backend
1457  * driver restart.  We tear down our netif structure and recreate it, but
1458  * leave the device-layer structures intact so that this is transparent to the
1459  * rest of the kernel.
1460  */
1461 static int netfront_resume(struct xenbus_device *dev)
1462 {
1463         struct netfront_info *info = dev_get_drvdata(&dev->dev);
1464
1465         dev_dbg(&dev->dev, "%s\n", dev->nodename);
1466
1467         xennet_disconnect_backend(info);
1468         return 0;
1469 }
1470
1471 static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1472 {
1473         char *s, *e, *macstr;
1474         int i;
1475
1476         macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1477         if (IS_ERR(macstr))
1478                 return PTR_ERR(macstr);
1479
1480         for (i = 0; i < ETH_ALEN; i++) {
1481                 mac[i] = simple_strtoul(s, &e, 16);
1482                 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1483                         kfree(macstr);
1484                         return -ENOENT;
1485                 }
1486                 s = e+1;
1487         }
1488
1489         kfree(macstr);
1490         return 0;
1491 }
1492
1493 static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
1494 {
1495         struct xen_netif_tx_sring *txs;
1496         struct xen_netif_rx_sring *rxs;
1497         int err;
1498         struct net_device *netdev = info->netdev;
1499
1500         info->tx_ring_ref = GRANT_INVALID_REF;
1501         info->rx_ring_ref = GRANT_INVALID_REF;
1502         info->rx.sring = NULL;
1503         info->tx.sring = NULL;
1504         netdev->irq = 0;
1505
1506         err = xen_net_read_mac(dev, netdev->dev_addr);
1507         if (err) {
1508                 xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1509                 goto fail;
1510         }
1511
1512         txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1513         if (!txs) {
1514                 err = -ENOMEM;
1515                 xenbus_dev_fatal(dev, err, "allocating tx ring page");
1516                 goto fail;
1517         }
1518         SHARED_RING_INIT(txs);
1519         FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
1520
1521         err = xenbus_grant_ring(dev, virt_to_mfn(txs));
1522         if (err < 0) {
1523                 free_page((unsigned long)txs);
1524                 goto fail;
1525         }
1526
1527         info->tx_ring_ref = err;
1528         rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1529         if (!rxs) {
1530                 err = -ENOMEM;
1531                 xenbus_dev_fatal(dev, err, "allocating rx ring page");
1532                 goto fail;
1533         }
1534         SHARED_RING_INIT(rxs);
1535         FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
1536
1537         err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
1538         if (err < 0) {
1539                 free_page((unsigned long)rxs);
1540                 goto fail;
1541         }
1542         info->rx_ring_ref = err;
1543
1544         err = xenbus_alloc_evtchn(dev, &info->evtchn);
1545         if (err)
1546                 goto fail;
1547
1548         err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
1549                                         0, netdev->name, netdev);
1550         if (err < 0)
1551                 goto fail;
1552         netdev->irq = err;
1553         return 0;
1554
1555  fail:
1556         return err;
1557 }
1558
1559 /* Common code used when first setting up, and when resuming. */
1560 static int talk_to_netback(struct xenbus_device *dev,
1561                            struct netfront_info *info)
1562 {
1563         const char *message;
1564         struct xenbus_transaction xbt;
1565         int err;
1566
1567         /* Create shared ring, alloc event channel. */
1568         err = setup_netfront(dev, info);
1569         if (err)
1570                 goto out;
1571
1572 again:
1573         err = xenbus_transaction_start(&xbt);
1574         if (err) {
1575                 xenbus_dev_fatal(dev, err, "starting transaction");
1576                 goto destroy_ring;
1577         }
1578
1579         err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
1580                             info->tx_ring_ref);
1581         if (err) {
1582                 message = "writing tx ring-ref";
1583                 goto abort_transaction;
1584         }
1585         err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
1586                             info->rx_ring_ref);
1587         if (err) {
1588                 message = "writing rx ring-ref";
1589                 goto abort_transaction;
1590         }
1591         err = xenbus_printf(xbt, dev->nodename,
1592                             "event-channel", "%u", info->evtchn);
1593         if (err) {
1594                 message = "writing event-channel";
1595                 goto abort_transaction;
1596         }
1597
1598         err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1599                             1);
1600         if (err) {
1601                 message = "writing request-rx-copy";
1602                 goto abort_transaction;
1603         }
1604
1605         err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
1606         if (err) {
1607                 message = "writing feature-rx-notify";
1608                 goto abort_transaction;
1609         }
1610
1611         err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
1612         if (err) {
1613                 message = "writing feature-sg";
1614                 goto abort_transaction;
1615         }
1616
1617         err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
1618         if (err) {
1619                 message = "writing feature-gso-tcpv4";
1620                 goto abort_transaction;
1621         }
1622
1623         err = xenbus_transaction_end(xbt, 0);
1624         if (err) {
1625                 if (err == -EAGAIN)
1626                         goto again;
1627                 xenbus_dev_fatal(dev, err, "completing transaction");
1628                 goto destroy_ring;
1629         }
1630
1631         return 0;
1632
1633  abort_transaction:
1634         xenbus_transaction_end(xbt, 1);
1635         xenbus_dev_fatal(dev, err, "%s", message);
1636  destroy_ring:
1637         xennet_disconnect_backend(info);
1638  out:
1639         return err;
1640 }
1641
1642 static int xennet_connect(struct net_device *dev)
1643 {
1644         struct netfront_info *np = netdev_priv(dev);
1645         int i, requeue_idx, err;
1646         struct sk_buff *skb;
1647         grant_ref_t ref;
1648         struct xen_netif_rx_request *req;
1649         unsigned int feature_rx_copy;
1650
1651         err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1652                            "feature-rx-copy", "%u", &feature_rx_copy);
1653         if (err != 1)
1654                 feature_rx_copy = 0;
1655
1656         if (!feature_rx_copy) {
1657                 dev_info(&dev->dev,
1658                          "backend does not support copying receive path\n");
1659                 return -ENODEV;
1660         }
1661
1662         err = talk_to_netback(np->xbdev, np);
1663         if (err)
1664                 return err;
1665
1666         rtnl_lock();
1667         netdev_update_features(dev);
1668         rtnl_unlock();
1669
1670         spin_lock_bh(&np->rx_lock);
1671         spin_lock_irq(&np->tx_lock);
1672
1673         /* Step 1: Discard all pending TX packet fragments. */
1674         xennet_release_tx_bufs(np);
1675
1676         /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1677         for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1678                 skb_frag_t *frag;
1679                 const struct page *page;
1680                 if (!np->rx_skbs[i])
1681                         continue;
1682
1683                 skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
1684                 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1685                 req = RING_GET_REQUEST(&np->rx, requeue_idx);
1686
1687                 frag = &skb_shinfo(skb)->frags[0];
1688                 page = skb_frag_page(frag);
1689                 gnttab_grant_foreign_access_ref(
1690                         ref, np->xbdev->otherend_id,
1691                         pfn_to_mfn(page_to_pfn(page)),
1692                         0);
1693                 req->gref = ref;
1694                 req->id   = requeue_idx;
1695
1696                 requeue_idx++;
1697         }
1698
1699         np->rx.req_prod_pvt = requeue_idx;
1700
1701         /*
1702          * Step 3: All public and private state should now be sane.  Get
1703          * ready to start sending and receiving packets and give the driver
1704          * domain a kick because we've probably just requeued some
1705          * packets.
1706          */
1707         netif_carrier_on(np->netdev);
1708         notify_remote_via_irq(np->netdev->irq);
1709         xennet_tx_buf_gc(dev);
1710         xennet_alloc_rx_buffers(dev);
1711
1712         spin_unlock_irq(&np->tx_lock);
1713         spin_unlock_bh(&np->rx_lock);
1714
1715         return 0;
1716 }
1717
1718 /**
1719  * Callback received when the backend's state changes.
1720  */
1721 static void netback_changed(struct xenbus_device *dev,
1722                             enum xenbus_state backend_state)
1723 {
1724         struct netfront_info *np = dev_get_drvdata(&dev->dev);
1725         struct net_device *netdev = np->netdev;
1726
1727         dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
1728
1729         switch (backend_state) {
1730         case XenbusStateInitialising:
1731         case XenbusStateInitialised:
1732         case XenbusStateReconfiguring:
1733         case XenbusStateReconfigured:
1734         case XenbusStateUnknown:
1735         case XenbusStateClosed:
1736                 break;
1737
1738         case XenbusStateInitWait:
1739                 if (dev->state != XenbusStateInitialising)
1740                         break;
1741                 if (xennet_connect(netdev) != 0)
1742                         break;
1743                 xenbus_switch_state(dev, XenbusStateConnected);
1744                 break;
1745
1746         case XenbusStateConnected:
1747                 netif_notify_peers(netdev);
1748                 break;
1749
1750         case XenbusStateClosing:
1751                 xenbus_frontend_closed(dev);
1752                 break;
1753         }
1754 }
1755
1756 static const struct xennet_stat {
1757         char name[ETH_GSTRING_LEN];
1758         u16 offset;
1759 } xennet_stats[] = {
1760         {
1761                 "rx_gso_checksum_fixup",
1762                 offsetof(struct netfront_info, rx_gso_checksum_fixup)
1763         },
1764 };
1765
1766 static int xennet_get_sset_count(struct net_device *dev, int string_set)
1767 {
1768         switch (string_set) {
1769         case ETH_SS_STATS:
1770                 return ARRAY_SIZE(xennet_stats);
1771         default:
1772                 return -EINVAL;
1773         }
1774 }
1775
1776 static void xennet_get_ethtool_stats(struct net_device *dev,
1777                                      struct ethtool_stats *stats, u64 * data)
1778 {
1779         void *np = netdev_priv(dev);
1780         int i;
1781
1782         for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
1783                 data[i] = *(unsigned long *)(np + xennet_stats[i].offset);
1784 }
1785
1786 static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
1787 {
1788         int i;
1789
1790         switch (stringset) {
1791         case ETH_SS_STATS:
1792                 for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
1793                         memcpy(data + i * ETH_GSTRING_LEN,
1794                                xennet_stats[i].name, ETH_GSTRING_LEN);
1795                 break;
1796         }
1797 }
1798
1799 static const struct ethtool_ops xennet_ethtool_ops =
1800 {
1801         .get_link = ethtool_op_get_link,
1802
1803         .get_sset_count = xennet_get_sset_count,
1804         .get_ethtool_stats = xennet_get_ethtool_stats,
1805         .get_strings = xennet_get_strings,
1806 };
1807
1808 #ifdef CONFIG_SYSFS
1809 static ssize_t show_rxbuf_min(struct device *dev,
1810                               struct device_attribute *attr, char *buf)
1811 {
1812         struct net_device *netdev = to_net_dev(dev);
1813         struct netfront_info *info = netdev_priv(netdev);
1814
1815         return sprintf(buf, "%u\n", info->rx_min_target);
1816 }
1817
1818 static ssize_t store_rxbuf_min(struct device *dev,
1819                                struct device_attribute *attr,
1820                                const char *buf, size_t len)
1821 {
1822         struct net_device *netdev = to_net_dev(dev);
1823         struct netfront_info *np = netdev_priv(netdev);
1824         char *endp;
1825         unsigned long target;
1826
1827         if (!capable(CAP_NET_ADMIN))
1828                 return -EPERM;
1829
1830         target = simple_strtoul(buf, &endp, 0);
1831         if (endp == buf)
1832                 return -EBADMSG;
1833
1834         if (target < RX_MIN_TARGET)
1835                 target = RX_MIN_TARGET;
1836         if (target > RX_MAX_TARGET)
1837                 target = RX_MAX_TARGET;
1838
1839         spin_lock_bh(&np->rx_lock);
1840         if (target > np->rx_max_target)
1841                 np->rx_max_target = target;
1842         np->rx_min_target = target;
1843         if (target > np->rx_target)
1844                 np->rx_target = target;
1845
1846         xennet_alloc_rx_buffers(netdev);
1847
1848         spin_unlock_bh(&np->rx_lock);
1849         return len;
1850 }
1851
1852 static ssize_t show_rxbuf_max(struct device *dev,
1853                               struct device_attribute *attr, char *buf)
1854 {
1855         struct net_device *netdev = to_net_dev(dev);
1856         struct netfront_info *info = netdev_priv(netdev);
1857
1858         return sprintf(buf, "%u\n", info->rx_max_target);
1859 }
1860
1861 static ssize_t store_rxbuf_max(struct device *dev,
1862                                struct device_attribute *attr,
1863                                const char *buf, size_t len)
1864 {
1865         struct net_device *netdev = to_net_dev(dev);
1866         struct netfront_info *np = netdev_priv(netdev);
1867         char *endp;
1868         unsigned long target;
1869
1870         if (!capable(CAP_NET_ADMIN))
1871                 return -EPERM;
1872
1873         target = simple_strtoul(buf, &endp, 0);
1874         if (endp == buf)
1875                 return -EBADMSG;
1876
1877         if (target < RX_MIN_TARGET)
1878                 target = RX_MIN_TARGET;
1879         if (target > RX_MAX_TARGET)
1880                 target = RX_MAX_TARGET;
1881
1882         spin_lock_bh(&np->rx_lock);
1883         if (target < np->rx_min_target)
1884                 np->rx_min_target = target;
1885         np->rx_max_target = target;
1886         if (target < np->rx_target)
1887                 np->rx_target = target;
1888
1889         xennet_alloc_rx_buffers(netdev);
1890
1891         spin_unlock_bh(&np->rx_lock);
1892         return len;
1893 }
1894
1895 static ssize_t show_rxbuf_cur(struct device *dev,
1896                               struct device_attribute *attr, char *buf)
1897 {
1898         struct net_device *netdev = to_net_dev(dev);
1899         struct netfront_info *info = netdev_priv(netdev);
1900
1901         return sprintf(buf, "%u\n", info->rx_target);
1902 }
1903
1904 static struct device_attribute xennet_attrs[] = {
1905         __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
1906         __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
1907         __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
1908 };
1909
1910 static int xennet_sysfs_addif(struct net_device *netdev)
1911 {
1912         int i;
1913         int err;
1914
1915         for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
1916                 err = device_create_file(&netdev->dev,
1917                                            &xennet_attrs[i]);
1918                 if (err)
1919                         goto fail;
1920         }
1921         return 0;
1922
1923  fail:
1924         while (--i >= 0)
1925                 device_remove_file(&netdev->dev, &xennet_attrs[i]);
1926         return err;
1927 }
1928
1929 static void xennet_sysfs_delif(struct net_device *netdev)
1930 {
1931         int i;
1932
1933         for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
1934                 device_remove_file(&netdev->dev, &xennet_attrs[i]);
1935 }
1936
1937 #endif /* CONFIG_SYSFS */
1938
1939 static const struct xenbus_device_id netfront_ids[] = {
1940         { "vif" },
1941         { "" }
1942 };
1943
1944
1945 static int __devexit xennet_remove(struct xenbus_device *dev)
1946 {
1947         struct netfront_info *info = dev_get_drvdata(&dev->dev);
1948
1949         dev_dbg(&dev->dev, "%s\n", dev->nodename);
1950
1951         xennet_disconnect_backend(info);
1952
1953         xennet_sysfs_delif(info->netdev);
1954
1955         unregister_netdev(info->netdev);
1956
1957         del_timer_sync(&info->rx_refill_timer);
1958
1959         free_percpu(info->stats);
1960
1961         free_netdev(info->netdev);
1962
1963         return 0;
1964 }
1965
1966 static DEFINE_XENBUS_DRIVER(netfront, ,
1967         .probe = netfront_probe,
1968         .remove = __devexit_p(xennet_remove),
1969         .resume = netfront_resume,
1970         .otherend_changed = netback_changed,
1971 );
1972
1973 static int __init netif_init(void)
1974 {
1975         if (!xen_domain())
1976                 return -ENODEV;
1977
1978         if (xen_initial_domain())
1979                 return 0;
1980
1981         if (xen_hvm_domain() && !xen_platform_pci_unplug)
1982                 return -ENODEV;
1983
1984         printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
1985
1986         return xenbus_register_frontend(&netfront_driver);
1987 }
1988 module_init(netif_init);
1989
1990
1991 static void __exit netif_exit(void)
1992 {
1993         if (xen_initial_domain())
1994                 return;
1995
1996         xenbus_unregister_driver(&netfront_driver);
1997 }
1998 module_exit(netif_exit);
1999
2000 MODULE_DESCRIPTION("Xen virtual network device frontend");
2001 MODULE_LICENSE("GPL");
2002 MODULE_ALIAS("xen:vif");
2003 MODULE_ALIAS("xennet");