drivers/net/xen-netfront.c

   1 /*
   2  * Virtual network driver for conversing with remote driver backends.
   3  *
   4  * Copyright (c) 2002-2005, K A Fraser
   5  * Copyright (c) 2005, XenSource Ltd
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License version 2
   9  * as published by the Free Software Foundation; or, when distributed
  10  * separately from the Linux kernel or incorporated into other
  11  * software packages, subject to the following license:
  12  *
  13  * Permission is hereby granted, free of charge, to any person obtaining a copy
  14  * of this source file (the "Software"), to deal in the Software without
  15  * restriction, including without limitation the rights to use, copy, modify,
  16  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  17  * and to permit persons to whom the Software is furnished to do so, subject to
  18  * the following conditions:
  19  *
  20  * The above copyright notice and this permission notice shall be included in
  21  * all copies or substantial portions of the Software.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  29  * IN THE SOFTWARE.
  30  */
  31
  32 #include <linux/module.h>
  33 #include <linux/kernel.h>
  34 #include <linux/netdevice.h>
  35 #include <linux/etherdevice.h>
  36 #include <linux/skbuff.h>
  37 #include <linux/ethtool.h>
  38 #include <linux/if_ether.h>
  39 #include <linux/tcp.h>
  40 #include <linux/udp.h>
  41 #include <linux/moduleparam.h>
  42 #include <linux/mm.h>
  43 #include <linux/slab.h>
  44 #include <net/ip.h>
  45
  46 #include <xen/xen.h>
  47 #include <xen/xenbus.h>
  48 #include <xen/events.h>
  49 #include <xen/page.h>
  50 #include <xen/platform_pci.h>
  51 #include <xen/grant_table.h>
  52
  53 #include <xen/interface/io/netif.h>
  54 #include <xen/interface/memory.h>
  55 #include <xen/interface/grant_table.h>
  56
  57 static const struct ethtool_ops xennet_ethtool_ops;
  58
  59 struct netfront_cb {
  60         struct page *page;
  61         unsigned offset;
  62 };
  63
  64 #define NETFRONT_SKB_CB(skb)    ((struct netfront_cb *)((skb)->cb))
  65
  66 #define RX_COPY_THRESHOLD 256
  67
  68 #define GRANT_INVALID_REF       0
  69
  70 #define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
  71 #define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
  72 #define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
  73
  74 struct netfront_stats {
  75         u64                     rx_packets;
  76         u64                     tx_packets;
  77         u64                     rx_bytes;
  78         u64                     tx_bytes;
  79         struct u64_stats_sync   syncp;
  80 };
  81
  82 struct netfront_info {
  83         struct list_head list;
  84         struct net_device *netdev;
  85
  86         struct napi_struct napi;
  87
  88         unsigned int evtchn;
  89         struct xenbus_device *xbdev;
  90
  91         spinlock_t   tx_lock;
  92         struct xen_netif_tx_front_ring tx;
  93         int tx_ring_ref;
  94
  95         /*
  96          * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
  97          * are linked from tx_skb_freelist through skb_entry.link.
  98          *
  99          *  NB. Freelist index entries are always going to be less than
 100          *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
 101          *  greater than PAGE_OFFSET: we use this property to distinguish
 102          *  them.
 103          */
 104         union skb_entry {
 105                 struct sk_buff *skb;
 106                 unsigned long link;
 107         } tx_skbs[NET_TX_RING_SIZE];
 108         grant_ref_t gref_tx_head;
 109         grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
 110         unsigned tx_skb_freelist;
 111
 112         spinlock_t   rx_lock ____cacheline_aligned_in_smp;
 113         struct xen_netif_rx_front_ring rx;
 114         int rx_ring_ref;
 115
 116         /* Receive-ring batched refills. */
 117 #define RX_MIN_TARGET 8
 118 #define RX_DFL_MIN_TARGET 64
 119 #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
 120         unsigned rx_min_target, rx_max_target, rx_target;
 121         struct sk_buff_head rx_batch;
 122
 123         struct timer_list rx_refill_timer;
 124
 125         struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
 126         grant_ref_t gref_rx_head;
 127         grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 128
 129         unsigned long rx_pfn_array[NET_RX_RING_SIZE];
 130         struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
 131         struct mmu_update rx_mmu[NET_RX_RING_SIZE];
 132
 133         /* Statistics */
 134         struct netfront_stats __percpu *stats;
 135
 136         unsigned long rx_gso_checksum_fixup;
 137 };
 138
 139 struct netfront_rx_info {
 140         struct xen_netif_rx_response rx;
 141         struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 142 };
 143
 144 static void skb_entry_set_link(union skb_entry *list, unsigned short id)
 145 {
 146         list->link = id;
 147 }
 148
 149 static int skb_entry_is_link(const union skb_entry *list)
 150 {
 151         BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
 152         return (unsigned long)list->skb < PAGE_OFFSET;
 153 }
 154
 155 /*
 156  * Access macros for acquiring freeing slots in tx_skbs[].
 157  */
 158
 159 static void add_id_to_freelist(unsigned *head, union skb_entry *list,
 160                                unsigned short id)
 161 {
 162         skb_entry_set_link(&list[id], *head);
 163         *head = id;
 164 }
 165
 166 static unsigned short get_id_from_freelist(unsigned *head,
 167                                            union skb_entry *list)
 168 {
 169         unsigned int id = *head;
 170         *head = list[id].link;
 171         return id;
 172 }
 173
 174 static int xennet_rxidx(RING_IDX idx)
 175 {
 176         return idx & (NET_RX_RING_SIZE - 1);
 177 }
 178
 179 static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
 180                                          RING_IDX ri)
 181 {
 182         int i = xennet_rxidx(ri);
 183         struct sk_buff *skb = np->rx_skbs[i];
 184         np->rx_skbs[i] = NULL;
 185         return skb;
 186 }
 187
 188 static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
 189                                             RING_IDX ri)
 190 {
 191         int i = xennet_rxidx(ri);
 192         grant_ref_t ref = np->grant_rx_ref[i];
 193         np->grant_rx_ref[i] = GRANT_INVALID_REF;
 194         return ref;
 195 }
 196
 197 #ifdef CONFIG_SYSFS
 198 static int xennet_sysfs_addif(struct net_device *netdev);
 199 static void xennet_sysfs_delif(struct net_device *netdev);
 200 #else /* !CONFIG_SYSFS */
 201 #define xennet_sysfs_addif(dev) (0)
 202 #define xennet_sysfs_delif(dev) do { } while (0)
 203 #endif
 204
 205 static bool xennet_can_sg(struct net_device *dev)
 206 {
 207         return dev->features & NETIF_F_SG;
 208 }
 209
 210
 211 static void rx_refill_timeout(unsigned long data)
 212 {
 213         struct net_device *dev = (struct net_device *)data;
 214         struct netfront_info *np = netdev_priv(dev);
 215         napi_schedule(&np->napi);
 216 }
 217
 218 static int netfront_tx_slot_available(struct netfront_info *np)
 219 {
 220         return (np->tx.req_prod_pvt - np->tx.rsp_cons) <
 221                 (TX_MAX_TARGET - MAX_SKB_FRAGS - 2);
 222 }
 223
 224 static void xennet_maybe_wake_tx(struct net_device *dev)
 225 {
 226         struct netfront_info *np = netdev_priv(dev);
 227
 228         if (unlikely(netif_queue_stopped(dev)) &&
 229             netfront_tx_slot_available(np) &&
 230             likely(netif_running(dev)))
 231                 netif_wake_queue(dev);
 232 }
 233
 234 static void xennet_alloc_rx_buffers(struct net_device *dev)
 235 {
 236         unsigned short id;
 237         struct netfront_info *np = netdev_priv(dev);
 238         struct sk_buff *skb;
 239         struct page *page;
 240         int i, batch_target, notify;
 241         RING_IDX req_prod = np->rx.req_prod_pvt;
 242         grant_ref_t ref;
 243         unsigned long pfn;
 244         void *vaddr;
 245         struct xen_netif_rx_request *req;
 246
 247         if (unlikely(!netif_carrier_ok(dev)))
 248                 return;
 249
 250         /*
 251          * Allocate skbuffs greedily, even though we batch updates to the
 252          * receive ring. This creates a less bursty demand on the memory
 253          * allocator, so should reduce the chance of failed allocation requests
 254          * both for ourself and for other kernel subsystems.
 255          */
 256         batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
 257         for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
 258                 skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD + NET_IP_ALIGN,
 259                                          GFP_ATOMIC | __GFP_NOWARN);
 260                 if (unlikely(!skb))
 261                         goto no_skb;
 262
 263                 /* Align ip header to a 16 bytes boundary */
 264                 skb_reserve(skb, NET_IP_ALIGN);
 265
 266                 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 267                 if (!page) {
 268                         kfree_skb(skb);
 269 no_skb:
 270                         /* Any skbuffs queued for refill? Force them out. */
 271                         if (i != 0)
 272                                 goto refill;
 273                         /* Could not allocate any skbuffs. Try again later. */
 274                         mod_timer(&np->rx_refill_timer,
 275                                   jiffies + (HZ/10));
 276                         break;
 277                 }
 278
 279                 __skb_fill_page_desc(skb, 0, page, 0, 0);
 280                 skb_shinfo(skb)->nr_frags = 1;
 281                 __skb_queue_tail(&np->rx_batch, skb);
 282         }
 283
 284         /* Is the batch large enough to be worthwhile? */
 285         if (i < (np->rx_target/2)) {
 286                 if (req_prod > np->rx.sring->req_prod)
 287                         goto push;
 288                 return;
 289         }
 290
 291         /* Adjust our fill target if we risked running out of buffers. */
 292         if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
 293             ((np->rx_target *= 2) > np->rx_max_target))
 294                 np->rx_target = np->rx_max_target;
 295
 296  refill:
 297         for (i = 0; ; i++) {
 298                 skb = __skb_dequeue(&np->rx_batch);
 299                 if (skb == NULL)
 300                         break;
 301
 302                 skb->dev = dev;
 303
 304                 id = xennet_rxidx(req_prod + i);
 305
 306                 BUG_ON(np->rx_skbs[id]);
 307                 np->rx_skbs[id] = skb;
 308
 309                 ref = gnttab_claim_grant_reference(&np->gref_rx_head);
 310                 BUG_ON((signed short)ref < 0);
 311                 np->grant_rx_ref[id] = ref;
 312
 313                 pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
 314                 vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0]));
 315
 316                 req = RING_GET_REQUEST(&np->rx, req_prod + i);
 317                 gnttab_grant_foreign_access_ref(ref,
 318                                                 np->xbdev->otherend_id,
 319                                                 pfn_to_mfn(pfn),
 320                                                 0);
 321
 322                 req->id = id;
 323                 req->gref = ref;
 324         }
 325
 326         wmb();          /* barrier so backend seens requests */
 327
 328         /* Above is a suitable barrier to ensure backend will see requests. */
 329         np->rx.req_prod_pvt = req_prod + i;
 330  push:
 331         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
 332         if (notify)
 333                 notify_remote_via_irq(np->netdev->irq);
 334 }
 335
 336 static int xennet_open(struct net_device *dev)
 337 {
 338         struct netfront_info *np = netdev_priv(dev);
 339
 340         napi_enable(&np->napi);
 341
 342         spin_lock_bh(&np->rx_lock);
 343         if (netif_carrier_ok(dev)) {
 344                 xennet_alloc_rx_buffers(dev);
 345                 np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
 346                 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
 347                         napi_schedule(&np->napi);
 348         }
 349         spin_unlock_bh(&np->rx_lock);
 350
 351         netif_start_queue(dev);
 352
 353         return 0;
 354 }
 355
 356 static void xennet_tx_buf_gc(struct net_device *dev)
 357 {
 358         RING_IDX cons, prod;
 359         unsigned short id;
 360         struct netfront_info *np = netdev_priv(dev);
 361         struct sk_buff *skb;
 362
 363         BUG_ON(!netif_carrier_ok(dev));
 364
 365         do {
 366                 prod = np->tx.sring->rsp_prod;
 367                 rmb(); /* Ensure we see responses up to 'rp'. */
 368
 369                 for (cons = np->tx.rsp_cons; cons != prod; cons++) {
 370                         struct xen_netif_tx_response *txrsp;
 371
 372                         txrsp = RING_GET_RESPONSE(&np->tx, cons);
 373                         if (txrsp->status == XEN_NETIF_RSP_NULL)
 374                                 continue;
 375
 376                         id  = txrsp->id;
 377                         skb = np->tx_skbs[id].skb;
 378                         if (unlikely(gnttab_query_foreign_access(
 379                                 np->grant_tx_ref[id]) != 0)) {
 380                                 printk(KERN_ALERT "xennet_tx_buf_gc: warning "
 381                                        "-- grant still in use by backend "
 382                                        "domain.\n");
 383                                 BUG();
 384                         }
 385                         gnttab_end_foreign_access_ref(
 386                                 np->grant_tx_ref[id], GNTMAP_readonly);
 387                         gnttab_release_grant_reference(
 388                                 &np->gref_tx_head, np->grant_tx_ref[id]);
 389                         np->grant_tx_ref[id] = GRANT_INVALID_REF;
 390                         add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
 391                         dev_kfree_skb_irq(skb);
 392                 }
 393
 394                 np->tx.rsp_cons = prod;
 395
 396                 /*
 397                  * Set a new event, then check for race with update of tx_cons.
 398                  * Note that it is essential to schedule a callback, no matter
 399                  * how few buffers are pending. Even if there is space in the
 400                  * transmit ring, higher layers may be blocked because too much
 401                  * data is outstanding: in such cases notification from Xen is
 402                  * likely to be the only kick that we'll get.
 403                  */
 404                 np->tx.sring->rsp_event =
 405                         prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
 406                 mb();           /* update shared area */
 407         } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
 408
 409         xennet_maybe_wake_tx(dev);
 410 }
 411
 412 static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
 413                               struct xen_netif_tx_request *tx)
 414 {
 415         struct netfront_info *np = netdev_priv(dev);
 416         char *data = skb->data;
 417         unsigned long mfn;
 418         RING_IDX prod = np->tx.req_prod_pvt;
 419         int frags = skb_shinfo(skb)->nr_frags;
 420         unsigned int offset = offset_in_page(data);
 421         unsigned int len = skb_headlen(skb);
 422         unsigned int id;
 423         grant_ref_t ref;
 424         int i;
 425
 426         /* While the header overlaps a page boundary (including being
 427            larger than a page), split it it into page-sized chunks. */
 428         while (len > PAGE_SIZE - offset) {
 429                 tx->size = PAGE_SIZE - offset;
 430                 tx->flags |= XEN_NETTXF_more_data;
 431                 len -= tx->size;
 432                 data += tx->size;
 433                 offset = 0;
 434
 435                 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
 436                 np->tx_skbs[id].skb = skb_get(skb);
 437                 tx = RING_GET_REQUEST(&np->tx, prod++);
 438                 tx->id = id;
 439                 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
 440                 BUG_ON((signed short)ref < 0);
 441
 442                 mfn = virt_to_mfn(data);
 443                 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
 444                                                 mfn, GNTMAP_readonly);
 445
 446                 tx->gref = np->grant_tx_ref[id] = ref;
 447                 tx->offset = offset;
 448                 tx->size = len;
 449                 tx->flags = 0;
 450         }
 451
 452         /* Grant backend access to each skb fragment page. */
 453         for (i = 0; i < frags; i++) {
 454                 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 455
 456                 tx->flags |= XEN_NETTXF_more_data;
 457
 458                 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
 459                 np->tx_skbs[id].skb = skb_get(skb);
 460                 tx = RING_GET_REQUEST(&np->tx, prod++);
 461                 tx->id = id;
 462                 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
 463                 BUG_ON((signed short)ref < 0);
 464
 465                 mfn = pfn_to_mfn(page_to_pfn(skb_frag_page(frag)));
 466                 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
 467                                                 mfn, GNTMAP_readonly);
 468
 469                 tx->gref = np->grant_tx_ref[id] = ref;
 470                 tx->offset = frag->page_offset;
 471                 tx->size = skb_frag_size(frag);
 472                 tx->flags = 0;
 473         }
 474
 475         np->tx.req_prod_pvt = prod;
 476 }
 477
 478 static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 479 {
 480         unsigned short id;
 481         struct netfront_info *np = netdev_priv(dev);
 482         struct netfront_stats *stats = this_cpu_ptr(np->stats);
 483         struct xen_netif_tx_request *tx;
 484         struct xen_netif_extra_info *extra;
 485         char *data = skb->data;
 486         RING_IDX i;
 487         grant_ref_t ref;
 488         unsigned long mfn;
 489         int notify;
 490         int frags = skb_shinfo(skb)->nr_frags;
 491         unsigned int offset = offset_in_page(data);
 492         unsigned int len = skb_headlen(skb);
 493         unsigned long flags;
 494
 495         frags += DIV_ROUND_UP(offset + len, PAGE_SIZE);
 496         if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
 497                 printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
 498                        frags);
 499                 dump_stack();
 500                 goto drop;
 501         }
 502
 503         spin_lock_irqsave(&np->tx_lock, flags);
 504
 505         if (unlikely(!netif_carrier_ok(dev) ||
 506                      (frags > 1 && !xennet_can_sg(dev)) ||
 507                      netif_needs_gso(skb, netif_skb_features(skb)))) {
 508                 spin_unlock_irqrestore(&np->tx_lock, flags);
 509                 goto drop;
 510         }
 511
 512         i = np->tx.req_prod_pvt;
 513
 514         id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
 515         np->tx_skbs[id].skb = skb;
 516
 517         tx = RING_GET_REQUEST(&np->tx, i);
 518
 519         tx->id   = id;
 520         ref = gnttab_claim_grant_reference(&np->gref_tx_head);
 521         BUG_ON((signed short)ref < 0);
 522         mfn = virt_to_mfn(data);
 523         gnttab_grant_foreign_access_ref(
 524                 ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
 525         tx->gref = np->grant_tx_ref[id] = ref;
 526         tx->offset = offset;
 527         tx->size = len;
 528         extra = NULL;
 529
 530         tx->flags = 0;
 531         if (skb->ip_summed == CHECKSUM_PARTIAL)
 532                 /* local packet? */
 533                 tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
 534         else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 535                 /* remote but checksummed. */
 536                 tx->flags |= XEN_NETTXF_data_validated;
 537
 538         if (skb_shinfo(skb)->gso_size) {
 539                 struct xen_netif_extra_info *gso;
 540
 541                 gso = (struct xen_netif_extra_info *)
 542                         RING_GET_REQUEST(&np->tx, ++i);
 543
 544                 if (extra)
 545                         extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
 546                 else
 547                         tx->flags |= XEN_NETTXF_extra_info;
 548
 549                 gso->u.gso.size = skb_shinfo(skb)->gso_size;
 550                 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 551                 gso->u.gso.pad = 0;
 552                 gso->u.gso.features = 0;
 553
 554                 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 555                 gso->flags = 0;
 556                 extra = gso;
 557         }
 558
 559         np->tx.req_prod_pvt = i + 1;
 560
 561         xennet_make_frags(skb, dev, tx);
 562         tx->size = skb->len;
 563
 564         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
 565         if (notify)
 566                 notify_remote_via_irq(np->netdev->irq);
 567
 568         u64_stats_update_begin(&stats->syncp);
 569         stats->tx_bytes += skb->len;
 570         stats->tx_packets++;
 571         u64_stats_update_end(&stats->syncp);
 572
 573         /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
 574         xennet_tx_buf_gc(dev);
 575
 576         if (!netfront_tx_slot_available(np))
 577                 netif_stop_queue(dev);
 578
 579         spin_unlock_irqrestore(&np->tx_lock, flags);
 580
 581         return NETDEV_TX_OK;
 582
 583  drop:
 584         dev->stats.tx_dropped++;
 585         dev_kfree_skb(skb);
 586         return NETDEV_TX_OK;
 587 }
 588
 589 static int xennet_close(struct net_device *dev)
 590 {
 591         struct netfront_info *np = netdev_priv(dev);
 592         netif_stop_queue(np->netdev);
 593         napi_disable(&np->napi);
 594         return 0;
 595 }
 596
 597 static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
 598                                 grant_ref_t ref)
 599 {
 600         int new = xennet_rxidx(np->rx.req_prod_pvt);
 601
 602         BUG_ON(np->rx_skbs[new]);
 603         np->rx_skbs[new] = skb;
 604         np->grant_rx_ref[new] = ref;
 605         RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
 606         RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
 607         np->rx.req_prod_pvt++;
 608 }
 609
 610 static int xennet_get_extras(struct netfront_info *np,
 611                              struct xen_netif_extra_info *extras,
 612                              RING_IDX rp)
 613
 614 {
 615         struct xen_netif_extra_info *extra;
 616         struct device *dev = &np->netdev->dev;
 617         RING_IDX cons = np->rx.rsp_cons;
 618         int err = 0;
 619
 620         do {
 621                 struct sk_buff *skb;
 622                 grant_ref_t ref;
 623
 624                 if (unlikely(cons + 1 == rp)) {
 625                         if (net_ratelimit())
 626                                 dev_warn(dev, "Missing extra info\n");
 627                         err = -EBADR;
 628                         break;
 629                 }
 630
 631                 extra = (struct xen_netif_extra_info *)
 632                         RING_GET_RESPONSE(&np->rx, ++cons);
 633
 634                 if (unlikely(!extra->type ||
 635                              extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 636                         if (net_ratelimit())
 637                                 dev_warn(dev, "Invalid extra type: %d\n",
 638                                         extra->type);
 639                         err = -EINVAL;
 640                 } else {
 641                         memcpy(&extras[extra->type - 1], extra,
 642                                sizeof(*extra));
 643                 }
 644
 645                 skb = xennet_get_rx_skb(np, cons);
 646                 ref = xennet_get_rx_ref(np, cons);
 647                 xennet_move_rx_slot(np, skb, ref);
 648         } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 649
 650         np->rx.rsp_cons = cons;
 651         return err;
 652 }
 653
 654 static int xennet_get_responses(struct netfront_info *np,
 655                                 struct netfront_rx_info *rinfo, RING_IDX rp,
 656                                 struct sk_buff_head *list)
 657 {
 658         struct xen_netif_rx_response *rx = &rinfo->rx;
 659         struct xen_netif_extra_info *extras = rinfo->extras;
 660         struct device *dev = &np->netdev->dev;
 661         RING_IDX cons = np->rx.rsp_cons;
 662         struct sk_buff *skb = xennet_get_rx_skb(np, cons);
 663         grant_ref_t ref = xennet_get_rx_ref(np, cons);
 664         int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
 665         int frags = 1;
 666         int err = 0;
 667         unsigned long ret;
 668
 669         if (rx->flags & XEN_NETRXF_extra_info) {
 670                 err = xennet_get_extras(np, extras, rp);
 671                 cons = np->rx.rsp_cons;
 672         }
 673
 674         for (;;) {
 675                 if (unlikely(rx->status < 0 ||
 676                              rx->offset + rx->status > PAGE_SIZE)) {
 677                         if (net_ratelimit())
 678                                 dev_warn(dev, "rx->offset: %x, size: %u\n",
 679                                          rx->offset, rx->status);
 680                         xennet_move_rx_slot(np, skb, ref);
 681                         err = -EINVAL;
 682                         goto next;
 683                 }
 684
 685                 /*
 686                  * This definitely indicates a bug, either in this driver or in
 687                  * the backend driver. In future this should flag the bad
 688                  * situation to the system controller to reboot the backed.
 689                  */
 690                 if (ref == GRANT_INVALID_REF) {
 691                         if (net_ratelimit())
 692                                 dev_warn(dev, "Bad rx response id %d.\n",
 693                                          rx->id);
 694                         err = -EINVAL;
 695                         goto next;
 696                 }
 697
 698                 ret = gnttab_end_foreign_access_ref(ref, 0);
 699                 BUG_ON(!ret);
 700
 701                 gnttab_release_grant_reference(&np->gref_rx_head, ref);
 702
 703                 __skb_queue_tail(list, skb);
 704
 705 next:
 706                 if (!(rx->flags & XEN_NETRXF_more_data))
 707                         break;
 708
 709                 if (cons + frags == rp) {
 710                         if (net_ratelimit())
 711                                 dev_warn(dev, "Need more frags\n");
 712                         err = -ENOENT;
 713                         break;
 714                 }
 715
 716                 rx = RING_GET_RESPONSE(&np->rx, cons + frags);
 717                 skb = xennet_get_rx_skb(np, cons + frags);
 718                 ref = xennet_get_rx_ref(np, cons + frags);
 719                 frags++;
 720         }
 721
 722         if (unlikely(frags > max)) {
 723                 if (net_ratelimit())
 724                         dev_warn(dev, "Too many frags\n");
 725                 err = -E2BIG;
 726         }
 727
 728         if (unlikely(err))
 729                 np->rx.rsp_cons = cons + frags;
 730
 731         return err;
 732 }
 733
 734 static int xennet_set_skb_gso(struct sk_buff *skb,
 735                               struct xen_netif_extra_info *gso)
 736 {
 737         if (!gso->u.gso.size) {
 738                 if (net_ratelimit())
 739                         printk(KERN_WARNING "GSO size must not be zero.\n");
 740                 return -EINVAL;
 741         }
 742
 743         /* Currently only TCPv4 S.O. is supported. */
 744         if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
 745                 if (net_ratelimit())
 746                         printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type);
 747                 return -EINVAL;
 748         }
 749
 750         skb_shinfo(skb)->gso_size = gso->u.gso.size;
 751         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 752
 753         /* Header must be checked, and gso_segs computed. */
 754         skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 755         skb_shinfo(skb)->gso_segs = 0;
 756
 757         return 0;
 758 }
 759
 760 static RING_IDX xennet_fill_frags(struct netfront_info *np,
 761                                   struct sk_buff *skb,
 762                                   struct sk_buff_head *list)
 763 {
 764         struct skb_shared_info *shinfo = skb_shinfo(skb);
 765         int nr_frags = shinfo->nr_frags;
 766         RING_IDX cons = np->rx.rsp_cons;
 767         struct sk_buff *nskb;
 768
 769         while ((nskb = __skb_dequeue(list))) {
 770                 struct xen_netif_rx_response *rx =
 771                         RING_GET_RESPONSE(&np->rx, ++cons);
 772                 skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 773
 774                 __skb_fill_page_desc(skb, nr_frags,
 775                                      skb_frag_page(nfrag),
 776                                      rx->offset, rx->status);
 777
 778                 skb->data_len += rx->status;
 779
 780                 skb_shinfo(nskb)->nr_frags = 0;
 781                 kfree_skb(nskb);
 782
 783                 nr_frags++;
 784         }
 785
 786         shinfo->nr_frags = nr_frags;
 787         return cons;
 788 }
 789
 790 static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
 791 {
 792         struct iphdr *iph;
 793         unsigned char *th;
 794         int err = -EPROTO;
 795         int recalculate_partial_csum = 0;
 796
 797         /*
 798          * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
 799          * peers can fail to set NETRXF_csum_blank when sending a GSO
 800          * frame. In this case force the SKB to CHECKSUM_PARTIAL and
 801          * recalculate the partial checksum.
 802          */
 803         if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 804                 struct netfront_info *np = netdev_priv(dev);
 805                 np->rx_gso_checksum_fixup++;
 806                 skb->ip_summed = CHECKSUM_PARTIAL;
 807                 recalculate_partial_csum = 1;
 808         }
 809
 810         /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
 811         if (skb->ip_summed != CHECKSUM_PARTIAL)
 812                 return 0;
 813
 814         if (skb->protocol != htons(ETH_P_IP))
 815                 goto out;
 816
 817         iph = (void *)skb->data;
 818         th = skb->data + 4 * iph->ihl;
 819         if (th >= skb_tail_pointer(skb))
 820                 goto out;
 821
 822         skb->csum_start = th - skb->head;
 823         switch (iph->protocol) {
 824         case IPPROTO_TCP:
 825                 skb->csum_offset = offsetof(struct tcphdr, check);
 826
 827                 if (recalculate_partial_csum) {
 828                         struct tcphdr *tcph = (struct tcphdr *)th;
 829                         tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 830                                                          skb->len - iph->ihl*4,
 831                                                          IPPROTO_TCP, 0);
 832                 }
 833                 break;
 834         case IPPROTO_UDP:
 835                 skb->csum_offset = offsetof(struct udphdr, check);
 836
 837                 if (recalculate_partial_csum) {
 838                         struct udphdr *udph = (struct udphdr *)th;
 839                         udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 840                                                          skb->len - iph->ihl*4,
 841                                                          IPPROTO_UDP, 0);
 842                 }
 843                 break;
 844         default:
 845                 if (net_ratelimit())
 846                         printk(KERN_ERR "Attempting to checksum a non-"
 847                                "TCP/UDP packet, dropping a protocol"
 848                                " %d packet", iph->protocol);
 849                 goto out;
 850         }
 851
 852         if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
 853                 goto out;
 854
 855         err = 0;
 856
 857 out:
 858         return err;
 859 }
 860
 861 static int handle_incoming_queue(struct net_device *dev,
 862                                  struct sk_buff_head *rxq)
 863 {
 864         struct netfront_info *np = netdev_priv(dev);
 865         struct netfront_stats *stats = this_cpu_ptr(np->stats);
 866         int packets_dropped = 0;
 867         struct sk_buff *skb;
 868
 869         while ((skb = __skb_dequeue(rxq)) != NULL) {
 870                 struct page *page = NETFRONT_SKB_CB(skb)->page;
 871                 void *vaddr = page_address(page);
 872                 unsigned offset = NETFRONT_SKB_CB(skb)->offset;
 873
 874                 memcpy(skb->data, vaddr + offset,
 875                        skb_headlen(skb));
 876
 877                 if (page != skb_frag_page(&skb_shinfo(skb)->frags[0]))
 878                         __free_page(page);
 879
 880                 /* Ethernet work: Delayed to here as it peeks the header. */
 881                 skb->protocol = eth_type_trans(skb, dev);
 882
 883                 if (checksum_setup(dev, skb)) {
 884                         kfree_skb(skb);
 885                         packets_dropped++;
 886                         dev->stats.rx_errors++;
 887                         continue;
 888                 }
 889
 890                 u64_stats_update_begin(&stats->syncp);
 891                 stats->rx_packets++;
 892                 stats->rx_bytes += skb->len;
 893                 u64_stats_update_end(&stats->syncp);
 894
 895                 /* Pass it up. */
 896                 netif_receive_skb(skb);
 897         }
 898
 899         return packets_dropped;
 900 }
 901
 902 static int xennet_poll(struct napi_struct *napi, int budget)
 903 {
 904         struct netfront_info *np = container_of(napi, struct netfront_info, napi);
 905         struct net_device *dev = np->netdev;
 906         struct sk_buff *skb;
 907         struct netfront_rx_info rinfo;
 908         struct xen_netif_rx_response *rx = &rinfo.rx;
 909         struct xen_netif_extra_info *extras = rinfo.extras;
 910         RING_IDX i, rp;
 911         int work_done;
 912         struct sk_buff_head rxq;
 913         struct sk_buff_head errq;
 914         struct sk_buff_head tmpq;
 915         unsigned long flags;
 916         unsigned int len;
 917         int err;
 918
 919         spin_lock(&np->rx_lock);
 920
 921         skb_queue_head_init(&rxq);
 922         skb_queue_head_init(&errq);
 923         skb_queue_head_init(&tmpq);
 924
 925         rp = np->rx.sring->rsp_prod;
 926         rmb(); /* Ensure we see queued responses up to 'rp'. */
 927
 928         i = np->rx.rsp_cons;
 929         work_done = 0;
 930         while ((i != rp) && (work_done < budget)) {
 931                 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
 932                 memset(extras, 0, sizeof(rinfo.extras));
 933
 934                 err = xennet_get_responses(np, &rinfo, rp, &tmpq);
 935
 936                 if (unlikely(err)) {
 937 err:
 938                         while ((skb = __skb_dequeue(&tmpq)))
 939                                 __skb_queue_tail(&errq, skb);
 940                         dev->stats.rx_errors++;
 941                         i = np->rx.rsp_cons;
 942                         continue;
 943                 }
 944
 945                 skb = __skb_dequeue(&tmpq);
 946
 947                 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
 948                         struct xen_netif_extra_info *gso;
 949                         gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
 950
 951                         if (unlikely(xennet_set_skb_gso(skb, gso))) {
 952                                 __skb_queue_head(&tmpq, skb);
 953                                 np->rx.rsp_cons += skb_queue_len(&tmpq);
 954                                 goto err;
 955                         }
 956                 }
 957
 958                 NETFRONT_SKB_CB(skb)->page =
 959                         skb_frag_page(&skb_shinfo(skb)->frags[0]);
 960                 NETFRONT_SKB_CB(skb)->offset = rx->offset;
 961
 962                 len = rx->status;
 963                 if (len > RX_COPY_THRESHOLD)
 964                         len = RX_COPY_THRESHOLD;
 965                 skb_put(skb, len);
 966
 967                 if (rx->status > len) {
 968                         skb_shinfo(skb)->frags[0].page_offset =
 969                                 rx->offset + len;
 970                         skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status - len);
 971                         skb->data_len = rx->status - len;
 972                 } else {
 973                         __skb_fill_page_desc(skb, 0, NULL, 0, 0);
 974                         skb_shinfo(skb)->nr_frags = 0;
 975                 }
 976
 977                 i = xennet_fill_frags(np, skb, &tmpq);
 978
 979                 /*
 980                  * Truesize approximates the size of true data plus
 981                  * any supervisor overheads. Adding hypervisor
 982                  * overheads has been shown to significantly reduce
 983                  * achievable bandwidth with the default receive
 984                  * buffer size. It is therefore not wise to account
 985                  * for it here.
 986                  *
 987                  * After alloc_skb(RX_COPY_THRESHOLD), truesize is set
 988                  * to RX_COPY_THRESHOLD + the supervisor
 989                  * overheads. Here, we add the size of the data pulled
 990                  * in xennet_fill_frags().
 991                  *
 992                  * We also adjust for any unused space in the main
 993                  * data area by subtracting (RX_COPY_THRESHOLD -
 994                  * len). This is especially important with drivers
 995                  * which split incoming packets into header and data,
 996                  * using only 66 bytes of the main data area (see the
 997                  * e1000 driver for example.)  On such systems,
 998                  * without this last adjustement, our achievable
 999                  * receive throughout using the standard receive
1000                  * buffer size was cut by 25%(!!!).
1001                  */
1002                 skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
1003                 skb->len += skb->data_len;
1004
1005                 if (rx->flags & XEN_NETRXF_csum_blank)
1006                         skb->ip_summed = CHECKSUM_PARTIAL;
1007                 else if (rx->flags & XEN_NETRXF_data_validated)
1008                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1009
1010                 __skb_queue_tail(&rxq, skb);
1011
1012                 np->rx.rsp_cons = ++i;
1013                 work_done++;
1014         }
1015
1016         __skb_queue_purge(&errq);
1017
1018         work_done -= handle_incoming_queue(dev, &rxq);
1019
1020         /* If we get a callback with very few responses, reduce fill target. */
1021         /* NB. Note exponential increase, linear decrease. */
1022         if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
1023              ((3*np->rx_target) / 4)) &&
1024             (--np->rx_target < np->rx_min_target))
1025                 np->rx_target = np->rx_min_target;
1026
1027         xennet_alloc_rx_buffers(dev);
1028
1029         if (work_done < budget) {
1030                 int more_to_do = 0;
1031
1032                 local_irq_save(flags);
1033
1034                 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
1035                 if (!more_to_do)
1036                         __napi_complete(napi);
1037
1038                 local_irq_restore(flags);
1039         }
1040
1041         spin_unlock(&np->rx_lock);
1042
1043         return work_done;
1044 }
1045
1046 static int xennet_change_mtu(struct net_device *dev, int mtu)
1047 {
1048         int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
1049
1050         if (mtu > max)
1051                 return -EINVAL;
1052         dev->mtu = mtu;
1053         return 0;
1054 }
1055
1056 static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev,
1057                                                     struct rtnl_link_stats64 *tot)
1058 {
1059         struct netfront_info *np = netdev_priv(dev);
1060         int cpu;
1061
1062         for_each_possible_cpu(cpu) {
1063                 struct netfront_stats *stats = per_cpu_ptr(np->stats, cpu);
1064                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1065                 unsigned int start;
1066
1067                 do {
1068                         start = u64_stats_fetch_begin_bh(&stats->syncp);
1069
1070                         rx_packets = stats->rx_packets;
1071                         tx_packets = stats->tx_packets;
1072                         rx_bytes = stats->rx_bytes;
1073                         tx_bytes = stats->tx_bytes;
1074                 } while (u64_stats_fetch_retry_bh(&stats->syncp, start));
1075
1076                 tot->rx_packets += rx_packets;
1077                 tot->tx_packets += tx_packets;
1078                 tot->rx_bytes   += rx_bytes;
1079                 tot->tx_bytes   += tx_bytes;
1080         }
1081
1082         tot->rx_errors  = dev->stats.rx_errors;
1083         tot->tx_dropped = dev->stats.tx_dropped;
1084
1085         return tot;
1086 }
1087
1088 static void xennet_release_tx_bufs(struct netfront_info *np)
1089 {
1090         struct sk_buff *skb;
1091         int i;
1092
1093         for (i = 0; i < NET_TX_RING_SIZE; i++) {
1094                 /* Skip over entries which are actually freelist references */
1095                 if (skb_entry_is_link(&np->tx_skbs[i]))
1096                         continue;
1097
1098                 skb = np->tx_skbs[i].skb;
1099                 gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
1100                                               GNTMAP_readonly);
1101                 gnttab_release_grant_reference(&np->gref_tx_head,
1102                                                np->grant_tx_ref[i]);
1103                 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1104                 add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
1105                 dev_kfree_skb_irq(skb);
1106         }
1107 }
1108
1109 static void xennet_release_rx_bufs(struct netfront_info *np)
1110 {
1111         struct mmu_update      *mmu = np->rx_mmu;
1112         struct multicall_entry *mcl = np->rx_mcl;
1113         struct sk_buff_head free_list;
1114         struct sk_buff *skb;
1115         unsigned long mfn;
1116         int xfer = 0, noxfer = 0, unused = 0;
1117         int id, ref;
1118
1119         dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n",
1120                          __func__);
1121         return;
1122
1123         skb_queue_head_init(&free_list);
1124
1125         spin_lock_bh(&np->rx_lock);
1126
1127         for (id = 0; id < NET_RX_RING_SIZE; id++) {
1128                 ref = np->grant_rx_ref[id];
1129                 if (ref == GRANT_INVALID_REF) {
1130                         unused++;
1131                         continue;
1132                 }
1133
1134                 skb = np->rx_skbs[id];
1135                 mfn = gnttab_end_foreign_transfer_ref(ref);
1136                 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1137                 np->grant_rx_ref[id] = GRANT_INVALID_REF;
1138
1139                 if (0 == mfn) {
1140                         skb_shinfo(skb)->nr_frags = 0;
1141                         dev_kfree_skb(skb);
1142                         noxfer++;
1143                         continue;
1144                 }
1145
1146                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1147                         /* Remap the page. */
1148                         const struct page *page =
1149                                 skb_frag_page(&skb_shinfo(skb)->frags[0]);
1150                         unsigned long pfn = page_to_pfn(page);
1151                         void *vaddr = page_address(page);
1152
1153                         MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
1154                                                 mfn_pte(mfn, PAGE_KERNEL),
1155                                                 0);
1156                         mcl++;
1157                         mmu->ptr = ((u64)mfn << PAGE_SHIFT)
1158                                 | MMU_MACHPHYS_UPDATE;
1159                         mmu->val = pfn;
1160                         mmu++;
1161
1162                         set_phys_to_machine(pfn, mfn);
1163                 }
1164                 __skb_queue_tail(&free_list, skb);
1165                 xfer++;
1166         }
1167
1168         dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n",
1169                  __func__, xfer, noxfer, unused);
1170
1171         if (xfer) {
1172                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1173                         /* Do all the remapping work and M2P updates. */
1174                         MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu,
1175                                          NULL, DOMID_SELF);
1176                         mcl++;
1177                         HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
1178                 }
1179         }
1180
1181         __skb_queue_purge(&free_list);
1182
1183         spin_unlock_bh(&np->rx_lock);
1184 }
1185
1186 static void xennet_uninit(struct net_device *dev)
1187 {
1188         struct netfront_info *np = netdev_priv(dev);
1189         xennet_release_tx_bufs(np);
1190         xennet_release_rx_bufs(np);
1191         gnttab_free_grant_references(np->gref_tx_head);
1192         gnttab_free_grant_references(np->gref_rx_head);
1193 }
1194
1195 static netdev_features_t xennet_fix_features(struct net_device *dev,
1196         netdev_features_t features)
1197 {
1198         struct netfront_info *np = netdev_priv(dev);
1199         int val;
1200
1201         if (features & NETIF_F_SG) {
1202                 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1203                                  "%d", &val) < 0)
1204                         val = 0;
1205
1206                 if (!val)
1207                         features &= ~NETIF_F_SG;
1208         }
1209
1210         if (features & NETIF_F_TSO) {
1211                 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1212                                  "feature-gso-tcpv4", "%d", &val) < 0)
1213                         val = 0;
1214
1215                 if (!val)
1216                         features &= ~NETIF_F_TSO;
1217         }
1218
1219         return features;
1220 }
1221
1222 static int xennet_set_features(struct net_device *dev,
1223         netdev_features_t features)
1224 {
1225         if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
1226                 netdev_info(dev, "Reducing MTU because no SG offload");
1227                 dev->mtu = ETH_DATA_LEN;
1228         }
1229
1230         return 0;
1231 }
1232
1233 static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1234 {
1235         struct net_device *dev = dev_id;
1236         struct netfront_info *np = netdev_priv(dev);
1237         unsigned long flags;
1238
1239         spin_lock_irqsave(&np->tx_lock, flags);
1240
1241         if (likely(netif_carrier_ok(dev))) {
1242                 xennet_tx_buf_gc(dev);
1243                 /* Under tx_lock: protects access to rx shared-ring indexes. */
1244                 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
1245                         napi_schedule(&np->napi);
1246         }
1247
1248         spin_unlock_irqrestore(&np->tx_lock, flags);
1249
1250         return IRQ_HANDLED;
1251 }
1252
1253 #ifdef CONFIG_NET_POLL_CONTROLLER
1254 static void xennet_poll_controller(struct net_device *dev)
1255 {
1256         xennet_interrupt(0, dev);
1257 }
1258 #endif
1259
1260 static const struct net_device_ops xennet_netdev_ops = {
1261         .ndo_open            = xennet_open,
1262         .ndo_uninit          = xennet_uninit,
1263         .ndo_stop            = xennet_close,
1264         .ndo_start_xmit      = xennet_start_xmit,
1265         .ndo_change_mtu      = xennet_change_mtu,
1266         .ndo_get_stats64     = xennet_get_stats64,
1267         .ndo_set_mac_address = eth_mac_addr,
1268         .ndo_validate_addr   = eth_validate_addr,
1269         .ndo_fix_features    = xennet_fix_features,
1270         .ndo_set_features    = xennet_set_features,
1271 #ifdef CONFIG_NET_POLL_CONTROLLER
1272         .ndo_poll_controller = xennet_poll_controller,
1273 #endif
1274 };
1275
1276 static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev)
1277 {
1278         int i, err;
1279         struct net_device *netdev;
1280         struct netfront_info *np;
1281
1282         netdev = alloc_etherdev(sizeof(struct netfront_info));
1283         if (!netdev)
1284                 return ERR_PTR(-ENOMEM);
1285
1286         np                   = netdev_priv(netdev);
1287         np->xbdev            = dev;
1288
1289         spin_lock_init(&np->tx_lock);
1290         spin_lock_init(&np->rx_lock);
1291
1292         skb_queue_head_init(&np->rx_batch);
1293         np->rx_target     = RX_DFL_MIN_TARGET;
1294         np->rx_min_target = RX_DFL_MIN_TARGET;
1295         np->rx_max_target = RX_MAX_TARGET;
1296
1297         init_timer(&np->rx_refill_timer);
1298         np->rx_refill_timer.data = (unsigned long)netdev;
1299         np->rx_refill_timer.function = rx_refill_timeout;
1300
1301         err = -ENOMEM;
1302         np->stats = alloc_percpu(struct netfront_stats);
1303         if (np->stats == NULL)
1304                 goto exit;
1305
1306         /* Initialise tx_skbs as a free chain containing every entry. */
1307         np->tx_skb_freelist = 0;
1308         for (i = 0; i < NET_TX_RING_SIZE; i++) {
1309                 skb_entry_set_link(&np->tx_skbs[i], i+1);
1310                 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1311         }
1312
1313         /* Clear out rx_skbs */
1314         for (i = 0; i < NET_RX_RING_SIZE; i++) {
1315                 np->rx_skbs[i] = NULL;
1316                 np->grant_rx_ref[i] = GRANT_INVALID_REF;
1317         }
1318
1319         /* A grant for every tx ring slot */
1320         if (gnttab_alloc_grant_references(TX_MAX_TARGET,
1321                                           &np->gref_tx_head) < 0) {
1322                 printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
1323                 err = -ENOMEM;
1324                 goto exit_free_stats;
1325         }
1326         /* A grant for every rx ring slot */
1327         if (gnttab_alloc_grant_references(RX_MAX_TARGET,
1328                                           &np->gref_rx_head) < 0) {
1329                 printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
1330                 err = -ENOMEM;
1331                 goto exit_free_tx;
1332         }
1333
1334         netdev->netdev_ops      = &xennet_netdev_ops;
1335
1336         netif_napi_add(netdev, &np->napi, xennet_poll, 64);
1337         netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
1338                                   NETIF_F_GSO_ROBUST;
1339         netdev->hw_features     = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
1340
1341         /*
1342          * Assume that all hw features are available for now. This set
1343          * will be adjusted by the call to netdev_update_features() in
1344          * xennet_connect() which is the earliest point where we can
1345          * negotiate with the backend regarding supported features.
1346          */
1347         netdev->features |= netdev->hw_features;
1348
1349         SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
1350         SET_NETDEV_DEV(netdev, &dev->dev);
1351
1352         np->netdev = netdev;
1353
1354         netif_carrier_off(netdev);
1355
1356         return netdev;
1357
1358  exit_free_tx:
1359         gnttab_free_grant_references(np->gref_tx_head);
1360  exit_free_stats:
1361         free_percpu(np->stats);
1362  exit:
1363         free_netdev(netdev);
1364         return ERR_PTR(err);
1365 }
1366
1367 /**
1368  * Entry point to this code when a new device is created.  Allocate the basic
1369  * structures and the ring buffers for communication with the backend, and
1370  * inform the backend of the appropriate details for those.
1371  */
1372 static int __devinit netfront_probe(struct xenbus_device *dev,
1373                                     const struct xenbus_device_id *id)
1374 {
1375         int err;
1376         struct net_device *netdev;
1377         struct netfront_info *info;
1378
1379         netdev = xennet_create_dev(dev);
1380         if (IS_ERR(netdev)) {
1381                 err = PTR_ERR(netdev);
1382                 xenbus_dev_fatal(dev, err, "creating netdev");
1383                 return err;
1384         }
1385
1386         info = netdev_priv(netdev);
1387         dev_set_drvdata(&dev->dev, info);
1388
1389         err = register_netdev(info->netdev);
1390         if (err) {
1391                 printk(KERN_WARNING "%s: register_netdev err=%d\n",
1392                        __func__, err);
1393                 goto fail;
1394         }
1395
1396         err = xennet_sysfs_addif(info->netdev);
1397         if (err) {
1398                 unregister_netdev(info->netdev);
1399                 printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
1400                        __func__, err);
1401                 goto fail;
1402         }
1403
1404         return 0;
1405
1406  fail:
1407         free_netdev(netdev);
1408         dev_set_drvdata(&dev->dev, NULL);
1409         return err;
1410 }
1411
1412 static void xennet_end_access(int ref, void *page)
1413 {
1414         /* This frees the page as a side-effect */
1415         if (ref != GRANT_INVALID_REF)
1416                 gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1417 }
1418
1419 static void xennet_disconnect_backend(struct netfront_info *info)
1420 {
1421         /* Stop old i/f to prevent errors whilst we rebuild the state. */
1422         spin_lock_bh(&info->rx_lock);
1423         spin_lock_irq(&info->tx_lock);
1424         netif_carrier_off(info->netdev);
1425         spin_unlock_irq(&info->tx_lock);
1426         spin_unlock_bh(&info->rx_lock);
1427
1428         if (info->netdev->irq)
1429                 unbind_from_irqhandler(info->netdev->irq, info->netdev);
1430         info->evtchn = info->netdev->irq = 0;
1431
1432         /* End access and free the pages */
1433         xennet_end_access(info->tx_ring_ref, info->tx.sring);
1434         xennet_end_access(info->rx_ring_ref, info->rx.sring);
1435
1436         info->tx_ring_ref = GRANT_INVALID_REF;
1437         info->rx_ring_ref = GRANT_INVALID_REF;
1438         info->tx.sring = NULL;
1439         info->rx.sring = NULL;
1440 }
1441
1442 /**
1443  * We are reconnecting to the backend, due to a suspend/resume, or a backend
1444  * driver restart.  We tear down our netif structure and recreate it, but
1445  * leave the device-layer structures intact so that this is transparent to the
1446  * rest of the kernel.
1447  */
1448 static int netfront_resume(struct xenbus_device *dev)
1449 {
1450         struct netfront_info *info = dev_get_drvdata(&dev->dev);
1451
1452         dev_dbg(&dev->dev, "%s\n", dev->nodename);
1453
1454         xennet_disconnect_backend(info);
1455         return 0;
1456 }
1457
1458 static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1459 {
1460         char *s, *e, *macstr;
1461         int i;
1462
1463         macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1464         if (IS_ERR(macstr))
1465                 return PTR_ERR(macstr);
1466
1467         for (i = 0; i < ETH_ALEN; i++) {
1468                 mac[i] = simple_strtoul(s, &e, 16);
1469                 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1470                         kfree(macstr);
1471                         return -ENOENT;
1472                 }
1473                 s = e+1;
1474         }
1475
1476         kfree(macstr);
1477         return 0;
1478 }
1479
1480 static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
1481 {
1482         struct xen_netif_tx_sring *txs;
1483         struct xen_netif_rx_sring *rxs;
1484         int err;
1485         struct net_device *netdev = info->netdev;
1486
1487         info->tx_ring_ref = GRANT_INVALID_REF;
1488         info->rx_ring_ref = GRANT_INVALID_REF;
1489         info->rx.sring = NULL;
1490         info->tx.sring = NULL;
1491         netdev->irq = 0;
1492
1493         err = xen_net_read_mac(dev, netdev->dev_addr);
1494         if (err) {
1495                 xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1496                 goto fail;
1497         }
1498
1499         txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1500         if (!txs) {
1501                 err = -ENOMEM;
1502                 xenbus_dev_fatal(dev, err, "allocating tx ring page");
1503                 goto fail;
1504         }
1505         SHARED_RING_INIT(txs);
1506         FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
1507
1508         err = xenbus_grant_ring(dev, virt_to_mfn(txs));
1509         if (err < 0) {
1510                 free_page((unsigned long)txs);
1511                 goto fail;
1512         }
1513
1514         info->tx_ring_ref = err;
1515         rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1516         if (!rxs) {
1517                 err = -ENOMEM;
1518                 xenbus_dev_fatal(dev, err, "allocating rx ring page");
1519                 goto fail;
1520         }
1521         SHARED_RING_INIT(rxs);
1522         FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
1523
1524         err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
1525         if (err < 0) {
1526                 free_page((unsigned long)rxs);
1527                 goto fail;
1528         }
1529         info->rx_ring_ref = err;
1530
1531         err = xenbus_alloc_evtchn(dev, &info->evtchn);
1532         if (err)
1533                 goto fail;
1534
1535         err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
1536                                         0, netdev->name, netdev);
1537         if (err < 0)
1538                 goto fail;
1539         netdev->irq = err;
1540         return 0;
1541
1542  fail:
1543         return err;
1544 }
1545
1546 /* Common code used when first setting up, and when resuming. */
1547 static int talk_to_netback(struct xenbus_device *dev,
1548                            struct netfront_info *info)
1549 {
1550         const char *message;
1551         struct xenbus_transaction xbt;
1552         int err;
1553
1554         /* Create shared ring, alloc event channel. */
1555         err = setup_netfront(dev, info);
1556         if (err)
1557                 goto out;
1558
1559 again:
1560         err = xenbus_transaction_start(&xbt);
1561         if (err) {
1562                 xenbus_dev_fatal(dev, err, "starting transaction");
1563                 goto destroy_ring;
1564         }
1565
1566         err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
1567                             info->tx_ring_ref);
1568         if (err) {
1569                 message = "writing tx ring-ref";
1570                 goto abort_transaction;
1571         }
1572         err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
1573                             info->rx_ring_ref);
1574         if (err) {
1575                 message = "writing rx ring-ref";
1576                 goto abort_transaction;
1577         }
1578         err = xenbus_printf(xbt, dev->nodename,
1579                             "event-channel", "%u", info->evtchn);
1580         if (err) {
1581                 message = "writing event-channel";
1582                 goto abort_transaction;
1583         }
1584
1585         err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1586                             1);
1587         if (err) {
1588                 message = "writing request-rx-copy";
1589                 goto abort_transaction;
1590         }
1591
1592         err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
1593         if (err) {
1594                 message = "writing feature-rx-notify";
1595                 goto abort_transaction;
1596         }
1597
1598         err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
1599         if (err) {
1600                 message = "writing feature-sg";
1601                 goto abort_transaction;
1602         }
1603
1604         err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
1605         if (err) {
1606                 message = "writing feature-gso-tcpv4";
1607                 goto abort_transaction;
1608         }
1609
1610         err = xenbus_transaction_end(xbt, 0);
1611         if (err) {
1612                 if (err == -EAGAIN)
1613                         goto again;
1614                 xenbus_dev_fatal(dev, err, "completing transaction");
1615                 goto destroy_ring;
1616         }
1617
1618         return 0;
1619
1620  abort_transaction:
1621         xenbus_transaction_end(xbt, 1);
1622         xenbus_dev_fatal(dev, err, "%s", message);
1623  destroy_ring:
1624         xennet_disconnect_backend(info);
1625  out:
1626         return err;
1627 }
1628
1629 static int xennet_connect(struct net_device *dev)
1630 {
1631         struct netfront_info *np = netdev_priv(dev);
1632         int i, requeue_idx, err;
1633         struct sk_buff *skb;
1634         grant_ref_t ref;
1635         struct xen_netif_rx_request *req;
1636         unsigned int feature_rx_copy;
1637
1638         err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1639                            "feature-rx-copy", "%u", &feature_rx_copy);
1640         if (err != 1)
1641                 feature_rx_copy = 0;
1642
1643         if (!feature_rx_copy) {
1644                 dev_info(&dev->dev,
1645                          "backend does not support copying receive path\n");
1646                 return -ENODEV;
1647         }
1648
1649         err = talk_to_netback(np->xbdev, np);
1650         if (err)
1651                 return err;
1652
1653         rtnl_lock();
1654         netdev_update_features(dev);
1655         rtnl_unlock();
1656
1657         spin_lock_bh(&np->rx_lock);
1658         spin_lock_irq(&np->tx_lock);
1659
1660         /* Step 1: Discard all pending TX packet fragments. */
1661         xennet_release_tx_bufs(np);
1662
1663         /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1664         for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1665                 skb_frag_t *frag;
1666                 const struct page *page;
1667                 if (!np->rx_skbs[i])
1668                         continue;
1669
1670                 skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
1671                 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1672                 req = RING_GET_REQUEST(&np->rx, requeue_idx);
1673
1674                 frag = &skb_shinfo(skb)->frags[0];
1675                 page = skb_frag_page(frag);
1676                 gnttab_grant_foreign_access_ref(
1677                         ref, np->xbdev->otherend_id,
1678                         pfn_to_mfn(page_to_pfn(page)),
1679                         0);
1680                 req->gref = ref;
1681                 req->id   = requeue_idx;
1682
1683                 requeue_idx++;
1684         }
1685
1686         np->rx.req_prod_pvt = requeue_idx;
1687
1688         /*
1689          * Step 3: All public and private state should now be sane.  Get
1690          * ready to start sending and receiving packets and give the driver
1691          * domain a kick because we've probably just requeued some
1692          * packets.
1693          */
1694         netif_carrier_on(np->netdev);
1695         notify_remote_via_irq(np->netdev->irq);
1696         xennet_tx_buf_gc(dev);
1697         xennet_alloc_rx_buffers(dev);
1698
1699         spin_unlock_irq(&np->tx_lock);
1700         spin_unlock_bh(&np->rx_lock);
1701
1702         return 0;
1703 }
1704
1705 /**
1706  * Callback received when the backend's state changes.
1707  */
1708 static void netback_changed(struct xenbus_device *dev,
1709                             enum xenbus_state backend_state)
1710 {
1711         struct netfront_info *np = dev_get_drvdata(&dev->dev);
1712         struct net_device *netdev = np->netdev;
1713
1714         dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
1715
1716         switch (backend_state) {
1717         case XenbusStateInitialising:
1718         case XenbusStateInitialised:
1719         case XenbusStateReconfiguring:
1720         case XenbusStateReconfigured:
1721         case XenbusStateUnknown:
1722         case XenbusStateClosed:
1723                 break;
1724
1725         case XenbusStateInitWait:
1726                 if (dev->state != XenbusStateInitialising)
1727                         break;
1728                 if (xennet_connect(netdev) != 0)
1729                         break;
1730                 xenbus_switch_state(dev, XenbusStateConnected);
1731                 break;
1732
1733         case XenbusStateConnected:
1734                 netif_notify_peers(netdev);
1735                 break;
1736
1737         case XenbusStateClosing:
1738                 xenbus_frontend_closed(dev);
1739                 break;
1740         }
1741 }
1742
1743 static const struct xennet_stat {
1744         char name[ETH_GSTRING_LEN];
1745         u16 offset;
1746 } xennet_stats[] = {
1747         {
1748                 "rx_gso_checksum_fixup",
1749                 offsetof(struct netfront_info, rx_gso_checksum_fixup)
1750         },
1751 };
1752
1753 static int xennet_get_sset_count(struct net_device *dev, int string_set)
1754 {
1755         switch (string_set) {
1756         case ETH_SS_STATS:
1757                 return ARRAY_SIZE(xennet_stats);
1758         default:
1759                 return -EINVAL;
1760         }
1761 }
1762
1763 static void xennet_get_ethtool_stats(struct net_device *dev,
1764                                      struct ethtool_stats *stats, u64 * data)
1765 {
1766         void *np = netdev_priv(dev);
1767         int i;
1768
1769         for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
1770                 data[i] = *(unsigned long *)(np + xennet_stats[i].offset);
1771 }
1772
1773 static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
1774 {
1775         int i;
1776
1777         switch (stringset) {
1778         case ETH_SS_STATS:
1779                 for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
1780                         memcpy(data + i * ETH_GSTRING_LEN,
1781                                xennet_stats[i].name, ETH_GSTRING_LEN);
1782                 break;
1783         }
1784 }
1785
1786 static const struct ethtool_ops xennet_ethtool_ops =
1787 {
1788         .get_link = ethtool_op_get_link,
1789
1790         .get_sset_count = xennet_get_sset_count,
1791         .get_ethtool_stats = xennet_get_ethtool_stats,
1792         .get_strings = xennet_get_strings,
1793 };
1794
1795 #ifdef CONFIG_SYSFS
1796 static ssize_t show_rxbuf_min(struct device *dev,
1797                               struct device_attribute *attr, char *buf)
1798 {
1799         struct net_device *netdev = to_net_dev(dev);
1800         struct netfront_info *info = netdev_priv(netdev);
1801
1802         return sprintf(buf, "%u\n", info->rx_min_target);
1803 }
1804
1805 static ssize_t store_rxbuf_min(struct device *dev,
1806                                struct device_attribute *attr,
1807                                const char *buf, size_t len)
1808 {
1809         struct net_device *netdev = to_net_dev(dev);
1810         struct netfront_info *np = netdev_priv(netdev);
1811         char *endp;
1812         unsigned long target;
1813
1814         if (!capable(CAP_NET_ADMIN))
1815                 return -EPERM;
1816
1817         target = simple_strtoul(buf, &endp, 0);
1818         if (endp == buf)
1819                 return -EBADMSG;
1820
1821         if (target < RX_MIN_TARGET)
1822                 target = RX_MIN_TARGET;
1823         if (target > RX_MAX_TARGET)
1824                 target = RX_MAX_TARGET;
1825
1826         spin_lock_bh(&np->rx_lock);
1827         if (target > np->rx_max_target)
1828                 np->rx_max_target = target;
1829         np->rx_min_target = target;
1830         if (target > np->rx_target)
1831                 np->rx_target = target;
1832
1833         xennet_alloc_rx_buffers(netdev);
1834
1835         spin_unlock_bh(&np->rx_lock);
1836         return len;
1837 }
1838
1839 static ssize_t show_rxbuf_max(struct device *dev,
1840                               struct device_attribute *attr, char *buf)
1841 {
1842         struct net_device *netdev = to_net_dev(dev);
1843         struct netfront_info *info = netdev_priv(netdev);
1844
1845         return sprintf(buf, "%u\n", info->rx_max_target);
1846 }
1847
1848 static ssize_t store_rxbuf_max(struct device *dev,
1849                                struct device_attribute *attr,
1850                                const char *buf, size_t len)
1851 {
1852         struct net_device *netdev = to_net_dev(dev);
1853         struct netfront_info *np = netdev_priv(netdev);
1854         char *endp;
1855         unsigned long target;
1856
1857         if (!capable(CAP_NET_ADMIN))
1858                 return -EPERM;
1859
1860         target = simple_strtoul(buf, &endp, 0);
1861         if (endp == buf)
1862                 return -EBADMSG;
1863
1864         if (target < RX_MIN_TARGET)
1865                 target = RX_MIN_TARGET;
1866         if (target > RX_MAX_TARGET)
1867                 target = RX_MAX_TARGET;
1868
1869         spin_lock_bh(&np->rx_lock);
1870         if (target < np->rx_min_target)
1871                 np->rx_min_target = target;
1872         np->rx_max_target = target;
1873         if (target < np->rx_target)
1874                 np->rx_target = target;
1875
1876         xennet_alloc_rx_buffers(netdev);
1877
1878         spin_unlock_bh(&np->rx_lock);
1879         return len;
1880 }
1881
1882 static ssize_t show_rxbuf_cur(struct device *dev,
1883                               struct device_attribute *attr, char *buf)
1884 {
1885         struct net_device *netdev = to_net_dev(dev);
1886         struct netfront_info *info = netdev_priv(netdev);
1887
1888         return sprintf(buf, "%u\n", info->rx_target);
1889 }
1890
1891 static struct device_attribute xennet_attrs[] = {
1892         __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
1893         __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
1894         __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
1895 };
1896
1897 static int xennet_sysfs_addif(struct net_device *netdev)
1898 {
1899         int i;
1900         int err;
1901
1902         for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
1903                 err = device_create_file(&netdev->dev,
1904                                            &xennet_attrs[i]);
1905                 if (err)
1906                         goto fail;
1907         }
1908         return 0;
1909
1910  fail:
1911         while (--i >= 0)
1912                 device_remove_file(&netdev->dev, &xennet_attrs[i]);
1913         return err;
1914 }
1915
1916 static void xennet_sysfs_delif(struct net_device *netdev)
1917 {
1918         int i;
1919
1920         for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
1921                 device_remove_file(&netdev->dev, &xennet_attrs[i]);
1922 }
1923
1924 #endif /* CONFIG_SYSFS */
1925
1926 static const struct xenbus_device_id netfront_ids[] = {
1927         { "vif" },
1928         { "" }
1929 };
1930
1931
1932 static int __devexit xennet_remove(struct xenbus_device *dev)
1933 {
1934         struct netfront_info *info = dev_get_drvdata(&dev->dev);
1935
1936         dev_dbg(&dev->dev, "%s\n", dev->nodename);
1937
1938         unregister_netdev(info->netdev);
1939
1940         xennet_disconnect_backend(info);
1941
1942         del_timer_sync(&info->rx_refill_timer);
1943
1944         xennet_sysfs_delif(info->netdev);
1945
1946         free_percpu(info->stats);
1947
1948         free_netdev(info->netdev);
1949
1950         return 0;
1951 }
1952
1953 static DEFINE_XENBUS_DRIVER(netfront, ,
1954         .probe = netfront_probe,
1955         .remove = __devexit_p(xennet_remove),
1956         .resume = netfront_resume,
1957         .otherend_changed = netback_changed,
1958 );
1959
1960 static int __init netif_init(void)
1961 {
1962         if (!xen_domain())
1963                 return -ENODEV;
1964
1965         if (xen_initial_domain())
1966                 return 0;
1967
1968         if (!xen_platform_pci_unplug)
1969                 return -ENODEV;
1970
1971         printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
1972
1973         return xenbus_register_frontend(&netfront_driver);
1974 }
1975 module_init(netif_init);
1976
1977
1978 static void __exit netif_exit(void)
1979 {
1980         if (xen_initial_domain())
1981                 return;
1982
1983         xenbus_unregister_driver(&netfront_driver);
1984 }
1985 module_exit(netif_exit);
1986
1987 MODULE_DESCRIPTION("Xen virtual network device frontend");
1988 MODULE_LICENSE("GPL");
1989 MODULE_ALIAS("xen:vif");
1990 MODULE_ALIAS("xennet");