drivers/virtio/virtio_ring.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /* Virtio ring implementation.
   3  *
   4  *  Copyright 2007 Rusty Russell IBM Corporation
   5  */
   6 #include <linux/virtio.h>
   7 #include <linux/virtio_ring.h>
   8 #include <linux/virtio_config.h>
   9 #include <linux/device.h>
  10 #include <linux/slab.h>
  11 #include <linux/module.h>
  12 #include <linux/hrtimer.h>
  13 #include <linux/dma-mapping.h>
  14 #include <xen/xen.h>
  15
  16 #ifdef DEBUG
  17 /* For development, we want to crash whenever the ring is screwed. */
  18 #define BAD_RING(_vq, fmt, args...)                             \
  19         do {                                                    \
  20                 dev_err(&(_vq)->vq.vdev->dev,                   \
  21                         "%s:"fmt, (_vq)->vq.name, ##args);      \
  22                 BUG();                                          \
  23         } while (0)
  24 /* Caller is supposed to guarantee no reentry. */
  25 #define START_USE(_vq)                                          \
  26         do {                                                    \
  27                 if ((_vq)->in_use)                              \
  28                         panic("%s:in_use = %i\n",               \
  29                               (_vq)->vq.name, (_vq)->in_use);   \
  30                 (_vq)->in_use = __LINE__;                       \
  31         } while (0)
  32 #define END_USE(_vq) \
  33         do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
  34 #define LAST_ADD_TIME_UPDATE(_vq)                               \
  35         do {                                                    \
  36                 ktime_t now = ktime_get();                      \
  37                                                                 \
  38                 /* No kick or get, with .1 second between?  Warn. */ \
  39                 if ((_vq)->last_add_time_valid)                 \
  40                         WARN_ON(ktime_to_ms(ktime_sub(now,      \
  41                                 (_vq)->last_add_time)) > 100);  \
  42                 (_vq)->last_add_time = now;                     \
  43                 (_vq)->last_add_time_valid = true;              \
  44         } while (0)
  45 #define LAST_ADD_TIME_CHECK(_vq)                                \
  46         do {                                                    \
  47                 if ((_vq)->last_add_time_valid) {               \
  48                         WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
  49                                       (_vq)->last_add_time)) > 100); \
  50                 }                                               \
  51         } while (0)
  52 #define LAST_ADD_TIME_INVALID(_vq)                              \
  53         ((_vq)->last_add_time_valid = false)
  54 #else
  55 #define BAD_RING(_vq, fmt, args...)                             \
  56         do {                                                    \
  57                 dev_err(&_vq->vq.vdev->dev,                     \
  58                         "%s:"fmt, (_vq)->vq.name, ##args);      \
  59                 (_vq)->broken = true;                           \
  60         } while (0)
  61 #define START_USE(vq)
  62 #define END_USE(vq)
  63 #define LAST_ADD_TIME_UPDATE(vq)
  64 #define LAST_ADD_TIME_CHECK(vq)
  65 #define LAST_ADD_TIME_INVALID(vq)
  66 #endif
  67
  68 struct vring_desc_state_split {
  69         void *data;                     /* Data for callback. */
  70         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
  71 };
  72
  73 struct vring_desc_state_packed {
  74         void *data;                     /* Data for callback. */
  75         struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
  76         u16 num;                        /* Descriptor list length. */
  77         u16 next;                       /* The next desc state in a list. */
  78         u16 last;                       /* The last desc state in a list. */
  79 };
  80
  81 struct vring_desc_extra_packed {
  82         dma_addr_t addr;                /* Buffer DMA addr. */
  83         u32 len;                        /* Buffer length. */
  84         u16 flags;                      /* Descriptor flags. */
  85 };
  86
  87 struct vring_virtqueue {
  88         struct virtqueue vq;
  89
  90         /* Is this a packed ring? */
  91         bool packed_ring;
  92
  93         /* Is DMA API used? */
  94         bool use_dma_api;
  95
  96         /* Can we use weak barriers? */
  97         bool weak_barriers;
  98
  99         /* Other side has made a mess, don't try any more. */
 100         bool broken;
 101
 102         /* Host supports indirect buffers */
 103         bool indirect;
 104
 105         /* Host publishes avail event idx */
 106         bool event;
 107
 108         /* Head of free buffer list. */
 109         unsigned int free_head;
 110         /* Number we've added since last sync. */
 111         unsigned int num_added;
 112
 113         /* Last used index we've seen. */
 114         u16 last_used_idx;
 115
 116         union {
 117                 /* Available for split ring */
 118                 struct {
 119                         /* Actual memory layout for this queue. */
 120                         struct vring vring;
 121
 122                         /* Last written value to avail->flags */
 123                         u16 avail_flags_shadow;
 124
 125                         /*
 126                          * Last written value to avail->idx in
 127                          * guest byte order.
 128                          */
 129                         u16 avail_idx_shadow;
 130
 131                         /* Per-descriptor state. */
 132                         struct vring_desc_state_split *desc_state;
 133
 134                         /* DMA address and size information */
 135                         dma_addr_t queue_dma_addr;
 136                         size_t queue_size_in_bytes;
 137                 } split;
 138
 139                 /* Available for packed ring */
 140                 struct {
 141                         /* Actual memory layout for this queue. */
 142                         struct {
 143                                 unsigned int num;
 144                                 struct vring_packed_desc *desc;
 145                                 struct vring_packed_desc_event *driver;
 146                                 struct vring_packed_desc_event *device;
 147                         } vring;
 148
 149                         /* Driver ring wrap counter. */
 150                         bool avail_wrap_counter;
 151
 152                         /* Device ring wrap counter. */
 153                         bool used_wrap_counter;
 154
 155                         /* Avail used flags. */
 156                         u16 avail_used_flags;
 157
 158                         /* Index of the next avail descriptor. */
 159                         u16 next_avail_idx;
 160
 161                         /*
 162                          * Last written value to driver->flags in
 163                          * guest byte order.
 164                          */
 165                         u16 event_flags_shadow;
 166
 167                         /* Per-descriptor state. */
 168                         struct vring_desc_state_packed *desc_state;
 169                         struct vring_desc_extra_packed *desc_extra;
 170
 171                         /* DMA address and size information */
 172                         dma_addr_t ring_dma_addr;
 173                         dma_addr_t driver_event_dma_addr;
 174                         dma_addr_t device_event_dma_addr;
 175                         size_t ring_size_in_bytes;
 176                         size_t event_size_in_bytes;
 177                 } packed;
 178         };
 179
 180         /* How to notify other side. FIXME: commonalize hcalls! */
 181         bool (*notify)(struct virtqueue *vq);
 182
 183         /* DMA, allocation, and size information */
 184         bool we_own_ring;
 185
 186 #ifdef DEBUG
 187         /* They're supposed to lock for us. */
 188         unsigned int in_use;
 189
 190         /* Figure out if their kicks are too delayed. */
 191         bool last_add_time_valid;
 192         ktime_t last_add_time;
 193 #endif
 194 };
 195
 196
 197 /*
 198  * Helpers.
 199  */
 200
 201 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 202
 203 static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
 204                                           unsigned int total_sg)
 205 {
 206         struct vring_virtqueue *vq = to_vvq(_vq);
 207
 208         /*
 209          * If the host supports indirect descriptor tables, and we have multiple
 210          * buffers, then go indirect. FIXME: tune this threshold
 211          */
 212         return (vq->indirect && total_sg > 1 && vq->vq.num_free);
 213 }
 214
 215 /*
 216  * Modern virtio devices have feature bits to specify whether they need a
 217  * quirk and bypass the IOMMU. If not there, just use the DMA API.
 218  *
 219  * If there, the interaction between virtio and DMA API is messy.
 220  *
 221  * On most systems with virtio, physical addresses match bus addresses,
 222  * and it doesn't particularly matter whether we use the DMA API.
 223  *
 224  * On some systems, including Xen and any system with a physical device
 225  * that speaks virtio behind a physical IOMMU, we must use the DMA API
 226  * for virtio DMA to work at all.
 227  *
 228  * On other systems, including SPARC and PPC64, virtio-pci devices are
 229  * enumerated as though they are behind an IOMMU, but the virtio host
 230  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
 231  * there or somehow map everything as the identity.
 232  *
 233  * For the time being, we preserve historic behavior and bypass the DMA
 234  * API.
 235  *
 236  * TODO: install a per-device DMA ops structure that does the right thing
 237  * taking into account all the above quirks, and use the DMA API
 238  * unconditionally on data path.
 239  */
 240
 241 static bool vring_use_dma_api(struct virtio_device *vdev)
 242 {
 243         if (!virtio_has_iommu_quirk(vdev))
 244                 return true;
 245
 246         /* Otherwise, we are left to guess. */
 247         /*
 248          * In theory, it's possible to have a buggy QEMU-supposed
 249          * emulated Q35 IOMMU and Xen enabled at the same time.  On
 250          * such a configuration, virtio has never worked and will
 251          * not work without an even larger kludge.  Instead, enable
 252          * the DMA API if we're a Xen guest, which at least allows
 253          * all of the sensible Xen configurations to work correctly.
 254          */
 255         if (xen_domain())
 256                 return true;
 257
 258         return false;
 259 }
 260
 261 size_t virtio_max_dma_size(struct virtio_device *vdev)
 262 {
 263         size_t max_segment_size = SIZE_MAX;
 264
 265         if (vring_use_dma_api(vdev))
 266                 max_segment_size = dma_max_mapping_size(&vdev->dev);
 267
 268         return max_segment_size;
 269 }
 270 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
 271
 272 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
 273                               dma_addr_t *dma_handle, gfp_t flag)
 274 {
 275         if (vring_use_dma_api(vdev)) {
 276                 return dma_alloc_coherent(vdev->dev.parent, size,
 277                                           dma_handle, flag);
 278         } else {
 279                 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
 280
 281                 if (queue) {
 282                         phys_addr_t phys_addr = virt_to_phys(queue);
 283                         *dma_handle = (dma_addr_t)phys_addr;
 284
 285                         /*
 286                          * Sanity check: make sure we dind't truncate
 287                          * the address.  The only arches I can find that
 288                          * have 64-bit phys_addr_t but 32-bit dma_addr_t
 289                          * are certain non-highmem MIPS and x86
 290                          * configurations, but these configurations
 291                          * should never allocate physical pages above 32
 292                          * bits, so this is fine.  Just in case, throw a
 293                          * warning and abort if we end up with an
 294                          * unrepresentable address.
 295                          */
 296                         if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
 297                                 free_pages_exact(queue, PAGE_ALIGN(size));
 298                                 return NULL;
 299                         }
 300                 }
 301                 return queue;
 302         }
 303 }
 304
 305 static void vring_free_queue(struct virtio_device *vdev, size_t size,
 306                              void *queue, dma_addr_t dma_handle)
 307 {
 308         if (vring_use_dma_api(vdev))
 309                 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
 310         else
 311                 free_pages_exact(queue, PAGE_ALIGN(size));
 312 }
 313
 314 /*
 315  * The DMA ops on various arches are rather gnarly right now, and
 316  * making all of the arch DMA ops work on the vring device itself
 317  * is a mess.  For now, we use the parent device for DMA ops.
 318  */
 319 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 320 {
 321         return vq->vq.vdev->dev.parent;
 322 }
 323
 324 /* Map one sg entry. */
 325 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
 326                                    struct scatterlist *sg,
 327                                    enum dma_data_direction direction)
 328 {
 329         if (!vq->use_dma_api)
 330                 return (dma_addr_t)sg_phys(sg);
 331
 332         /*
 333          * We can't use dma_map_sg, because we don't use scatterlists in
 334          * the way it expects (we don't guarantee that the scatterlist
 335          * will exist for the lifetime of the mapping).
 336          */
 337         return dma_map_page(vring_dma_dev(vq),
 338                             sg_page(sg), sg->offset, sg->length,
 339                             direction);
 340 }
 341
 342 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
 343                                    void *cpu_addr, size_t size,
 344                                    enum dma_data_direction direction)
 345 {
 346         if (!vq->use_dma_api)
 347                 return (dma_addr_t)virt_to_phys(cpu_addr);
 348
 349         return dma_map_single(vring_dma_dev(vq),
 350                               cpu_addr, size, direction);
 351 }
 352
 353 static int vring_mapping_error(const struct vring_virtqueue *vq,
 354                                dma_addr_t addr)
 355 {
 356         if (!vq->use_dma_api)
 357                 return 0;
 358
 359         return dma_mapping_error(vring_dma_dev(vq), addr);
 360 }
 361
 362
 363 /*
 364  * Split ring specific functions - *_split().
 365  */
 366
 367 static void vring_unmap_one_split(const struct vring_virtqueue *vq,
 368                                   struct vring_desc *desc)
 369 {
 370         u16 flags;
 371
 372         if (!vq->use_dma_api)
 373                 return;
 374
 375         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
 376
 377         if (flags & VRING_DESC_F_INDIRECT) {
 378                 dma_unmap_single(vring_dma_dev(vq),
 379                                  virtio64_to_cpu(vq->vq.vdev, desc->addr),
 380                                  virtio32_to_cpu(vq->vq.vdev, desc->len),
 381                                  (flags & VRING_DESC_F_WRITE) ?
 382                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
 383         } else {
 384                 dma_unmap_page(vring_dma_dev(vq),
 385                                virtio64_to_cpu(vq->vq.vdev, desc->addr),
 386                                virtio32_to_cpu(vq->vq.vdev, desc->len),
 387                                (flags & VRING_DESC_F_WRITE) ?
 388                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
 389         }
 390 }
 391
 392 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
 393                                                unsigned int total_sg,
 394                                                gfp_t gfp)
 395 {
 396         struct vring_desc *desc;
 397         unsigned int i;
 398
 399         /*
 400          * We require lowmem mappings for the descriptors because
 401          * otherwise virt_to_phys will give us bogus addresses in the
 402          * virtqueue.
 403          */
 404         gfp &= ~__GFP_HIGHMEM;
 405
 406         desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
 407         if (!desc)
 408                 return NULL;
 409
 410         for (i = 0; i < total_sg; i++)
 411                 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
 412         return desc;
 413 }
 414
 415 static inline int virtqueue_add_split(struct virtqueue *_vq,
 416                                       struct scatterlist *sgs[],
 417                                       unsigned int total_sg,
 418                                       unsigned int out_sgs,
 419                                       unsigned int in_sgs,
 420                                       void *data,
 421                                       void *ctx,
 422                                       gfp_t gfp)
 423 {
 424         struct vring_virtqueue *vq = to_vvq(_vq);
 425         struct scatterlist *sg;
 426         struct vring_desc *desc;
 427         unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx;
 428         int head;
 429         bool indirect;
 430
 431         START_USE(vq);
 432
 433         BUG_ON(data == NULL);
 434         BUG_ON(ctx && vq->indirect);
 435
 436         if (unlikely(vq->broken)) {
 437                 END_USE(vq);
 438                 return -EIO;
 439         }
 440
 441         LAST_ADD_TIME_UPDATE(vq);
 442
 443         BUG_ON(total_sg == 0);
 444
 445         head = vq->free_head;
 446
 447         if (virtqueue_use_indirect(_vq, total_sg))
 448                 desc = alloc_indirect_split(_vq, total_sg, gfp);
 449         else {
 450                 desc = NULL;
 451                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
 452         }
 453
 454         if (desc) {
 455                 /* Use a single buffer which doesn't continue */
 456                 indirect = true;
 457                 /* Set up rest to use this indirect table. */
 458                 i = 0;
 459                 descs_used = 1;
 460         } else {
 461                 indirect = false;
 462                 desc = vq->split.vring.desc;
 463                 i = head;
 464                 descs_used = total_sg;
 465         }
 466
 467         if (vq->vq.num_free < descs_used) {
 468                 pr_debug("Can't add buf len %i - avail = %i\n",
 469                          descs_used, vq->vq.num_free);
 470                 /* FIXME: for historical reasons, we force a notify here if
 471                  * there are outgoing parts to the buffer.  Presumably the
 472                  * host should service the ring ASAP. */
 473                 if (out_sgs)
 474                         vq->notify(&vq->vq);
 475                 if (indirect)
 476                         kfree(desc);
 477                 END_USE(vq);
 478                 return -ENOSPC;
 479         }
 480
 481         for (n = 0; n < out_sgs; n++) {
 482                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 483                         dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
 484                         if (vring_mapping_error(vq, addr))
 485                                 goto unmap_release;
 486
 487                         desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
 488                         desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
 489                         desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
 490                         prev = i;
 491                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 492                 }
 493         }
 494         for (; n < (out_sgs + in_sgs); n++) {
 495                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 496                         dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
 497                         if (vring_mapping_error(vq, addr))
 498                                 goto unmap_release;
 499
 500                         desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
 501                         desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
 502                         desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
 503                         prev = i;
 504                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 505                 }
 506         }
 507         /* Last one doesn't continue. */
 508         desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
 509
 510         if (indirect) {
 511                 /* Now that the indirect table is filled in, map it. */
 512                 dma_addr_t addr = vring_map_single(
 513                         vq, desc, total_sg * sizeof(struct vring_desc),
 514                         DMA_TO_DEVICE);
 515                 if (vring_mapping_error(vq, addr))
 516                         goto unmap_release;
 517
 518                 vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
 519                                 VRING_DESC_F_INDIRECT);
 520                 vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev,
 521                                 addr);
 522
 523                 vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev,
 524                                 total_sg * sizeof(struct vring_desc));
 525         }
 526
 527         /* We're using some buffers from the free list. */
 528         vq->vq.num_free -= descs_used;
 529
 530         /* Update free pointer */
 531         if (indirect)
 532                 vq->free_head = virtio16_to_cpu(_vq->vdev,
 533                                         vq->split.vring.desc[head].next);
 534         else
 535                 vq->free_head = i;
 536
 537         /* Store token and indirect buffer state. */
 538         vq->split.desc_state[head].data = data;
 539         if (indirect)
 540                 vq->split.desc_state[head].indir_desc = desc;
 541         else
 542                 vq->split.desc_state[head].indir_desc = ctx;
 543
 544         /* Put entry in available array (but don't update avail->idx until they
 545          * do sync). */
 546         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
 547         vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
 548
 549         /* Descriptors and available array need to be set before we expose the
 550          * new available array entries. */
 551         virtio_wmb(vq->weak_barriers);
 552         vq->split.avail_idx_shadow++;
 553         vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 554                                                 vq->split.avail_idx_shadow);
 555         vq->num_added++;
 556
 557         pr_debug("Added buffer head %i to %p\n", head, vq);
 558         END_USE(vq);
 559
 560         /* This is very unlikely, but theoretically possible.  Kick
 561          * just in case. */
 562         if (unlikely(vq->num_added == (1 << 16) - 1))
 563                 virtqueue_kick(_vq);
 564
 565         return 0;
 566
 567 unmap_release:
 568         err_idx = i;
 569
 570         if (indirect)
 571                 i = 0;
 572         else
 573                 i = head;
 574
 575         for (n = 0; n < total_sg; n++) {
 576                 if (i == err_idx)
 577                         break;
 578                 vring_unmap_one_split(vq, &desc[i]);
 579                 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 580         }
 581
 582         if (indirect)
 583                 kfree(desc);
 584
 585         END_USE(vq);
 586         return -ENOMEM;
 587 }
 588
 589 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 590 {
 591         struct vring_virtqueue *vq = to_vvq(_vq);
 592         u16 new, old;
 593         bool needs_kick;
 594
 595         START_USE(vq);
 596         /* We need to expose available array entries before checking avail
 597          * event. */
 598         virtio_mb(vq->weak_barriers);
 599
 600         old = vq->split.avail_idx_shadow - vq->num_added;
 601         new = vq->split.avail_idx_shadow;
 602         vq->num_added = 0;
 603
 604         LAST_ADD_TIME_CHECK(vq);
 605         LAST_ADD_TIME_INVALID(vq);
 606
 607         if (vq->event) {
 608                 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
 609                                         vring_avail_event(&vq->split.vring)),
 610                                               new, old);
 611         } else {
 612                 needs_kick = !(vq->split.vring.used->flags &
 613                                         cpu_to_virtio16(_vq->vdev,
 614                                                 VRING_USED_F_NO_NOTIFY));
 615         }
 616         END_USE(vq);
 617         return needs_kick;
 618 }
 619
 620 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 621                              void **ctx)
 622 {
 623         unsigned int i, j;
 624         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
 625
 626         /* Clear data ptr. */
 627         vq->split.desc_state[head].data = NULL;
 628
 629         /* Put back on free list: unmap first-level descriptors and find end */
 630         i = head;
 631
 632         while (vq->split.vring.desc[i].flags & nextflag) {
 633                 vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
 634                 i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next);
 635                 vq->vq.num_free++;
 636         }
 637
 638         vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
 639         vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev,
 640                                                 vq->free_head);
 641         vq->free_head = head;
 642
 643         /* Plus final descriptor */
 644         vq->vq.num_free++;
 645
 646         if (vq->indirect) {
 647                 struct vring_desc *indir_desc =
 648                                 vq->split.desc_state[head].indir_desc;
 649                 u32 len;
 650
 651                 /* Free the indirect table, if any, now that it's unmapped. */
 652                 if (!indir_desc)
 653                         return;
 654
 655                 len = virtio32_to_cpu(vq->vq.vdev,
 656                                 vq->split.vring.desc[head].len);
 657
 658                 BUG_ON(!(vq->split.vring.desc[head].flags &
 659                          cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
 660                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 661
 662                 for (j = 0; j < len / sizeof(struct vring_desc); j++)
 663                         vring_unmap_one_split(vq, &indir_desc[j]);
 664
 665                 kfree(indir_desc);
 666                 vq->split.desc_state[head].indir_desc = NULL;
 667         } else if (ctx) {
 668                 *ctx = vq->split.desc_state[head].indir_desc;
 669         }
 670 }
 671
 672 static inline bool more_used_split(const struct vring_virtqueue *vq)
 673 {
 674         return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
 675                         vq->split.vring.used->idx);
 676 }
 677
 678 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 679                                          unsigned int *len,
 680                                          void **ctx)
 681 {
 682         struct vring_virtqueue *vq = to_vvq(_vq);
 683         void *ret;
 684         unsigned int i;
 685         u16 last_used;
 686
 687         START_USE(vq);
 688
 689         if (unlikely(vq->broken)) {
 690                 END_USE(vq);
 691                 return NULL;
 692         }
 693
 694         if (!more_used_split(vq)) {
 695                 pr_debug("No more buffers in queue\n");
 696                 END_USE(vq);
 697                 return NULL;
 698         }
 699
 700         /* Only get used array entries after they have been exposed by host. */
 701         virtio_rmb(vq->weak_barriers);
 702
 703         last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
 704         i = virtio32_to_cpu(_vq->vdev,
 705                         vq->split.vring.used->ring[last_used].id);
 706         *len = virtio32_to_cpu(_vq->vdev,
 707                         vq->split.vring.used->ring[last_used].len);
 708
 709         if (unlikely(i >= vq->split.vring.num)) {
 710                 BAD_RING(vq, "id %u out of range\n", i);
 711                 return NULL;
 712         }
 713         if (unlikely(!vq->split.desc_state[i].data)) {
 714                 BAD_RING(vq, "id %u is not a head!\n", i);
 715                 return NULL;
 716         }
 717
 718         /* detach_buf_split clears data, so grab it now. */
 719         ret = vq->split.desc_state[i].data;
 720         detach_buf_split(vq, i, ctx);
 721         vq->last_used_idx++;
 722         /* If we expect an interrupt for the next entry, tell host
 723          * by writing event index and flush out the write before
 724          * the read in the next get_buf call. */
 725         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
 726                 virtio_store_mb(vq->weak_barriers,
 727                                 &vring_used_event(&vq->split.vring),
 728                                 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
 729
 730         LAST_ADD_TIME_INVALID(vq);
 731
 732         END_USE(vq);
 733         return ret;
 734 }
 735
 736 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
 737 {
 738         struct vring_virtqueue *vq = to_vvq(_vq);
 739
 740         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
 741                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
 742                 if (!vq->event)
 743                         vq->split.vring.avail->flags =
 744                                 cpu_to_virtio16(_vq->vdev,
 745                                                 vq->split.avail_flags_shadow);
 746         }
 747 }
 748
 749 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
 750 {
 751         struct vring_virtqueue *vq = to_vvq(_vq);
 752         u16 last_used_idx;
 753
 754         START_USE(vq);
 755
 756         /* We optimistically turn back on interrupts, then check if there was
 757          * more to do. */
 758         /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
 759          * either clear the flags bit or point the event index at the next
 760          * entry. Always do both to keep code simple. */
 761         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 762                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 763                 if (!vq->event)
 764                         vq->split.vring.avail->flags =
 765                                 cpu_to_virtio16(_vq->vdev,
 766                                                 vq->split.avail_flags_shadow);
 767         }
 768         vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
 769                         last_used_idx = vq->last_used_idx);
 770         END_USE(vq);
 771         return last_used_idx;
 772 }
 773
 774 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
 775 {
 776         struct vring_virtqueue *vq = to_vvq(_vq);
 777
 778         return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
 779                         vq->split.vring.used->idx);
 780 }
 781
 782 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
 783 {
 784         struct vring_virtqueue *vq = to_vvq(_vq);
 785         u16 bufs;
 786
 787         START_USE(vq);
 788
 789         /* We optimistically turn back on interrupts, then check if there was
 790          * more to do. */
 791         /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
 792          * either clear the flags bit or point the event index at the next
 793          * entry. Always update the event index to keep code simple. */
 794         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 795                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 796                 if (!vq->event)
 797                         vq->split.vring.avail->flags =
 798                                 cpu_to_virtio16(_vq->vdev,
 799                                                 vq->split.avail_flags_shadow);
 800         }
 801         /* TODO: tune this threshold */
 802         bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
 803
 804         virtio_store_mb(vq->weak_barriers,
 805                         &vring_used_event(&vq->split.vring),
 806                         cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
 807
 808         if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
 809                                         - vq->last_used_idx) > bufs)) {
 810                 END_USE(vq);
 811                 return false;
 812         }
 813
 814         END_USE(vq);
 815         return true;
 816 }
 817
 818 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
 819 {
 820         struct vring_virtqueue *vq = to_vvq(_vq);
 821         unsigned int i;
 822         void *buf;
 823
 824         START_USE(vq);
 825
 826         for (i = 0; i < vq->split.vring.num; i++) {
 827                 if (!vq->split.desc_state[i].data)
 828                         continue;
 829                 /* detach_buf_split clears data, so grab it now. */
 830                 buf = vq->split.desc_state[i].data;
 831                 detach_buf_split(vq, i, NULL);
 832                 vq->split.avail_idx_shadow--;
 833                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 834                                 vq->split.avail_idx_shadow);
 835                 END_USE(vq);
 836                 return buf;
 837         }
 838         /* That should have freed everything. */
 839         BUG_ON(vq->vq.num_free != vq->split.vring.num);
 840
 841         END_USE(vq);
 842         return NULL;
 843 }
 844
 845 static struct virtqueue *vring_create_virtqueue_split(
 846         unsigned int index,
 847         unsigned int num,
 848         unsigned int vring_align,
 849         struct virtio_device *vdev,
 850         bool weak_barriers,
 851         bool may_reduce_num,
 852         bool context,
 853         bool (*notify)(struct virtqueue *),
 854         void (*callback)(struct virtqueue *),
 855         const char *name)
 856 {
 857         struct virtqueue *vq;
 858         void *queue = NULL;
 859         dma_addr_t dma_addr;
 860         size_t queue_size_in_bytes;
 861         struct vring vring;
 862
 863         /* We assume num is a power of 2. */
 864         if (num & (num - 1)) {
 865                 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
 866                 return NULL;
 867         }
 868
 869         /* TODO: allocate each queue chunk individually */
 870         for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
 871                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 872                                           &dma_addr,
 873                                           GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
 874                 if (queue)
 875                         break;
 876                 if (!may_reduce_num)
 877                         return NULL;
 878         }
 879
 880         if (!num)
 881                 return NULL;
 882
 883         if (!queue) {
 884                 /* Try to get a single page. You are my only hope! */
 885                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 886                                           &dma_addr, GFP_KERNEL|__GFP_ZERO);
 887         }
 888         if (!queue)
 889                 return NULL;
 890
 891         queue_size_in_bytes = vring_size(num, vring_align);
 892         vring_init(&vring, num, queue, vring_align);
 893
 894         vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 895                                    notify, callback, name);
 896         if (!vq) {
 897                 vring_free_queue(vdev, queue_size_in_bytes, queue,
 898                                  dma_addr);
 899                 return NULL;
 900         }
 901
 902         to_vvq(vq)->split.queue_dma_addr = dma_addr;
 903         to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
 904         to_vvq(vq)->we_own_ring = true;
 905
 906         return vq;
 907 }
 908
 909
 910 /*
 911  * Packed ring specific functions - *_packed().
 912  */
 913
 914 static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
 915                                      struct vring_desc_extra_packed *state)
 916 {
 917         u16 flags;
 918
 919         if (!vq->use_dma_api)
 920                 return;
 921
 922         flags = state->flags;
 923
 924         if (flags & VRING_DESC_F_INDIRECT) {
 925                 dma_unmap_single(vring_dma_dev(vq),
 926                                  state->addr, state->len,
 927                                  (flags & VRING_DESC_F_WRITE) ?
 928                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
 929         } else {
 930                 dma_unmap_page(vring_dma_dev(vq),
 931                                state->addr, state->len,
 932                                (flags & VRING_DESC_F_WRITE) ?
 933                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
 934         }
 935 }
 936
 937 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
 938                                    struct vring_packed_desc *desc)
 939 {
 940         u16 flags;
 941
 942         if (!vq->use_dma_api)
 943                 return;
 944
 945         flags = le16_to_cpu(desc->flags);
 946
 947         if (flags & VRING_DESC_F_INDIRECT) {
 948                 dma_unmap_single(vring_dma_dev(vq),
 949                                  le64_to_cpu(desc->addr),
 950                                  le32_to_cpu(desc->len),
 951                                  (flags & VRING_DESC_F_WRITE) ?
 952                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
 953         } else {
 954                 dma_unmap_page(vring_dma_dev(vq),
 955                                le64_to_cpu(desc->addr),
 956                                le32_to_cpu(desc->len),
 957                                (flags & VRING_DESC_F_WRITE) ?
 958                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
 959         }
 960 }
 961
 962 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
 963                                                        gfp_t gfp)
 964 {
 965         struct vring_packed_desc *desc;
 966
 967         /*
 968          * We require lowmem mappings for the descriptors because
 969          * otherwise virt_to_phys will give us bogus addresses in the
 970          * virtqueue.
 971          */
 972         gfp &= ~__GFP_HIGHMEM;
 973
 974         desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
 975
 976         return desc;
 977 }
 978
 979 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 980                                        struct scatterlist *sgs[],
 981                                        unsigned int total_sg,
 982                                        unsigned int out_sgs,
 983                                        unsigned int in_sgs,
 984                                        void *data,
 985                                        gfp_t gfp)
 986 {
 987         struct vring_packed_desc *desc;
 988         struct scatterlist *sg;
 989         unsigned int i, n, err_idx;
 990         u16 head, id;
 991         dma_addr_t addr;
 992
 993         head = vq->packed.next_avail_idx;
 994         desc = alloc_indirect_packed(total_sg, gfp);
 995
 996         if (unlikely(vq->vq.num_free < 1)) {
 997                 pr_debug("Can't add buf len 1 - avail = 0\n");
 998                 kfree(desc);
 999                 END_USE(vq);
1000                 return -ENOSPC;
1001         }
1002
1003         i = 0;
1004         id = vq->free_head;
1005         BUG_ON(id == vq->packed.vring.num);
1006
1007         for (n = 0; n < out_sgs + in_sgs; n++) {
1008                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1009                         addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1010                                         DMA_TO_DEVICE : DMA_FROM_DEVICE);
1011                         if (vring_mapping_error(vq, addr))
1012                                 goto unmap_release;
1013
1014                         desc[i].flags = cpu_to_le16(n < out_sgs ?
1015                                                 0 : VRING_DESC_F_WRITE);
1016                         desc[i].addr = cpu_to_le64(addr);
1017                         desc[i].len = cpu_to_le32(sg->length);
1018                         i++;
1019                 }
1020         }
1021
1022         /* Now that the indirect table is filled in, map it. */
1023         addr = vring_map_single(vq, desc,
1024                         total_sg * sizeof(struct vring_packed_desc),
1025                         DMA_TO_DEVICE);
1026         if (vring_mapping_error(vq, addr))
1027                 goto unmap_release;
1028
1029         vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1030         vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1031                                 sizeof(struct vring_packed_desc));
1032         vq->packed.vring.desc[head].id = cpu_to_le16(id);
1033
1034         if (vq->use_dma_api) {
1035                 vq->packed.desc_extra[id].addr = addr;
1036                 vq->packed.desc_extra[id].len = total_sg *
1037                                 sizeof(struct vring_packed_desc);
1038                 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1039                                                   vq->packed.avail_used_flags;
1040         }
1041
1042         /*
1043          * A driver MUST NOT make the first descriptor in the list
1044          * available before all subsequent descriptors comprising
1045          * the list are made available.
1046          */
1047         virtio_wmb(vq->weak_barriers);
1048         vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1049                                                 vq->packed.avail_used_flags);
1050
1051         /* We're using some buffers from the free list. */
1052         vq->vq.num_free -= 1;
1053
1054         /* Update free pointer */
1055         n = head + 1;
1056         if (n >= vq->packed.vring.num) {
1057                 n = 0;
1058                 vq->packed.avail_wrap_counter ^= 1;
1059                 vq->packed.avail_used_flags ^=
1060                                 1 << VRING_PACKED_DESC_F_AVAIL |
1061                                 1 << VRING_PACKED_DESC_F_USED;
1062         }
1063         vq->packed.next_avail_idx = n;
1064         vq->free_head = vq->packed.desc_state[id].next;
1065
1066         /* Store token and indirect buffer state. */
1067         vq->packed.desc_state[id].num = 1;
1068         vq->packed.desc_state[id].data = data;
1069         vq->packed.desc_state[id].indir_desc = desc;
1070         vq->packed.desc_state[id].last = id;
1071
1072         vq->num_added += 1;
1073
1074         pr_debug("Added buffer head %i to %p\n", head, vq);
1075         END_USE(vq);
1076
1077         return 0;
1078
1079 unmap_release:
1080         err_idx = i;
1081
1082         for (i = 0; i < err_idx; i++)
1083                 vring_unmap_desc_packed(vq, &desc[i]);
1084
1085         kfree(desc);
1086
1087         END_USE(vq);
1088         return -ENOMEM;
1089 }
1090
1091 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1092                                        struct scatterlist *sgs[],
1093                                        unsigned int total_sg,
1094                                        unsigned int out_sgs,
1095                                        unsigned int in_sgs,
1096                                        void *data,
1097                                        void *ctx,
1098                                        gfp_t gfp)
1099 {
1100         struct vring_virtqueue *vq = to_vvq(_vq);
1101         struct vring_packed_desc *desc;
1102         struct scatterlist *sg;
1103         unsigned int i, n, c, descs_used, err_idx;
1104         __le16 uninitialized_var(head_flags), flags;
1105         u16 head, id, uninitialized_var(prev), curr, avail_used_flags;
1106
1107         START_USE(vq);
1108
1109         BUG_ON(data == NULL);
1110         BUG_ON(ctx && vq->indirect);
1111
1112         if (unlikely(vq->broken)) {
1113                 END_USE(vq);
1114                 return -EIO;
1115         }
1116
1117         LAST_ADD_TIME_UPDATE(vq);
1118
1119         BUG_ON(total_sg == 0);
1120
1121         if (virtqueue_use_indirect(_vq, total_sg))
1122                 return virtqueue_add_indirect_packed(vq, sgs, total_sg,
1123                                 out_sgs, in_sgs, data, gfp);
1124
1125         head = vq->packed.next_avail_idx;
1126         avail_used_flags = vq->packed.avail_used_flags;
1127
1128         WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1129
1130         desc = vq->packed.vring.desc;
1131         i = head;
1132         descs_used = total_sg;
1133
1134         if (unlikely(vq->vq.num_free < descs_used)) {
1135                 pr_debug("Can't add buf len %i - avail = %i\n",
1136                          descs_used, vq->vq.num_free);
1137                 END_USE(vq);
1138                 return -ENOSPC;
1139         }
1140
1141         id = vq->free_head;
1142         BUG_ON(id == vq->packed.vring.num);
1143
1144         curr = id;
1145         c = 0;
1146         for (n = 0; n < out_sgs + in_sgs; n++) {
1147                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1148                         dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1149                                         DMA_TO_DEVICE : DMA_FROM_DEVICE);
1150                         if (vring_mapping_error(vq, addr))
1151                                 goto unmap_release;
1152
1153                         flags = cpu_to_le16(vq->packed.avail_used_flags |
1154                                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1155                                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1156                         if (i == head)
1157                                 head_flags = flags;
1158                         else
1159                                 desc[i].flags = flags;
1160
1161                         desc[i].addr = cpu_to_le64(addr);
1162                         desc[i].len = cpu_to_le32(sg->length);
1163                         desc[i].id = cpu_to_le16(id);
1164
1165                         if (unlikely(vq->use_dma_api)) {
1166                                 vq->packed.desc_extra[curr].addr = addr;
1167                                 vq->packed.desc_extra[curr].len = sg->length;
1168                                 vq->packed.desc_extra[curr].flags =
1169                                         le16_to_cpu(flags);
1170                         }
1171                         prev = curr;
1172                         curr = vq->packed.desc_state[curr].next;
1173
1174                         if ((unlikely(++i >= vq->packed.vring.num))) {
1175                                 i = 0;
1176                                 vq->packed.avail_used_flags ^=
1177                                         1 << VRING_PACKED_DESC_F_AVAIL |
1178                                         1 << VRING_PACKED_DESC_F_USED;
1179                         }
1180                 }
1181         }
1182
1183         if (i < head)
1184                 vq->packed.avail_wrap_counter ^= 1;
1185
1186         /* We're using some buffers from the free list. */
1187         vq->vq.num_free -= descs_used;
1188
1189         /* Update free pointer */
1190         vq->packed.next_avail_idx = i;
1191         vq->free_head = curr;
1192
1193         /* Store token. */
1194         vq->packed.desc_state[id].num = descs_used;
1195         vq->packed.desc_state[id].data = data;
1196         vq->packed.desc_state[id].indir_desc = ctx;
1197         vq->packed.desc_state[id].last = prev;
1198
1199         /*
1200          * A driver MUST NOT make the first descriptor in the list
1201          * available before all subsequent descriptors comprising
1202          * the list are made available.
1203          */
1204         virtio_wmb(vq->weak_barriers);
1205         vq->packed.vring.desc[head].flags = head_flags;
1206         vq->num_added += descs_used;
1207
1208         pr_debug("Added buffer head %i to %p\n", head, vq);
1209         END_USE(vq);
1210
1211         return 0;
1212
1213 unmap_release:
1214         err_idx = i;
1215         i = head;
1216
1217         vq->packed.avail_used_flags = avail_used_flags;
1218
1219         for (n = 0; n < total_sg; n++) {
1220                 if (i == err_idx)
1221                         break;
1222                 vring_unmap_desc_packed(vq, &desc[i]);
1223                 i++;
1224                 if (i >= vq->packed.vring.num)
1225                         i = 0;
1226         }
1227
1228         END_USE(vq);
1229         return -EIO;
1230 }
1231
1232 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1233 {
1234         struct vring_virtqueue *vq = to_vvq(_vq);
1235         u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1236         bool needs_kick;
1237         union {
1238                 struct {
1239                         __le16 off_wrap;
1240                         __le16 flags;
1241                 };
1242                 u32 u32;
1243         } snapshot;
1244
1245         START_USE(vq);
1246
1247         /*
1248          * We need to expose the new flags value before checking notification
1249          * suppressions.
1250          */
1251         virtio_mb(vq->weak_barriers);
1252
1253         old = vq->packed.next_avail_idx - vq->num_added;
1254         new = vq->packed.next_avail_idx;
1255         vq->num_added = 0;
1256
1257         snapshot.u32 = *(u32 *)vq->packed.vring.device;
1258         flags = le16_to_cpu(snapshot.flags);
1259
1260         LAST_ADD_TIME_CHECK(vq);
1261         LAST_ADD_TIME_INVALID(vq);
1262
1263         if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1264                 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1265                 goto out;
1266         }
1267
1268         off_wrap = le16_to_cpu(snapshot.off_wrap);
1269
1270         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1271         event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1272         if (wrap_counter != vq->packed.avail_wrap_counter)
1273                 event_idx -= vq->packed.vring.num;
1274
1275         needs_kick = vring_need_event(event_idx, new, old);
1276 out:
1277         END_USE(vq);
1278         return needs_kick;
1279 }
1280
1281 static void detach_buf_packed(struct vring_virtqueue *vq,
1282                               unsigned int id, void **ctx)
1283 {
1284         struct vring_desc_state_packed *state = NULL;
1285         struct vring_packed_desc *desc;
1286         unsigned int i, curr;
1287
1288         state = &vq->packed.desc_state[id];
1289
1290         /* Clear data ptr. */
1291         state->data = NULL;
1292
1293         vq->packed.desc_state[state->last].next = vq->free_head;
1294         vq->free_head = id;
1295         vq->vq.num_free += state->num;
1296
1297         if (unlikely(vq->use_dma_api)) {
1298                 curr = id;
1299                 for (i = 0; i < state->num; i++) {
1300                         vring_unmap_state_packed(vq,
1301                                 &vq->packed.desc_extra[curr]);
1302                         curr = vq->packed.desc_state[curr].next;
1303                 }
1304         }
1305
1306         if (vq->indirect) {
1307                 u32 len;
1308
1309                 /* Free the indirect table, if any, now that it's unmapped. */
1310                 desc = state->indir_desc;
1311                 if (!desc)
1312                         return;
1313
1314                 if (vq->use_dma_api) {
1315                         len = vq->packed.desc_extra[id].len;
1316                         for (i = 0; i < len / sizeof(struct vring_packed_desc);
1317                                         i++)
1318                                 vring_unmap_desc_packed(vq, &desc[i]);
1319                 }
1320                 kfree(desc);
1321                 state->indir_desc = NULL;
1322         } else if (ctx) {
1323                 *ctx = state->indir_desc;
1324         }
1325 }
1326
1327 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1328                                        u16 idx, bool used_wrap_counter)
1329 {
1330         bool avail, used;
1331         u16 flags;
1332
1333         flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1334         avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1335         used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1336
1337         return avail == used && used == used_wrap_counter;
1338 }
1339
1340 static inline bool more_used_packed(const struct vring_virtqueue *vq)
1341 {
1342         return is_used_desc_packed(vq, vq->last_used_idx,
1343                         vq->packed.used_wrap_counter);
1344 }
1345
1346 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1347                                           unsigned int *len,
1348                                           void **ctx)
1349 {
1350         struct vring_virtqueue *vq = to_vvq(_vq);
1351         u16 last_used, id;
1352         void *ret;
1353
1354         START_USE(vq);
1355
1356         if (unlikely(vq->broken)) {
1357                 END_USE(vq);
1358                 return NULL;
1359         }
1360
1361         if (!more_used_packed(vq)) {
1362                 pr_debug("No more buffers in queue\n");
1363                 END_USE(vq);
1364                 return NULL;
1365         }
1366
1367         /* Only get used elements after they have been exposed by host. */
1368         virtio_rmb(vq->weak_barriers);
1369
1370         last_used = vq->last_used_idx;
1371         id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1372         *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1373
1374         if (unlikely(id >= vq->packed.vring.num)) {
1375                 BAD_RING(vq, "id %u out of range\n", id);
1376                 return NULL;
1377         }
1378         if (unlikely(!vq->packed.desc_state[id].data)) {
1379                 BAD_RING(vq, "id %u is not a head!\n", id);
1380                 return NULL;
1381         }
1382
1383         /* detach_buf_packed clears data, so grab it now. */
1384         ret = vq->packed.desc_state[id].data;
1385         detach_buf_packed(vq, id, ctx);
1386
1387         vq->last_used_idx += vq->packed.desc_state[id].num;
1388         if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
1389                 vq->last_used_idx -= vq->packed.vring.num;
1390                 vq->packed.used_wrap_counter ^= 1;
1391         }
1392
1393         /*
1394          * If we expect an interrupt for the next entry, tell host
1395          * by writing event index and flush out the write before
1396          * the read in the next get_buf call.
1397          */
1398         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1399                 virtio_store_mb(vq->weak_barriers,
1400                                 &vq->packed.vring.driver->off_wrap,
1401                                 cpu_to_le16(vq->last_used_idx |
1402                                         (vq->packed.used_wrap_counter <<
1403                                          VRING_PACKED_EVENT_F_WRAP_CTR)));
1404
1405         LAST_ADD_TIME_INVALID(vq);
1406
1407         END_USE(vq);
1408         return ret;
1409 }
1410
1411 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1412 {
1413         struct vring_virtqueue *vq = to_vvq(_vq);
1414
1415         if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1416                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1417                 vq->packed.vring.driver->flags =
1418                         cpu_to_le16(vq->packed.event_flags_shadow);
1419         }
1420 }
1421
1422 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1423 {
1424         struct vring_virtqueue *vq = to_vvq(_vq);
1425
1426         START_USE(vq);
1427
1428         /*
1429          * We optimistically turn back on interrupts, then check if there was
1430          * more to do.
1431          */
1432
1433         if (vq->event) {
1434                 vq->packed.vring.driver->off_wrap =
1435                         cpu_to_le16(vq->last_used_idx |
1436                                 (vq->packed.used_wrap_counter <<
1437                                  VRING_PACKED_EVENT_F_WRAP_CTR));
1438                 /*
1439                  * We need to update event offset and event wrap
1440                  * counter first before updating event flags.
1441                  */
1442                 virtio_wmb(vq->weak_barriers);
1443         }
1444
1445         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1446                 vq->packed.event_flags_shadow = vq->event ?
1447                                 VRING_PACKED_EVENT_FLAG_DESC :
1448                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1449                 vq->packed.vring.driver->flags =
1450                                 cpu_to_le16(vq->packed.event_flags_shadow);
1451         }
1452
1453         END_USE(vq);
1454         return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
1455                         VRING_PACKED_EVENT_F_WRAP_CTR);
1456 }
1457
1458 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1459 {
1460         struct vring_virtqueue *vq = to_vvq(_vq);
1461         bool wrap_counter;
1462         u16 used_idx;
1463
1464         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1465         used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1466
1467         return is_used_desc_packed(vq, used_idx, wrap_counter);
1468 }
1469
1470 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1471 {
1472         struct vring_virtqueue *vq = to_vvq(_vq);
1473         u16 used_idx, wrap_counter;
1474         u16 bufs;
1475
1476         START_USE(vq);
1477
1478         /*
1479          * We optimistically turn back on interrupts, then check if there was
1480          * more to do.
1481          */
1482
1483         if (vq->event) {
1484                 /* TODO: tune this threshold */
1485                 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1486                 wrap_counter = vq->packed.used_wrap_counter;
1487
1488                 used_idx = vq->last_used_idx + bufs;
1489                 if (used_idx >= vq->packed.vring.num) {
1490                         used_idx -= vq->packed.vring.num;
1491                         wrap_counter ^= 1;
1492                 }
1493
1494                 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1495                         (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1496
1497                 /*
1498                  * We need to update event offset and event wrap
1499                  * counter first before updating event flags.
1500                  */
1501                 virtio_wmb(vq->weak_barriers);
1502         }
1503
1504         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1505                 vq->packed.event_flags_shadow = vq->event ?
1506                                 VRING_PACKED_EVENT_FLAG_DESC :
1507                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1508                 vq->packed.vring.driver->flags =
1509                                 cpu_to_le16(vq->packed.event_flags_shadow);
1510         }
1511
1512         /*
1513          * We need to update event suppression structure first
1514          * before re-checking for more used buffers.
1515          */
1516         virtio_mb(vq->weak_barriers);
1517
1518         if (is_used_desc_packed(vq,
1519                                 vq->last_used_idx,
1520                                 vq->packed.used_wrap_counter)) {
1521                 END_USE(vq);
1522                 return false;
1523         }
1524
1525         END_USE(vq);
1526         return true;
1527 }
1528
1529 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1530 {
1531         struct vring_virtqueue *vq = to_vvq(_vq);
1532         unsigned int i;
1533         void *buf;
1534
1535         START_USE(vq);
1536
1537         for (i = 0; i < vq->packed.vring.num; i++) {
1538                 if (!vq->packed.desc_state[i].data)
1539                         continue;
1540                 /* detach_buf clears data, so grab it now. */
1541                 buf = vq->packed.desc_state[i].data;
1542                 detach_buf_packed(vq, i, NULL);
1543                 END_USE(vq);
1544                 return buf;
1545         }
1546         /* That should have freed everything. */
1547         BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1548
1549         END_USE(vq);
1550         return NULL;
1551 }
1552
1553 static struct virtqueue *vring_create_virtqueue_packed(
1554         unsigned int index,
1555         unsigned int num,
1556         unsigned int vring_align,
1557         struct virtio_device *vdev,
1558         bool weak_barriers,
1559         bool may_reduce_num,
1560         bool context,
1561         bool (*notify)(struct virtqueue *),
1562         void (*callback)(struct virtqueue *),
1563         const char *name)
1564 {
1565         struct vring_virtqueue *vq;
1566         struct vring_packed_desc *ring;
1567         struct vring_packed_desc_event *driver, *device;
1568         dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1569         size_t ring_size_in_bytes, event_size_in_bytes;
1570         unsigned int i;
1571
1572         ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1573
1574         ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1575                                  &ring_dma_addr,
1576                                  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1577         if (!ring)
1578                 goto err_ring;
1579
1580         event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1581
1582         driver = vring_alloc_queue(vdev, event_size_in_bytes,
1583                                    &driver_event_dma_addr,
1584                                    GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1585         if (!driver)
1586                 goto err_driver;
1587
1588         device = vring_alloc_queue(vdev, event_size_in_bytes,
1589                                    &device_event_dma_addr,
1590                                    GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1591         if (!device)
1592                 goto err_device;
1593
1594         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1595         if (!vq)
1596                 goto err_vq;
1597
1598         vq->vq.callback = callback;
1599         vq->vq.vdev = vdev;
1600         vq->vq.name = name;
1601         vq->vq.num_free = num;
1602         vq->vq.index = index;
1603         vq->we_own_ring = true;
1604         vq->notify = notify;
1605         vq->weak_barriers = weak_barriers;
1606         vq->broken = false;
1607         vq->last_used_idx = 0;
1608         vq->num_added = 0;
1609         vq->packed_ring = true;
1610         vq->use_dma_api = vring_use_dma_api(vdev);
1611         list_add_tail(&vq->vq.list, &vdev->vqs);
1612 #ifdef DEBUG
1613         vq->in_use = false;
1614         vq->last_add_time_valid = false;
1615 #endif
1616
1617         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1618                 !context;
1619         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1620
1621         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1622                 vq->weak_barriers = false;
1623
1624         vq->packed.ring_dma_addr = ring_dma_addr;
1625         vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1626         vq->packed.device_event_dma_addr = device_event_dma_addr;
1627
1628         vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1629         vq->packed.event_size_in_bytes = event_size_in_bytes;
1630
1631         vq->packed.vring.num = num;
1632         vq->packed.vring.desc = ring;
1633         vq->packed.vring.driver = driver;
1634         vq->packed.vring.device = device;
1635
1636         vq->packed.next_avail_idx = 0;
1637         vq->packed.avail_wrap_counter = 1;
1638         vq->packed.used_wrap_counter = 1;
1639         vq->packed.event_flags_shadow = 0;
1640         vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1641
1642         vq->packed.desc_state = kmalloc_array(num,
1643                         sizeof(struct vring_desc_state_packed),
1644                         GFP_KERNEL);
1645         if (!vq->packed.desc_state)
1646                 goto err_desc_state;
1647
1648         memset(vq->packed.desc_state, 0,
1649                 num * sizeof(struct vring_desc_state_packed));
1650
1651         /* Put everything in free lists. */
1652         vq->free_head = 0;
1653         for (i = 0; i < num-1; i++)
1654                 vq->packed.desc_state[i].next = i + 1;
1655
1656         vq->packed.desc_extra = kmalloc_array(num,
1657                         sizeof(struct vring_desc_extra_packed),
1658                         GFP_KERNEL);
1659         if (!vq->packed.desc_extra)
1660                 goto err_desc_extra;
1661
1662         memset(vq->packed.desc_extra, 0,
1663                 num * sizeof(struct vring_desc_extra_packed));
1664
1665         /* No callback?  Tell other side not to bother us. */
1666         if (!callback) {
1667                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1668                 vq->packed.vring.driver->flags =
1669                         cpu_to_le16(vq->packed.event_flags_shadow);
1670         }
1671
1672         return &vq->vq;
1673
1674 err_desc_extra:
1675         kfree(vq->packed.desc_state);
1676 err_desc_state:
1677         kfree(vq);
1678 err_vq:
1679         vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr);
1680 err_device:
1681         vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr);
1682 err_driver:
1683         vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1684 err_ring:
1685         return NULL;
1686 }
1687
1688
1689 /*
1690  * Generic functions and exported symbols.
1691  */
1692
1693 static inline int virtqueue_add(struct virtqueue *_vq,
1694                                 struct scatterlist *sgs[],
1695                                 unsigned int total_sg,
1696                                 unsigned int out_sgs,
1697                                 unsigned int in_sgs,
1698                                 void *data,
1699                                 void *ctx,
1700                                 gfp_t gfp)
1701 {
1702         struct vring_virtqueue *vq = to_vvq(_vq);
1703
1704         return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1705                                         out_sgs, in_sgs, data, ctx, gfp) :
1706                                  virtqueue_add_split(_vq, sgs, total_sg,
1707                                         out_sgs, in_sgs, data, ctx, gfp);
1708 }
1709
1710 /**
1711  * virtqueue_add_sgs - expose buffers to other end
1712  * @_vq: the struct virtqueue we're talking about.
1713  * @sgs: array of terminated scatterlists.
1714  * @out_sgs: the number of scatterlists readable by other side
1715  * @in_sgs: the number of scatterlists which are writable (after readable ones)
1716  * @data: the token identifying the buffer.
1717  * @gfp: how to do memory allocations (if necessary).
1718  *
1719  * Caller must ensure we don't call this with other virtqueue operations
1720  * at the same time (except where noted).
1721  *
1722  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1723  */
1724 int virtqueue_add_sgs(struct virtqueue *_vq,
1725                       struct scatterlist *sgs[],
1726                       unsigned int out_sgs,
1727                       unsigned int in_sgs,
1728                       void *data,
1729                       gfp_t gfp)
1730 {
1731         unsigned int i, total_sg = 0;
1732
1733         /* Count them first. */
1734         for (i = 0; i < out_sgs + in_sgs; i++) {
1735                 struct scatterlist *sg;
1736
1737                 for (sg = sgs[i]; sg; sg = sg_next(sg))
1738                         total_sg++;
1739         }
1740         return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1741                              data, NULL, gfp);
1742 }
1743 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1744
1745 /**
1746  * virtqueue_add_outbuf - expose output buffers to other end
1747  * @vq: the struct virtqueue we're talking about.
1748  * @sg: scatterlist (must be well-formed and terminated!)
1749  * @num: the number of entries in @sg readable by other side
1750  * @data: the token identifying the buffer.
1751  * @gfp: how to do memory allocations (if necessary).
1752  *
1753  * Caller must ensure we don't call this with other virtqueue operations
1754  * at the same time (except where noted).
1755  *
1756  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1757  */
1758 int virtqueue_add_outbuf(struct virtqueue *vq,
1759                          struct scatterlist *sg, unsigned int num,
1760                          void *data,
1761                          gfp_t gfp)
1762 {
1763         return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1764 }
1765 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1766
1767 /**
1768  * virtqueue_add_inbuf - expose input buffers to other end
1769  * @vq: the struct virtqueue we're talking about.
1770  * @sg: scatterlist (must be well-formed and terminated!)
1771  * @num: the number of entries in @sg writable by other side
1772  * @data: the token identifying the buffer.
1773  * @gfp: how to do memory allocations (if necessary).
1774  *
1775  * Caller must ensure we don't call this with other virtqueue operations
1776  * at the same time (except where noted).
1777  *
1778  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1779  */
1780 int virtqueue_add_inbuf(struct virtqueue *vq,
1781                         struct scatterlist *sg, unsigned int num,
1782                         void *data,
1783                         gfp_t gfp)
1784 {
1785         return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1786 }
1787 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1788
1789 /**
1790  * virtqueue_add_inbuf_ctx - expose input buffers to other end
1791  * @vq: the struct virtqueue we're talking about.
1792  * @sg: scatterlist (must be well-formed and terminated!)
1793  * @num: the number of entries in @sg writable by other side
1794  * @data: the token identifying the buffer.
1795  * @ctx: extra context for the token
1796  * @gfp: how to do memory allocations (if necessary).
1797  *
1798  * Caller must ensure we don't call this with other virtqueue operations
1799  * at the same time (except where noted).
1800  *
1801  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1802  */
1803 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1804                         struct scatterlist *sg, unsigned int num,
1805                         void *data,
1806                         void *ctx,
1807                         gfp_t gfp)
1808 {
1809         return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1810 }
1811 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1812
1813 /**
1814  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
1815  * @_vq: the struct virtqueue
1816  *
1817  * Instead of virtqueue_kick(), you can do:
1818  *      if (virtqueue_kick_prepare(vq))
1819  *              virtqueue_notify(vq);
1820  *
1821  * This is sometimes useful because the virtqueue_kick_prepare() needs
1822  * to be serialized, but the actual virtqueue_notify() call does not.
1823  */
1824 bool virtqueue_kick_prepare(struct virtqueue *_vq)
1825 {
1826         struct vring_virtqueue *vq = to_vvq(_vq);
1827
1828         return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1829                                  virtqueue_kick_prepare_split(_vq);
1830 }
1831 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1832
1833 /**
1834  * virtqueue_notify - second half of split virtqueue_kick call.
1835  * @_vq: the struct virtqueue
1836  *
1837  * This does not need to be serialized.
1838  *
1839  * Returns false if host notify failed or queue is broken, otherwise true.
1840  */
1841 bool virtqueue_notify(struct virtqueue *_vq)
1842 {
1843         struct vring_virtqueue *vq = to_vvq(_vq);
1844
1845         if (unlikely(vq->broken))
1846                 return false;
1847
1848         /* Prod other side to tell it about changes. */
1849         if (!vq->notify(_vq)) {
1850                 vq->broken = true;
1851                 return false;
1852         }
1853         return true;
1854 }
1855 EXPORT_SYMBOL_GPL(virtqueue_notify);
1856
1857 /**
1858  * virtqueue_kick - update after add_buf
1859  * @vq: the struct virtqueue
1860  *
1861  * After one or more virtqueue_add_* calls, invoke this to kick
1862  * the other side.
1863  *
1864  * Caller must ensure we don't call this with other virtqueue
1865  * operations at the same time (except where noted).
1866  *
1867  * Returns false if kick failed, otherwise true.
1868  */
1869 bool virtqueue_kick(struct virtqueue *vq)
1870 {
1871         if (virtqueue_kick_prepare(vq))
1872                 return virtqueue_notify(vq);
1873         return true;
1874 }
1875 EXPORT_SYMBOL_GPL(virtqueue_kick);
1876
1877 /**
1878  * virtqueue_get_buf - get the next used buffer
1879  * @_vq: the struct virtqueue we're talking about.
1880  * @len: the length written into the buffer
1881  * @ctx: extra context for the token
1882  *
1883  * If the device wrote data into the buffer, @len will be set to the
1884  * amount written.  This means you don't need to clear the buffer
1885  * beforehand to ensure there's no data leakage in the case of short
1886  * writes.
1887  *
1888  * Caller must ensure we don't call this with other virtqueue
1889  * operations at the same time (except where noted).
1890  *
1891  * Returns NULL if there are no used buffers, or the "data" token
1892  * handed to virtqueue_add_*().
1893  */
1894 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
1895                             void **ctx)
1896 {
1897         struct vring_virtqueue *vq = to_vvq(_vq);
1898
1899         return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
1900                                  virtqueue_get_buf_ctx_split(_vq, len, ctx);
1901 }
1902 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
1903
1904 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
1905 {
1906         return virtqueue_get_buf_ctx(_vq, len, NULL);
1907 }
1908 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
1909 /**
1910  * virtqueue_disable_cb - disable callbacks
1911  * @_vq: the struct virtqueue we're talking about.
1912  *
1913  * Note that this is not necessarily synchronous, hence unreliable and only
1914  * useful as an optimization.
1915  *
1916  * Unlike other operations, this need not be serialized.
1917  */
1918 void virtqueue_disable_cb(struct virtqueue *_vq)
1919 {
1920         struct vring_virtqueue *vq = to_vvq(_vq);
1921
1922         if (vq->packed_ring)
1923                 virtqueue_disable_cb_packed(_vq);
1924         else
1925                 virtqueue_disable_cb_split(_vq);
1926 }
1927 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
1928
1929 /**
1930  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
1931  * @_vq: the struct virtqueue we're talking about.
1932  *
1933  * This re-enables callbacks; it returns current queue state
1934  * in an opaque unsigned value. This value should be later tested by
1935  * virtqueue_poll, to detect a possible race between the driver checking for
1936  * more work, and enabling callbacks.
1937  *
1938  * Caller must ensure we don't call this with other virtqueue
1939  * operations at the same time (except where noted).
1940  */
1941 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
1942 {
1943         struct vring_virtqueue *vq = to_vvq(_vq);
1944
1945         return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
1946                                  virtqueue_enable_cb_prepare_split(_vq);
1947 }
1948 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
1949
1950 /**
1951  * virtqueue_poll - query pending used buffers
1952  * @_vq: the struct virtqueue we're talking about.
1953  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
1954  *
1955  * Returns "true" if there are pending used buffers in the queue.
1956  *
1957  * This does not need to be serialized.
1958  */
1959 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
1960 {
1961         struct vring_virtqueue *vq = to_vvq(_vq);
1962
1963         virtio_mb(vq->weak_barriers);
1964         return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
1965                                  virtqueue_poll_split(_vq, last_used_idx);
1966 }
1967 EXPORT_SYMBOL_GPL(virtqueue_poll);
1968
1969 /**
1970  * virtqueue_enable_cb - restart callbacks after disable_cb.
1971  * @_vq: the struct virtqueue we're talking about.
1972  *
1973  * This re-enables callbacks; it returns "false" if there are pending
1974  * buffers in the queue, to detect a possible race between the driver
1975  * checking for more work, and enabling callbacks.
1976  *
1977  * Caller must ensure we don't call this with other virtqueue
1978  * operations at the same time (except where noted).
1979  */
1980 bool virtqueue_enable_cb(struct virtqueue *_vq)
1981 {
1982         unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
1983
1984         return !virtqueue_poll(_vq, last_used_idx);
1985 }
1986 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
1987
1988 /**
1989  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
1990  * @_vq: the struct virtqueue we're talking about.
1991  *
1992  * This re-enables callbacks but hints to the other side to delay
1993  * interrupts until most of the available buffers have been processed;
1994  * it returns "false" if there are many pending buffers in the queue,
1995  * to detect a possible race between the driver checking for more work,
1996  * and enabling callbacks.
1997  *
1998  * Caller must ensure we don't call this with other virtqueue
1999  * operations at the same time (except where noted).
2000  */
2001 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2002 {
2003         struct vring_virtqueue *vq = to_vvq(_vq);
2004
2005         return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2006                                  virtqueue_enable_cb_delayed_split(_vq);
2007 }
2008 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2009
2010 /**
2011  * virtqueue_detach_unused_buf - detach first unused buffer
2012  * @_vq: the struct virtqueue we're talking about.
2013  *
2014  * Returns NULL or the "data" token handed to virtqueue_add_*().
2015  * This is not valid on an active queue; it is useful only for device
2016  * shutdown.
2017  */
2018 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2019 {
2020         struct vring_virtqueue *vq = to_vvq(_vq);
2021
2022         return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2023                                  virtqueue_detach_unused_buf_split(_vq);
2024 }
2025 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2026
2027 static inline bool more_used(const struct vring_virtqueue *vq)
2028 {
2029         return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2030 }
2031
2032 irqreturn_t vring_interrupt(int irq, void *_vq)
2033 {
2034         struct vring_virtqueue *vq = to_vvq(_vq);
2035
2036         if (!more_used(vq)) {
2037                 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2038                 return IRQ_NONE;
2039         }
2040
2041         if (unlikely(vq->broken))
2042                 return IRQ_HANDLED;
2043
2044         pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2045         if (vq->vq.callback)
2046                 vq->vq.callback(&vq->vq);
2047
2048         return IRQ_HANDLED;
2049 }
2050 EXPORT_SYMBOL_GPL(vring_interrupt);
2051
2052 /* Only available for split ring */
2053 struct virtqueue *__vring_new_virtqueue(unsigned int index,
2054                                         struct vring vring,
2055                                         struct virtio_device *vdev,
2056                                         bool weak_barriers,
2057                                         bool context,
2058                                         bool (*notify)(struct virtqueue *),
2059                                         void (*callback)(struct virtqueue *),
2060                                         const char *name)
2061 {
2062         unsigned int i;
2063         struct vring_virtqueue *vq;
2064
2065         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2066                 return NULL;
2067
2068         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2069         if (!vq)
2070                 return NULL;
2071
2072         vq->packed_ring = false;
2073         vq->vq.callback = callback;
2074         vq->vq.vdev = vdev;
2075         vq->vq.name = name;
2076         vq->vq.num_free = vring.num;
2077         vq->vq.index = index;
2078         vq->we_own_ring = false;
2079         vq->notify = notify;
2080         vq->weak_barriers = weak_barriers;
2081         vq->broken = false;
2082         vq->last_used_idx = 0;
2083         vq->num_added = 0;
2084         vq->use_dma_api = vring_use_dma_api(vdev);
2085         list_add_tail(&vq->vq.list, &vdev->vqs);
2086 #ifdef DEBUG
2087         vq->in_use = false;
2088         vq->last_add_time_valid = false;
2089 #endif
2090
2091         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2092                 !context;
2093         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2094
2095         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2096                 vq->weak_barriers = false;
2097
2098         vq->split.queue_dma_addr = 0;
2099         vq->split.queue_size_in_bytes = 0;
2100
2101         vq->split.vring = vring;
2102         vq->split.avail_flags_shadow = 0;
2103         vq->split.avail_idx_shadow = 0;
2104
2105         /* No callback?  Tell other side not to bother us. */
2106         if (!callback) {
2107                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
2108                 if (!vq->event)
2109                         vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2110                                         vq->split.avail_flags_shadow);
2111         }
2112
2113         vq->split.desc_state = kmalloc_array(vring.num,
2114                         sizeof(struct vring_desc_state_split), GFP_KERNEL);
2115         if (!vq->split.desc_state) {
2116                 kfree(vq);
2117                 return NULL;
2118         }
2119
2120         /* Put everything in free lists. */
2121         vq->free_head = 0;
2122         for (i = 0; i < vring.num-1; i++)
2123                 vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
2124         memset(vq->split.desc_state, 0, vring.num *
2125                         sizeof(struct vring_desc_state_split));
2126
2127         return &vq->vq;
2128 }
2129 EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
2130
2131 struct virtqueue *vring_create_virtqueue(
2132         unsigned int index,
2133         unsigned int num,
2134         unsigned int vring_align,
2135         struct virtio_device *vdev,
2136         bool weak_barriers,
2137         bool may_reduce_num,
2138         bool context,
2139         bool (*notify)(struct virtqueue *),
2140         void (*callback)(struct virtqueue *),
2141         const char *name)
2142 {
2143
2144         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2145                 return vring_create_virtqueue_packed(index, num, vring_align,
2146                                 vdev, weak_barriers, may_reduce_num,
2147                                 context, notify, callback, name);
2148
2149         return vring_create_virtqueue_split(index, num, vring_align,
2150                         vdev, weak_barriers, may_reduce_num,
2151                         context, notify, callback, name);
2152 }
2153 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2154
2155 /* Only available for split ring */
2156 struct virtqueue *vring_new_virtqueue(unsigned int index,
2157                                       unsigned int num,
2158                                       unsigned int vring_align,
2159                                       struct virtio_device *vdev,
2160                                       bool weak_barriers,
2161                                       bool context,
2162                                       void *pages,
2163                                       bool (*notify)(struct virtqueue *vq),
2164                                       void (*callback)(struct virtqueue *vq),
2165                                       const char *name)
2166 {
2167         struct vring vring;
2168
2169         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2170                 return NULL;
2171
2172         vring_init(&vring, num, pages, vring_align);
2173         return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2174                                      notify, callback, name);
2175 }
2176 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2177
2178 void vring_del_virtqueue(struct virtqueue *_vq)
2179 {
2180         struct vring_virtqueue *vq = to_vvq(_vq);
2181
2182         if (vq->we_own_ring) {
2183                 if (vq->packed_ring) {
2184                         vring_free_queue(vq->vq.vdev,
2185                                          vq->packed.ring_size_in_bytes,
2186                                          vq->packed.vring.desc,
2187                                          vq->packed.ring_dma_addr);
2188
2189                         vring_free_queue(vq->vq.vdev,
2190                                          vq->packed.event_size_in_bytes,
2191                                          vq->packed.vring.driver,
2192                                          vq->packed.driver_event_dma_addr);
2193
2194                         vring_free_queue(vq->vq.vdev,
2195                                          vq->packed.event_size_in_bytes,
2196                                          vq->packed.vring.device,
2197                                          vq->packed.device_event_dma_addr);
2198
2199                         kfree(vq->packed.desc_state);
2200                         kfree(vq->packed.desc_extra);
2201                 } else {
2202                         vring_free_queue(vq->vq.vdev,
2203                                          vq->split.queue_size_in_bytes,
2204                                          vq->split.vring.desc,
2205                                          vq->split.queue_dma_addr);
2206                 }
2207         }
2208         if (!vq->packed_ring)
2209                 kfree(vq->split.desc_state);
2210         list_del(&_vq->list);
2211         kfree(vq);
2212 }
2213 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2214
2215 /* Manipulates transport-specific feature bits. */
2216 void vring_transport_features(struct virtio_device *vdev)
2217 {
2218         unsigned int i;
2219
2220         for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2221                 switch (i) {
2222                 case VIRTIO_RING_F_INDIRECT_DESC:
2223                         break;
2224                 case VIRTIO_RING_F_EVENT_IDX:
2225                         break;
2226                 case VIRTIO_F_VERSION_1:
2227                         break;
2228                 case VIRTIO_F_IOMMU_PLATFORM:
2229                         break;
2230                 case VIRTIO_F_RING_PACKED:
2231                         break;
2232                 case VIRTIO_F_ORDER_PLATFORM:
2233                         break;
2234                 default:
2235                         /* We don't understand this bit. */
2236                         __virtio_clear_bit(vdev, i);
2237                 }
2238         }
2239 }
2240 EXPORT_SYMBOL_GPL(vring_transport_features);
2241
2242 /**
2243  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2244  * @_vq: the struct virtqueue containing the vring of interest.
2245  *
2246  * Returns the size of the vring.  This is mainly used for boasting to
2247  * userspace.  Unlike other operations, this need not be serialized.
2248  */
2249 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2250 {
2251
2252         struct vring_virtqueue *vq = to_vvq(_vq);
2253
2254         return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2255 }
2256 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2257
2258 bool virtqueue_is_broken(struct virtqueue *_vq)
2259 {
2260         struct vring_virtqueue *vq = to_vvq(_vq);
2261
2262         return vq->broken;
2263 }
2264 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2265
2266 /*
2267  * This should prevent the device from being used, allowing drivers to
2268  * recover.  You may need to grab appropriate locks to flush.
2269  */
2270 void virtio_break_device(struct virtio_device *dev)
2271 {
2272         struct virtqueue *_vq;
2273
2274         list_for_each_entry(_vq, &dev->vqs, list) {
2275                 struct vring_virtqueue *vq = to_vvq(_vq);
2276                 vq->broken = true;
2277         }
2278 }
2279 EXPORT_SYMBOL_GPL(virtio_break_device);
2280
2281 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2282 {
2283         struct vring_virtqueue *vq = to_vvq(_vq);
2284
2285         BUG_ON(!vq->we_own_ring);
2286
2287         if (vq->packed_ring)
2288                 return vq->packed.ring_dma_addr;
2289
2290         return vq->split.queue_dma_addr;
2291 }
2292 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2293
2294 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2295 {
2296         struct vring_virtqueue *vq = to_vvq(_vq);
2297
2298         BUG_ON(!vq->we_own_ring);
2299
2300         if (vq->packed_ring)
2301                 return vq->packed.driver_event_dma_addr;
2302
2303         return vq->split.queue_dma_addr +
2304                 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2305 }
2306 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2307
2308 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2309 {
2310         struct vring_virtqueue *vq = to_vvq(_vq);
2311
2312         BUG_ON(!vq->we_own_ring);
2313
2314         if (vq->packed_ring)
2315                 return vq->packed.device_event_dma_addr;
2316
2317         return vq->split.queue_dma_addr +
2318                 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2319 }
2320 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2321
2322 /* Only available for split ring */
2323 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2324 {
2325         return &to_vvq(vq)->split.vring;
2326 }
2327 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2328
2329 MODULE_LICENSE("GPL");