1 /******************************************************************************
4 * Device for accessing (in user-space) pages that have been granted by other
7 * Copyright (c) 2006-2007, D G Murray.
8 * (c) 2009 Gerd Hoffmann <kraxel@redhat.com>
9 * (c) 2018 Oleksandr Andrushchenko, EPAM Systems Inc.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
25 #include <linux/dma-mapping.h>
26 #include <linux/module.h>
27 #include <linux/kernel.h>
28 #include <linux/init.h>
29 #include <linux/miscdevice.h>
31 #include <linux/uaccess.h>
32 #include <linux/sched.h>
33 #include <linux/sched/mm.h>
34 #include <linux/spinlock.h>
35 #include <linux/slab.h>
36 #include <linux/highmem.h>
37 #include <linux/refcount.h>
38 #include <linux/workqueue.h>
41 #include <xen/grant_table.h>
42 #include <xen/balloon.h>
43 #include <xen/gntdev.h>
44 #include <xen/events.h>
46 #include <asm/xen/hypervisor.h>
47 #include <asm/xen/hypercall.h>
49 #include "gntdev-common.h"
50 #ifdef CONFIG_XEN_GNTDEV_DMABUF
51 #include "gntdev-dmabuf.h"
54 MODULE_LICENSE("GPL");
55 MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
56 "Gerd Hoffmann <kraxel@redhat.com>");
57 MODULE_DESCRIPTION("User-space granted page access driver");
59 static unsigned int limit
= 64*1024;
60 module_param(limit
, uint
, 0644);
61 MODULE_PARM_DESC(limit
,
62 "Maximum number of grants that may be mapped by one mapping request");
64 /* True in PV mode, false otherwise */
65 static int use_ptemod
;
67 static void unmap_grant_pages(struct gntdev_grant_map
*map
,
68 int offset
, int pages
);
70 static struct miscdevice gntdev_miscdev
;
72 /* ------------------------------------------------------------------ */
74 bool gntdev_test_page_count(unsigned int count
)
76 return !count
|| count
> limit
;
79 static void gntdev_print_maps(struct gntdev_priv
*priv
,
80 char *text
, int text_index
)
83 struct gntdev_grant_map
*map
;
85 pr_debug("%s: maps list (priv %p)\n", __func__
, priv
);
86 list_for_each_entry(map
, &priv
->maps
, next
)
87 pr_debug(" index %2d, count %2d %s\n",
88 map
->index
, map
->count
,
89 map
->index
== text_index
&& text
? text
: "");
93 static void gntdev_free_map(struct gntdev_grant_map
*map
)
98 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
100 struct gnttab_dma_alloc_args args
;
102 args
.dev
= map
->dma_dev
;
103 args
.coherent
= !!(map
->dma_flags
& GNTDEV_DMA_FLAG_COHERENT
);
104 args
.nr_pages
= map
->count
;
105 args
.pages
= map
->pages
;
106 args
.frames
= map
->frames
;
107 args
.vaddr
= map
->dma_vaddr
;
108 args
.dev_bus_addr
= map
->dma_bus_addr
;
110 gnttab_dma_free_pages(&args
);
114 gnttab_free_pages(map
->count
, map
->pages
);
116 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
121 kvfree(map
->map_ops
);
122 kvfree(map
->unmap_ops
);
123 kvfree(map
->kmap_ops
);
124 kvfree(map
->kunmap_ops
);
125 kvfree(map
->being_removed
);
129 struct gntdev_grant_map
*gntdev_alloc_map(struct gntdev_priv
*priv
, int count
,
132 struct gntdev_grant_map
*add
;
135 add
= kzalloc(sizeof(*add
), GFP_KERNEL
);
139 add
->grants
= kvmalloc_array(count
, sizeof(add
->grants
[0]),
141 add
->map_ops
= kvmalloc_array(count
, sizeof(add
->map_ops
[0]),
143 add
->unmap_ops
= kvmalloc_array(count
, sizeof(add
->unmap_ops
[0]),
145 add
->pages
= kvcalloc(count
, sizeof(add
->pages
[0]), GFP_KERNEL
);
147 kvcalloc(count
, sizeof(add
->being_removed
[0]), GFP_KERNEL
);
148 if (NULL
== add
->grants
||
149 NULL
== add
->map_ops
||
150 NULL
== add
->unmap_ops
||
151 NULL
== add
->pages
||
152 NULL
== add
->being_removed
)
155 add
->kmap_ops
= kvmalloc_array(count
, sizeof(add
->kmap_ops
[0]),
157 add
->kunmap_ops
= kvmalloc_array(count
, sizeof(add
->kunmap_ops
[0]),
159 if (NULL
== add
->kmap_ops
|| NULL
== add
->kunmap_ops
)
163 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
164 add
->dma_flags
= dma_flags
;
167 * Check if this mapping is requested to be backed
170 if (dma_flags
& (GNTDEV_DMA_FLAG_WC
| GNTDEV_DMA_FLAG_COHERENT
)) {
171 struct gnttab_dma_alloc_args args
;
173 add
->frames
= kvcalloc(count
, sizeof(add
->frames
[0]),
178 /* Remember the device, so we can free DMA memory. */
179 add
->dma_dev
= priv
->dma_dev
;
181 args
.dev
= priv
->dma_dev
;
182 args
.coherent
= !!(dma_flags
& GNTDEV_DMA_FLAG_COHERENT
);
183 args
.nr_pages
= count
;
184 args
.pages
= add
->pages
;
185 args
.frames
= add
->frames
;
187 if (gnttab_dma_alloc_pages(&args
))
190 add
->dma_vaddr
= args
.vaddr
;
191 add
->dma_bus_addr
= args
.dev_bus_addr
;
194 if (gnttab_alloc_pages(count
, add
->pages
))
197 for (i
= 0; i
< count
; i
++) {
198 add
->grants
[i
].domid
= DOMID_INVALID
;
199 add
->grants
[i
].ref
= INVALID_GRANT_REF
;
200 add
->map_ops
[i
].handle
= INVALID_GRANT_HANDLE
;
201 add
->unmap_ops
[i
].handle
= INVALID_GRANT_HANDLE
;
203 add
->kmap_ops
[i
].handle
= INVALID_GRANT_HANDLE
;
204 add
->kunmap_ops
[i
].handle
= INVALID_GRANT_HANDLE
;
210 refcount_set(&add
->users
, 1);
215 gntdev_free_map(add
);
219 void gntdev_add_map(struct gntdev_priv
*priv
, struct gntdev_grant_map
*add
)
221 struct gntdev_grant_map
*map
;
223 list_for_each_entry(map
, &priv
->maps
, next
) {
224 if (add
->index
+ add
->count
< map
->index
) {
225 list_add_tail(&add
->next
, &map
->next
);
228 add
->index
= map
->index
+ map
->count
;
230 list_add_tail(&add
->next
, &priv
->maps
);
233 gntdev_print_maps(priv
, "[new]", add
->index
);
236 static struct gntdev_grant_map
*gntdev_find_map_index(struct gntdev_priv
*priv
,
237 int index
, int count
)
239 struct gntdev_grant_map
*map
;
241 list_for_each_entry(map
, &priv
->maps
, next
) {
242 if (map
->index
!= index
)
244 if (count
&& map
->count
!= count
)
251 void gntdev_put_map(struct gntdev_priv
*priv
, struct gntdev_grant_map
*map
)
256 if (!refcount_dec_and_test(&map
->users
))
259 if (map
->pages
&& !use_ptemod
) {
261 * Increment the reference count. This ensures that the
262 * subsequent call to unmap_grant_pages() will not wind up
263 * re-entering itself. It *can* wind up calling
264 * gntdev_put_map() recursively, but such calls will be with a
265 * reference count greater than 1, so they will return before
266 * this code is reached. The recursion depth is thus limited to
267 * 1. Do NOT use refcount_inc() here, as it will detect that
268 * the reference count is zero and WARN().
270 refcount_set(&map
->users
, 1);
273 * Unmap the grants. This may or may not be asynchronous, so it
274 * is possible that the reference count is 1 on return, but it
275 * could also be greater than 1.
277 unmap_grant_pages(map
, 0, map
->count
);
279 /* Check if the memory now needs to be freed */
280 if (!refcount_dec_and_test(&map
->users
))
284 * All pages have been returned to the hypervisor, so free the
289 if (use_ptemod
&& map
->notifier_init
)
290 mmu_interval_notifier_remove(&map
->notifier
);
292 if (map
->notify
.flags
& UNMAP_NOTIFY_SEND_EVENT
) {
293 notify_remote_via_evtchn(map
->notify
.event
);
294 evtchn_put(map
->notify
.event
);
296 gntdev_free_map(map
);
299 /* ------------------------------------------------------------------ */
301 static int find_grant_ptes(pte_t
*pte
, unsigned long addr
, void *data
)
303 struct gntdev_grant_map
*map
= data
;
304 unsigned int pgnr
= (addr
- map
->pages_vm_start
) >> PAGE_SHIFT
;
305 int flags
= map
->flags
| GNTMAP_application_map
| GNTMAP_contains_pte
|
306 (1 << _GNTMAP_guest_avail0
);
309 BUG_ON(pgnr
>= map
->count
);
310 pte_maddr
= arbitrary_virt_to_machine(pte
).maddr
;
312 gnttab_set_map_op(&map
->map_ops
[pgnr
], pte_maddr
, flags
,
313 map
->grants
[pgnr
].ref
,
314 map
->grants
[pgnr
].domid
);
315 gnttab_set_unmap_op(&map
->unmap_ops
[pgnr
], pte_maddr
, flags
,
316 INVALID_GRANT_HANDLE
);
320 int gntdev_map_grant_pages(struct gntdev_grant_map
*map
)
326 /* Note: it could already be mapped */
327 if (map
->map_ops
[0].handle
!= INVALID_GRANT_HANDLE
)
329 for (i
= 0; i
< map
->count
; i
++) {
330 unsigned long addr
= (unsigned long)
331 pfn_to_kaddr(page_to_pfn(map
->pages
[i
]));
332 gnttab_set_map_op(&map
->map_ops
[i
], addr
, map
->flags
,
334 map
->grants
[i
].domid
);
335 gnttab_set_unmap_op(&map
->unmap_ops
[i
], addr
,
336 map
->flags
, INVALID_GRANT_HANDLE
);
340 * Setup the map_ops corresponding to the pte entries pointing
341 * to the kernel linear addresses of the struct pages.
342 * These ptes are completely different from the user ptes dealt
343 * with find_grant_ptes.
344 * Note that GNTMAP_device_map isn't needed here: The
345 * dev_bus_addr output field gets consumed only from ->map_ops,
346 * and by not requesting it when mapping we also avoid needing
347 * to mirror dev_bus_addr into ->unmap_ops (and holding an extra
348 * reference to the page in the hypervisor).
350 unsigned int flags
= (map
->flags
& ~GNTMAP_device_map
) |
353 for (i
= 0; i
< map
->count
; i
++) {
354 unsigned long address
= (unsigned long)
355 pfn_to_kaddr(page_to_pfn(map
->pages
[i
]));
356 BUG_ON(PageHighMem(map
->pages
[i
]));
358 gnttab_set_map_op(&map
->kmap_ops
[i
], address
, flags
,
360 map
->grants
[i
].domid
);
361 gnttab_set_unmap_op(&map
->kunmap_ops
[i
], address
,
362 flags
, INVALID_GRANT_HANDLE
);
366 pr_debug("map %d+%d\n", map
->index
, map
->count
);
367 err
= gnttab_map_refs(map
->map_ops
, map
->kmap_ops
, map
->pages
,
370 for (i
= 0; i
< map
->count
; i
++) {
371 if (map
->map_ops
[i
].status
== GNTST_okay
) {
372 map
->unmap_ops
[i
].handle
= map
->map_ops
[i
].handle
;
377 if (map
->flags
& GNTMAP_device_map
)
378 map
->unmap_ops
[i
].dev_bus_addr
= map
->map_ops
[i
].dev_bus_addr
;
381 if (map
->kmap_ops
[i
].status
== GNTST_okay
) {
383 map
->kunmap_ops
[i
].handle
= map
->kmap_ops
[i
].handle
;
388 atomic_add(alloced
, &map
->live_grants
);
392 static void __unmap_grant_pages_done(int result
,
393 struct gntab_unmap_queue_data
*data
)
396 struct gntdev_grant_map
*map
= data
->data
;
397 unsigned int offset
= data
->unmap_ops
- map
->unmap_ops
;
398 int successful_unmaps
= 0;
401 for (i
= 0; i
< data
->count
; i
++) {
402 if (map
->unmap_ops
[offset
+ i
].status
== GNTST_okay
&&
403 map
->unmap_ops
[offset
+ i
].handle
!= INVALID_GRANT_HANDLE
)
406 WARN_ON(map
->unmap_ops
[offset
+ i
].status
!= GNTST_okay
&&
407 map
->unmap_ops
[offset
+ i
].handle
!= INVALID_GRANT_HANDLE
);
408 pr_debug("unmap handle=%d st=%d\n",
409 map
->unmap_ops
[offset
+i
].handle
,
410 map
->unmap_ops
[offset
+i
].status
);
411 map
->unmap_ops
[offset
+i
].handle
= INVALID_GRANT_HANDLE
;
413 if (map
->kunmap_ops
[offset
+ i
].status
== GNTST_okay
&&
414 map
->kunmap_ops
[offset
+ i
].handle
!= INVALID_GRANT_HANDLE
)
417 WARN_ON(map
->kunmap_ops
[offset
+ i
].status
!= GNTST_okay
&&
418 map
->kunmap_ops
[offset
+ i
].handle
!= INVALID_GRANT_HANDLE
);
419 pr_debug("kunmap handle=%u st=%d\n",
420 map
->kunmap_ops
[offset
+i
].handle
,
421 map
->kunmap_ops
[offset
+i
].status
);
422 map
->kunmap_ops
[offset
+i
].handle
= INVALID_GRANT_HANDLE
;
427 * Decrease the live-grant counter. This must happen after the loop to
428 * prevent premature reuse of the grants by gnttab_mmap().
430 live_grants
= atomic_sub_return(successful_unmaps
, &map
->live_grants
);
431 if (WARN_ON(live_grants
< 0))
432 pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n",
433 __func__
, live_grants
, successful_unmaps
);
435 /* Release reference taken by __unmap_grant_pages */
436 gntdev_put_map(NULL
, map
);
439 static void __unmap_grant_pages(struct gntdev_grant_map
*map
, int offset
,
442 if (map
->notify
.flags
& UNMAP_NOTIFY_CLEAR_BYTE
) {
443 int pgno
= (map
->notify
.addr
>> PAGE_SHIFT
);
445 if (pgno
>= offset
&& pgno
< offset
+ pages
) {
446 /* No need for kmap, pages are in lowmem */
447 uint8_t *tmp
= pfn_to_kaddr(page_to_pfn(map
->pages
[pgno
]));
449 tmp
[map
->notify
.addr
& (PAGE_SIZE
-1)] = 0;
450 map
->notify
.flags
&= ~UNMAP_NOTIFY_CLEAR_BYTE
;
454 map
->unmap_data
.unmap_ops
= map
->unmap_ops
+ offset
;
455 map
->unmap_data
.kunmap_ops
= use_ptemod
? map
->kunmap_ops
+ offset
: NULL
;
456 map
->unmap_data
.pages
= map
->pages
+ offset
;
457 map
->unmap_data
.count
= pages
;
458 map
->unmap_data
.done
= __unmap_grant_pages_done
;
459 map
->unmap_data
.data
= map
;
460 refcount_inc(&map
->users
); /* to keep map alive during async call below */
462 gnttab_unmap_refs_async(&map
->unmap_data
);
465 static void unmap_grant_pages(struct gntdev_grant_map
*map
, int offset
,
470 if (atomic_read(&map
->live_grants
) == 0)
471 return; /* Nothing to do */
473 pr_debug("unmap %d+%d [%d+%d]\n", map
->index
, map
->count
, offset
, pages
);
475 /* It is possible the requested range will have a "hole" where we
476 * already unmapped some of the grants. Only unmap valid ranges.
479 while (pages
&& map
->being_removed
[offset
]) {
484 while (range
< pages
) {
485 if (map
->being_removed
[offset
+ range
])
487 map
->being_removed
[offset
+ range
] = true;
491 __unmap_grant_pages(map
, offset
, range
);
497 /* ------------------------------------------------------------------ */
499 static void gntdev_vma_open(struct vm_area_struct
*vma
)
501 struct gntdev_grant_map
*map
= vma
->vm_private_data
;
503 pr_debug("gntdev_vma_open %p\n", vma
);
504 refcount_inc(&map
->users
);
507 static void gntdev_vma_close(struct vm_area_struct
*vma
)
509 struct gntdev_grant_map
*map
= vma
->vm_private_data
;
510 struct file
*file
= vma
->vm_file
;
511 struct gntdev_priv
*priv
= file
->private_data
;
513 pr_debug("gntdev_vma_close %p\n", vma
);
515 vma
->vm_private_data
= NULL
;
516 gntdev_put_map(priv
, map
);
519 static struct page
*gntdev_vma_find_special_page(struct vm_area_struct
*vma
,
522 struct gntdev_grant_map
*map
= vma
->vm_private_data
;
524 return map
->pages
[(addr
- map
->pages_vm_start
) >> PAGE_SHIFT
];
527 static const struct vm_operations_struct gntdev_vmops
= {
528 .open
= gntdev_vma_open
,
529 .close
= gntdev_vma_close
,
530 .find_special_page
= gntdev_vma_find_special_page
,
533 /* ------------------------------------------------------------------ */
535 static bool gntdev_invalidate(struct mmu_interval_notifier
*mn
,
536 const struct mmu_notifier_range
*range
,
537 unsigned long cur_seq
)
539 struct gntdev_grant_map
*map
=
540 container_of(mn
, struct gntdev_grant_map
, notifier
);
541 unsigned long mstart
, mend
;
542 unsigned long map_start
, map_end
;
544 if (!mmu_notifier_range_blockable(range
))
547 map_start
= map
->pages_vm_start
;
548 map_end
= map
->pages_vm_start
+ (map
->count
<< PAGE_SHIFT
);
551 * If the VMA is split or otherwise changed the notifier is not
552 * updated, but we don't want to process VA's outside the modified
553 * VMA. FIXME: It would be much more understandable to just prevent
554 * modifying the VMA in the first place.
556 if (map_start
>= range
->end
|| map_end
<= range
->start
)
559 mstart
= max(range
->start
, map_start
);
560 mend
= min(range
->end
, map_end
);
561 pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
562 map
->index
, map
->count
, map_start
, map_end
,
563 range
->start
, range
->end
, mstart
, mend
);
564 unmap_grant_pages(map
, (mstart
- map_start
) >> PAGE_SHIFT
,
565 (mend
- mstart
) >> PAGE_SHIFT
);
570 static const struct mmu_interval_notifier_ops gntdev_mmu_ops
= {
571 .invalidate
= gntdev_invalidate
,
574 /* ------------------------------------------------------------------ */
576 static int gntdev_open(struct inode
*inode
, struct file
*flip
)
578 struct gntdev_priv
*priv
;
580 priv
= kzalloc(sizeof(*priv
), GFP_KERNEL
);
584 INIT_LIST_HEAD(&priv
->maps
);
585 mutex_init(&priv
->lock
);
587 #ifdef CONFIG_XEN_GNTDEV_DMABUF
588 priv
->dmabuf_priv
= gntdev_dmabuf_init(flip
);
589 if (IS_ERR(priv
->dmabuf_priv
)) {
590 int ret
= PTR_ERR(priv
->dmabuf_priv
);
597 flip
->private_data
= priv
;
598 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
599 priv
->dma_dev
= gntdev_miscdev
.this_device
;
600 dma_coerce_mask_and_coherent(priv
->dma_dev
, DMA_BIT_MASK(64));
602 pr_debug("priv %p\n", priv
);
607 static int gntdev_release(struct inode
*inode
, struct file
*flip
)
609 struct gntdev_priv
*priv
= flip
->private_data
;
610 struct gntdev_grant_map
*map
;
612 pr_debug("priv %p\n", priv
);
614 mutex_lock(&priv
->lock
);
615 while (!list_empty(&priv
->maps
)) {
616 map
= list_entry(priv
->maps
.next
,
617 struct gntdev_grant_map
, next
);
618 list_del(&map
->next
);
619 gntdev_put_map(NULL
/* already removed */, map
);
621 mutex_unlock(&priv
->lock
);
623 #ifdef CONFIG_XEN_GNTDEV_DMABUF
624 gntdev_dmabuf_fini(priv
->dmabuf_priv
);
631 static long gntdev_ioctl_map_grant_ref(struct gntdev_priv
*priv
,
632 struct ioctl_gntdev_map_grant_ref __user
*u
)
634 struct ioctl_gntdev_map_grant_ref op
;
635 struct gntdev_grant_map
*map
;
638 if (copy_from_user(&op
, u
, sizeof(op
)) != 0)
640 pr_debug("priv %p, add %d\n", priv
, op
.count
);
641 if (unlikely(gntdev_test_page_count(op
.count
)))
645 map
= gntdev_alloc_map(priv
, op
.count
, 0 /* This is not a dma-buf. */);
649 if (copy_from_user(map
->grants
, &u
->refs
,
650 sizeof(map
->grants
[0]) * op
.count
) != 0) {
651 gntdev_put_map(NULL
, map
);
655 mutex_lock(&priv
->lock
);
656 gntdev_add_map(priv
, map
);
657 op
.index
= map
->index
<< PAGE_SHIFT
;
658 mutex_unlock(&priv
->lock
);
660 if (copy_to_user(u
, &op
, sizeof(op
)) != 0)
666 static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv
*priv
,
667 struct ioctl_gntdev_unmap_grant_ref __user
*u
)
669 struct ioctl_gntdev_unmap_grant_ref op
;
670 struct gntdev_grant_map
*map
;
673 if (copy_from_user(&op
, u
, sizeof(op
)) != 0)
675 pr_debug("priv %p, del %d+%d\n", priv
, (int)op
.index
, (int)op
.count
);
677 mutex_lock(&priv
->lock
);
678 map
= gntdev_find_map_index(priv
, op
.index
>> PAGE_SHIFT
, op
.count
);
680 list_del(&map
->next
);
683 mutex_unlock(&priv
->lock
);
685 gntdev_put_map(priv
, map
);
689 static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv
*priv
,
690 struct ioctl_gntdev_get_offset_for_vaddr __user
*u
)
692 struct ioctl_gntdev_get_offset_for_vaddr op
;
693 struct vm_area_struct
*vma
;
694 struct gntdev_grant_map
*map
;
697 if (copy_from_user(&op
, u
, sizeof(op
)) != 0)
699 pr_debug("priv %p, offset for vaddr %lx\n", priv
, (unsigned long)op
.vaddr
);
701 mmap_read_lock(current
->mm
);
702 vma
= find_vma(current
->mm
, op
.vaddr
);
703 if (!vma
|| vma
->vm_ops
!= &gntdev_vmops
)
706 map
= vma
->vm_private_data
;
710 op
.offset
= map
->index
<< PAGE_SHIFT
;
711 op
.count
= map
->count
;
715 mmap_read_unlock(current
->mm
);
717 if (rv
== 0 && copy_to_user(u
, &op
, sizeof(op
)) != 0)
722 static long gntdev_ioctl_notify(struct gntdev_priv
*priv
, void __user
*u
)
724 struct ioctl_gntdev_unmap_notify op
;
725 struct gntdev_grant_map
*map
;
728 evtchn_port_t out_event
;
730 if (copy_from_user(&op
, u
, sizeof(op
)))
733 if (op
.action
& ~(UNMAP_NOTIFY_CLEAR_BYTE
|UNMAP_NOTIFY_SEND_EVENT
))
736 /* We need to grab a reference to the event channel we are going to use
737 * to send the notify before releasing the reference we may already have
738 * (if someone has called this ioctl twice). This is required so that
739 * it is possible to change the clear_byte part of the notification
740 * without disturbing the event channel part, which may now be the last
741 * reference to that event channel.
743 if (op
.action
& UNMAP_NOTIFY_SEND_EVENT
) {
744 if (evtchn_get(op
.event_channel_port
))
748 out_flags
= op
.action
;
749 out_event
= op
.event_channel_port
;
751 mutex_lock(&priv
->lock
);
753 list_for_each_entry(map
, &priv
->maps
, next
) {
754 uint64_t begin
= map
->index
<< PAGE_SHIFT
;
755 uint64_t end
= (map
->index
+ map
->count
) << PAGE_SHIFT
;
756 if (op
.index
>= begin
&& op
.index
< end
)
763 if ((op
.action
& UNMAP_NOTIFY_CLEAR_BYTE
) &&
764 (map
->flags
& GNTMAP_readonly
)) {
769 out_flags
= map
->notify
.flags
;
770 out_event
= map
->notify
.event
;
772 map
->notify
.flags
= op
.action
;
773 map
->notify
.addr
= op
.index
- (map
->index
<< PAGE_SHIFT
);
774 map
->notify
.event
= op
.event_channel_port
;
779 mutex_unlock(&priv
->lock
);
781 /* Drop the reference to the event channel we did not save in the map */
782 if (out_flags
& UNMAP_NOTIFY_SEND_EVENT
)
783 evtchn_put(out_event
);
788 #define GNTDEV_COPY_BATCH 16
790 struct gntdev_copy_batch
{
791 struct gnttab_copy ops
[GNTDEV_COPY_BATCH
];
792 struct page
*pages
[GNTDEV_COPY_BATCH
];
793 s16 __user
*status
[GNTDEV_COPY_BATCH
];
795 unsigned int nr_pages
;
799 static int gntdev_get_page(struct gntdev_copy_batch
*batch
, void __user
*virt
,
802 unsigned long addr
= (unsigned long)virt
;
804 unsigned long xen_pfn
;
807 ret
= pin_user_pages_fast(addr
, 1, batch
->writeable
? FOLL_WRITE
: 0, &page
);
811 batch
->pages
[batch
->nr_pages
++] = page
;
813 xen_pfn
= page_to_xen_pfn(page
) + XEN_PFN_DOWN(addr
& ~PAGE_MASK
);
814 *gfn
= pfn_to_gfn(xen_pfn
);
819 static void gntdev_put_pages(struct gntdev_copy_batch
*batch
)
821 unpin_user_pages_dirty_lock(batch
->pages
, batch
->nr_pages
, batch
->writeable
);
823 batch
->writeable
= false;
826 static int gntdev_copy(struct gntdev_copy_batch
*batch
)
830 gnttab_batch_copy(batch
->ops
, batch
->nr_ops
);
831 gntdev_put_pages(batch
);
834 * For each completed op, update the status if the op failed
835 * and all previous ops for the segment were successful.
837 for (i
= 0; i
< batch
->nr_ops
; i
++) {
838 s16 status
= batch
->ops
[i
].status
;
841 if (status
== GNTST_okay
)
844 if (__get_user(old_status
, batch
->status
[i
]))
847 if (old_status
!= GNTST_okay
)
850 if (__put_user(status
, batch
->status
[i
]))
858 static int gntdev_grant_copy_seg(struct gntdev_copy_batch
*batch
,
859 struct gntdev_grant_copy_segment
*seg
,
865 * Disallow local -> local copies since there is only space in
866 * batch->pages for one page per-op and this would be a very
867 * expensive memcpy().
869 if (!(seg
->flags
& (GNTCOPY_source_gref
| GNTCOPY_dest_gref
)))
872 /* Can't cross page if source/dest is a grant ref. */
873 if (seg
->flags
& GNTCOPY_source_gref
) {
874 if (seg
->source
.foreign
.offset
+ seg
->len
> XEN_PAGE_SIZE
)
877 if (seg
->flags
& GNTCOPY_dest_gref
) {
878 if (seg
->dest
.foreign
.offset
+ seg
->len
> XEN_PAGE_SIZE
)
882 if (put_user(GNTST_okay
, status
))
885 while (copied
< seg
->len
) {
886 struct gnttab_copy
*op
;
892 if (batch
->nr_ops
>= GNTDEV_COPY_BATCH
) {
893 ret
= gntdev_copy(batch
);
898 len
= seg
->len
- copied
;
900 op
= &batch
->ops
[batch
->nr_ops
];
903 if (seg
->flags
& GNTCOPY_source_gref
) {
904 op
->source
.u
.ref
= seg
->source
.foreign
.ref
;
905 op
->source
.domid
= seg
->source
.foreign
.domid
;
906 op
->source
.offset
= seg
->source
.foreign
.offset
+ copied
;
907 op
->flags
|= GNTCOPY_source_gref
;
909 virt
= seg
->source
.virt
+ copied
;
910 off
= (unsigned long)virt
& ~XEN_PAGE_MASK
;
911 len
= min(len
, (size_t)XEN_PAGE_SIZE
- off
);
912 batch
->writeable
= false;
914 ret
= gntdev_get_page(batch
, virt
, &gfn
);
918 op
->source
.u
.gmfn
= gfn
;
919 op
->source
.domid
= DOMID_SELF
;
920 op
->source
.offset
= off
;
923 if (seg
->flags
& GNTCOPY_dest_gref
) {
924 op
->dest
.u
.ref
= seg
->dest
.foreign
.ref
;
925 op
->dest
.domid
= seg
->dest
.foreign
.domid
;
926 op
->dest
.offset
= seg
->dest
.foreign
.offset
+ copied
;
927 op
->flags
|= GNTCOPY_dest_gref
;
929 virt
= seg
->dest
.virt
+ copied
;
930 off
= (unsigned long)virt
& ~XEN_PAGE_MASK
;
931 len
= min(len
, (size_t)XEN_PAGE_SIZE
- off
);
932 batch
->writeable
= true;
934 ret
= gntdev_get_page(batch
, virt
, &gfn
);
938 op
->dest
.u
.gmfn
= gfn
;
939 op
->dest
.domid
= DOMID_SELF
;
940 op
->dest
.offset
= off
;
946 batch
->status
[batch
->nr_ops
] = status
;
953 static long gntdev_ioctl_grant_copy(struct gntdev_priv
*priv
, void __user
*u
)
955 struct ioctl_gntdev_grant_copy copy
;
956 struct gntdev_copy_batch batch
;
960 if (copy_from_user(©
, u
, sizeof(copy
)))
966 for (i
= 0; i
< copy
.count
; i
++) {
967 struct gntdev_grant_copy_segment seg
;
969 if (copy_from_user(&seg
, ©
.segments
[i
], sizeof(seg
))) {
974 ret
= gntdev_grant_copy_seg(&batch
, &seg
, ©
.segments
[i
].status
);
981 ret
= gntdev_copy(&batch
);
985 gntdev_put_pages(&batch
);
989 static long gntdev_ioctl(struct file
*flip
,
990 unsigned int cmd
, unsigned long arg
)
992 struct gntdev_priv
*priv
= flip
->private_data
;
993 void __user
*ptr
= (void __user
*)arg
;
996 case IOCTL_GNTDEV_MAP_GRANT_REF
:
997 return gntdev_ioctl_map_grant_ref(priv
, ptr
);
999 case IOCTL_GNTDEV_UNMAP_GRANT_REF
:
1000 return gntdev_ioctl_unmap_grant_ref(priv
, ptr
);
1002 case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR
:
1003 return gntdev_ioctl_get_offset_for_vaddr(priv
, ptr
);
1005 case IOCTL_GNTDEV_SET_UNMAP_NOTIFY
:
1006 return gntdev_ioctl_notify(priv
, ptr
);
1008 case IOCTL_GNTDEV_GRANT_COPY
:
1009 return gntdev_ioctl_grant_copy(priv
, ptr
);
1011 #ifdef CONFIG_XEN_GNTDEV_DMABUF
1012 case IOCTL_GNTDEV_DMABUF_EXP_FROM_REFS
:
1013 return gntdev_ioctl_dmabuf_exp_from_refs(priv
, use_ptemod
, ptr
);
1015 case IOCTL_GNTDEV_DMABUF_EXP_WAIT_RELEASED
:
1016 return gntdev_ioctl_dmabuf_exp_wait_released(priv
, ptr
);
1018 case IOCTL_GNTDEV_DMABUF_IMP_TO_REFS
:
1019 return gntdev_ioctl_dmabuf_imp_to_refs(priv
, ptr
);
1021 case IOCTL_GNTDEV_DMABUF_IMP_RELEASE
:
1022 return gntdev_ioctl_dmabuf_imp_release(priv
, ptr
);
1026 pr_debug("priv %p, unknown cmd %x\n", priv
, cmd
);
1027 return -ENOIOCTLCMD
;
1033 static int gntdev_mmap(struct file
*flip
, struct vm_area_struct
*vma
)
1035 struct gntdev_priv
*priv
= flip
->private_data
;
1036 int index
= vma
->vm_pgoff
;
1037 int count
= vma_pages(vma
);
1038 struct gntdev_grant_map
*map
;
1041 if ((vma
->vm_flags
& VM_WRITE
) && !(vma
->vm_flags
& VM_SHARED
))
1044 pr_debug("map %d+%d at %lx (pgoff %lx)\n",
1045 index
, count
, vma
->vm_start
, vma
->vm_pgoff
);
1047 mutex_lock(&priv
->lock
);
1048 map
= gntdev_find_map_index(priv
, index
, count
);
1051 if (!atomic_add_unless(&map
->in_use
, 1, 1))
1054 refcount_inc(&map
->users
);
1056 vma
->vm_ops
= &gntdev_vmops
;
1058 vm_flags_set(vma
, VM_DONTEXPAND
| VM_DONTDUMP
| VM_MIXEDMAP
);
1061 vm_flags_set(vma
, VM_DONTCOPY
);
1063 vma
->vm_private_data
= map
;
1065 if ((vma
->vm_flags
& VM_WRITE
) &&
1066 (map
->flags
& GNTMAP_readonly
))
1067 goto out_unlock_put
;
1069 map
->flags
= GNTMAP_host_map
;
1070 if (!(vma
->vm_flags
& VM_WRITE
))
1071 map
->flags
|= GNTMAP_readonly
;
1074 map
->pages_vm_start
= vma
->vm_start
;
1077 err
= mmu_interval_notifier_insert_locked(
1078 &map
->notifier
, vma
->vm_mm
, vma
->vm_start
,
1079 vma
->vm_end
- vma
->vm_start
, &gntdev_mmu_ops
);
1081 goto out_unlock_put
;
1083 map
->notifier_init
= true;
1085 mutex_unlock(&priv
->lock
);
1089 * gntdev takes the address of the PTE in find_grant_ptes() and
1090 * passes it to the hypervisor in gntdev_map_grant_pages(). The
1091 * purpose of the notifier is to prevent the hypervisor pointer
1092 * to the PTE from going stale.
1094 * Since this vma's mappings can't be touched without the
1095 * mmap_lock, and we are holding it now, there is no need for
1096 * the notifier_range locking pattern.
1098 mmu_interval_read_begin(&map
->notifier
);
1100 err
= apply_to_page_range(vma
->vm_mm
, vma
->vm_start
,
1101 vma
->vm_end
- vma
->vm_start
,
1102 find_grant_ptes
, map
);
1104 pr_warn("find_grant_ptes() failure.\n");
1109 err
= gntdev_map_grant_pages(map
);
1114 err
= vm_map_pages_zero(vma
, map
->pages
, map
->count
);
1122 mutex_unlock(&priv
->lock
);
1126 mutex_unlock(&priv
->lock
);
1129 unmap_grant_pages(map
, 0, map
->count
);
1130 gntdev_put_map(priv
, map
);
1134 static const struct file_operations gntdev_fops
= {
1135 .owner
= THIS_MODULE
,
1136 .open
= gntdev_open
,
1137 .release
= gntdev_release
,
1138 .mmap
= gntdev_mmap
,
1139 .unlocked_ioctl
= gntdev_ioctl
1142 static struct miscdevice gntdev_miscdev
= {
1143 .minor
= MISC_DYNAMIC_MINOR
,
1144 .name
= "xen/gntdev",
1145 .fops
= &gntdev_fops
,
1148 /* ------------------------------------------------------------------ */
1150 static int __init
gntdev_init(void)
1157 use_ptemod
= !xen_feature(XENFEAT_auto_translated_physmap
);
1159 err
= misc_register(&gntdev_miscdev
);
1161 pr_err("Could not register gntdev device\n");
1167 static void __exit
gntdev_exit(void)
1169 misc_deregister(&gntdev_miscdev
);
1172 module_init(gntdev_init
);
1173 module_exit(gntdev_exit
);
1175 /* ------------------------------------------------------------------ */