2 * VFIO: IOMMU DMA mapping support for TCE on POWER
4 * Copyright (C) 2013 IBM Corp. All rights reserved.
5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * Derived from original vfio_iommu_type1.c:
12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
13 * Author: Alex Williamson <alex.williamson@redhat.com>
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/slab.h>
19 #include <linux/uaccess.h>
20 #include <linux/err.h>
21 #include <linux/vfio.h>
22 #include <linux/vmalloc.h>
23 #include <asm/iommu.h>
25 #include <asm/mmu_context.h>
27 #define DRIVER_VERSION "0.1"
28 #define DRIVER_AUTHOR "aik@ozlabs.ru"
29 #define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
31 static void tce_iommu_detach_group(void *iommu_data
,
32 struct iommu_group
*iommu_group
);
34 static long try_increment_locked_vm(struct mm_struct
*mm
, long npages
)
36 long ret
= 0, locked
, lock_limit
;
38 if (WARN_ON_ONCE(!mm
))
44 down_write(&mm
->mmap_sem
);
45 locked
= mm
->locked_vm
+ npages
;
46 lock_limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
47 if (locked
> lock_limit
&& !capable(CAP_IPC_LOCK
))
50 mm
->locked_vm
+= npages
;
52 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current
->pid
,
54 mm
->locked_vm
<< PAGE_SHIFT
,
55 rlimit(RLIMIT_MEMLOCK
),
56 ret
? " - exceeded" : "");
58 up_write(&mm
->mmap_sem
);
63 static void decrement_locked_vm(struct mm_struct
*mm
, long npages
)
68 down_write(&mm
->mmap_sem
);
69 if (WARN_ON_ONCE(npages
> mm
->locked_vm
))
70 npages
= mm
->locked_vm
;
71 mm
->locked_vm
-= npages
;
72 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current
->pid
,
74 mm
->locked_vm
<< PAGE_SHIFT
,
75 rlimit(RLIMIT_MEMLOCK
));
76 up_write(&mm
->mmap_sem
);
80 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
82 * This code handles mapping and unmapping of user data buffers
83 * into DMA'ble space using the IOMMU
86 struct tce_iommu_group
{
87 struct list_head next
;
88 struct iommu_group
*grp
;
92 * A container needs to remember which preregistered region it has
93 * referenced to do proper cleanup at the userspace process exit.
95 struct tce_iommu_prereg
{
96 struct list_head next
;
97 struct mm_iommu_table_group_mem_t
*mem
;
101 * The container descriptor supports only a single group per container.
102 * Required by the API as the container is not supplied with the IOMMU group
103 * at the moment of initialization.
105 struct tce_container
{
109 bool def_window_pending
;
110 unsigned long locked_pages
;
111 struct mm_struct
*mm
;
112 struct iommu_table
*tables
[IOMMU_TABLE_GROUP_MAX_TABLES
];
113 struct list_head group_list
;
114 struct list_head prereg_list
;
117 static long tce_iommu_mm_set(struct tce_container
*container
)
120 if (container
->mm
== current
->mm
)
124 BUG_ON(!current
->mm
);
125 container
->mm
= current
->mm
;
126 atomic_inc(&container
->mm
->mm_count
);
131 static long tce_iommu_prereg_free(struct tce_container
*container
,
132 struct tce_iommu_prereg
*tcemem
)
136 ret
= mm_iommu_put(container
->mm
, tcemem
->mem
);
140 list_del(&tcemem
->next
);
146 static long tce_iommu_unregister_pages(struct tce_container
*container
,
147 __u64 vaddr
, __u64 size
)
149 struct mm_iommu_table_group_mem_t
*mem
;
150 struct tce_iommu_prereg
*tcemem
;
153 if ((vaddr
& ~PAGE_MASK
) || (size
& ~PAGE_MASK
))
156 mem
= mm_iommu_find(container
->mm
, vaddr
, size
>> PAGE_SHIFT
);
160 list_for_each_entry(tcemem
, &container
->prereg_list
, next
) {
161 if (tcemem
->mem
== mem
) {
170 return tce_iommu_prereg_free(container
, tcemem
);
173 static long tce_iommu_register_pages(struct tce_container
*container
,
174 __u64 vaddr
, __u64 size
)
177 struct mm_iommu_table_group_mem_t
*mem
= NULL
;
178 struct tce_iommu_prereg
*tcemem
;
179 unsigned long entries
= size
>> PAGE_SHIFT
;
181 if ((vaddr
& ~PAGE_MASK
) || (size
& ~PAGE_MASK
) ||
182 ((vaddr
+ size
) < vaddr
))
185 mem
= mm_iommu_find(container
->mm
, vaddr
, entries
);
187 list_for_each_entry(tcemem
, &container
->prereg_list
, next
) {
188 if (tcemem
->mem
== mem
)
193 ret
= mm_iommu_get(container
->mm
, vaddr
, entries
, &mem
);
197 tcemem
= kzalloc(sizeof(*tcemem
), GFP_KERNEL
);
199 mm_iommu_put(container
->mm
, mem
);
204 list_add(&tcemem
->next
, &container
->prereg_list
);
206 container
->enabled
= true;
211 static long tce_iommu_userspace_view_alloc(struct iommu_table
*tbl
,
212 struct mm_struct
*mm
)
214 unsigned long cb
= _ALIGN_UP(sizeof(tbl
->it_userspace
[0]) *
215 tbl
->it_size
, PAGE_SIZE
);
219 BUG_ON(tbl
->it_userspace
);
221 ret
= try_increment_locked_vm(mm
, cb
>> PAGE_SHIFT
);
227 decrement_locked_vm(mm
, cb
>> PAGE_SHIFT
);
230 tbl
->it_userspace
= uas
;
235 static void tce_iommu_userspace_view_free(struct iommu_table
*tbl
,
236 struct mm_struct
*mm
)
238 unsigned long cb
= _ALIGN_UP(sizeof(tbl
->it_userspace
[0]) *
239 tbl
->it_size
, PAGE_SIZE
);
241 if (!tbl
->it_userspace
)
244 vfree(tbl
->it_userspace
);
245 tbl
->it_userspace
= NULL
;
246 decrement_locked_vm(mm
, cb
>> PAGE_SHIFT
);
249 static bool tce_page_is_contained(struct page
*page
, unsigned page_shift
)
252 * Check that the TCE table granularity is not bigger than the size of
253 * a page we just found. Otherwise the hardware can get access to
254 * a bigger memory chunk that it should.
256 return (PAGE_SHIFT
+ compound_order(compound_head(page
))) >= page_shift
;
259 static inline bool tce_groups_attached(struct tce_container
*container
)
261 return !list_empty(&container
->group_list
);
264 static long tce_iommu_find_table(struct tce_container
*container
,
265 phys_addr_t ioba
, struct iommu_table
**ptbl
)
269 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
270 struct iommu_table
*tbl
= container
->tables
[i
];
273 unsigned long entry
= ioba
>> tbl
->it_page_shift
;
274 unsigned long start
= tbl
->it_offset
;
275 unsigned long end
= start
+ tbl
->it_size
;
277 if ((start
<= entry
) && (entry
< end
)) {
287 static int tce_iommu_find_free_table(struct tce_container
*container
)
291 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
292 if (!container
->tables
[i
])
299 static int tce_iommu_enable(struct tce_container
*container
)
302 unsigned long locked
;
303 struct iommu_table_group
*table_group
;
304 struct tce_iommu_group
*tcegrp
;
306 if (container
->enabled
)
310 * When userspace pages are mapped into the IOMMU, they are effectively
311 * locked memory, so, theoretically, we need to update the accounting
312 * of locked pages on each map and unmap. For powerpc, the map unmap
313 * paths can be very hot, though, and the accounting would kill
314 * performance, especially since it would be difficult to impossible
315 * to handle the accounting in real mode only.
317 * To address that, rather than precisely accounting every page, we
318 * instead account for a worst case on locked memory when the iommu is
319 * enabled and disabled. The worst case upper bound on locked memory
320 * is the size of the whole iommu window, which is usually relatively
321 * small (compared to total memory sizes) on POWER hardware.
323 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
324 * that would effectively kill the guest at random points, much better
325 * enforcing the limit based on the max that the guest can map.
327 * Unfortunately at the moment it counts whole tables, no matter how
328 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
329 * each with 2GB DMA window, 8GB will be counted here. The reason for
330 * this is that we cannot tell here the amount of RAM used by the guest
331 * as this information is only available from KVM and VFIO is
334 * So we do not allow enabling a container without a group attached
335 * as there is no way to know how much we should increment
336 * the locked_vm counter.
338 if (!tce_groups_attached(container
))
341 tcegrp
= list_first_entry(&container
->group_list
,
342 struct tce_iommu_group
, next
);
343 table_group
= iommu_group_get_iommudata(tcegrp
->grp
);
347 if (!table_group
->tce32_size
)
350 ret
= tce_iommu_mm_set(container
);
354 locked
= table_group
->tce32_size
>> PAGE_SHIFT
;
355 ret
= try_increment_locked_vm(container
->mm
, locked
);
359 container
->locked_pages
= locked
;
361 container
->enabled
= true;
366 static void tce_iommu_disable(struct tce_container
*container
)
368 if (!container
->enabled
)
371 container
->enabled
= false;
373 BUG_ON(!container
->mm
);
374 decrement_locked_vm(container
->mm
, container
->locked_pages
);
377 static void *tce_iommu_open(unsigned long arg
)
379 struct tce_container
*container
;
381 if ((arg
!= VFIO_SPAPR_TCE_IOMMU
) && (arg
!= VFIO_SPAPR_TCE_v2_IOMMU
)) {
382 pr_err("tce_vfio: Wrong IOMMU type\n");
383 return ERR_PTR(-EINVAL
);
386 container
= kzalloc(sizeof(*container
), GFP_KERNEL
);
388 return ERR_PTR(-ENOMEM
);
390 mutex_init(&container
->lock
);
391 INIT_LIST_HEAD_RCU(&container
->group_list
);
392 INIT_LIST_HEAD_RCU(&container
->prereg_list
);
394 container
->v2
= arg
== VFIO_SPAPR_TCE_v2_IOMMU
;
399 static int tce_iommu_clear(struct tce_container
*container
,
400 struct iommu_table
*tbl
,
401 unsigned long entry
, unsigned long pages
);
402 static void tce_iommu_free_table(struct tce_container
*container
,
403 struct iommu_table
*tbl
);
405 static void tce_iommu_release(void *iommu_data
)
407 struct tce_container
*container
= iommu_data
;
408 struct tce_iommu_group
*tcegrp
;
411 while (tce_groups_attached(container
)) {
412 tcegrp
= list_first_entry(&container
->group_list
,
413 struct tce_iommu_group
, next
);
414 tce_iommu_detach_group(iommu_data
, tcegrp
->grp
);
418 * If VFIO created a table, it was not disposed
419 * by tce_iommu_detach_group() so do it now.
421 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
422 struct iommu_table
*tbl
= container
->tables
[i
];
427 tce_iommu_clear(container
, tbl
, tbl
->it_offset
, tbl
->it_size
);
428 tce_iommu_free_table(container
, tbl
);
431 while (!list_empty(&container
->prereg_list
)) {
432 struct tce_iommu_prereg
*tcemem
;
434 tcemem
= list_first_entry(&container
->prereg_list
,
435 struct tce_iommu_prereg
, next
);
436 WARN_ON_ONCE(tce_iommu_prereg_free(container
, tcemem
));
439 tce_iommu_disable(container
);
441 mmdrop(container
->mm
);
442 mutex_destroy(&container
->lock
);
447 static void tce_iommu_unuse_page(struct tce_container
*container
,
452 page
= pfn_to_page(hpa
>> PAGE_SHIFT
);
456 static int tce_iommu_prereg_ua_to_hpa(struct tce_container
*container
,
457 unsigned long tce
, unsigned long size
,
458 unsigned long *phpa
, struct mm_iommu_table_group_mem_t
**pmem
)
461 struct mm_iommu_table_group_mem_t
*mem
;
463 mem
= mm_iommu_lookup(container
->mm
, tce
, size
);
467 ret
= mm_iommu_ua_to_hpa(mem
, tce
, phpa
);
476 static void tce_iommu_unuse_page_v2(struct tce_container
*container
,
477 struct iommu_table
*tbl
, unsigned long entry
)
479 struct mm_iommu_table_group_mem_t
*mem
= NULL
;
481 unsigned long hpa
= 0;
482 unsigned long *pua
= IOMMU_TABLE_USERSPACE_ENTRY(tbl
, entry
);
487 ret
= tce_iommu_prereg_ua_to_hpa(container
, *pua
, IOMMU_PAGE_SIZE(tbl
),
490 pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
491 __func__
, *pua
, entry
, ret
);
493 mm_iommu_mapped_dec(mem
);
498 static int tce_iommu_clear(struct tce_container
*container
,
499 struct iommu_table
*tbl
,
500 unsigned long entry
, unsigned long pages
)
502 unsigned long oldhpa
;
504 enum dma_data_direction direction
;
506 for ( ; pages
; --pages
, ++entry
) {
507 direction
= DMA_NONE
;
509 ret
= iommu_tce_xchg(tbl
, entry
, &oldhpa
, &direction
);
513 if (direction
== DMA_NONE
)
517 tce_iommu_unuse_page_v2(container
, tbl
, entry
);
521 tce_iommu_unuse_page(container
, oldhpa
);
527 static int tce_iommu_use_page(unsigned long tce
, unsigned long *hpa
)
529 struct page
*page
= NULL
;
530 enum dma_data_direction direction
= iommu_tce_direction(tce
);
532 if (get_user_pages_fast(tce
& PAGE_MASK
, 1,
533 direction
!= DMA_TO_DEVICE
, &page
) != 1)
536 *hpa
= __pa((unsigned long) page_address(page
));
541 static long tce_iommu_build(struct tce_container
*container
,
542 struct iommu_table
*tbl
,
543 unsigned long entry
, unsigned long tce
, unsigned long pages
,
544 enum dma_data_direction direction
)
549 enum dma_data_direction dirtmp
;
551 for (i
= 0; i
< pages
; ++i
) {
552 unsigned long offset
= tce
& IOMMU_PAGE_MASK(tbl
) & ~PAGE_MASK
;
554 ret
= tce_iommu_use_page(tce
, &hpa
);
558 page
= pfn_to_page(hpa
>> PAGE_SHIFT
);
559 if (!tce_page_is_contained(page
, tbl
->it_page_shift
)) {
566 ret
= iommu_tce_xchg(tbl
, entry
+ i
, &hpa
, &dirtmp
);
568 tce_iommu_unuse_page(container
, hpa
);
569 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
570 __func__
, entry
<< tbl
->it_page_shift
,
575 if (dirtmp
!= DMA_NONE
)
576 tce_iommu_unuse_page(container
, hpa
);
578 tce
+= IOMMU_PAGE_SIZE(tbl
);
582 tce_iommu_clear(container
, tbl
, entry
, i
);
587 static long tce_iommu_build_v2(struct tce_container
*container
,
588 struct iommu_table
*tbl
,
589 unsigned long entry
, unsigned long tce
, unsigned long pages
,
590 enum dma_data_direction direction
)
595 enum dma_data_direction dirtmp
;
597 if (!tbl
->it_userspace
) {
598 ret
= tce_iommu_userspace_view_alloc(tbl
, container
->mm
);
603 for (i
= 0; i
< pages
; ++i
) {
604 struct mm_iommu_table_group_mem_t
*mem
= NULL
;
605 unsigned long *pua
= IOMMU_TABLE_USERSPACE_ENTRY(tbl
,
608 ret
= tce_iommu_prereg_ua_to_hpa(container
,
609 tce
, IOMMU_PAGE_SIZE(tbl
), &hpa
, &mem
);
613 page
= pfn_to_page(hpa
>> PAGE_SHIFT
);
614 if (!tce_page_is_contained(page
, tbl
->it_page_shift
)) {
619 /* Preserve offset within IOMMU page */
620 hpa
|= tce
& IOMMU_PAGE_MASK(tbl
) & ~PAGE_MASK
;
623 /* The registered region is being unregistered */
624 if (mm_iommu_mapped_inc(mem
))
627 ret
= iommu_tce_xchg(tbl
, entry
+ i
, &hpa
, &dirtmp
);
629 /* dirtmp cannot be DMA_NONE here */
630 tce_iommu_unuse_page_v2(container
, tbl
, entry
+ i
);
631 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
632 __func__
, entry
<< tbl
->it_page_shift
,
637 if (dirtmp
!= DMA_NONE
)
638 tce_iommu_unuse_page_v2(container
, tbl
, entry
+ i
);
642 tce
+= IOMMU_PAGE_SIZE(tbl
);
646 tce_iommu_clear(container
, tbl
, entry
, i
);
651 static long tce_iommu_create_table(struct tce_container
*container
,
652 struct iommu_table_group
*table_group
,
657 struct iommu_table
**ptbl
)
659 long ret
, table_size
;
661 table_size
= table_group
->ops
->get_table_size(page_shift
, window_size
,
666 ret
= try_increment_locked_vm(container
->mm
, table_size
>> PAGE_SHIFT
);
670 ret
= table_group
->ops
->create_table(table_group
, num
,
671 page_shift
, window_size
, levels
, ptbl
);
673 WARN_ON(!ret
&& !(*ptbl
)->it_ops
->free
);
674 WARN_ON(!ret
&& ((*ptbl
)->it_allocated_size
!= table_size
));
679 static void tce_iommu_free_table(struct tce_container
*container
,
680 struct iommu_table
*tbl
)
682 unsigned long pages
= tbl
->it_allocated_size
>> PAGE_SHIFT
;
684 tce_iommu_userspace_view_free(tbl
, container
->mm
);
685 tbl
->it_ops
->free(tbl
);
686 decrement_locked_vm(container
->mm
, pages
);
689 static long tce_iommu_create_window(struct tce_container
*container
,
690 __u32 page_shift
, __u64 window_size
, __u32 levels
,
693 struct tce_iommu_group
*tcegrp
;
694 struct iommu_table_group
*table_group
;
695 struct iommu_table
*tbl
= NULL
;
698 num
= tce_iommu_find_free_table(container
);
702 /* Get the first group for ops::create_table */
703 tcegrp
= list_first_entry(&container
->group_list
,
704 struct tce_iommu_group
, next
);
705 table_group
= iommu_group_get_iommudata(tcegrp
->grp
);
709 if (!(table_group
->pgsizes
& (1ULL << page_shift
)))
712 if (!table_group
->ops
->set_window
|| !table_group
->ops
->unset_window
||
713 !table_group
->ops
->get_table_size
||
714 !table_group
->ops
->create_table
)
717 /* Create TCE table */
718 ret
= tce_iommu_create_table(container
, table_group
, num
,
719 page_shift
, window_size
, levels
, &tbl
);
723 BUG_ON(!tbl
->it_ops
->free
);
726 * Program the table to every group.
727 * Groups have been tested for compatibility at the attach time.
729 list_for_each_entry(tcegrp
, &container
->group_list
, next
) {
730 table_group
= iommu_group_get_iommudata(tcegrp
->grp
);
732 ret
= table_group
->ops
->set_window(table_group
, num
, tbl
);
737 container
->tables
[num
] = tbl
;
739 /* Return start address assigned by platform in create_table() */
740 *start_addr
= tbl
->it_offset
<< tbl
->it_page_shift
;
745 list_for_each_entry(tcegrp
, &container
->group_list
, next
) {
746 table_group
= iommu_group_get_iommudata(tcegrp
->grp
);
747 table_group
->ops
->unset_window(table_group
, num
);
749 tce_iommu_free_table(container
, tbl
);
754 static long tce_iommu_remove_window(struct tce_container
*container
,
757 struct iommu_table_group
*table_group
= NULL
;
758 struct iommu_table
*tbl
;
759 struct tce_iommu_group
*tcegrp
;
762 num
= tce_iommu_find_table(container
, start_addr
, &tbl
);
766 BUG_ON(!tbl
->it_size
);
768 /* Detach groups from IOMMUs */
769 list_for_each_entry(tcegrp
, &container
->group_list
, next
) {
770 table_group
= iommu_group_get_iommudata(tcegrp
->grp
);
773 * SPAPR TCE IOMMU exposes the default DMA window to
774 * the guest via dma32_window_start/size of
775 * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow
776 * the userspace to remove this window, some do not so
777 * here we check for the platform capability.
779 if (!table_group
->ops
|| !table_group
->ops
->unset_window
)
782 table_group
->ops
->unset_window(table_group
, num
);
786 tce_iommu_clear(container
, tbl
, tbl
->it_offset
, tbl
->it_size
);
787 tce_iommu_free_table(container
, tbl
);
788 container
->tables
[num
] = NULL
;
793 static long tce_iommu_create_default_window(struct tce_container
*container
)
796 __u64 start_addr
= 0;
797 struct tce_iommu_group
*tcegrp
;
798 struct iommu_table_group
*table_group
;
800 if (!container
->def_window_pending
)
803 if (!tce_groups_attached(container
))
806 tcegrp
= list_first_entry(&container
->group_list
,
807 struct tce_iommu_group
, next
);
808 table_group
= iommu_group_get_iommudata(tcegrp
->grp
);
812 ret
= tce_iommu_create_window(container
, IOMMU_PAGE_SHIFT_4K
,
813 table_group
->tce32_size
, 1, &start_addr
);
814 WARN_ON_ONCE(!ret
&& start_addr
);
817 container
->def_window_pending
= false;
822 static long tce_iommu_ioctl(void *iommu_data
,
823 unsigned int cmd
, unsigned long arg
)
825 struct tce_container
*container
= iommu_data
;
826 unsigned long minsz
, ddwsz
;
830 case VFIO_CHECK_EXTENSION
:
832 case VFIO_SPAPR_TCE_IOMMU
:
833 case VFIO_SPAPR_TCE_v2_IOMMU
:
837 ret
= vfio_spapr_iommu_eeh_ioctl(NULL
, cmd
, arg
);
841 return (ret
< 0) ? 0 : ret
;
845 * Sanity check to prevent one userspace from manipulating
846 * another userspace mm.
849 if (container
->mm
&& container
->mm
!= current
->mm
)
853 case VFIO_IOMMU_SPAPR_TCE_GET_INFO
: {
854 struct vfio_iommu_spapr_tce_info info
;
855 struct tce_iommu_group
*tcegrp
;
856 struct iommu_table_group
*table_group
;
858 if (!tce_groups_attached(container
))
861 tcegrp
= list_first_entry(&container
->group_list
,
862 struct tce_iommu_group
, next
);
863 table_group
= iommu_group_get_iommudata(tcegrp
->grp
);
868 minsz
= offsetofend(struct vfio_iommu_spapr_tce_info
,
871 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
874 if (info
.argsz
< minsz
)
877 info
.dma32_window_start
= table_group
->tce32_start
;
878 info
.dma32_window_size
= table_group
->tce32_size
;
880 memset(&info
.ddw
, 0, sizeof(info
.ddw
));
882 if (table_group
->max_dynamic_windows_supported
&&
884 info
.flags
|= VFIO_IOMMU_SPAPR_INFO_DDW
;
885 info
.ddw
.pgsizes
= table_group
->pgsizes
;
886 info
.ddw
.max_dynamic_windows_supported
=
887 table_group
->max_dynamic_windows_supported
;
888 info
.ddw
.levels
= table_group
->max_levels
;
891 ddwsz
= offsetofend(struct vfio_iommu_spapr_tce_info
, ddw
);
893 if (info
.argsz
>= ddwsz
)
896 if (copy_to_user((void __user
*)arg
, &info
, minsz
))
901 case VFIO_IOMMU_MAP_DMA
: {
902 struct vfio_iommu_type1_dma_map param
;
903 struct iommu_table
*tbl
= NULL
;
905 enum dma_data_direction direction
;
907 if (!container
->enabled
)
910 minsz
= offsetofend(struct vfio_iommu_type1_dma_map
, size
);
912 if (copy_from_user(¶m
, (void __user
*)arg
, minsz
))
915 if (param
.argsz
< minsz
)
918 if (param
.flags
& ~(VFIO_DMA_MAP_FLAG_READ
|
919 VFIO_DMA_MAP_FLAG_WRITE
))
922 ret
= tce_iommu_create_default_window(container
);
926 num
= tce_iommu_find_table(container
, param
.iova
, &tbl
);
930 if ((param
.size
& ~IOMMU_PAGE_MASK(tbl
)) ||
931 (param
.vaddr
& ~IOMMU_PAGE_MASK(tbl
)))
934 /* iova is checked by the IOMMU API */
935 if (param
.flags
& VFIO_DMA_MAP_FLAG_READ
) {
936 if (param
.flags
& VFIO_DMA_MAP_FLAG_WRITE
)
937 direction
= DMA_BIDIRECTIONAL
;
939 direction
= DMA_TO_DEVICE
;
941 if (param
.flags
& VFIO_DMA_MAP_FLAG_WRITE
)
942 direction
= DMA_FROM_DEVICE
;
947 ret
= iommu_tce_put_param_check(tbl
, param
.iova
, param
.vaddr
);
952 ret
= tce_iommu_build_v2(container
, tbl
,
953 param
.iova
>> tbl
->it_page_shift
,
955 param
.size
>> tbl
->it_page_shift
,
958 ret
= tce_iommu_build(container
, tbl
,
959 param
.iova
>> tbl
->it_page_shift
,
961 param
.size
>> tbl
->it_page_shift
,
964 iommu_flush_tce(tbl
);
968 case VFIO_IOMMU_UNMAP_DMA
: {
969 struct vfio_iommu_type1_dma_unmap param
;
970 struct iommu_table
*tbl
= NULL
;
973 if (!container
->enabled
)
976 minsz
= offsetofend(struct vfio_iommu_type1_dma_unmap
,
979 if (copy_from_user(¶m
, (void __user
*)arg
, minsz
))
982 if (param
.argsz
< minsz
)
985 /* No flag is supported now */
989 ret
= tce_iommu_create_default_window(container
);
993 num
= tce_iommu_find_table(container
, param
.iova
, &tbl
);
997 if (param
.size
& ~IOMMU_PAGE_MASK(tbl
))
1000 ret
= iommu_tce_clear_param_check(tbl
, param
.iova
, 0,
1001 param
.size
>> tbl
->it_page_shift
);
1005 ret
= tce_iommu_clear(container
, tbl
,
1006 param
.iova
>> tbl
->it_page_shift
,
1007 param
.size
>> tbl
->it_page_shift
);
1008 iommu_flush_tce(tbl
);
1012 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY
: {
1013 struct vfio_iommu_spapr_register_memory param
;
1018 minsz
= offsetofend(struct vfio_iommu_spapr_register_memory
,
1021 ret
= tce_iommu_mm_set(container
);
1025 if (copy_from_user(¶m
, (void __user
*)arg
, minsz
))
1028 if (param
.argsz
< minsz
)
1031 /* No flag is supported now */
1035 mutex_lock(&container
->lock
);
1036 ret
= tce_iommu_register_pages(container
, param
.vaddr
,
1038 mutex_unlock(&container
->lock
);
1042 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY
: {
1043 struct vfio_iommu_spapr_register_memory param
;
1051 minsz
= offsetofend(struct vfio_iommu_spapr_register_memory
,
1054 if (copy_from_user(¶m
, (void __user
*)arg
, minsz
))
1057 if (param
.argsz
< minsz
)
1060 /* No flag is supported now */
1064 mutex_lock(&container
->lock
);
1065 ret
= tce_iommu_unregister_pages(container
, param
.vaddr
,
1067 mutex_unlock(&container
->lock
);
1071 case VFIO_IOMMU_ENABLE
:
1075 mutex_lock(&container
->lock
);
1076 ret
= tce_iommu_enable(container
);
1077 mutex_unlock(&container
->lock
);
1081 case VFIO_IOMMU_DISABLE
:
1085 mutex_lock(&container
->lock
);
1086 tce_iommu_disable(container
);
1087 mutex_unlock(&container
->lock
);
1090 case VFIO_EEH_PE_OP
: {
1091 struct tce_iommu_group
*tcegrp
;
1094 list_for_each_entry(tcegrp
, &container
->group_list
, next
) {
1095 ret
= vfio_spapr_iommu_eeh_ioctl(tcegrp
->grp
,
1103 case VFIO_IOMMU_SPAPR_TCE_CREATE
: {
1104 struct vfio_iommu_spapr_tce_create create
;
1109 ret
= tce_iommu_mm_set(container
);
1113 if (!tce_groups_attached(container
))
1116 minsz
= offsetofend(struct vfio_iommu_spapr_tce_create
,
1119 if (copy_from_user(&create
, (void __user
*)arg
, minsz
))
1122 if (create
.argsz
< minsz
)
1128 mutex_lock(&container
->lock
);
1130 ret
= tce_iommu_create_default_window(container
);
1132 ret
= tce_iommu_create_window(container
,
1134 create
.window_size
, create
.levels
,
1135 &create
.start_addr
);
1137 mutex_unlock(&container
->lock
);
1139 if (!ret
&& copy_to_user((void __user
*)arg
, &create
, minsz
))
1144 case VFIO_IOMMU_SPAPR_TCE_REMOVE
: {
1145 struct vfio_iommu_spapr_tce_remove remove
;
1150 ret
= tce_iommu_mm_set(container
);
1154 if (!tce_groups_attached(container
))
1157 minsz
= offsetofend(struct vfio_iommu_spapr_tce_remove
,
1160 if (copy_from_user(&remove
, (void __user
*)arg
, minsz
))
1163 if (remove
.argsz
< minsz
)
1169 if (container
->def_window_pending
&& !remove
.start_addr
) {
1170 container
->def_window_pending
= false;
1174 mutex_lock(&container
->lock
);
1176 ret
= tce_iommu_remove_window(container
, remove
.start_addr
);
1178 mutex_unlock(&container
->lock
);
1187 static void tce_iommu_release_ownership(struct tce_container
*container
,
1188 struct iommu_table_group
*table_group
)
1192 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
1193 struct iommu_table
*tbl
= container
->tables
[i
];
1198 tce_iommu_clear(container
, tbl
, tbl
->it_offset
, tbl
->it_size
);
1199 tce_iommu_userspace_view_free(tbl
, container
->mm
);
1201 iommu_release_ownership(tbl
);
1203 container
->tables
[i
] = NULL
;
1207 static int tce_iommu_take_ownership(struct tce_container
*container
,
1208 struct iommu_table_group
*table_group
)
1212 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
1213 struct iommu_table
*tbl
= table_group
->tables
[i
];
1215 if (!tbl
|| !tbl
->it_map
)
1218 rc
= iommu_take_ownership(tbl
);
1220 for (j
= 0; j
< i
; ++j
)
1221 iommu_release_ownership(
1222 table_group
->tables
[j
]);
1228 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
)
1229 container
->tables
[i
] = table_group
->tables
[i
];
1234 static void tce_iommu_release_ownership_ddw(struct tce_container
*container
,
1235 struct iommu_table_group
*table_group
)
1239 if (!table_group
->ops
->unset_window
) {
1244 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
)
1245 table_group
->ops
->unset_window(table_group
, i
);
1247 table_group
->ops
->release_ownership(table_group
);
1250 static long tce_iommu_take_ownership_ddw(struct tce_container
*container
,
1251 struct iommu_table_group
*table_group
)
1255 if (!table_group
->ops
->create_table
|| !table_group
->ops
->set_window
||
1256 !table_group
->ops
->release_ownership
) {
1261 table_group
->ops
->take_ownership(table_group
);
1263 /* Set all windows to the new group */
1264 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
1265 struct iommu_table
*tbl
= container
->tables
[i
];
1270 ret
= table_group
->ops
->set_window(table_group
, i
, tbl
);
1278 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
)
1279 table_group
->ops
->unset_window(table_group
, i
);
1281 table_group
->ops
->release_ownership(table_group
);
1286 static int tce_iommu_attach_group(void *iommu_data
,
1287 struct iommu_group
*iommu_group
)
1290 struct tce_container
*container
= iommu_data
;
1291 struct iommu_table_group
*table_group
;
1292 struct tce_iommu_group
*tcegrp
= NULL
;
1294 mutex_lock(&container
->lock
);
1296 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
1297 iommu_group_id(iommu_group), iommu_group); */
1298 table_group
= iommu_group_get_iommudata(iommu_group
);
1304 if (tce_groups_attached(container
) && (!table_group
->ops
||
1305 !table_group
->ops
->take_ownership
||
1306 !table_group
->ops
->release_ownership
)) {
1311 /* Check if new group has the same iommu_ops (i.e. compatible) */
1312 list_for_each_entry(tcegrp
, &container
->group_list
, next
) {
1313 struct iommu_table_group
*table_group_tmp
;
1315 if (tcegrp
->grp
== iommu_group
) {
1316 pr_warn("tce_vfio: Group %d is already attached\n",
1317 iommu_group_id(iommu_group
));
1321 table_group_tmp
= iommu_group_get_iommudata(tcegrp
->grp
);
1322 if (table_group_tmp
->ops
->create_table
!=
1323 table_group
->ops
->create_table
) {
1324 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1325 iommu_group_id(iommu_group
),
1326 iommu_group_id(tcegrp
->grp
));
1332 tcegrp
= kzalloc(sizeof(*tcegrp
), GFP_KERNEL
);
1338 if (!table_group
->ops
|| !table_group
->ops
->take_ownership
||
1339 !table_group
->ops
->release_ownership
) {
1340 if (container
->v2
) {
1344 ret
= tce_iommu_take_ownership(container
, table_group
);
1346 if (!container
->v2
) {
1350 ret
= tce_iommu_take_ownership_ddw(container
, table_group
);
1351 if (!tce_groups_attached(container
) && !container
->tables
[0])
1352 container
->def_window_pending
= true;
1356 tcegrp
->grp
= iommu_group
;
1357 list_add(&tcegrp
->next
, &container
->group_list
);
1364 mutex_unlock(&container
->lock
);
1369 static void tce_iommu_detach_group(void *iommu_data
,
1370 struct iommu_group
*iommu_group
)
1372 struct tce_container
*container
= iommu_data
;
1373 struct iommu_table_group
*table_group
;
1375 struct tce_iommu_group
*tcegrp
;
1377 mutex_lock(&container
->lock
);
1379 list_for_each_entry(tcegrp
, &container
->group_list
, next
) {
1380 if (tcegrp
->grp
== iommu_group
) {
1387 pr_warn("tce_vfio: detaching unattached group #%u\n",
1388 iommu_group_id(iommu_group
));
1392 list_del(&tcegrp
->next
);
1395 table_group
= iommu_group_get_iommudata(iommu_group
);
1396 BUG_ON(!table_group
);
1398 if (!table_group
->ops
|| !table_group
->ops
->release_ownership
)
1399 tce_iommu_release_ownership(container
, table_group
);
1401 tce_iommu_release_ownership_ddw(container
, table_group
);
1404 mutex_unlock(&container
->lock
);
1407 const struct vfio_iommu_driver_ops tce_iommu_driver_ops
= {
1408 .name
= "iommu-vfio-powerpc",
1409 .owner
= THIS_MODULE
,
1410 .open
= tce_iommu_open
,
1411 .release
= tce_iommu_release
,
1412 .ioctl
= tce_iommu_ioctl
,
1413 .attach_group
= tce_iommu_attach_group
,
1414 .detach_group
= tce_iommu_detach_group
,
1417 static int __init
tce_iommu_init(void)
1419 return vfio_register_iommu_driver(&tce_iommu_driver_ops
);
1422 static void __exit
tce_iommu_cleanup(void)
1424 vfio_unregister_iommu_driver(&tce_iommu_driver_ops
);
1427 module_init(tce_iommu_init
);
1428 module_exit(tce_iommu_cleanup
);
1430 MODULE_VERSION(DRIVER_VERSION
);
1431 MODULE_LICENSE("GPL v2");
1432 MODULE_AUTHOR(DRIVER_AUTHOR
);
1433 MODULE_DESCRIPTION(DRIVER_DESC
);