2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
17 * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
20 #include <linux/types.h>
21 #include <linux/string.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/highmem.h>
25 #include <linux/gfp.h>
26 #include <linux/slab.h>
27 #include <linux/sched/signal.h>
28 #include <linux/hugetlb.h>
29 #include <linux/list.h>
30 #include <linux/anon_inodes.h>
31 #include <linux/iommu.h>
32 #include <linux/file.h>
34 #include <asm/tlbflush.h>
35 #include <asm/kvm_ppc.h>
36 #include <asm/kvm_book3s.h>
37 #include <asm/book3s/64/mmu-hash.h>
38 #include <asm/hvcall.h>
39 #include <asm/synch.h>
40 #include <asm/ppc-opcode.h>
41 #include <asm/kvm_host.h>
43 #include <asm/iommu.h>
45 #include <asm/mmu_context.h>
47 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages
)
49 return ALIGN(iommu_pages
* sizeof(u64
), PAGE_SIZE
) / PAGE_SIZE
;
52 static unsigned long kvmppc_stt_pages(unsigned long tce_pages
)
54 unsigned long stt_bytes
= sizeof(struct kvmppc_spapr_tce_table
) +
55 (tce_pages
* sizeof(struct page
*));
57 return tce_pages
+ ALIGN(stt_bytes
, PAGE_SIZE
) / PAGE_SIZE
;
60 static long kvmppc_account_memlimit(unsigned long stt_pages
, bool inc
)
64 if (!current
|| !current
->mm
)
65 return ret
; /* process exited */
67 down_write(¤t
->mm
->mmap_sem
);
70 unsigned long locked
, lock_limit
;
72 locked
= current
->mm
->locked_vm
+ stt_pages
;
73 lock_limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
74 if (locked
> lock_limit
&& !capable(CAP_IPC_LOCK
))
77 current
->mm
->locked_vm
+= stt_pages
;
79 if (WARN_ON_ONCE(stt_pages
> current
->mm
->locked_vm
))
80 stt_pages
= current
->mm
->locked_vm
;
82 current
->mm
->locked_vm
-= stt_pages
;
85 pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current
->pid
,
87 stt_pages
<< PAGE_SHIFT
,
88 current
->mm
->locked_vm
<< PAGE_SHIFT
,
89 rlimit(RLIMIT_MEMLOCK
),
90 ret
? " - exceeded" : "");
92 up_write(¤t
->mm
->mmap_sem
);
97 static void kvm_spapr_tce_iommu_table_free(struct rcu_head
*head
)
99 struct kvmppc_spapr_tce_iommu_table
*stit
= container_of(head
,
100 struct kvmppc_spapr_tce_iommu_table
, rcu
);
102 iommu_tce_table_put(stit
->tbl
);
107 static void kvm_spapr_tce_liobn_put(struct kref
*kref
)
109 struct kvmppc_spapr_tce_iommu_table
*stit
= container_of(kref
,
110 struct kvmppc_spapr_tce_iommu_table
, kref
);
112 list_del_rcu(&stit
->next
);
114 call_rcu(&stit
->rcu
, kvm_spapr_tce_iommu_table_free
);
117 extern void kvm_spapr_tce_release_iommu_group(struct kvm
*kvm
,
118 struct iommu_group
*grp
)
121 struct kvmppc_spapr_tce_table
*stt
;
122 struct kvmppc_spapr_tce_iommu_table
*stit
, *tmp
;
123 struct iommu_table_group
*table_group
= NULL
;
125 list_for_each_entry_rcu(stt
, &kvm
->arch
.spapr_tce_tables
, list
) {
127 table_group
= iommu_group_get_iommudata(grp
);
128 if (WARN_ON(!table_group
))
131 list_for_each_entry_safe(stit
, tmp
, &stt
->iommu_tables
, next
) {
132 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
133 if (table_group
->tables
[i
] != stit
->tbl
)
136 kref_put(&stit
->kref
, kvm_spapr_tce_liobn_put
);
143 extern long kvm_spapr_tce_attach_iommu_group(struct kvm
*kvm
, int tablefd
,
144 struct iommu_group
*grp
)
146 struct kvmppc_spapr_tce_table
*stt
= NULL
;
148 struct iommu_table
*tbl
= NULL
;
149 struct iommu_table_group
*table_group
;
151 struct kvmppc_spapr_tce_iommu_table
*stit
;
158 list_for_each_entry_rcu(stt
, &kvm
->arch
.spapr_tce_tables
, list
) {
159 if (stt
== f
.file
->private_data
) {
170 table_group
= iommu_group_get_iommudata(grp
);
171 if (WARN_ON(!table_group
))
174 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
175 struct iommu_table
*tbltmp
= table_group
->tables
[i
];
180 * Make sure hardware table parameters are exactly the same;
181 * this is used in the TCE handlers where boundary checks
182 * use only the first attached table.
184 if ((tbltmp
->it_page_shift
== stt
->page_shift
) &&
185 (tbltmp
->it_offset
== stt
->offset
) &&
186 (tbltmp
->it_size
== stt
->size
)) {
188 * Reference the table to avoid races with
189 * add/remove DMA windows.
191 tbl
= iommu_tce_table_get(tbltmp
);
198 list_for_each_entry_rcu(stit
, &stt
->iommu_tables
, next
) {
199 if (tbl
!= stit
->tbl
)
202 if (!kref_get_unless_zero(&stit
->kref
)) {
203 /* stit is being destroyed */
204 iommu_tce_table_put(tbl
);
208 * The table is already known to this KVM, we just increased
209 * its KVM reference counter and can return.
214 stit
= kzalloc(sizeof(*stit
), GFP_KERNEL
);
216 iommu_tce_table_put(tbl
);
221 kref_init(&stit
->kref
);
223 list_add_rcu(&stit
->next
, &stt
->iommu_tables
);
228 static void release_spapr_tce_table(struct rcu_head
*head
)
230 struct kvmppc_spapr_tce_table
*stt
= container_of(head
,
231 struct kvmppc_spapr_tce_table
, rcu
);
232 unsigned long i
, npages
= kvmppc_tce_pages(stt
->size
);
234 for (i
= 0; i
< npages
; i
++)
235 __free_page(stt
->pages
[i
]);
240 static int kvm_spapr_tce_fault(struct vm_fault
*vmf
)
242 struct kvmppc_spapr_tce_table
*stt
= vmf
->vma
->vm_file
->private_data
;
245 if (vmf
->pgoff
>= kvmppc_tce_pages(stt
->size
))
246 return VM_FAULT_SIGBUS
;
248 page
= stt
->pages
[vmf
->pgoff
];
254 static const struct vm_operations_struct kvm_spapr_tce_vm_ops
= {
255 .fault
= kvm_spapr_tce_fault
,
258 static int kvm_spapr_tce_mmap(struct file
*file
, struct vm_area_struct
*vma
)
260 vma
->vm_ops
= &kvm_spapr_tce_vm_ops
;
264 static int kvm_spapr_tce_release(struct inode
*inode
, struct file
*filp
)
266 struct kvmppc_spapr_tce_table
*stt
= filp
->private_data
;
267 struct kvmppc_spapr_tce_iommu_table
*stit
, *tmp
;
268 struct kvm
*kvm
= stt
->kvm
;
270 mutex_lock(&kvm
->lock
);
271 list_del_rcu(&stt
->list
);
272 mutex_unlock(&kvm
->lock
);
274 list_for_each_entry_safe(stit
, tmp
, &stt
->iommu_tables
, next
) {
275 WARN_ON(!kref_read(&stit
->kref
));
277 if (kref_put(&stit
->kref
, kvm_spapr_tce_liobn_put
))
282 kvm_put_kvm(stt
->kvm
);
284 kvmppc_account_memlimit(
285 kvmppc_stt_pages(kvmppc_tce_pages(stt
->size
)), false);
286 call_rcu(&stt
->rcu
, release_spapr_tce_table
);
291 static const struct file_operations kvm_spapr_tce_fops
= {
292 .mmap
= kvm_spapr_tce_mmap
,
293 .release
= kvm_spapr_tce_release
,
296 long kvm_vm_ioctl_create_spapr_tce(struct kvm
*kvm
,
297 struct kvm_create_spapr_tce_64
*args
)
299 struct kvmppc_spapr_tce_table
*stt
= NULL
;
300 struct kvmppc_spapr_tce_table
*siter
;
301 unsigned long npages
, size
;
308 size
= _ALIGN_UP(args
->size
, PAGE_SIZE
>> 3);
309 npages
= kvmppc_tce_pages(size
);
310 ret
= kvmppc_account_memlimit(kvmppc_stt_pages(npages
), true);
315 stt
= kzalloc(sizeof(*stt
) + npages
* sizeof(struct page
*),
320 stt
->liobn
= args
->liobn
;
321 stt
->page_shift
= args
->page_shift
;
322 stt
->offset
= args
->offset
;
325 INIT_LIST_HEAD_RCU(&stt
->iommu_tables
);
327 for (i
= 0; i
< npages
; i
++) {
328 stt
->pages
[i
] = alloc_page(GFP_KERNEL
| __GFP_ZERO
);
333 mutex_lock(&kvm
->lock
);
335 /* Check this LIOBN hasn't been previously allocated */
337 list_for_each_entry(siter
, &kvm
->arch
.spapr_tce_tables
, list
) {
338 if (siter
->liobn
== args
->liobn
) {
345 ret
= anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops
,
346 stt
, O_RDWR
| O_CLOEXEC
);
349 list_add_rcu(&stt
->list
, &kvm
->arch
.spapr_tce_tables
);
353 mutex_unlock(&kvm
->lock
);
359 for (i
= 0; i
< npages
; i
++)
361 __free_page(stt
->pages
[i
]);
365 kvmppc_account_memlimit(kvmppc_stt_pages(npages
), false);
369 static void kvmppc_clear_tce(struct iommu_table
*tbl
, unsigned long entry
)
371 unsigned long hpa
= 0;
372 enum dma_data_direction dir
= DMA_NONE
;
374 iommu_tce_xchg(tbl
, entry
, &hpa
, &dir
);
377 static long kvmppc_tce_iommu_mapped_dec(struct kvm
*kvm
,
378 struct iommu_table
*tbl
, unsigned long entry
)
380 struct mm_iommu_table_group_mem_t
*mem
= NULL
;
381 const unsigned long pgsize
= 1ULL << tbl
->it_page_shift
;
382 unsigned long *pua
= IOMMU_TABLE_USERSPACE_ENTRY(tbl
, entry
);
385 /* it_userspace allocation might be delayed */
388 mem
= mm_iommu_lookup(kvm
->mm
, *pua
, pgsize
);
392 mm_iommu_mapped_dec(mem
);
399 static long kvmppc_tce_iommu_unmap(struct kvm
*kvm
,
400 struct iommu_table
*tbl
, unsigned long entry
)
402 enum dma_data_direction dir
= DMA_NONE
;
403 unsigned long hpa
= 0;
406 if (WARN_ON_ONCE(iommu_tce_xchg(tbl
, entry
, &hpa
, &dir
)))
412 ret
= kvmppc_tce_iommu_mapped_dec(kvm
, tbl
, entry
);
413 if (ret
!= H_SUCCESS
)
414 iommu_tce_xchg(tbl
, entry
, &hpa
, &dir
);
419 long kvmppc_tce_iommu_map(struct kvm
*kvm
, struct iommu_table
*tbl
,
420 unsigned long entry
, unsigned long ua
,
421 enum dma_data_direction dir
)
424 unsigned long hpa
, *pua
= IOMMU_TABLE_USERSPACE_ENTRY(tbl
, entry
);
425 struct mm_iommu_table_group_mem_t
*mem
;
428 /* it_userspace allocation might be delayed */
431 mem
= mm_iommu_lookup(kvm
->mm
, ua
, 1ULL << tbl
->it_page_shift
);
433 /* This only handles v2 IOMMU type, v1 is handled via ioctl() */
436 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem
, ua
, &hpa
)))
439 if (mm_iommu_mapped_inc(mem
))
442 ret
= iommu_tce_xchg(tbl
, entry
, &hpa
, &dir
);
443 if (WARN_ON_ONCE(ret
)) {
444 mm_iommu_mapped_dec(mem
);
449 kvmppc_tce_iommu_mapped_dec(kvm
, tbl
, entry
);
456 long kvmppc_h_put_tce(struct kvm_vcpu
*vcpu
, unsigned long liobn
,
457 unsigned long ioba
, unsigned long tce
)
459 struct kvmppc_spapr_tce_table
*stt
;
461 struct kvmppc_spapr_tce_iommu_table
*stit
;
462 unsigned long entry
, ua
= 0;
463 enum dma_data_direction dir
;
465 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
466 /* liobn, ioba, tce); */
468 stt
= kvmppc_find_table(vcpu
->kvm
, liobn
);
472 ret
= kvmppc_ioba_validate(stt
, ioba
, 1);
473 if (ret
!= H_SUCCESS
)
476 ret
= kvmppc_tce_validate(stt
, tce
);
477 if (ret
!= H_SUCCESS
)
480 dir
= iommu_tce_direction(tce
);
482 idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
484 if ((dir
!= DMA_NONE
) && kvmppc_gpa_to_ua(vcpu
->kvm
,
485 tce
& ~(TCE_PCI_READ
| TCE_PCI_WRITE
), &ua
, NULL
)) {
490 entry
= ioba
>> stt
->page_shift
;
492 list_for_each_entry_lockless(stit
, &stt
->iommu_tables
, next
) {
494 ret
= kvmppc_tce_iommu_unmap(vcpu
->kvm
,
497 ret
= kvmppc_tce_iommu_map(vcpu
->kvm
, stit
->tbl
,
500 if (ret
== H_SUCCESS
)
503 if (ret
== H_TOO_HARD
)
507 kvmppc_clear_tce(stit
->tbl
, entry
);
510 kvmppc_tce_put(stt
, entry
, tce
);
513 srcu_read_unlock(&vcpu
->kvm
->srcu
, idx
);
517 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce
);
519 long kvmppc_h_put_tce_indirect(struct kvm_vcpu
*vcpu
,
520 unsigned long liobn
, unsigned long ioba
,
521 unsigned long tce_list
, unsigned long npages
)
523 struct kvmppc_spapr_tce_table
*stt
;
524 long i
, ret
= H_SUCCESS
, idx
;
525 unsigned long entry
, ua
= 0;
528 struct kvmppc_spapr_tce_iommu_table
*stit
;
530 stt
= kvmppc_find_table(vcpu
->kvm
, liobn
);
534 entry
= ioba
>> stt
->page_shift
;
536 * SPAPR spec says that the maximum size of the list is 512 TCEs
537 * so the whole table fits in 4K page
542 if (tce_list
& (SZ_4K
- 1))
545 ret
= kvmppc_ioba_validate(stt
, ioba
, npages
);
546 if (ret
!= H_SUCCESS
)
549 idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
550 if (kvmppc_gpa_to_ua(vcpu
->kvm
, tce_list
, &ua
, NULL
)) {
554 tces
= (u64 __user
*) ua
;
556 for (i
= 0; i
< npages
; ++i
) {
557 if (get_user(tce
, tces
+ i
)) {
561 tce
= be64_to_cpu(tce
);
563 ret
= kvmppc_tce_validate(stt
, tce
);
564 if (ret
!= H_SUCCESS
)
567 if (kvmppc_gpa_to_ua(vcpu
->kvm
,
568 tce
& ~(TCE_PCI_READ
| TCE_PCI_WRITE
),
572 list_for_each_entry_lockless(stit
, &stt
->iommu_tables
, next
) {
573 ret
= kvmppc_tce_iommu_map(vcpu
->kvm
,
574 stit
->tbl
, entry
+ i
, ua
,
575 iommu_tce_direction(tce
));
577 if (ret
== H_SUCCESS
)
580 if (ret
== H_TOO_HARD
)
584 kvmppc_clear_tce(stit
->tbl
, entry
);
587 kvmppc_tce_put(stt
, entry
+ i
, tce
);
591 srcu_read_unlock(&vcpu
->kvm
->srcu
, idx
);
595 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect
);
597 long kvmppc_h_stuff_tce(struct kvm_vcpu
*vcpu
,
598 unsigned long liobn
, unsigned long ioba
,
599 unsigned long tce_value
, unsigned long npages
)
601 struct kvmppc_spapr_tce_table
*stt
;
603 struct kvmppc_spapr_tce_iommu_table
*stit
;
605 stt
= kvmppc_find_table(vcpu
->kvm
, liobn
);
609 ret
= kvmppc_ioba_validate(stt
, ioba
, npages
);
610 if (ret
!= H_SUCCESS
)
613 /* Check permission bits only to allow userspace poison TCE for debug */
614 if (tce_value
& (TCE_PCI_WRITE
| TCE_PCI_READ
))
617 list_for_each_entry_lockless(stit
, &stt
->iommu_tables
, next
) {
618 unsigned long entry
= ioba
>> stit
->tbl
->it_page_shift
;
620 for (i
= 0; i
< npages
; ++i
) {
621 ret
= kvmppc_tce_iommu_unmap(vcpu
->kvm
,
622 stit
->tbl
, entry
+ i
);
624 if (ret
== H_SUCCESS
)
627 if (ret
== H_TOO_HARD
)
631 kvmppc_clear_tce(stit
->tbl
, entry
);
635 for (i
= 0; i
< npages
; ++i
, ioba
+= (1ULL << stt
->page_shift
))
636 kvmppc_tce_put(stt
, ioba
>> stt
->page_shift
, tce_value
);
640 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce
);