2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
6 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
9 #include <linux/types.h>
10 #include <linux/string.h>
11 #include <linux/kvm.h>
12 #include <linux/kvm_host.h>
13 #include <linux/hugetlb.h>
14 #include <linux/module.h>
15 #include <linux/log2.h>
17 #include <asm/tlbflush.h>
18 #include <asm/kvm_ppc.h>
19 #include <asm/kvm_book3s.h>
20 #include <asm/book3s/64/mmu-hash.h>
21 #include <asm/hvcall.h>
22 #include <asm/synch.h>
23 #include <asm/ppc-opcode.h>
25 /* Translate address of a vmalloc'd thing to a linear map address */
26 static void *real_vmalloc_addr(void *x
)
28 unsigned long addr
= (unsigned long) x
;
31 * assume we don't have huge pages in vmalloc space...
32 * So don't worry about THP collapse/split. Called
33 * Only in realmode, hence won't need irq_save/restore.
35 p
= __find_linux_pte_or_hugepte(swapper_pg_dir
, addr
, NULL
, NULL
);
36 if (!p
|| !pte_present(*p
))
38 addr
= (pte_pfn(*p
) << PAGE_SHIFT
) | (addr
& ~PAGE_MASK
);
42 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
43 static int global_invalidates(struct kvm
*kvm
, unsigned long flags
)
48 * If there is only one vcore, and it's currently running,
49 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
50 * we can use tlbiel as long as we mark all other physical
51 * cores as potentially having stale TLB entries for this lpid.
52 * Otherwise, don't use tlbiel.
54 if (kvm
->arch
.online_vcores
== 1 && local_paca
->kvm_hstate
.kvm_vcpu
)
60 /* any other core might now have stale TLB entries... */
62 cpumask_setall(&kvm
->arch
.need_tlb_flush
);
63 cpumask_clear_cpu(local_paca
->kvm_hstate
.kvm_vcore
->pcpu
,
64 &kvm
->arch
.need_tlb_flush
);
71 * Add this HPTE into the chain for the real page.
72 * Must be called with the chain locked; it unlocks the chain.
74 void kvmppc_add_revmap_chain(struct kvm
*kvm
, struct revmap_entry
*rev
,
75 unsigned long *rmap
, long pte_index
, int realmode
)
77 struct revmap_entry
*head
, *tail
;
80 if (*rmap
& KVMPPC_RMAP_PRESENT
) {
81 i
= *rmap
& KVMPPC_RMAP_INDEX
;
82 head
= &kvm
->arch
.revmap
[i
];
84 head
= real_vmalloc_addr(head
);
85 tail
= &kvm
->arch
.revmap
[head
->back
];
87 tail
= real_vmalloc_addr(tail
);
89 rev
->back
= head
->back
;
90 tail
->forw
= pte_index
;
91 head
->back
= pte_index
;
93 rev
->forw
= rev
->back
= pte_index
;
94 *rmap
= (*rmap
& ~KVMPPC_RMAP_INDEX
) |
95 pte_index
| KVMPPC_RMAP_PRESENT
;
99 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain
);
101 /* Update the changed page order field of an rmap entry */
102 void kvmppc_update_rmap_change(unsigned long *rmap
, unsigned long psize
)
108 order
= ilog2(psize
);
109 order
<<= KVMPPC_RMAP_CHG_SHIFT
;
110 if (order
> (*rmap
& KVMPPC_RMAP_CHG_ORDER
))
111 *rmap
= (*rmap
& ~KVMPPC_RMAP_CHG_ORDER
) | order
;
113 EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change
);
115 /* Returns a pointer to the revmap entry for the page mapped by a HPTE */
116 static unsigned long *revmap_for_hpte(struct kvm
*kvm
, unsigned long hpte_v
,
117 unsigned long hpte_gr
)
119 struct kvm_memory_slot
*memslot
;
123 gfn
= hpte_rpn(hpte_gr
, hpte_page_size(hpte_v
, hpte_gr
));
124 memslot
= __gfn_to_memslot(kvm_memslots_raw(kvm
), gfn
);
128 rmap
= real_vmalloc_addr(&memslot
->arch
.rmap
[gfn
- memslot
->base_gfn
]);
132 /* Remove this HPTE from the chain for a real page */
133 static void remove_revmap_chain(struct kvm
*kvm
, long pte_index
,
134 struct revmap_entry
*rev
,
135 unsigned long hpte_v
, unsigned long hpte_r
)
137 struct revmap_entry
*next
, *prev
;
138 unsigned long ptel
, head
;
140 unsigned long rcbits
;
142 rcbits
= hpte_r
& (HPTE_R_R
| HPTE_R_C
);
143 ptel
= rev
->guest_rpte
|= rcbits
;
144 rmap
= revmap_for_hpte(kvm
, hpte_v
, ptel
);
149 head
= *rmap
& KVMPPC_RMAP_INDEX
;
150 next
= real_vmalloc_addr(&kvm
->arch
.revmap
[rev
->forw
]);
151 prev
= real_vmalloc_addr(&kvm
->arch
.revmap
[rev
->back
]);
152 next
->back
= rev
->back
;
153 prev
->forw
= rev
->forw
;
154 if (head
== pte_index
) {
156 if (head
== pte_index
)
157 *rmap
&= ~(KVMPPC_RMAP_PRESENT
| KVMPPC_RMAP_INDEX
);
159 *rmap
= (*rmap
& ~KVMPPC_RMAP_INDEX
) | head
;
161 *rmap
|= rcbits
<< KVMPPC_RMAP_RC_SHIFT
;
162 if (rcbits
& HPTE_R_C
)
163 kvmppc_update_rmap_change(rmap
, hpte_page_size(hpte_v
, hpte_r
));
167 long kvmppc_do_h_enter(struct kvm
*kvm
, unsigned long flags
,
168 long pte_index
, unsigned long pteh
, unsigned long ptel
,
169 pgd_t
*pgdir
, bool realmode
, unsigned long *pte_idx_ret
)
171 unsigned long i
, pa
, gpa
, gfn
, psize
;
172 unsigned long slot_fn
, hva
;
174 struct revmap_entry
*rev
;
175 unsigned long g_ptel
;
176 struct kvm_memory_slot
*memslot
;
177 unsigned hpage_shift
;
181 unsigned int writing
;
182 unsigned long mmu_seq
;
183 unsigned long rcbits
, irq_flags
= 0;
185 psize
= hpte_page_size(pteh
, ptel
);
188 writing
= hpte_is_writable(ptel
);
189 pteh
&= ~(HPTE_V_HVLOCK
| HPTE_V_ABSENT
| HPTE_V_VALID
);
190 ptel
&= ~HPTE_GR_RESERVED
;
193 /* used later to detect if we might have been invalidated */
194 mmu_seq
= kvm
->mmu_notifier_seq
;
197 /* Find the memslot (if any) for this address */
198 gpa
= (ptel
& HPTE_R_RPN
) & ~(psize
- 1);
199 gfn
= gpa
>> PAGE_SHIFT
;
200 memslot
= __gfn_to_memslot(kvm_memslots_raw(kvm
), gfn
);
204 if (!(memslot
&& !(memslot
->flags
& KVM_MEMSLOT_INVALID
))) {
205 /* Emulated MMIO - mark this with key=31 */
206 pteh
|= HPTE_V_ABSENT
;
207 ptel
|= HPTE_R_KEY_HI
| HPTE_R_KEY_LO
;
211 /* Check if the requested page fits entirely in the memslot. */
212 if (!slot_is_aligned(memslot
, psize
))
214 slot_fn
= gfn
- memslot
->base_gfn
;
215 rmap
= &memslot
->arch
.rmap
[slot_fn
];
217 /* Translate to host virtual address */
218 hva
= __gfn_to_hva_memslot(memslot
, gfn
);
220 * If we had a page table table change after lookup, we would
221 * retry via mmu_notifier_retry.
224 ptep
= __find_linux_pte_or_hugepte(pgdir
, hva
, NULL
,
227 local_irq_save(irq_flags
);
228 ptep
= find_linux_pte_or_hugepte(pgdir
, hva
, NULL
,
233 unsigned int host_pte_size
;
236 host_pte_size
= 1ul << hpage_shift
;
238 host_pte_size
= PAGE_SIZE
;
240 * We should always find the guest page size
241 * to <= host page size, if host is using hugepage
243 if (host_pte_size
< psize
) {
245 local_irq_restore(flags
);
248 pte
= kvmppc_read_update_linux_pte(ptep
, writing
);
249 if (pte_present(pte
) && !pte_protnone(pte
)) {
250 if (writing
&& !pte_write(pte
))
251 /* make the actual HPTE be read-only */
252 ptel
= hpte_make_readonly(ptel
);
253 is_io
= hpte_cache_bits(pte_val(pte
));
254 pa
= pte_pfn(pte
) << PAGE_SHIFT
;
255 pa
|= hva
& (host_pte_size
- 1);
256 pa
|= gpa
& ~PAGE_MASK
;
260 local_irq_restore(irq_flags
);
262 ptel
&= ~(HPTE_R_PP0
- psize
);
266 pteh
|= HPTE_V_VALID
;
268 pteh
|= HPTE_V_ABSENT
;
271 if (is_io
!= ~0ul && !hpte_cache_flags_ok(ptel
, is_io
)) {
275 * Allow guest to map emulated device memory as
276 * uncacheable, but actually make it cacheable.
278 ptel
&= ~(HPTE_R_W
|HPTE_R_I
|HPTE_R_G
);
282 /* Find and lock the HPTEG slot to use */
284 if (pte_index
>= kvm
->arch
.hpt_npte
)
286 if (likely((flags
& H_EXACT
) == 0)) {
288 hpte
= (__be64
*)(kvm
->arch
.hpt_virt
+ (pte_index
<< 4));
289 for (i
= 0; i
< 8; ++i
) {
290 if ((be64_to_cpu(*hpte
) & HPTE_V_VALID
) == 0 &&
291 try_lock_hpte(hpte
, HPTE_V_HVLOCK
| HPTE_V_VALID
|
298 * Since try_lock_hpte doesn't retry (not even stdcx.
299 * failures), it could be that there is a free slot
300 * but we transiently failed to lock it. Try again,
301 * actually locking each slot and checking it.
304 for (i
= 0; i
< 8; ++i
) {
306 while (!try_lock_hpte(hpte
, HPTE_V_HVLOCK
))
308 pte
= be64_to_cpu(hpte
[0]);
309 if (!(pte
& (HPTE_V_VALID
| HPTE_V_ABSENT
)))
311 __unlock_hpte(hpte
, pte
);
319 hpte
= (__be64
*)(kvm
->arch
.hpt_virt
+ (pte_index
<< 4));
320 if (!try_lock_hpte(hpte
, HPTE_V_HVLOCK
| HPTE_V_VALID
|
322 /* Lock the slot and check again */
325 while (!try_lock_hpte(hpte
, HPTE_V_HVLOCK
))
327 pte
= be64_to_cpu(hpte
[0]);
328 if (pte
& (HPTE_V_VALID
| HPTE_V_ABSENT
)) {
329 __unlock_hpte(hpte
, pte
);
335 /* Save away the guest's idea of the second HPTE dword */
336 rev
= &kvm
->arch
.revmap
[pte_index
];
338 rev
= real_vmalloc_addr(rev
);
340 rev
->guest_rpte
= g_ptel
;
341 note_hpte_modification(kvm
, rev
);
344 /* Link HPTE into reverse-map chain */
345 if (pteh
& HPTE_V_VALID
) {
347 rmap
= real_vmalloc_addr(rmap
);
349 /* Check for pending invalidations under the rmap chain lock */
350 if (mmu_notifier_retry(kvm
, mmu_seq
)) {
351 /* inval in progress, write a non-present HPTE */
352 pteh
|= HPTE_V_ABSENT
;
353 pteh
&= ~HPTE_V_VALID
;
356 kvmppc_add_revmap_chain(kvm
, rev
, rmap
, pte_index
,
358 /* Only set R/C in real HPTE if already set in *rmap */
359 rcbits
= *rmap
>> KVMPPC_RMAP_RC_SHIFT
;
360 ptel
&= rcbits
| ~(HPTE_R_R
| HPTE_R_C
);
364 hpte
[1] = cpu_to_be64(ptel
);
366 /* Write the first HPTE dword, unlocking the HPTE and making it valid */
368 __unlock_hpte(hpte
, pteh
);
369 asm volatile("ptesync" : : : "memory");
371 *pte_idx_ret
= pte_index
;
374 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter
);
376 long kvmppc_h_enter(struct kvm_vcpu
*vcpu
, unsigned long flags
,
377 long pte_index
, unsigned long pteh
, unsigned long ptel
)
379 return kvmppc_do_h_enter(vcpu
->kvm
, flags
, pte_index
, pteh
, ptel
,
380 vcpu
->arch
.pgdir
, true, &vcpu
->arch
.gpr
[4]);
383 #ifdef __BIG_ENDIAN__
384 #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
386 #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
389 static inline int try_lock_tlbie(unsigned int *lock
)
391 unsigned int tmp
, old
;
392 unsigned int token
= LOCK_TOKEN
;
394 asm volatile("1:lwarx %1,0,%2\n"
401 : "=&r" (tmp
), "=&r" (old
)
402 : "r" (lock
), "r" (token
)
407 static void do_tlbies(struct kvm
*kvm
, unsigned long *rbvalues
,
408 long npages
, int global
, bool need_sync
)
413 while (!try_lock_tlbie(&kvm
->arch
.tlbie_lock
))
416 asm volatile("ptesync" : : : "memory");
417 for (i
= 0; i
< npages
; ++i
)
418 asm volatile(PPC_TLBIE(%1,%0) : :
419 "r" (rbvalues
[i
]), "r" (kvm
->arch
.lpid
));
420 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
421 kvm
->arch
.tlbie_lock
= 0;
424 asm volatile("ptesync" : : : "memory");
425 for (i
= 0; i
< npages
; ++i
)
426 asm volatile("tlbiel %0" : : "r" (rbvalues
[i
]));
427 asm volatile("ptesync" : : : "memory");
431 long kvmppc_do_h_remove(struct kvm
*kvm
, unsigned long flags
,
432 unsigned long pte_index
, unsigned long avpn
,
433 unsigned long *hpret
)
436 unsigned long v
, r
, rb
;
437 struct revmap_entry
*rev
;
440 if (pte_index
>= kvm
->arch
.hpt_npte
)
442 hpte
= (__be64
*)(kvm
->arch
.hpt_virt
+ (pte_index
<< 4));
443 while (!try_lock_hpte(hpte
, HPTE_V_HVLOCK
))
445 pte
= be64_to_cpu(hpte
[0]);
446 if ((pte
& (HPTE_V_ABSENT
| HPTE_V_VALID
)) == 0 ||
447 ((flags
& H_AVPN
) && (pte
& ~0x7fUL
) != avpn
) ||
448 ((flags
& H_ANDCOND
) && (pte
& avpn
) != 0)) {
449 __unlock_hpte(hpte
, pte
);
453 rev
= real_vmalloc_addr(&kvm
->arch
.revmap
[pte_index
]);
454 v
= pte
& ~HPTE_V_HVLOCK
;
455 if (v
& HPTE_V_VALID
) {
456 hpte
[0] &= ~cpu_to_be64(HPTE_V_VALID
);
457 rb
= compute_tlbie_rb(v
, be64_to_cpu(hpte
[1]), pte_index
);
458 do_tlbies(kvm
, &rb
, 1, global_invalidates(kvm
, flags
), true);
460 * The reference (R) and change (C) bits in a HPT
461 * entry can be set by hardware at any time up until
462 * the HPTE is invalidated and the TLB invalidation
463 * sequence has completed. This means that when
464 * removing a HPTE, we need to re-read the HPTE after
465 * the invalidation sequence has completed in order to
466 * obtain reliable values of R and C.
468 remove_revmap_chain(kvm
, pte_index
, rev
, v
,
469 be64_to_cpu(hpte
[1]));
471 r
= rev
->guest_rpte
& ~HPTE_GR_RESERVED
;
472 note_hpte_modification(kvm
, rev
);
473 unlock_hpte(hpte
, 0);
475 if (v
& HPTE_V_ABSENT
)
476 v
= (v
& ~HPTE_V_ABSENT
) | HPTE_V_VALID
;
481 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove
);
483 long kvmppc_h_remove(struct kvm_vcpu
*vcpu
, unsigned long flags
,
484 unsigned long pte_index
, unsigned long avpn
)
486 return kvmppc_do_h_remove(vcpu
->kvm
, flags
, pte_index
, avpn
,
490 long kvmppc_h_bulk_remove(struct kvm_vcpu
*vcpu
)
492 struct kvm
*kvm
= vcpu
->kvm
;
493 unsigned long *args
= &vcpu
->arch
.gpr
[4];
494 __be64
*hp
, *hptes
[4];
495 unsigned long tlbrb
[4];
496 long int i
, j
, k
, n
, found
, indexes
[4];
497 unsigned long flags
, req
, pte_index
, rcbits
;
499 long int ret
= H_SUCCESS
;
500 struct revmap_entry
*rev
, *revs
[4];
503 global
= global_invalidates(kvm
, 0);
504 for (i
= 0; i
< 4 && ret
== H_SUCCESS
; ) {
509 flags
= pte_index
>> 56;
510 pte_index
&= ((1ul << 56) - 1);
513 if (req
== 3) { /* no more requests */
517 if (req
!= 1 || flags
== 3 ||
518 pte_index
>= kvm
->arch
.hpt_npte
) {
519 /* parameter error */
520 args
[j
] = ((0xa0 | flags
) << 56) + pte_index
;
524 hp
= (__be64
*) (kvm
->arch
.hpt_virt
+ (pte_index
<< 4));
525 /* to avoid deadlock, don't spin except for first */
526 if (!try_lock_hpte(hp
, HPTE_V_HVLOCK
)) {
529 while (!try_lock_hpte(hp
, HPTE_V_HVLOCK
))
533 hp0
= be64_to_cpu(hp
[0]);
534 if (hp0
& (HPTE_V_ABSENT
| HPTE_V_VALID
)) {
536 case 0: /* absolute */
539 case 1: /* andcond */
540 if (!(hp0
& args
[j
+ 1]))
544 if ((hp0
& ~0x7fUL
) == args
[j
+ 1])
550 hp
[0] &= ~cpu_to_be64(HPTE_V_HVLOCK
);
551 args
[j
] = ((0x90 | flags
) << 56) + pte_index
;
555 args
[j
] = ((0x80 | flags
) << 56) + pte_index
;
556 rev
= real_vmalloc_addr(&kvm
->arch
.revmap
[pte_index
]);
557 note_hpte_modification(kvm
, rev
);
559 if (!(hp0
& HPTE_V_VALID
)) {
560 /* insert R and C bits from PTE */
561 rcbits
= rev
->guest_rpte
& (HPTE_R_R
|HPTE_R_C
);
562 args
[j
] |= rcbits
<< (56 - 5);
567 /* leave it locked */
568 hp
[0] &= ~cpu_to_be64(HPTE_V_VALID
);
569 tlbrb
[n
] = compute_tlbie_rb(be64_to_cpu(hp
[0]),
570 be64_to_cpu(hp
[1]), pte_index
);
580 /* Now that we've collected a batch, do the tlbies */
581 do_tlbies(kvm
, tlbrb
, n
, global
, true);
583 /* Read PTE low words after tlbie to get final R/C values */
584 for (k
= 0; k
< n
; ++k
) {
586 pte_index
= args
[j
] & ((1ul << 56) - 1);
589 remove_revmap_chain(kvm
, pte_index
, rev
,
590 be64_to_cpu(hp
[0]), be64_to_cpu(hp
[1]));
591 rcbits
= rev
->guest_rpte
& (HPTE_R_R
|HPTE_R_C
);
592 args
[j
] |= rcbits
<< (56 - 5);
593 __unlock_hpte(hp
, 0);
600 long kvmppc_h_protect(struct kvm_vcpu
*vcpu
, unsigned long flags
,
601 unsigned long pte_index
, unsigned long avpn
,
604 struct kvm
*kvm
= vcpu
->kvm
;
606 struct revmap_entry
*rev
;
607 unsigned long v
, r
, rb
, mask
, bits
;
610 if (pte_index
>= kvm
->arch
.hpt_npte
)
613 hpte
= (__be64
*)(kvm
->arch
.hpt_virt
+ (pte_index
<< 4));
614 while (!try_lock_hpte(hpte
, HPTE_V_HVLOCK
))
616 pte
= be64_to_cpu(hpte
[0]);
617 if ((pte
& (HPTE_V_ABSENT
| HPTE_V_VALID
)) == 0 ||
618 ((flags
& H_AVPN
) && (pte
& ~0x7fUL
) != avpn
)) {
619 __unlock_hpte(hpte
, pte
);
624 bits
= (flags
<< 55) & HPTE_R_PP0
;
625 bits
|= (flags
<< 48) & HPTE_R_KEY_HI
;
626 bits
|= flags
& (HPTE_R_PP
| HPTE_R_N
| HPTE_R_KEY_LO
);
628 /* Update guest view of 2nd HPTE dword */
629 mask
= HPTE_R_PP0
| HPTE_R_PP
| HPTE_R_N
|
630 HPTE_R_KEY_HI
| HPTE_R_KEY_LO
;
631 rev
= real_vmalloc_addr(&kvm
->arch
.revmap
[pte_index
]);
633 r
= (rev
->guest_rpte
& ~mask
) | bits
;
635 note_hpte_modification(kvm
, rev
);
639 if (v
& HPTE_V_VALID
) {
641 * If the page is valid, don't let it transition from
642 * readonly to writable. If it should be writable, we'll
643 * take a trap and let the page fault code sort it out.
645 pte
= be64_to_cpu(hpte
[1]);
646 r
= (pte
& ~mask
) | bits
;
647 if (hpte_is_writable(r
) && !hpte_is_writable(pte
))
648 r
= hpte_make_readonly(r
);
649 /* If the PTE is changing, invalidate it first */
651 rb
= compute_tlbie_rb(v
, r
, pte_index
);
652 hpte
[0] = cpu_to_be64((v
& ~HPTE_V_VALID
) |
654 do_tlbies(kvm
, &rb
, 1, global_invalidates(kvm
, flags
),
656 hpte
[1] = cpu_to_be64(r
);
659 unlock_hpte(hpte
, v
& ~HPTE_V_HVLOCK
);
660 asm volatile("ptesync" : : : "memory");
664 long kvmppc_h_read(struct kvm_vcpu
*vcpu
, unsigned long flags
,
665 unsigned long pte_index
)
667 struct kvm
*kvm
= vcpu
->kvm
;
671 struct revmap_entry
*rev
= NULL
;
673 if (pte_index
>= kvm
->arch
.hpt_npte
)
675 if (flags
& H_READ_4
) {
679 rev
= real_vmalloc_addr(&kvm
->arch
.revmap
[pte_index
]);
680 for (i
= 0; i
< n
; ++i
, ++pte_index
) {
681 hpte
= (__be64
*)(kvm
->arch
.hpt_virt
+ (pte_index
<< 4));
682 v
= be64_to_cpu(hpte
[0]) & ~HPTE_V_HVLOCK
;
683 r
= be64_to_cpu(hpte
[1]);
684 if (v
& HPTE_V_ABSENT
) {
688 if (v
& HPTE_V_VALID
) {
689 r
= rev
[i
].guest_rpte
| (r
& (HPTE_R_R
| HPTE_R_C
));
690 r
&= ~HPTE_GR_RESERVED
;
692 vcpu
->arch
.gpr
[4 + i
* 2] = v
;
693 vcpu
->arch
.gpr
[5 + i
* 2] = r
;
698 long kvmppc_h_clear_ref(struct kvm_vcpu
*vcpu
, unsigned long flags
,
699 unsigned long pte_index
)
701 struct kvm
*kvm
= vcpu
->kvm
;
703 unsigned long v
, r
, gr
;
704 struct revmap_entry
*rev
;
706 long ret
= H_NOT_FOUND
;
708 if (pte_index
>= kvm
->arch
.hpt_npte
)
711 rev
= real_vmalloc_addr(&kvm
->arch
.revmap
[pte_index
]);
712 hpte
= (__be64
*)(kvm
->arch
.hpt_virt
+ (pte_index
<< 4));
713 while (!try_lock_hpte(hpte
, HPTE_V_HVLOCK
))
715 v
= be64_to_cpu(hpte
[0]);
716 r
= be64_to_cpu(hpte
[1]);
717 if (!(v
& (HPTE_V_VALID
| HPTE_V_ABSENT
)))
720 gr
= rev
->guest_rpte
;
721 if (rev
->guest_rpte
& HPTE_R_R
) {
722 rev
->guest_rpte
&= ~HPTE_R_R
;
723 note_hpte_modification(kvm
, rev
);
725 if (v
& HPTE_V_VALID
) {
726 gr
|= r
& (HPTE_R_R
| HPTE_R_C
);
728 kvmppc_clear_ref_hpte(kvm
, hpte
, pte_index
);
729 rmap
= revmap_for_hpte(kvm
, v
, gr
);
732 *rmap
|= KVMPPC_RMAP_REFERENCED
;
737 vcpu
->arch
.gpr
[4] = gr
;
740 unlock_hpte(hpte
, v
& ~HPTE_V_HVLOCK
);
744 long kvmppc_h_clear_mod(struct kvm_vcpu
*vcpu
, unsigned long flags
,
745 unsigned long pte_index
)
747 struct kvm
*kvm
= vcpu
->kvm
;
749 unsigned long v
, r
, gr
;
750 struct revmap_entry
*rev
;
752 long ret
= H_NOT_FOUND
;
754 if (pte_index
>= kvm
->arch
.hpt_npte
)
757 rev
= real_vmalloc_addr(&kvm
->arch
.revmap
[pte_index
]);
758 hpte
= (__be64
*)(kvm
->arch
.hpt_virt
+ (pte_index
<< 4));
759 while (!try_lock_hpte(hpte
, HPTE_V_HVLOCK
))
761 v
= be64_to_cpu(hpte
[0]);
762 r
= be64_to_cpu(hpte
[1]);
763 if (!(v
& (HPTE_V_VALID
| HPTE_V_ABSENT
)))
766 gr
= rev
->guest_rpte
;
768 rev
->guest_rpte
&= ~HPTE_R_C
;
769 note_hpte_modification(kvm
, rev
);
771 if (v
& HPTE_V_VALID
) {
772 /* need to make it temporarily absent so C is stable */
773 hpte
[0] |= cpu_to_be64(HPTE_V_ABSENT
);
774 kvmppc_invalidate_hpte(kvm
, hpte
, pte_index
);
775 r
= be64_to_cpu(hpte
[1]);
776 gr
|= r
& (HPTE_R_R
| HPTE_R_C
);
778 unsigned long psize
= hpte_page_size(v
, r
);
779 hpte
[1] = cpu_to_be64(r
& ~HPTE_R_C
);
781 rmap
= revmap_for_hpte(kvm
, v
, gr
);
784 *rmap
|= KVMPPC_RMAP_CHANGED
;
785 kvmppc_update_rmap_change(rmap
, psize
);
790 vcpu
->arch
.gpr
[4] = gr
;
793 unlock_hpte(hpte
, v
& ~HPTE_V_HVLOCK
);
797 void kvmppc_invalidate_hpte(struct kvm
*kvm
, __be64
*hptep
,
798 unsigned long pte_index
)
802 hptep
[0] &= ~cpu_to_be64(HPTE_V_VALID
);
803 rb
= compute_tlbie_rb(be64_to_cpu(hptep
[0]), be64_to_cpu(hptep
[1]),
805 do_tlbies(kvm
, &rb
, 1, 1, true);
807 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte
);
809 void kvmppc_clear_ref_hpte(struct kvm
*kvm
, __be64
*hptep
,
810 unsigned long pte_index
)
815 rb
= compute_tlbie_rb(be64_to_cpu(hptep
[0]), be64_to_cpu(hptep
[1]),
817 rbyte
= (be64_to_cpu(hptep
[1]) & ~HPTE_R_R
) >> 8;
818 /* modify only the second-last byte, which contains the ref bit */
819 *((char *)hptep
+ 14) = rbyte
;
820 do_tlbies(kvm
, &rb
, 1, 1, false);
822 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte
);
824 static int slb_base_page_shift
[4] = {
828 20, /* 1M, unsupported */
831 /* When called from virtmode, this func should be protected by
832 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
833 * can trigger deadlock issue.
835 long kvmppc_hv_find_lock_hpte(struct kvm
*kvm
, gva_t eaddr
, unsigned long slb_v
,
840 unsigned long somask
;
841 unsigned long vsid
, hash
;
844 unsigned long mask
, val
;
847 /* Get page shift, work out hash and AVPN etc. */
848 mask
= SLB_VSID_B
| HPTE_V_AVPN
| HPTE_V_SECONDARY
;
851 if (slb_v
& SLB_VSID_L
) {
852 mask
|= HPTE_V_LARGE
;
854 pshift
= slb_base_page_shift
[(slb_v
& SLB_VSID_LP
) >> 4];
856 if (slb_v
& SLB_VSID_B_1T
) {
857 somask
= (1UL << 40) - 1;
858 vsid
= (slb_v
& ~SLB_VSID_B
) >> SLB_VSID_SHIFT_1T
;
861 somask
= (1UL << 28) - 1;
862 vsid
= (slb_v
& ~SLB_VSID_B
) >> SLB_VSID_SHIFT
;
864 hash
= (vsid
^ ((eaddr
& somask
) >> pshift
)) & kvm
->arch
.hpt_mask
;
865 avpn
= slb_v
& ~(somask
>> 16); /* also includes B */
866 avpn
|= (eaddr
& somask
) >> 16;
869 avpn
&= ~((1UL << (pshift
- 16)) - 1);
875 hpte
= (__be64
*)(kvm
->arch
.hpt_virt
+ (hash
<< 7));
877 for (i
= 0; i
< 16; i
+= 2) {
878 /* Read the PTE racily */
879 v
= be64_to_cpu(hpte
[i
]) & ~HPTE_V_HVLOCK
;
881 /* Check valid/absent, hash, segment size and AVPN */
882 if (!(v
& valid
) || (v
& mask
) != val
)
885 /* Lock the PTE and read it under the lock */
886 while (!try_lock_hpte(&hpte
[i
], HPTE_V_HVLOCK
))
888 v
= be64_to_cpu(hpte
[i
]) & ~HPTE_V_HVLOCK
;
889 r
= be64_to_cpu(hpte
[i
+1]);
892 * Check the HPTE again, including base page size
894 if ((v
& valid
) && (v
& mask
) == val
&&
895 hpte_base_page_size(v
, r
) == (1ul << pshift
))
896 /* Return with the HPTE still locked */
897 return (hash
<< 3) + (i
>> 1);
899 __unlock_hpte(&hpte
[i
], v
);
902 if (val
& HPTE_V_SECONDARY
)
904 val
|= HPTE_V_SECONDARY
;
905 hash
= hash
^ kvm
->arch
.hpt_mask
;
909 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte
);
912 * Called in real mode to check whether an HPTE not found fault
913 * is due to accessing a paged-out page or an emulated MMIO page,
914 * or if a protection fault is due to accessing a page that the
915 * guest wanted read/write access to but which we made read-only.
916 * Returns a possibly modified status (DSISR) value if not
917 * (i.e. pass the interrupt to the guest),
918 * -1 to pass the fault up to host kernel mode code, -2 to do that
919 * and also load the instruction word (for MMIO emulation),
920 * or 0 if we should make the guest retry the access.
922 long kvmppc_hpte_hv_fault(struct kvm_vcpu
*vcpu
, unsigned long addr
,
923 unsigned long slb_v
, unsigned int status
, bool data
)
925 struct kvm
*kvm
= vcpu
->kvm
;
927 unsigned long v
, r
, gr
;
930 struct revmap_entry
*rev
;
931 unsigned long pp
, key
;
933 /* For protection fault, expect to find a valid HPTE */
934 valid
= HPTE_V_VALID
;
935 if (status
& DSISR_NOHPTE
)
936 valid
|= HPTE_V_ABSENT
;
938 index
= kvmppc_hv_find_lock_hpte(kvm
, addr
, slb_v
, valid
);
940 if (status
& DSISR_NOHPTE
)
941 return status
; /* there really was no HPTE */
942 return 0; /* for prot fault, HPTE disappeared */
944 hpte
= (__be64
*)(kvm
->arch
.hpt_virt
+ (index
<< 4));
945 v
= be64_to_cpu(hpte
[0]) & ~HPTE_V_HVLOCK
;
946 r
= be64_to_cpu(hpte
[1]);
947 rev
= real_vmalloc_addr(&kvm
->arch
.revmap
[index
]);
948 gr
= rev
->guest_rpte
;
950 unlock_hpte(hpte
, v
);
952 /* For not found, if the HPTE is valid by now, retry the instruction */
953 if ((status
& DSISR_NOHPTE
) && (v
& HPTE_V_VALID
))
956 /* Check access permissions to the page */
957 pp
= gr
& (HPTE_R_PP0
| HPTE_R_PP
);
958 key
= (vcpu
->arch
.shregs
.msr
& MSR_PR
) ? SLB_VSID_KP
: SLB_VSID_KS
;
959 status
&= ~DSISR_NOHPTE
; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
961 if (gr
& (HPTE_R_N
| HPTE_R_G
))
962 return status
| SRR1_ISI_N_OR_G
;
963 if (!hpte_read_permission(pp
, slb_v
& key
))
964 return status
| SRR1_ISI_PROT
;
965 } else if (status
& DSISR_ISSTORE
) {
966 /* check write permission */
967 if (!hpte_write_permission(pp
, slb_v
& key
))
968 return status
| DSISR_PROTFAULT
;
970 if (!hpte_read_permission(pp
, slb_v
& key
))
971 return status
| DSISR_PROTFAULT
;
974 /* Check storage key, if applicable */
975 if (data
&& (vcpu
->arch
.shregs
.msr
& MSR_DR
)) {
976 unsigned int perm
= hpte_get_skey_perm(gr
, vcpu
->arch
.amr
);
977 if (status
& DSISR_ISSTORE
)
980 return status
| DSISR_KEYFAULT
;
983 /* Save HPTE info for virtual-mode handler */
984 vcpu
->arch
.pgfault_addr
= addr
;
985 vcpu
->arch
.pgfault_index
= index
;
986 vcpu
->arch
.pgfault_hpte
[0] = v
;
987 vcpu
->arch
.pgfault_hpte
[1] = r
;
989 /* Check the storage key to see if it is possibly emulated MMIO */
990 if (data
&& (vcpu
->arch
.shregs
.msr
& MSR_IR
) &&
991 (r
& (HPTE_R_KEY_HI
| HPTE_R_KEY_LO
)) ==
992 (HPTE_R_KEY_HI
| HPTE_R_KEY_LO
))
993 return -2; /* MMIO emulation - load instr word */
995 return -1; /* send fault up to host kernel mode */