2 * KVM paravirt_ops implementation
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
19 * Copyright IBM Corporation, 2007
20 * Authors: Anthony Liguori <aliguori@us.ibm.com>
23 #include <linux/module.h>
24 #include <linux/kernel.h>
25 #include <linux/kvm_para.h>
26 #include <linux/cpu.h>
28 #include <linux/highmem.h>
29 #include <linux/hardirq.h>
30 #include <asm/timer.h>
32 #define MMU_QUEUE_SIZE 1024
34 struct kvm_para_state
{
35 u8 mmu_queue
[MMU_QUEUE_SIZE
];
37 enum paravirt_lazy_mode mode
;
40 static DEFINE_PER_CPU(struct kvm_para_state
, para_state
);
42 static struct kvm_para_state
*kvm_para_state(void)
44 return &per_cpu(para_state
, raw_smp_processor_id());
48 * No need for any "IO delay" on KVM
50 static void kvm_io_delay(void)
54 static void kvm_mmu_op(void *buffer
, unsigned len
)
61 a2
= 0; /* on i386 __pa() always returns <4G */
62 r
= kvm_hypercall3(KVM_HC_MMU_OP
, len
, a1
, a2
);
68 static void mmu_queue_flush(struct kvm_para_state
*state
)
70 if (state
->mmu_queue_len
) {
71 kvm_mmu_op(state
->mmu_queue
, state
->mmu_queue_len
);
72 state
->mmu_queue_len
= 0;
76 static void kvm_deferred_mmu_op(void *buffer
, int len
)
78 struct kvm_para_state
*state
= kvm_para_state();
80 if (state
->mode
!= PARAVIRT_LAZY_MMU
) {
81 kvm_mmu_op(buffer
, len
);
84 if (state
->mmu_queue_len
+ len
> sizeof state
->mmu_queue
)
85 mmu_queue_flush(state
);
86 memcpy(state
->mmu_queue
+ state
->mmu_queue_len
, buffer
, len
);
87 state
->mmu_queue_len
+= len
;
90 static void kvm_mmu_write(void *dest
, u64 val
)
93 struct kvm_mmu_op_write_pte wpte
;
97 unsigned long dst
= (unsigned long) dest
;
99 page
= kmap_atomic_to_page(dest
);
100 pte_phys
= page_to_pfn(page
);
101 pte_phys
<<= PAGE_SHIFT
;
102 pte_phys
+= (dst
& ~(PAGE_MASK
));
104 pte_phys
= (unsigned long)__pa(dest
);
106 wpte
.header
.op
= KVM_MMU_OP_WRITE_PTE
;
108 wpte
.pte_phys
= pte_phys
;
110 kvm_deferred_mmu_op(&wpte
, sizeof wpte
);
114 * We only need to hook operations that are MMU writes. We hook these so that
115 * we can use lazy MMU mode to batch these operations. We could probably
116 * improve the performance of the host code if we used some of the information
117 * here to simplify processing of batched writes.
119 static void kvm_set_pte(pte_t
*ptep
, pte_t pte
)
121 kvm_mmu_write(ptep
, pte_val(pte
));
124 static void kvm_set_pte_at(struct mm_struct
*mm
, unsigned long addr
,
125 pte_t
*ptep
, pte_t pte
)
127 kvm_mmu_write(ptep
, pte_val(pte
));
130 static void kvm_set_pmd(pmd_t
*pmdp
, pmd_t pmd
)
132 kvm_mmu_write(pmdp
, pmd_val(pmd
));
135 #if PAGETABLE_LEVELS >= 3
136 #ifdef CONFIG_X86_PAE
137 static void kvm_set_pte_atomic(pte_t
*ptep
, pte_t pte
)
139 kvm_mmu_write(ptep
, pte_val(pte
));
142 static void kvm_pte_clear(struct mm_struct
*mm
,
143 unsigned long addr
, pte_t
*ptep
)
145 kvm_mmu_write(ptep
, 0);
148 static void kvm_pmd_clear(pmd_t
*pmdp
)
150 kvm_mmu_write(pmdp
, 0);
154 static void kvm_set_pud(pud_t
*pudp
, pud_t pud
)
156 kvm_mmu_write(pudp
, pud_val(pud
));
159 #if PAGETABLE_LEVELS == 4
160 static void kvm_set_pgd(pgd_t
*pgdp
, pgd_t pgd
)
162 kvm_mmu_write(pgdp
, pgd_val(pgd
));
165 #endif /* PAGETABLE_LEVELS >= 3 */
167 static void kvm_flush_tlb(void)
169 struct kvm_mmu_op_flush_tlb ftlb
= {
170 .header
.op
= KVM_MMU_OP_FLUSH_TLB
,
173 kvm_deferred_mmu_op(&ftlb
, sizeof ftlb
);
176 static void kvm_release_pt(unsigned long pfn
)
178 struct kvm_mmu_op_release_pt rpt
= {
179 .header
.op
= KVM_MMU_OP_RELEASE_PT
,
180 .pt_phys
= (u64
)pfn
<< PAGE_SHIFT
,
183 kvm_mmu_op(&rpt
, sizeof rpt
);
186 static void kvm_enter_lazy_mmu(void)
188 struct kvm_para_state
*state
= kvm_para_state();
190 paravirt_enter_lazy_mmu();
191 state
->mode
= paravirt_get_lazy_mode();
194 static void kvm_leave_lazy_mmu(void)
196 struct kvm_para_state
*state
= kvm_para_state();
198 mmu_queue_flush(state
);
199 paravirt_leave_lazy_mmu();
200 state
->mode
= paravirt_get_lazy_mode();
203 static void __init
paravirt_ops_setup(void)
205 pv_info
.name
= "KVM";
206 pv_info
.paravirt_enabled
= 1;
208 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY
))
209 pv_cpu_ops
.io_delay
= kvm_io_delay
;
211 if (kvm_para_has_feature(KVM_FEATURE_MMU_OP
)) {
212 pv_mmu_ops
.set_pte
= kvm_set_pte
;
213 pv_mmu_ops
.set_pte_at
= kvm_set_pte_at
;
214 pv_mmu_ops
.set_pmd
= kvm_set_pmd
;
215 #if PAGETABLE_LEVELS >= 3
216 #ifdef CONFIG_X86_PAE
217 pv_mmu_ops
.set_pte_atomic
= kvm_set_pte_atomic
;
218 pv_mmu_ops
.pte_clear
= kvm_pte_clear
;
219 pv_mmu_ops
.pmd_clear
= kvm_pmd_clear
;
221 pv_mmu_ops
.set_pud
= kvm_set_pud
;
222 #if PAGETABLE_LEVELS == 4
223 pv_mmu_ops
.set_pgd
= kvm_set_pgd
;
226 pv_mmu_ops
.flush_tlb_user
= kvm_flush_tlb
;
227 pv_mmu_ops
.release_pte
= kvm_release_pt
;
228 pv_mmu_ops
.release_pmd
= kvm_release_pt
;
229 pv_mmu_ops
.release_pud
= kvm_release_pt
;
231 pv_mmu_ops
.lazy_mode
.enter
= kvm_enter_lazy_mmu
;
232 pv_mmu_ops
.lazy_mode
.leave
= kvm_leave_lazy_mmu
;
234 #ifdef CONFIG_X86_IO_APIC
239 void __init
kvm_guest_init(void)
241 if (!kvm_para_available())
244 paravirt_ops_setup();