2 * KVM paravirt_ops implementation
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
19 * Copyright IBM Corporation, 2007
20 * Authors: Anthony Liguori <aliguori@us.ibm.com>
23 #include <linux/module.h>
24 #include <linux/kernel.h>
25 #include <linux/kvm_para.h>
26 #include <linux/cpu.h>
28 #include <linux/highmem.h>
29 #include <linux/hardirq.h>
31 #define MMU_QUEUE_SIZE 1024
33 struct kvm_para_state
{
34 u8 mmu_queue
[MMU_QUEUE_SIZE
];
36 enum paravirt_lazy_mode mode
;
39 static DEFINE_PER_CPU(struct kvm_para_state
, para_state
);
41 static struct kvm_para_state
*kvm_para_state(void)
43 return &per_cpu(para_state
, raw_smp_processor_id());
47 * No need for any "IO delay" on KVM
49 static void kvm_io_delay(void)
53 static void kvm_mmu_op(void *buffer
, unsigned len
)
60 a2
= 0; /* on i386 __pa() always returns <4G */
61 r
= kvm_hypercall3(KVM_HC_MMU_OP
, len
, a1
, a2
);
67 static void mmu_queue_flush(struct kvm_para_state
*state
)
69 if (state
->mmu_queue_len
) {
70 kvm_mmu_op(state
->mmu_queue
, state
->mmu_queue_len
);
71 state
->mmu_queue_len
= 0;
75 static void kvm_deferred_mmu_op(void *buffer
, int len
)
77 struct kvm_para_state
*state
= kvm_para_state();
79 if (state
->mode
!= PARAVIRT_LAZY_MMU
) {
80 kvm_mmu_op(buffer
, len
);
83 if (state
->mmu_queue_len
+ len
> sizeof state
->mmu_queue
)
84 mmu_queue_flush(state
);
85 memcpy(state
->mmu_queue
+ state
->mmu_queue_len
, buffer
, len
);
86 state
->mmu_queue_len
+= len
;
89 static void kvm_mmu_write(void *dest
, u64 val
)
92 struct kvm_mmu_op_write_pte wpte
;
96 unsigned long dst
= (unsigned long) dest
;
98 page
= kmap_atomic_to_page(dest
);
99 pte_phys
= page_to_pfn(page
);
100 pte_phys
<<= PAGE_SHIFT
;
101 pte_phys
+= (dst
& ~(PAGE_MASK
));
103 pte_phys
= (unsigned long)__pa(dest
);
105 wpte
.header
.op
= KVM_MMU_OP_WRITE_PTE
;
107 wpte
.pte_phys
= pte_phys
;
109 kvm_deferred_mmu_op(&wpte
, sizeof wpte
);
113 * We only need to hook operations that are MMU writes. We hook these so that
114 * we can use lazy MMU mode to batch these operations. We could probably
115 * improve the performance of the host code if we used some of the information
116 * here to simplify processing of batched writes.
118 static void kvm_set_pte(pte_t
*ptep
, pte_t pte
)
120 kvm_mmu_write(ptep
, pte_val(pte
));
123 static void kvm_set_pte_at(struct mm_struct
*mm
, unsigned long addr
,
124 pte_t
*ptep
, pte_t pte
)
126 kvm_mmu_write(ptep
, pte_val(pte
));
129 static void kvm_set_pmd(pmd_t
*pmdp
, pmd_t pmd
)
131 kvm_mmu_write(pmdp
, pmd_val(pmd
));
134 #if PAGETABLE_LEVELS >= 3
135 #ifdef CONFIG_X86_PAE
136 static void kvm_set_pte_atomic(pte_t
*ptep
, pte_t pte
)
138 kvm_mmu_write(ptep
, pte_val(pte
));
141 static void kvm_pte_clear(struct mm_struct
*mm
,
142 unsigned long addr
, pte_t
*ptep
)
144 kvm_mmu_write(ptep
, 0);
147 static void kvm_pmd_clear(pmd_t
*pmdp
)
149 kvm_mmu_write(pmdp
, 0);
153 static void kvm_set_pud(pud_t
*pudp
, pud_t pud
)
155 kvm_mmu_write(pudp
, pud_val(pud
));
158 #if PAGETABLE_LEVELS == 4
159 static void kvm_set_pgd(pgd_t
*pgdp
, pgd_t pgd
)
161 kvm_mmu_write(pgdp
, pgd_val(pgd
));
164 #endif /* PAGETABLE_LEVELS >= 3 */
166 static void kvm_flush_tlb(void)
168 struct kvm_mmu_op_flush_tlb ftlb
= {
169 .header
.op
= KVM_MMU_OP_FLUSH_TLB
,
172 kvm_deferred_mmu_op(&ftlb
, sizeof ftlb
);
175 static void kvm_release_pt(unsigned long pfn
)
177 struct kvm_mmu_op_release_pt rpt
= {
178 .header
.op
= KVM_MMU_OP_RELEASE_PT
,
179 .pt_phys
= (u64
)pfn
<< PAGE_SHIFT
,
182 kvm_mmu_op(&rpt
, sizeof rpt
);
185 static void kvm_enter_lazy_mmu(void)
187 struct kvm_para_state
*state
= kvm_para_state();
189 paravirt_enter_lazy_mmu();
190 state
->mode
= paravirt_get_lazy_mode();
193 static void kvm_leave_lazy_mmu(void)
195 struct kvm_para_state
*state
= kvm_para_state();
197 mmu_queue_flush(state
);
198 paravirt_leave_lazy(paravirt_get_lazy_mode());
199 state
->mode
= paravirt_get_lazy_mode();
202 static void paravirt_ops_setup(void)
204 pv_info
.name
= "KVM";
205 pv_info
.paravirt_enabled
= 1;
207 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY
))
208 pv_cpu_ops
.io_delay
= kvm_io_delay
;
210 if (kvm_para_has_feature(KVM_FEATURE_MMU_OP
)) {
211 pv_mmu_ops
.set_pte
= kvm_set_pte
;
212 pv_mmu_ops
.set_pte_at
= kvm_set_pte_at
;
213 pv_mmu_ops
.set_pmd
= kvm_set_pmd
;
214 #if PAGETABLE_LEVELS >= 3
215 #ifdef CONFIG_X86_PAE
216 pv_mmu_ops
.set_pte_atomic
= kvm_set_pte_atomic
;
217 pv_mmu_ops
.pte_clear
= kvm_pte_clear
;
218 pv_mmu_ops
.pmd_clear
= kvm_pmd_clear
;
220 pv_mmu_ops
.set_pud
= kvm_set_pud
;
221 #if PAGETABLE_LEVELS == 4
222 pv_mmu_ops
.set_pgd
= kvm_set_pgd
;
225 pv_mmu_ops
.flush_tlb_user
= kvm_flush_tlb
;
226 pv_mmu_ops
.release_pte
= kvm_release_pt
;
227 pv_mmu_ops
.release_pmd
= kvm_release_pt
;
228 pv_mmu_ops
.release_pud
= kvm_release_pt
;
230 pv_mmu_ops
.lazy_mode
.enter
= kvm_enter_lazy_mmu
;
231 pv_mmu_ops
.lazy_mode
.leave
= kvm_leave_lazy_mmu
;
235 void __init
kvm_guest_init(void)
237 if (!kvm_para_available())
240 paravirt_ops_setup();