2 * Copyright (c) 2008 The FreeBSD Project
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
32 #include <sys/sysproto.h>
38 #include <vm/vm_extern.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_object.h>
41 #include <vm/vm_param.h>
42 #include <machine/_inttypes.h>
43 #include <machine/specialreg.h>
44 #include <machine/segments.h>
45 #include <machine/vmcb.h>
47 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
48 #define MSRPM_SIZE (8*1024)
51 static void *iopm
= NULL
; /* Should I allocate a vm_object_t instead? */
52 static void *msrpm
= NULL
; /* Should I allocate a vm_object_t instead? */
54 static void *hsave_area
= NULL
;
82 unsigned long vmcb_pa
;
84 unsigned long regs
[NR_VCPU_REGS
];
85 u_int64_t host_gs_base
;
89 struct guestvm
*guest_vm
;
95 struct vcpu
*vcpus
[MAX_VCPUS
];
103 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
104 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
105 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
106 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
107 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
108 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
110 #define TD_GET_VCPU(td) td->vcpu
112 #define TD_SET_VCPU(td, vcpu) do { \
116 #define PROC_GET_GUESTVM(p) p->p_guestvm
118 #define PROC_SET_GUESTVM(p, guestvm) do { \
119 p->p_guestvm = guestvm; \
123 print_vmcb_seg(struct vmcb_seg
* vmcb_seg
, const char* name
)
125 printf("%s Selector\n", name
);
126 printf("Selector : %" PRIx16
"\n", vmcb_seg
->selector
);
127 printf("Attributes : %" PRIx16
"\n", vmcb_seg
->attrib
);
128 printf("Limit : %" PRIx32
"\n", vmcb_seg
->limit
);
129 printf("Base Address : %" PRIx64
"\n", vmcb_seg
->base
);
134 print_vmcb(struct vmcb
*vmcb
)
136 printf("VMCB Control Area\n");
137 printf("Intercept CR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_cr_reads
);
138 printf("Intercept CR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_cr_writes
);
139 printf("Intercept DR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_dr_reads
);
140 printf("Intercept DR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_dr_writes
);
141 printf("Intercept Exceptions : %" PRIx32
"\n", vmcb
->control
.intercept_exceptions
);
142 printf("Intercepts : %" PRIx64
"\n", vmcb
->control
.intercepts
);
143 printf("Reserved 1: \n");
144 for(int i
=0; i
< 44; i
++) {
145 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
148 printf("IOPM Base PA : %" PRIx64
"\n", vmcb
->control
.iopm_base_pa
);
149 printf("MSRPM Base PA : %" PRIx64
"\n", vmcb
->control
.msrpm_base_pa
);
150 printf("TSC Offset : %" PRIx64
"\n", vmcb
->control
.tsc_offset
);
151 printf("Guest ASID : %" PRIx32
"\n", vmcb
->control
.guest_asid
);
152 printf("TLB Control : %" PRIx8
"\n", vmcb
->control
.tlb_control
);
153 printf("Reserved 2 : \n");
154 for(int i
=0; i
< 3; i
++) {
155 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
158 printf("Virtual TPR : %" PRIx8
"\n", vmcb
->control
.v_tpr
);
159 printf("Virtual IRQ : %" PRIx8
"\n", vmcb
->control
.v_irq
);
160 printf("Virtual Interrupt : %" PRIx8
"\n", vmcb
->control
.v_intr
);
161 printf("Virtual Interrupt Masking: %" PRIx8
"\n", vmcb
->control
.v_intr_masking
);
162 printf("Virtual Interrupt Vector : %" PRIx8
"\n", vmcb
->control
.v_intr_vector
);
163 printf("Reserved 6 : \n");
164 for(int i
=0; i
< 3; i
++) {
165 printf("%" PRIx8
"", vmcb
->control
.reserved_6
[i
]); /* Should be Zero */
168 printf("Interrupt Shadow : %" PRIx8
"\n", vmcb
->control
.intr_shadow
);
169 printf("Reserved 7 : \n");
170 for(int i
=0; i
< 7; i
++) {
171 printf("%" PRIx8
"", vmcb
->control
.reserved_7
[i
]); /* Should be Zero */
174 printf("Exit Code : %" PRIx64
"\n", vmcb
->control
.exit_code
);
175 printf("Exit Info 1 : %" PRIx64
"\n", vmcb
->control
.exit_info_1
);
176 printf("Exit Info 2 : %" PRIx64
"\n", vmcb
->control
.exit_info_2
);
177 printf("Exit Interrupt Info : %" PRIx32
"\n", vmcb
->control
.exit_int_info
);
178 printf("Exit Interrupt Info Err Code: %" PRIx32
"\n", vmcb
->control
.exit_int_info_err_code
);
179 printf("Nested Control : %" PRIx64
"\n", vmcb
->control
.nested_ctl
);
180 printf("Reserved 8 : \n");
181 for(int i
=0; i
< 16; i
++) {
182 printf("%" PRIx8
"", vmcb
->control
.reserved_8
[i
]); /* Should be Zero */
185 printf("Event Injection : %" PRIx64
"\n", vmcb
->control
.event_inj
);
186 printf("Nested CR3 : %" PRIx64
"\n", vmcb
->control
.nested_cr3
);
187 printf("LBR Virtualization Enable: %" PRIx64
"\n", vmcb
->control
.lbr_virt_enable
);
188 printf("Reserved 9 : \n");
189 for(int i
=0; i
< 832; i
++) {
190 printf("%" PRIx8
"", vmcb
->control
.reserved_9
[i
]); /* Should be Zero */
196 printf("VMCB Save Area\n");
197 print_vmcb_seg(&(vmcb
->save
.es
), "ES");
198 print_vmcb_seg(&(vmcb
->save
.es
), "CS");
199 print_vmcb_seg(&(vmcb
->save
.es
), "SS");
200 print_vmcb_seg(&(vmcb
->save
.es
), "DS");
201 print_vmcb_seg(&(vmcb
->save
.es
), "FS");
202 print_vmcb_seg(&(vmcb
->save
.es
), "GS");
203 print_vmcb_seg(&(vmcb
->save
.es
), "GDTR");
204 print_vmcb_seg(&(vmcb
->save
.es
), "LDTR");
205 print_vmcb_seg(&(vmcb
->save
.es
), "IDTR");
206 print_vmcb_seg(&(vmcb
->save
.es
), "TR");
207 printf("Reserved 1 : \n");
208 for(int i
=0; i
< 43; i
++) {
209 printf("%" PRIx8
"", vmcb
->save
.reserved_1
[i
]); /* Should be Zero */
212 printf("Current Processor Level : %" PRIx8
"\n", vmcb
->save
.cpl
);
213 printf("Reserved 2 : \n");
214 for(int i
=0; i
< 4; i
++) {
215 printf("%" PRIx8
"", vmcb
->save
.reserved_2
[i
]); /* Should be Zero */
218 printf("EFER : %" PRIx64
"\n", vmcb
->save
.efer
);
219 printf("Reserved 3 : \n");
220 for(int i
=0; i
< 112; i
++) {
221 printf("%" PRIx8
"", vmcb
->save
.reserved_3
[i
]); /* Should be Zero */
224 printf("Control Register 4 : %" PRIx64
"\n", vmcb
->save
.cr4
);
225 printf("Control Register 3 : %" PRIx64
"\n", vmcb
->save
.cr3
);
226 printf("Control Register 0 : %" PRIx64
"\n", vmcb
->save
.cr0
);
227 printf("Debug Register 7 : %" PRIx64
"\n", vmcb
->save
.dr7
);
228 printf("Debug Register 6 : %" PRIx64
"\n", vmcb
->save
.dr6
);
229 printf("RFlags : %" PRIx64
"\n", vmcb
->save
.rflags
);
230 printf("RIP : %" PRIx64
"\n", vmcb
->save
.rip
);
231 printf("Reserved 4 : \n");
232 for(int i
=0; i
< 88; i
++) {
233 printf("%" PRIx8
"", vmcb
->save
.reserved_4
[i
]); /* Should be Zero */
236 printf("RSP : %" PRIx64
"\n", vmcb
->save
.rsp
);
237 printf("Reserved 5 : \n");
238 for(int i
=0; i
< 24; i
++) {
239 printf("%" PRIx8
"", vmcb
->save
.reserved_5
[i
]); /* Should be Zero */
242 printf("RAX : %" PRIx64
"\n", vmcb
->save
.rax
);
243 printf("STAR : %" PRIx64
"\n", vmcb
->save
.star
);
244 printf("LSTAR : %" PRIx64
"\n", vmcb
->save
.lstar
);
245 printf("CSTAR : %" PRIx64
"\n", vmcb
->save
.cstar
);
246 printf("SFMASK : %" PRIx64
"\n", vmcb
->save
.sfmask
);
247 printf("Kernel GS Base : %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
248 printf("SYSENTER CS : %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
249 printf("SYSENTER ESP : %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
250 printf("SYSENTER EIP : %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
251 printf("Control Register 2 : %" PRIx64
"\n", vmcb
->save
.cr2
);
252 printf("Reserved 6 : \n");
253 for(int i
=0; i
< 32; i
++) {
254 printf("%" PRIx8
"", vmcb
->save
.reserved_6
[i
]); /* Should be Zero */
257 printf("Global PAT : %" PRIx64
"\n", vmcb
->save
.g_pat
);
258 printf("Debug Control : %" PRIx64
"\n", vmcb
->save
.dbg_ctl
);
259 printf("BR From : %" PRIx64
"\n", vmcb
->save
.br_from
);
260 printf("BR To : %" PRIx64
"\n", vmcb
->save
.br_to
);
261 printf("Last Exception From : %" PRIx64
"\n", vmcb
->save
.last_excp_from
);
262 printf("Last Exception To : %" PRIx64
"\n", vmcb
->save
.last_excp_to
);
269 print_tss_desc(struct system_segment_descriptor
*tss_desc
)
271 printf("TSS desc @ %p:\n", tss_desc
);
272 printf("sd_lolimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lolimit
);
273 printf("sd_lobase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lobase
);
274 printf("sd_type: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_type
);
275 printf("sd_dpl: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_dpl
);
276 printf("sd_p: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_p
);
277 printf("sd_hilimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hilimit
);
278 printf("sd_xx0: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx0
);
279 printf("sd_gran: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_gran
);
280 printf("sd_hibase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hibase
);
281 printf("sd_xx1: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx1
);
282 printf("sd_mbz: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_mbz
);
283 printf("sd_xx2: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx2
);
288 print_tss(struct system_segment_descriptor
*tss_desc
)
294 base
= (u_int32_t
*) ((((u_int64_t
) tss_desc
->sd_hibase
) << 24) | ((u_int64_t
) tss_desc
->sd_lobase
));
295 limit
= ((tss_desc
->sd_hilimit
<< 16) | tss_desc
->sd_lolimit
) / 4;
297 printf("TSS: @ %p\n", base
);
298 for (i
= 0; i
<= limit
; i
++)
299 printf("%x: 0x%" PRIx32
"\n", i
, base
[i
]);
305 print_vmcb_save_area(struct vmcb
*vmcb
)
307 printf("VMCB save area:\n");
308 printf(" cs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
309 vmcb
->save
.cs
.selector
,
310 vmcb
->save
.cs
.attrib
,
313 printf(" fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
314 vmcb
->save
.fs
.selector
,
315 vmcb
->save
.fs
.attrib
,
318 printf(" gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
319 vmcb
->save
.gs
.selector
,
320 vmcb
->save
.gs
.attrib
,
323 printf(" tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
324 vmcb
->save
.tr
.selector
,
325 vmcb
->save
.tr
.attrib
,
328 printf(" ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
329 vmcb
->save
.ldtr
.selector
,
330 vmcb
->save
.ldtr
.attrib
,
331 vmcb
->save
.ldtr
.limit
,
332 vmcb
->save
.ldtr
.base
);
333 printf(" rip: %" PRIx64
"\n", vmcb
->save
.rip
);
334 printf(" kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
335 printf(" star: %" PRIx64
"\n", vmcb
->save
.star
);
336 printf(" lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
337 printf(" cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
338 printf(" sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
339 printf(" sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
340 printf(" sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
341 printf(" sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
346 vmrun_assert(struct vmcb
*vmcb
)
348 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
354 // The following are illegal:
357 A((vmcb
->save
.efer
& 0x0000000000001000) == 0);
359 // CR0.CD is zero and CR0.NW is set
360 A( ((vmcb
->save
.cr0
& 0x0000000040000000) == 0) &&
361 ((vmcb
->save
.cr0
& 0x0000000020000000) != 0));
363 // CR0[63:32] are not zero.
364 A((vmcb
->save
.cr0
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
366 // Any MBZ bit of CR3 is set.
367 A((vmcb
->save
.cr3
& 0xFFF0000000000000) != 0);
369 // CR4[63:11] are not zero.
370 A((vmcb
->save
.cr4
& 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
372 // DR6[63:32] are not zero.
373 A((vmcb
->save
.dr6
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
375 // DR7[63:32] are not zero.
376 A((vmcb
->save
.dr7
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
378 // EFER[63:15] are not zero.
379 A((vmcb
->save
.efer
& 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
381 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
382 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
384 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
385 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
386 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
387 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0));
389 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
390 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
391 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
392 ((vmcb
->save
.cr0
& 0x0000000000000001) == 0));
394 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
395 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
396 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
397 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
398 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0) &&
399 ((vmcb
->save
.cs
.attrib
& 0x0200) != 0) &&
400 ((vmcb
->save
.cs
.attrib
& 0x0400) != 0));
402 // The VMRUN intercept bit is clear.
403 A((vmcb
->control
.intercepts
& 0x0000000100000000) == 0);
405 // The MSR or IOIO intercept tables extend to a physical address that is
406 // greater than or equal to the maximum supported physical address.
408 // Illegal event injection (see Section 15.19 on page 391).
410 // ASID is equal to zero.
411 A(vmcb
->control
.guest_asid
== 0);
413 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
414 // combination that is otherwise illegal (see Section 15.18).
416 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
417 // sign-extend to 63 bits) all base addresses in the segment registers
418 // that have been loaded.
426 fkvm_vcpu_run(struct vcpu
*vcpu
)
435 u_short ldt_selector
;
437 unsigned long host_cr2
;
438 unsigned long host_dr6
;
439 unsigned long host_dr7
;
441 struct system_segment_descriptor
*tss_desc
;
446 printf("begin fkvm_vcpu_run\n");
450 if (vmrun_assert(vmcb
))
453 tss_desc
= (struct system_segment_descriptor
*) (&gdt
[GPROC0_SEL
]);
454 sel
= GSEL(GPROC0_SEL
, SEL_KPL
);
456 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
457 // print_tss_desc(tss_desc);
458 // print_tss(tss_desc);
460 print_vmcb_save_area(vmcb
);
461 printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu
->regs
[VCPU_REGS_RIP
]);
464 vmcb
->save
.rax
= vcpu
->regs
[VCPU_REGS_RAX
];
465 vmcb
->save
.rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
466 vmcb
->save
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
468 /* meh: kvm has pre_svm_run(svm); */
470 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
471 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
475 ldt_selector
= rldt();
476 // printf("fs selector: %hx\n", fs_selector);
477 // printf("gs selector: %hx\n", gs_selector);
478 // printf("ldt selector: %hx\n", ldt_selector);
485 vmcb
->save
.cr2
= vcpu
->cr2
;
488 /* meh: dr7? db_regs? */
490 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
491 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
492 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
493 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
495 star
= rdmsr(MSR_STAR
);
496 lstar
= rdmsr(MSR_LSTAR
);
497 cstar
= rdmsr(MSR_CSTAR
);
498 sfmask
= rdmsr(MSR_SF_MASK
);
502 __asm
__volatile (SVM_CLGI
);
509 "mov %c[rbx](%[svm]), %%rbx \n\t"
510 "mov %c[rcx](%[svm]), %%rcx \n\t"
511 "mov %c[rdx](%[svm]), %%rdx \n\t"
512 "mov %c[rsi](%[svm]), %%rsi \n\t"
513 "mov %c[rdi](%[svm]), %%rdi \n\t"
514 "mov %c[rbp](%[svm]), %%rbp \n\t"
515 "mov %c[r8](%[svm]), %%r8 \n\t"
516 "mov %c[r9](%[svm]), %%r9 \n\t"
517 "mov %c[r10](%[svm]), %%r10 \n\t"
518 "mov %c[r11](%[svm]), %%r11 \n\t"
519 "mov %c[r12](%[svm]), %%r12 \n\t"
520 "mov %c[r13](%[svm]), %%r13 \n\t"
521 "mov %c[r14](%[svm]), %%r14 \n\t"
522 "mov %c[r15](%[svm]), %%r15 \n\t"
524 /* Enter guest mode */
526 "mov %c[vmcb](%[svm]), %%rax \n\t"
532 /* Save guest registers, load host registers */
533 "mov %%rbx, %c[rbx](%[svm]) \n\t"
534 "mov %%rcx, %c[rcx](%[svm]) \n\t"
535 "mov %%rdx, %c[rdx](%[svm]) \n\t"
536 "mov %%rsi, %c[rsi](%[svm]) \n\t"
537 "mov %%rdi, %c[rdi](%[svm]) \n\t"
538 "mov %%rbp, %c[rbp](%[svm]) \n\t"
539 "mov %%r8, %c[r8](%[svm]) \n\t"
540 "mov %%r9, %c[r9](%[svm]) \n\t"
541 "mov %%r10, %c[r10](%[svm]) \n\t"
542 "mov %%r11, %c[r11](%[svm]) \n\t"
543 "mov %%r12, %c[r12](%[svm]) \n\t"
544 "mov %%r13, %c[r13](%[svm]) \n\t"
545 "mov %%r14, %c[r14](%[svm]) \n\t"
546 "mov %%r15, %c[r15](%[svm]) \n\t"
550 [vmcb
]"i"(offsetof(struct vcpu
, vmcb_pa
)),
551 [rbx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBX
])),
552 [rcx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RCX
])),
553 [rdx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDX
])),
554 [rsi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RSI
])),
555 [rdi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDI
])),
556 [rbp
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBP
])),
557 [r8
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R8
])),
558 [r9
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R9
])),
559 [r10
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R10
])),
560 [r11
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R11
])),
561 [r12
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R12
])),
562 [r13
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R13
])),
563 [r14
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R14
])),
564 [r15
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R15
]))
566 "rbx", "rcx", "rdx", "rsi", "rdi",
567 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
571 /* meh: dr7? db_regs? */
573 vcpu
->cr2
= vmcb
->save
.cr2
;
575 vcpu
->regs
[VCPU_REGS_RAX
] = vmcb
->save
.rax
;
576 vcpu
->regs
[VCPU_REGS_RSP
] = vmcb
->save
.rsp
;
577 vcpu
->regs
[VCPU_REGS_RIP
] = vmcb
->save
.rip
;
584 load_fs(fs_selector
);
585 load_gs(gs_selector
);
588 wrmsr(MSR_GSBASE
, vcpu
->host_gs_base
);
590 tss_desc
->sd_type
= SDT_SYSTSS
;
593 wrmsr(MSR_STAR
, star
);
594 wrmsr(MSR_LSTAR
, lstar
);
595 wrmsr(MSR_CSTAR
, cstar
);
596 wrmsr(MSR_SF_MASK
, sfmask
);
600 __asm
__volatile (SVM_STGI
);
604 printf("exit_code: %" PRIx64
"\n", vmcb
->control
.exit_code
);
606 // print_tss_desc(tss_desc);
607 // print_tss(tss_desc);
609 print_vmcb_save_area(vmcb
);
617 _fkvm_init_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
620 seg
->attrib
= VMCB_SELECTOR_P_MASK
| attrib
;
626 fkvm_init_seg(struct vmcb_seg
*seg
)
628 _fkvm_init_seg(seg
, VMCB_SELECTOR_S_MASK
| VMCB_SELECTOR_WRITE_MASK
);
632 fkvm_init_sys_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
634 _fkvm_init_seg(seg
, attrib
);
638 fkvm_iopm_alloc(void)
640 return contigmalloc(IOPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
644 fkvm_iopm_init(void *iopm
)
646 memset(iopm
, 0xff, IOPM_SIZE
); /* TODO: we may want to allow access to PC debug port */
650 fkvm_iopm_free(void *iopm
)
652 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
656 fkvm_msrpm_alloc(void)
658 return contigmalloc(MSRPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
662 fkvm_msrpm_init(void *msrpm
)
664 memset(msrpm
, 0xff, MSRPM_SIZE
); /* TODO: we may want to allow some MSR accesses */
668 fkvm_msrpm_free(void *msrpm
)
670 contigfree(msrpm
, MSRPM_SIZE
, M_DEVBUF
);
674 fkvm_hsave_area_alloc(void)
676 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
680 fkvm_hsave_area_init(void *hsave_area
)
685 fkvm_hsave_area_free(void *hsave_area
)
687 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
690 static struct vmspace
*
691 fkvm_make_vmspace(void)
695 sp
= vmspace_alloc(0, 0xffffffffffffffff);
697 printf("vmspace_alloc failed\n");
705 fkvm_destroy_vmspace(struct vmspace
* sp
)
711 fkvm_vmcb_alloc(void)
713 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL,
718 fkvm_vmcb_init(struct vmcb
*vmcb
)
720 struct vmcb_control_area
*control
= &vmcb
->control
;
721 struct vmcb_save_area
*save
= &vmcb
->save
;
723 control
->intercept_cr_reads
= INTERCEPT_CR4_MASK
;
725 control
->intercept_cr_writes
= INTERCEPT_CR4_MASK
|
728 control
->intercept_dr_reads
= INTERCEPT_DR0_MASK
|
733 control
->intercept_dr_writes
= INTERCEPT_DR0_MASK
|
740 control
->intercept_exceptions
= (1 << IDT_UD
) | // Invalid Opcode
741 (1 << IDT_MC
); // Machine Check
743 control
->intercepts
= INTERCEPT_INTR
|
750 INTERCEPT_IOIO_PROT
|
762 INTERCEPT_MWAIT_UNCOND
;
764 control
->iopm_base_pa
= vtophys(iopm
);
765 control
->msrpm_base_pa
= vtophys(msrpm
);
766 control
->tsc_offset
= 0;
768 /* TODO: remove this once we assign asid's to distinct VM's */
769 control
->guest_asid
= 1;
770 control
->tlb_control
= VMCB_TLB_CONTROL_FLUSH_ALL
;
772 /* let v_tpr default to 0 */
773 /* let v_irq default to 0 */
774 /* let v_intr default to 0 */
776 control
->v_intr_masking
= 1;
778 /* let v_intr_vector default to 0 */
779 /* let intr_shadow default to 0 */
780 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
781 exit_int_info_err_code default to 0 */
783 control
->nested_ctl
= 1;
785 /* let event_inj default to 0 */
787 // (nested_cr3 is later)
789 /* let lbr_virt_enable default to 0 */
792 fkvm_init_seg(&save
->ds
);
793 fkvm_init_seg(&save
->es
);
794 fkvm_init_seg(&save
->fs
);
795 fkvm_init_seg(&save
->gs
);
796 fkvm_init_seg(&save
->ss
);
798 _fkvm_init_seg(&save
->cs
, VMCB_SELECTOR_READ_MASK
| VMCB_SELECTOR_S_MASK
|
799 VMCB_SELECTOR_CODE_MASK
);
800 save
->cs
.selector
= 0xf000;
801 save
->cs
.base
= 0xffff0000;
803 save
->gdtr
.limit
= 0xffff;
804 save
->idtr
.limit
= 0xffff;
806 fkvm_init_sys_seg(&save
->ldtr
, SDT_SYSLDT
);
807 fkvm_init_sys_seg(&save
->tr
, SDT_SYS286BSY
);
809 save
->g_pat
= PAT_VALUE(PAT_WRITE_BACK
, 0) | PAT_VALUE(PAT_WRITE_THROUGH
, 1) |
810 PAT_VALUE(PAT_UNCACHED
, 2) | PAT_VALUE(PAT_UNCACHEABLE
, 3) |
811 PAT_VALUE(PAT_WRITE_BACK
, 4) | PAT_VALUE(PAT_WRITE_THROUGH
, 5) |
812 PAT_VALUE(PAT_UNCACHED
, 6) | PAT_VALUE(PAT_UNCACHEABLE
, 7);
814 /* CR0 = 6000_0010h at boot */
815 save
->cr0
= CR0_ET
| CR0_NW
| CR0_CD
;
816 save
->dr6
= 0xffff0ff0;
819 save
->rip
= 0x0000fff0;
821 save
->efer
= EFER_SVME
;
825 fkvm_vmcb_free(struct vmcb
*vmcb
)
827 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
831 fkvm_vcpu_create(struct guestvm
*guest_vm
)
834 vcpu
= malloc(sizeof(struct vcpu
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
836 vcpu
->vmcb
= fkvm_vmcb_alloc();
837 vcpu
->vmcb_pa
= vtophys(vcpu
->vmcb
);
838 printf("vmcb = 0x%p\n", vcpu
->vmcb
);
839 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu
->vmcb_pa
);
841 fkvm_vmcb_init(vcpu
->vmcb
);
842 vcpu
->vmcb
->control
.nested_cr3
= guest_vm
->nested_cr3
;
843 vcpu
->regs
[VCPU_REGS_RIP
] = vcpu
->vmcb
->save
.rip
;
845 vcpu
->guest_vm
= guest_vm
;
851 fkvm_vcpu_destroy(struct vcpu
*vcpu
)
853 fkvm_vmcb_free(vcpu
->vmcb
);
854 free(vcpu
, M_DEVBUF
);
857 static struct guestvm
*
858 fkvm_guestvm_alloc(void)
860 return malloc(sizeof(struct guestvm
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
864 fkvm_guestvm_free(struct guestvm
* guest_vm
)
866 free(guest_vm
, M_DEVBUF
);
870 fkvm_guestvm_add_vcpu(struct guestvm
*guest_vm
, struct vcpu
*vcpu
)
872 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = vcpu
;
873 guest_vm
->nr_vcpus
++; /* TODO: Probably not safe to increment */
874 /* How about a lock to protect all of this? */
879 fkvm_userpoke(struct thread
*td
, struct fkvm_userpoke_args
*uap
)
881 printf("fkvm_userpoke\n");
886 fkvm_mem_has_entry(vm_map_entry_t expected_entry
, vm_map_t vm_map
, vm_offset_t vaddr
)
888 vm_map_entry_t lookup_entry
;
889 vm_object_t throwaway_object
;
890 vm_pindex_t throwaway_pindex
;
891 vm_prot_t throwaway_prot
;
892 boolean_t throwaway_wired
;
895 error
= vm_map_lookup(&vm_map
, /* IN/OUT */
897 VM_PROT_READ
|VM_PROT_WRITE
,
898 &lookup_entry
, /* OUT */
899 &throwaway_object
, /* OUT */
900 &throwaway_pindex
, /* OUT */
901 &throwaway_prot
, /* OUT */
902 &throwaway_wired
); /* OUT */
903 if (error
!= KERN_SUCCESS
)
905 vm_map_lookup_done(vm_map
, lookup_entry
);
906 return (lookup_entry
== expected_entry
);
910 fkvm_guest_check_range(struct guestvm
*guest_vm
, uint64_t start
, uint64_t end
)
912 vm_map_t guest_vm_map
;
913 vm_map_entry_t lookup_entry
;
914 vm_object_t throwaway_object
;
915 vm_pindex_t throwaway_pindex
;
916 vm_prot_t throwaway_prot
;
917 boolean_t throwaway_wired
;
921 guest_vm_map
= &guest_vm
->sp
->vm_map
;
923 error
= vm_map_lookup(&guest_vm_map
, /* IN/OUT */
925 VM_PROT_READ
|VM_PROT_WRITE
,
926 &lookup_entry
, /* OUT */
927 &throwaway_object
, /* OUT */
928 &throwaway_pindex
, /* OUT */
929 &throwaway_prot
, /* OUT */
930 &throwaway_wired
); /* OUT */
931 if (error
!= KERN_SUCCESS
)
934 if (fkvm_mem_has_entry(lookup_entry
, guest_vm_map
, end
))
939 vm_map_lookup_done(guest_vm_map
, lookup_entry
);
944 /* This function can only be called with multiples of page sizes */
945 /* vaddr as NULL overloads to fkvm_guest_check_range */
947 fkvm_set_user_mem_region(struct thread
*td
, struct fkvm_set_user_mem_region_args
*uap
)
949 struct guestvm
*guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
954 struct vmspace
*user_vm_space
;
955 vm_map_t user_vm_map
;
957 vm_object_t vm_object
;
958 vm_pindex_t vm_object_pindex
;
959 vm_ooffset_t vm_object_offset
;
960 vm_prot_t throwaway_prot
;
961 boolean_t throwaway_wired
;
962 vm_map_entry_t lookup_entry
;
966 start
= uap
->guest_pa
;
967 end
= uap
->guest_pa
+ uap
->size
- 1;
968 printf("start: 0x%" PRIx64
" bytes\n", start
);
969 printf("end: 0x%" PRIx64
" bytes\n", end
);
972 return fkvm_guest_check_range(guest_vm
, start
, end
);
974 user_vm_space
= td
->td_proc
->p_vmspace
;
975 user_vm_map
= &user_vm_space
->vm_map
;
976 printf("user vm space: %p\n", user_vm_space
);
977 printf("user vm map: %p\n", user_vm_map
);
979 error
= vm_map_lookup(&user_vm_map
, /* IN/OUT */
981 VM_PROT_READ
|VM_PROT_WRITE
,
982 &lookup_entry
, /* OUT */
983 &vm_object
, /* OUT */
984 &vm_object_pindex
, /* OUT */
985 &throwaway_prot
, /* OUT */
986 &throwaway_wired
); /* OUT */
987 if (error
!= KERN_SUCCESS
) {
988 printf("vm_map_lookup failed: %d\n", error
);
992 if (!fkvm_mem_has_entry(lookup_entry
, user_vm_map
, uap
->vaddr
+ uap
->size
)) {
993 printf("end of range not contained in same vm map entry as start\n");
997 printf("vm object: %p\n", vm_object
);
998 printf(" size: %d pages\n", (int) vm_object
->size
);
1000 vm_object_offset
= IDX_TO_OFF(vm_object_pindex
);
1001 printf("vm_ooffset: 0x%" PRIx64
"\n", vm_object_offset
);
1003 vm_object_reference(vm_object
); // TODO: this might be a mem leak
1005 vm_map_lookup_done(user_vm_map
, lookup_entry
);
1007 error
= vm_map_insert(&guest_vm
->sp
->vm_map
,
1012 VM_PROT_ALL
, VM_PROT_ALL
,
1014 if (error
!= KERN_SUCCESS
) {
1015 printf("vm_map_insert failed: %d\n", error
);
1017 case KERN_INVALID_ADDRESS
:
1030 fkvm_unset_user_mem_region(struct thread
*td
, struct fkvm_unset_user_mem_region_args
*uap
)
1032 struct guestvm
*guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1037 vm_map_t guest_vm_map
;
1041 start
= uap
->guest_pa
;
1042 end
= uap
->guest_pa
+ uap
->size
- 1;
1043 printf("start: 0x%" PRIx64
" bytes\n", start
);
1044 printf("end: 0x%" PRIx64
" bytes\n", end
);
1046 guest_vm_map
= &guest_vm
->sp
->vm_map
;
1048 error
= vm_map_remove(guest_vm_map
, start
, end
);
1049 if (error
!= KERN_SUCCESS
)
1056 fkvm_create_vm(struct thread
*td
, struct fkvm_create_vm_args
*uap
)
1059 struct guestvm
*guest_vm
;
1061 printf("SYSCALL : fkvm_create_vm\n");
1063 /* Allocate Guest VM */
1064 guest_vm
= fkvm_guestvm_alloc();
1066 /* Set up the vm address space */
1067 guest_vm
->sp
= fkvm_make_vmspace();
1068 if (guest_vm
->sp
== NULL
) {
1069 fkvm_guestvm_free(guest_vm
);
1072 guest_vm
->nested_cr3
= vtophys(vmspace_pmap(guest_vm
->sp
)->pm_pml4
);
1075 printf(" vm space: %p\n", guest_vm
->sp
);
1076 printf(" vm map: %p\n", &guest_vm
->sp
->vm_map
);
1077 printf(" ncr3: 0x%" PRIx64
"\n", guest_vm
->nested_cr3
);
1079 /* Allocate VCPU0 */
1080 vcpu
= fkvm_vcpu_create(guest_vm
);
1081 fkvm_guestvm_add_vcpu(guest_vm
, vcpu
);
1083 PROC_SET_GUESTVM(td
->td_proc
, guest_vm
);
1084 TD_SET_VCPU(td
, vcpu
);
1086 printf("fkvm_create_vm done\n");
1091 fkvm_destroy_vm(struct thread
*td
, struct fkvm_destroy_vm_args
*uap
)
1093 struct guestvm
*guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1095 /* Destroy the VCPUs */
1096 while (guest_vm
->nr_vcpus
> 0) {
1097 guest_vm
->nr_vcpus
--;
1098 fkvm_vcpu_destroy(guest_vm
->vcpus
[guest_vm
->nr_vcpus
]);
1099 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = NULL
;
1102 /* Destroy the vmspace */
1103 fkvm_destroy_vmspace(guest_vm
->sp
);
1105 /* Destroy the Guest VM itself */
1106 fkvm_guestvm_free(guest_vm
);
1112 fkvm_vm_run(struct thread
*td
, struct fkvm_vm_run_args
*uap
)
1114 struct vcpu
*vcpu
= TD_GET_VCPU(td
);
1115 struct guestvm
*guest_vm
= vcpu
->guest_vm
;
1116 struct vmcb
*vmcb
= vcpu
->vmcb
;
1119 fkvm_vcpu_run(vcpu
);
1121 switch (vmcb
->control
.exit_code
) {
1123 case VMCB_EXIT_EXCP_BASE
... (VMCB_EXIT_EXCP_BASE
+ 31): {
1126 excp_vector
= vmcb
->control
.exit_code
- VMCB_EXIT_EXCP_BASE
;
1128 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
1134 case VMCB_EXIT_INTR
: {
1135 printf("VMCB_EXIT_INTR - nothing to do\n");
1139 case VMCB_EXIT_NPF
: {
1140 /* EXITINFO1 contains fault error code */
1141 /* EXITINFO2 contains the guest physical address causing the fault. */
1143 u_int64_t fault_code
;
1144 u_int64_t fault_gpa
;
1146 vm_prot_t fault_type
;
1150 fault_code
= vmcb
->control
.exit_info_1
;
1151 fault_gpa
= vmcb
->control
.exit_info_2
;
1153 printf("VMCB_EXIT_NPF:\n");
1154 printf("gpa=0x%" PRIx64
"\n", fault_gpa
);
1155 printf("fault code=0x%" PRIx64
" [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
1157 (fault_code
& PGEX_P
) != 0,
1158 (fault_code
& PGEX_W
) != 0,
1159 (fault_code
& PGEX_U
) != 0,
1160 (fault_code
& PGEX_I
) != 0);
1162 if (fault_code
& PGEX_W
)
1163 fault_type
= VM_PROT_WRITE
;
1164 else if (fault_code
& PGEX_I
)
1165 fault_type
= VM_PROT_EXECUTE
;
1167 fault_type
= VM_PROT_READ
;
1169 fault_flags
= 0; /* TODO: is that right? */
1170 rc
= vm_fault(&guest_vm
->sp
->vm_map
, (fault_gpa
& (~PAGE_MASK
)), fault_type
, fault_flags
);
1171 if (rc
!= KERN_SUCCESS
) {
1172 printf("vm_fault failed: %d\n", rc
);
1177 printf("Unhandled vmexit:\n"
1178 " code: 0x%" PRIx64
"\n"
1179 " info1: 0x%" PRIx64
"\n"
1180 " info2: 0x%" PRIx64
"\n",
1181 vmcb
->control
.exit_code
,
1182 vmcb
->control
.exit_info_1
,
1183 vmcb
->control
.exit_info_2
);
1194 fkvm_create_vcpu(struct thread
*td
, struct fkvm_create_vcpu_args
*uap
)
1196 struct guestvm
*guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1200 vcpu
= fkvm_vcpu_create(guest_vm
);
1201 fkvm_guestvm_add_vcpu(guest_vm
, vcpu
);
1203 TD_SET_VCPU(td
, vcpu
);
1208 fkvm_load(void *unused
)
1212 printf("fkvm_load\n");
1213 printf("sizeof(struct vmcb) = %" PRIx64
"\n", sizeof(struct vmcb
));
1219 /* TODO: check for the presense of extensions */
1221 /* allocate structures */
1222 hsave_area
= fkvm_hsave_area_alloc();
1223 iopm
= fkvm_iopm_alloc();
1224 msrpm
= fkvm_msrpm_alloc();
1226 /* Initialize structures */
1227 fkvm_hsave_area_init(hsave_area
);
1228 fkvm_iopm_init(iopm
);
1229 fkvm_msrpm_init(msrpm
);
1231 /* Enable SVM in EFER */
1232 efer
= rdmsr(MSR_EFER
);
1233 printf("EFER = %" PRIx64
"\n", efer
);
1234 wrmsr(MSR_EFER
, efer
| EFER_SVME
);
1235 efer
= rdmsr(MSR_EFER
);
1236 printf("new EFER = %" PRIx64
"\n", efer
);
1238 /* Write Host save address in MSR_VM_HSAVE_PA */
1239 wrmsr(MSR_VM_HSAVE_PA
, vtophys(hsave_area
));
1241 SYSINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_load
, NULL
);
1244 fkvm_unload(void *unused
)
1246 printf("fkvm_unload\n");
1248 if (msrpm
!= NULL
) {
1249 fkvm_msrpm_free(iopm
);
1253 fkvm_iopm_free(iopm
);
1256 if (hsave_area
!= NULL
) {
1257 fkvm_hsave_area_free(hsave_area
);
1261 SYSUNINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_unload
, NULL
);