2 * Copyright (c) 2008 Brent Stephens <brents@rice.edu>
3 * Copyright (c) 2008 Diego Ongaro <diego.ongaro@rice.edu>
4 * Copyright (c) 2008 Kaushik Kumar Ram <kaushik@rice.edu>
5 * Copyright (c) 2008 Oleg Pesok <olegpesok@gmail.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/sysproto.h>
40 #include <sys/eventhandler.h>
43 #include <vm/vm_extern.h>
44 #include <vm/vm_map.h>
45 #include <vm/vm_object.h>
46 #include <vm/vm_param.h>
47 #include <machine/_inttypes.h>
48 #include <machine/specialreg.h>
49 #include <machine/segments.h>
50 #include <machine/vmcb.h>
52 #define printf(...) ((void) 0)
55 /* Definitions for Port IO */
63 #define PORT_MASK 0xFFFF0000
64 #define ADDR_MASK (7 << ADDR_SHIFT)
65 #define SIZE_MASK (7 << SIZE_SHIFT)
66 #define REP_MASK (1 << REP_SHIFT)
67 #define STR_MASK (1 << STR_SHIFT)
68 #define TYPE_MASK (1 << TYPE_SHIFT)
69 /* End Definitions for Port IO */
71 #define PMIO_PAGE_OFFSET 1
73 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
74 #define MSRPM_SIZE (8*1024)
78 static int fkvm_loaded
= 0;
80 static void *iopm
= NULL
; /* Should I allocate a vm_object_t instead? */
81 static void *msrpm
= NULL
; /* Should I allocate a vm_object_t instead? */
83 static void *hsave_area
= NULL
;
85 static eventhandler_tag exit_tag
;
113 unsigned long vmcb_pa
;
115 unsigned long regs
[NR_VCPU_REGS
];
116 u_int64_t host_fs_base
;
117 u_int64_t host_gs_base
;
122 uint64_t default_type
;
123 uint64_t mtrr64k
[MTRR_N64K
/8];
124 uint64_t mtrr16k
[MTRR_N16K
/8];
125 uint64_t mtrr4k
[MTRR_N4K
/8];
126 #define FKVM_MTRR_NVAR 8
127 uint64_t mtrrvar
[FKVM_MTRR_NVAR
*2];
130 struct guestvm
*guest_vm
;
132 unsigned long virqs
[256 / (sizeof(unsigned long) * 8)];
136 struct vcpu
*vcpus
[MAX_VCPUS
];
140 u_int64_t nested_cr3
;
144 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
145 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
146 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
147 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
148 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
149 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
152 fkvm_virq_dequeue(struct vcpu
*vcpu
);
154 static inline struct vcpu
*
155 TD_GET_VCPU(struct thread
*td
)
160 printf("TD_GET_VCPU -> NULL\n");
165 TD_SET_VCPU(struct thread
*td
, struct vcpu
*vcpu
)
170 static inline struct guestvm
*
171 PROC_GET_GUESTVM(struct proc
*proc
)
173 struct guestvm
*guestvm
;
174 guestvm
= proc
->p_guestvm
;
179 PROC_SET_GUESTVM(struct proc
*proc
, struct guestvm
*guestvm
)
181 proc
->p_guestvm
= guestvm
; \
185 print_vmcb_seg(struct vmcb_seg
* vmcb_seg
, const char* name
)
187 printf("%s Selector\n", name
);
188 printf("Selector : %" PRIx16
"\n", vmcb_seg
->selector
);
189 printf("Attributes : %" PRIx16
"\n", vmcb_seg
->attrib
);
190 printf("Limit : %" PRIx32
"\n", vmcb_seg
->limit
);
191 printf("Base Address : %" PRIx64
"\n", vmcb_seg
->base
);
196 print_vmcb(struct vmcb
*vmcb
)
198 printf("VMCB Control Area\n");
199 printf("Intercept CR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_cr_reads
);
200 printf("Intercept CR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_cr_writes
);
201 printf("Intercept DR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_dr_reads
);
202 printf("Intercept DR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_dr_writes
);
203 printf("Intercept Exceptions : %" PRIx32
"\n", vmcb
->control
.intercept_exceptions
);
204 printf("Intercepts : %" PRIx64
"\n", vmcb
->control
.intercepts
);
205 printf("Reserved 1: \n");
206 for(int i
=0; i
< 44; i
++) {
207 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
210 printf("IOPM Base PA : %" PRIx64
"\n", vmcb
->control
.iopm_base_pa
);
211 printf("MSRPM Base PA : %" PRIx64
"\n", vmcb
->control
.msrpm_base_pa
);
212 printf("TSC Offset : %" PRIx64
"\n", vmcb
->control
.tsc_offset
);
213 printf("Guest ASID : %" PRIx32
"\n", vmcb
->control
.guest_asid
);
214 printf("TLB Control : %" PRIx8
"\n", vmcb
->control
.tlb_control
);
215 printf("Reserved 2 : \n");
216 for(int i
=0; i
< 3; i
++) {
217 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
220 printf("Virtual TPR : %" PRIx8
"\n", vmcb
->control
.v_tpr
);
221 printf("Virtual IRQ Pending : %" PRIx8
"\n", vmcb
->control
.v_irq_pending
);
222 printf("Virtual Interrupt : %" PRIx8
"\n", vmcb
->control
.v_intr
);
223 printf("Virtual Interrupt Masking: %" PRIx8
"\n", vmcb
->control
.v_intr_masking
);
224 printf("Virtual Interrupt Vector : %" PRIx8
"\n", vmcb
->control
.v_intr_vector
);
225 printf("Reserved 6 : \n");
226 for(int i
=0; i
< 3; i
++) {
227 printf("%" PRIx8
"", vmcb
->control
.reserved_6
[i
]); /* Should be Zero */
230 printf("Interrupt Shadow : %" PRIx8
"\n", vmcb
->control
.intr_shadow
);
231 printf("Reserved 7 : \n");
232 for(int i
=0; i
< 7; i
++) {
233 printf("%" PRIx8
"", vmcb
->control
.reserved_7
[i
]); /* Should be Zero */
236 printf("Exit Code : %" PRIx64
"\n", vmcb
->control
.exit_code
);
237 printf("Exit Info 1 : %" PRIx64
"\n", vmcb
->control
.exit_info_1
);
238 printf("Exit Info 2 : %" PRIx64
"\n", vmcb
->control
.exit_info_2
);
239 printf("Exit Interrupt Info : %" PRIx32
"\n", vmcb
->control
.exit_int_info
);
240 printf("Exit Interrupt Info Err Code: %" PRIx32
"\n", vmcb
->control
.exit_int_info_err_code
);
241 printf("Nested Control : %" PRIx64
"\n", vmcb
->control
.nested_ctl
);
242 printf("Reserved 8 : \n");
243 for(int i
=0; i
< 16; i
++) {
244 printf("%" PRIx8
"", vmcb
->control
.reserved_8
[i
]); /* Should be Zero */
247 printf("Event Injection : %" PRIx64
"\n", vmcb
->control
.event_inj
);
248 printf("Nested CR3 : %" PRIx64
"\n", vmcb
->control
.nested_cr3
);
249 printf("LBR Virtualization Enable: %" PRIx64
"\n", vmcb
->control
.lbr_virt_enable
);
250 printf("Reserved 9 : \n");
251 for(int i
=0; i
< 832; i
++) {
252 printf("%" PRIx8
"", vmcb
->control
.reserved_9
[i
]); /* Should be Zero */
258 printf("VMCB Save Area\n");
259 print_vmcb_seg(&(vmcb
->save
.es
), "ES");
260 print_vmcb_seg(&(vmcb
->save
.cs
), "CS");
261 print_vmcb_seg(&(vmcb
->save
.ss
), "SS");
262 print_vmcb_seg(&(vmcb
->save
.ds
), "DS");
263 print_vmcb_seg(&(vmcb
->save
.fs
), "FS");
264 print_vmcb_seg(&(vmcb
->save
.gs
), "GS");
265 print_vmcb_seg(&(vmcb
->save
.gdtr
), "GDTR");
266 print_vmcb_seg(&(vmcb
->save
.ldtr
), "LDTR");
267 print_vmcb_seg(&(vmcb
->save
.idtr
), "IDTR");
268 print_vmcb_seg(&(vmcb
->save
.tr
), "TR");
269 printf("Reserved 1 : \n");
270 for(int i
=0; i
< 43; i
++) {
271 printf("%" PRIx8
"", vmcb
->save
.reserved_1
[i
]); /* Should be Zero */
274 printf("Current Processor Level : %" PRIx8
"\n", vmcb
->save
.cpl
);
275 printf("Reserved 2 : \n");
276 for(int i
=0; i
< 4; i
++) {
277 printf("%" PRIx8
"", vmcb
->save
.reserved_2
[i
]); /* Should be Zero */
280 printf("EFER : %" PRIx64
"\n", vmcb
->save
.efer
);
281 printf("Reserved 3 : \n");
282 for(int i
=0; i
< 112; i
++) {
283 printf("%" PRIx8
"", vmcb
->save
.reserved_3
[i
]); /* Should be Zero */
286 printf("Control Register 4 : %" PRIx64
"\n", vmcb
->save
.cr4
);
287 printf("Control Register 3 : %" PRIx64
"\n", vmcb
->save
.cr3
);
288 printf("Control Register 0 : %" PRIx64
"\n", vmcb
->save
.cr0
);
289 printf("Debug Register 7 : %" PRIx64
"\n", vmcb
->save
.dr7
);
290 printf("Debug Register 6 : %" PRIx64
"\n", vmcb
->save
.dr6
);
291 printf("RFlags : %" PRIx64
"\n", vmcb
->save
.rflags
);
292 printf("RIP : %" PRIx64
"\n", vmcb
->save
.rip
);
293 printf("Reserved 4 : \n");
294 for(int i
=0; i
< 88; i
++) {
295 printf("%" PRIx8
"", vmcb
->save
.reserved_4
[i
]); /* Should be Zero */
298 printf("RSP : %" PRIx64
"\n", vmcb
->save
.rsp
);
299 printf("Reserved 5 : \n");
300 for(int i
=0; i
< 24; i
++) {
301 printf("%" PRIx8
"", vmcb
->save
.reserved_5
[i
]); /* Should be Zero */
304 printf("RAX : %" PRIx64
"\n", vmcb
->save
.rax
);
305 printf("STAR : %" PRIx64
"\n", vmcb
->save
.star
);
306 printf("LSTAR : %" PRIx64
"\n", vmcb
->save
.lstar
);
307 printf("CSTAR : %" PRIx64
"\n", vmcb
->save
.cstar
);
308 printf("SFMASK : %" PRIx64
"\n", vmcb
->save
.sfmask
);
309 printf("Kernel GS Base : %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
310 printf("SYSENTER CS : %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
311 printf("SYSENTER ESP : %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
312 printf("SYSENTER EIP : %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
313 printf("Control Register 2 : %" PRIx64
"\n", vmcb
->save
.cr2
);
314 printf("Reserved 6 : \n");
315 for(int i
=0; i
< 32; i
++) {
316 printf("%" PRIx8
"", vmcb
->save
.reserved_6
[i
]); /* Should be Zero */
319 printf("Global PAT : %" PRIx64
"\n", vmcb
->save
.g_pat
);
320 printf("Debug Control : %" PRIx64
"\n", vmcb
->save
.dbg_ctl
);
321 printf("BR From : %" PRIx64
"\n", vmcb
->save
.br_from
);
322 printf("BR To : %" PRIx64
"\n", vmcb
->save
.br_to
);
323 printf("Last Exception From : %" PRIx64
"\n", vmcb
->save
.last_excp_from
);
324 printf("Last Exception To : %" PRIx64
"\n", vmcb
->save
.last_excp_to
);
331 print_tss_desc(struct system_segment_descriptor
*tss_desc
)
333 printf("TSS desc @ %p:\n", tss_desc
);
334 printf("sd_lolimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lolimit
);
335 printf("sd_lobase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lobase
);
336 printf("sd_type: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_type
);
337 printf("sd_dpl: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_dpl
);
338 printf("sd_p: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_p
);
339 printf("sd_hilimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hilimit
);
340 printf("sd_xx0: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx0
);
341 printf("sd_gran: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_gran
);
342 printf("sd_hibase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hibase
);
343 printf("sd_xx1: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx1
);
344 printf("sd_mbz: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_mbz
);
345 printf("sd_xx2: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx2
);
350 print_tss(struct system_segment_descriptor
*tss_desc
)
356 base
= (u_int32_t
*) ((((u_int64_t
) tss_desc
->sd_hibase
) << 24) | ((u_int64_t
) tss_desc
->sd_lobase
));
357 limit
= ((tss_desc
->sd_hilimit
<< 16) | tss_desc
->sd_lolimit
) / 4;
359 printf("TSS: @ %p\n", base
);
360 for (i
= 0; i
<= limit
; i
++)
361 printf("%x: 0x%" PRIx32
"\n", i
, base
[i
]);
367 print_vmcb_save_area(struct vmcb
*vmcb
)
369 printf("VMCB save area:\n");
370 printf(" cs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
371 vmcb
->save
.cs
.selector
,
372 vmcb
->save
.cs
.attrib
,
375 printf(" fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
376 vmcb
->save
.fs
.selector
,
377 vmcb
->save
.fs
.attrib
,
380 printf(" gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
381 vmcb
->save
.gs
.selector
,
382 vmcb
->save
.gs
.attrib
,
385 printf(" tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
386 vmcb
->save
.tr
.selector
,
387 vmcb
->save
.tr
.attrib
,
390 printf(" ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
391 vmcb
->save
.ldtr
.selector
,
392 vmcb
->save
.ldtr
.attrib
,
393 vmcb
->save
.ldtr
.limit
,
394 vmcb
->save
.ldtr
.base
);
395 printf(" rip: %" PRIx64
"\n", vmcb
->save
.rip
);
396 printf(" kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
397 printf(" star: %" PRIx64
"\n", vmcb
->save
.star
);
398 printf(" lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
399 printf(" cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
400 printf(" sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
401 printf(" sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
402 printf(" sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
403 printf(" sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
408 vmrun_assert(struct vmcb
*vmcb
)
410 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
416 // The following are illegal:
419 A((vmcb
->save
.efer
& 0x0000000000001000) == 0);
421 // CR0.CD is zero and CR0.NW is set
422 A( ((vmcb
->save
.cr0
& 0x0000000040000000) == 0) &&
423 ((vmcb
->save
.cr0
& 0x0000000020000000) != 0));
425 // CR0[63:32] are not zero.
426 A((vmcb
->save
.cr0
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
428 // Any MBZ bit of CR3 is set.
429 A((vmcb
->save
.cr3
& 0xFFF0000000000000) != 0);
431 // CR4[63:11] are not zero.
432 A((vmcb
->save
.cr4
& 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
434 // DR6[63:32] are not zero.
435 A((vmcb
->save
.dr6
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
437 // DR7[63:32] are not zero.
438 A((vmcb
->save
.dr7
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
440 // EFER[63:15] are not zero.
441 A((vmcb
->save
.efer
& 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
443 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
444 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
446 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
447 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
448 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
449 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0));
451 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
452 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
453 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
454 ((vmcb
->save
.cr0
& 0x0000000000000001) == 0));
456 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
457 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
458 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
459 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
460 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0) &&
461 ((vmcb
->save
.cs
.attrib
& 0x0200) != 0) &&
462 ((vmcb
->save
.cs
.attrib
& 0x0400) != 0));
464 // The VMRUN intercept bit is clear.
465 A((vmcb
->control
.intercepts
& 0x0000000100000000) == 0);
467 // The MSR or IOIO intercept tables extend to a physical address that is
468 // greater than or equal to the maximum supported physical address.
470 // Illegal event injection (see Section 15.19 on page 391).
472 // ASID is equal to zero.
473 A(vmcb
->control
.guest_asid
== 0);
475 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
476 // combination that is otherwise illegal (see Section 15.18).
478 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
479 // sign-extend to 63 bits) all base addresses in the segment registers
480 // that have been loaded.
488 fkvm_vcpu_run(struct vcpu
*vcpu
)
497 u_short ldt_selector
;
499 unsigned long host_cr2
;
500 unsigned long host_dr6
;
501 unsigned long host_dr7
;
503 struct system_segment_descriptor
*tss_desc
;
508 //printf("begin fkvm_vcpu_run\n");
512 fkvm_virq_dequeue(vcpu
);
514 if (vmrun_assert(vmcb
))
517 tss_desc
= (struct system_segment_descriptor
*) (&gdt
[GPROC0_SEL
]);
518 sel
= GSEL(GPROC0_SEL
, SEL_KPL
);
520 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
521 // print_tss_desc(tss_desc);
522 // print_tss(tss_desc);
524 // print_vmcb_save_area(vmcb);
525 // printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu->regs[VCPU_REGS_RIP]);
528 vmcb
->save
.rax
= vcpu
->regs
[VCPU_REGS_RAX
];
529 vmcb
->save
.rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
530 vmcb
->save
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
532 /* meh: kvm has pre_svm_run(svm); */
534 vcpu
->host_fs_base
= rdmsr(MSR_FSBASE
);
535 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
536 // printf("host_fs_base: 0x%" PRIx64 "\n", vcpu->host_fs_base);
537 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
541 ldt_selector
= rldt();
542 // printf("fs selector: %hx\n", fs_selector);
543 // printf("gs selector: %hx\n", gs_selector);
544 // printf("ldt selector: %hx\n", ldt_selector);
551 vmcb
->save
.cr2
= vcpu
->cr2
;
553 // TODO: something with apic_base?
555 /* meh: dr7? db_regs? */
557 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
558 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
559 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
560 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
562 star
= rdmsr(MSR_STAR
);
563 lstar
= rdmsr(MSR_LSTAR
);
564 cstar
= rdmsr(MSR_CSTAR
);
565 sfmask
= rdmsr(MSR_SF_MASK
);
567 // printf("CLGI...\n");
569 __asm
__volatile (SVM_CLGI
);
576 "mov %c[rbx](%[svm]), %%rbx \n\t"
577 "mov %c[rcx](%[svm]), %%rcx \n\t"
578 "mov %c[rdx](%[svm]), %%rdx \n\t"
579 "mov %c[rsi](%[svm]), %%rsi \n\t"
580 "mov %c[rdi](%[svm]), %%rdi \n\t"
581 "mov %c[rbp](%[svm]), %%rbp \n\t"
582 "mov %c[r8](%[svm]), %%r8 \n\t"
583 "mov %c[r9](%[svm]), %%r9 \n\t"
584 "mov %c[r10](%[svm]), %%r10 \n\t"
585 "mov %c[r11](%[svm]), %%r11 \n\t"
586 "mov %c[r12](%[svm]), %%r12 \n\t"
587 "mov %c[r13](%[svm]), %%r13 \n\t"
588 "mov %c[r14](%[svm]), %%r14 \n\t"
589 "mov %c[r15](%[svm]), %%r15 \n\t"
591 /* Enter guest mode */
593 "mov %c[vmcb](%[svm]), %%rax \n\t"
599 /* Save guest registers, load host registers */
600 "mov %%rbx, %c[rbx](%[svm]) \n\t"
601 "mov %%rcx, %c[rcx](%[svm]) \n\t"
602 "mov %%rdx, %c[rdx](%[svm]) \n\t"
603 "mov %%rsi, %c[rsi](%[svm]) \n\t"
604 "mov %%rdi, %c[rdi](%[svm]) \n\t"
605 "mov %%rbp, %c[rbp](%[svm]) \n\t"
606 "mov %%r8, %c[r8](%[svm]) \n\t"
607 "mov %%r9, %c[r9](%[svm]) \n\t"
608 "mov %%r10, %c[r10](%[svm]) \n\t"
609 "mov %%r11, %c[r11](%[svm]) \n\t"
610 "mov %%r12, %c[r12](%[svm]) \n\t"
611 "mov %%r13, %c[r13](%[svm]) \n\t"
612 "mov %%r14, %c[r14](%[svm]) \n\t"
613 "mov %%r15, %c[r15](%[svm]) \n\t"
617 [vmcb
]"i"(offsetof(struct vcpu
, vmcb_pa
)),
618 [rbx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBX
])),
619 [rcx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RCX
])),
620 [rdx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDX
])),
621 [rsi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RSI
])),
622 [rdi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDI
])),
623 [rbp
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBP
])),
624 [r8
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R8
])),
625 [r9
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R9
])),
626 [r10
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R10
])),
627 [r11
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R11
])),
628 [r12
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R12
])),
629 [r13
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R13
])),
630 [r14
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R14
])),
631 [r15
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R15
]))
633 "rbx", "rcx", "rdx", "rsi", "rdi",
634 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
638 /* meh: dr7? db_regs? */
640 vcpu
->cr2
= vmcb
->save
.cr2
;
642 vcpu
->regs
[VCPU_REGS_RAX
] = vmcb
->save
.rax
;
643 vcpu
->regs
[VCPU_REGS_RSP
] = vmcb
->save
.rsp
;
644 vcpu
->regs
[VCPU_REGS_RIP
] = vmcb
->save
.rip
;
651 load_fs(fs_selector
);
652 load_gs(gs_selector
);
655 wrmsr(MSR_FSBASE
, vcpu
->host_fs_base
);
656 wrmsr(MSR_GSBASE
, vcpu
->host_gs_base
);
658 tss_desc
->sd_type
= SDT_SYSTSS
;
661 wrmsr(MSR_STAR
, star
);
662 wrmsr(MSR_LSTAR
, lstar
);
663 wrmsr(MSR_CSTAR
, cstar
);
664 wrmsr(MSR_SF_MASK
, sfmask
);
668 __asm
__volatile (SVM_STGI
);
672 // print_tss_desc(tss_desc);
673 // print_tss(tss_desc);
675 // print_vmcb_save_area(vmcb);
683 _fkvm_init_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
686 seg
->attrib
= VMCB_SELECTOR_P_MASK
| attrib
;
692 fkvm_init_seg(struct vmcb_seg
*seg
)
694 _fkvm_init_seg(seg
, VMCB_SELECTOR_S_MASK
| VMCB_SELECTOR_WRITE_MASK
);
698 fkvm_init_sys_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
700 _fkvm_init_seg(seg
, attrib
);
704 fkvm_iopm_alloc(void)
706 return contigmalloc(IOPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
710 fkvm_iopm_init(void *iopm
)
712 memset(iopm
, 0xff, IOPM_SIZE
); /* TODO: we may want to allow access to PC debug port */
716 fkvm_iopm_free(void *iopm
)
718 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
722 fkvm_msrpm_alloc(void)
724 return contigmalloc(MSRPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
728 fkvm_msrpm_init(void *msrpm
)
730 memset(msrpm
, 0xff, MSRPM_SIZE
); /* TODO: we may want to allow some MSR accesses */
734 fkvm_msrpm_free(void *msrpm
)
736 contigfree(msrpm
, MSRPM_SIZE
, M_DEVBUF
);
740 fkvm_hsave_area_alloc(void)
742 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
746 fkvm_hsave_area_init(void *hsave_area
)
751 fkvm_hsave_area_free(void *hsave_area
)
753 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
756 static struct vmspace
*
757 fkvm_make_vmspace(void)
761 sp
= vmspace_alloc(0, 0xffffffffffffffff);
763 printf("vmspace_alloc failed\n");
771 fkvm_destroy_vmspace(struct vmspace
* sp
)
777 fkvm_vmcb_alloc(void)
779 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL,
784 fkvm_vmcb_init(struct vmcb
*vmcb
)
786 struct vmcb_control_area
*control
= &vmcb
->control
;
787 struct vmcb_save_area
*save
= &vmcb
->save
;
789 control
->intercept_cr_reads
= INTERCEPT_CR4_MASK
;
791 control
->intercept_cr_writes
= INTERCEPT_CR4_MASK
|
794 control
->intercept_dr_reads
= INTERCEPT_DR0_MASK
|
799 control
->intercept_dr_writes
= INTERCEPT_DR0_MASK
|
806 control
->intercept_exceptions
= (1 << IDT_UD
) | // Invalid Opcode
807 (1 << IDT_MC
); // Machine Check
809 control
->intercepts
= INTERCEPT_INTR
|
816 INTERCEPT_IOIO_PROT
|
828 INTERCEPT_MWAIT_UNCOND
;
830 control
->iopm_base_pa
= vtophys(iopm
);
831 control
->msrpm_base_pa
= vtophys(msrpm
);
832 control
->tsc_offset
= 0;
834 /* TODO: remove this once we assign asid's to distinct VM's */
835 control
->guest_asid
= 1;
836 control
->tlb_control
= VMCB_TLB_CONTROL_FLUSH_ALL
;
838 /* let v_tpr default to 0 */
839 /* let v_irq_pending default to 0 */
840 /* let v_intr default to 0 */
842 control
->v_intr_masking
= 1;
844 /* let v_intr_vector default to 0 */
845 /* let intr_shadow default to 0 */
846 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
847 exit_int_info_err_code default to 0 */
849 control
->nested_ctl
= 1;
851 /* let event_inj default to 0 */
853 // (nested_cr3 is later)
855 /* let lbr_virt_enable default to 0 */
858 fkvm_init_seg(&save
->ds
);
859 fkvm_init_seg(&save
->es
);
860 fkvm_init_seg(&save
->fs
);
861 fkvm_init_seg(&save
->gs
);
862 fkvm_init_seg(&save
->ss
);
864 _fkvm_init_seg(&save
->cs
, VMCB_SELECTOR_READ_MASK
| VMCB_SELECTOR_S_MASK
|
865 VMCB_SELECTOR_CODE_MASK
);
866 save
->cs
.selector
= 0xf000;
867 save
->cs
.base
= 0xffff0000;
869 save
->gdtr
.limit
= 0xffff;
870 save
->idtr
.limit
= 0xffff;
872 fkvm_init_sys_seg(&save
->ldtr
, SDT_SYSLDT
);
873 fkvm_init_sys_seg(&save
->tr
, SDT_SYS286BSY
);
875 save
->g_pat
= PAT_VALUE(PAT_WRITE_BACK
, 0) | PAT_VALUE(PAT_WRITE_THROUGH
, 1) |
876 PAT_VALUE(PAT_UNCACHED
, 2) | PAT_VALUE(PAT_UNCACHEABLE
, 3) |
877 PAT_VALUE(PAT_WRITE_BACK
, 4) | PAT_VALUE(PAT_WRITE_THROUGH
, 5) |
878 PAT_VALUE(PAT_UNCACHED
, 6) | PAT_VALUE(PAT_UNCACHEABLE
, 7);
880 /* CR0 = 6000_0010h at boot */
881 save
->cr0
= CR0_ET
| CR0_NW
| CR0_CD
;
882 save
->dr6
= 0xffff0ff0;
885 save
->rip
= 0x0000fff0;
887 save
->efer
= EFER_SVME
;
891 fkvm_vmcb_free(struct vmcb
*vmcb
)
893 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
897 fkvm_virq_set(struct vcpu
*vcpu
, int virq
)
901 i
= virq
/ (sizeof(vcpu
->virqs
[0]) * 8);
902 j
= virq
% (sizeof(vcpu
->virqs
[0]) * 8);
904 vcpu
->virqs
[i
] |= 1UL << j
;
908 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
912 fkvm_virq_pop(struct vcpu
*vcpu
)
915 for (i
= ARRAY_SIZE(vcpu
->virqs
) - 1; i
>= 0; i
--) {
916 j
= flsl(vcpu
->virqs
[i
]);
917 // virqs[i] == 0 => j = 0
918 // virqs[i] == (1 << 0) => j = 1
921 vcpu
->virqs
[i
] &= ~(1UL << (j
- 1));
922 return i
* sizeof(vcpu
->virqs
[0]) * 8 + (j
- 1);
930 fkvm_virq_test(struct vcpu
*vcpu
)
932 #define VIRQ_ASSERT(cond) do { \
934 printf("irq test failed %d\n", __LINE__); \
938 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
940 fkvm_virq_set(vcpu
, 0);
941 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 0);
942 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
944 fkvm_virq_set(vcpu
, 1);
945 fkvm_virq_set(vcpu
, 0);
946 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 1);
947 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 0);
948 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
950 fkvm_virq_set(vcpu
, 0);
951 fkvm_virq_set(vcpu
, 1);
952 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 1);
953 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 0);
954 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
956 fkvm_virq_set(vcpu
, 255);
957 fkvm_virq_set(vcpu
, 0);
958 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 255);
959 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 0);
960 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
962 fkvm_virq_set(vcpu
, 0);
963 fkvm_virq_set(vcpu
, 237);
964 fkvm_virq_set(vcpu
, 65);
965 fkvm_virq_set(vcpu
, 204);
966 fkvm_virq_set(vcpu
, 26);
967 fkvm_virq_set(vcpu
, 234);
968 fkvm_virq_set(vcpu
, 38);
969 fkvm_virq_set(vcpu
, 189);
970 fkvm_virq_set(vcpu
, 152);
971 fkvm_virq_set(vcpu
, 29);
972 fkvm_virq_set(vcpu
, 78);
973 fkvm_virq_set(vcpu
, 22);
974 fkvm_virq_set(vcpu
, 238);
975 fkvm_virq_set(vcpu
, 118);
976 fkvm_virq_set(vcpu
, 87);
977 fkvm_virq_set(vcpu
, 147);
978 fkvm_virq_set(vcpu
, 188);
979 fkvm_virq_set(vcpu
, 252);
980 fkvm_virq_set(vcpu
, 154);
981 fkvm_virq_set(vcpu
, 242);
982 fkvm_virq_set(vcpu
, 246);
983 fkvm_virq_set(vcpu
, 40);
984 fkvm_virq_set(vcpu
, 238);
985 fkvm_virq_set(vcpu
, 172);
986 fkvm_virq_set(vcpu
, 61);
988 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 252);
989 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 246);
990 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 242);
991 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 238);
992 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 237);
993 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 234);
994 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 204);
995 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 189);
996 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 188);
997 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 172);
998 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 154);
999 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 152);
1000 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 147);
1001 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 118);
1002 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 87);
1003 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 78);
1004 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 65);
1005 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 61);
1006 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 40);
1007 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 38);
1008 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 29);
1009 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 26);
1010 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 22);
1011 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 0);
1012 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
1018 _fkvm_vmcb_set_virq(struct vcpu
*vcpu
, int virq
)
1020 struct vmcb_control_area
*control
= &vcpu
->vmcb
->control
;
1022 control
->v_intr_vector
= virq
;
1023 control
->v_intr
= 0xf;
1024 control
->v_irq_pending
= 1;
1027 /* call this when we have a new interrupt for the vcpu */
1029 fkvm_virq_enqueue(struct vcpu
*vcpu
, int virq
)
1031 struct vmcb_control_area
*control
= &vcpu
->vmcb
->control
;
1033 if (control
->v_irq_pending
) {
1034 if (virq
< control
->v_intr_vector
)
1035 fkvm_virq_set(vcpu
, virq
);
1037 fkvm_virq_set(vcpu
, control
->v_intr_vector
);
1038 _fkvm_vmcb_set_virq(vcpu
, virq
);
1042 _fkvm_vmcb_set_virq(vcpu
, virq
);
1046 /* call this when the vcpu has finished handling an interrupt */
1048 fkvm_virq_dequeue(struct vcpu
*vcpu
)
1050 struct vmcb_control_area
*control
= &vcpu
->vmcb
->control
;
1053 if (control
->v_irq_pending
)
1054 return; /* there's already an interrupt pending */
1056 virq
= fkvm_virq_pop(vcpu
);
1058 return; /* no interrupts waiting */
1060 _fkvm_vmcb_set_virq(vcpu
, virq
);
1064 fkvm_inject_virq(struct thread
*td
, struct fkvm_inject_virq_args
*uap
)
1066 struct vcpu
*vcpu
= TD_GET_VCPU(td
);
1068 if (uap
->virq
< 0 || uap
->virq
> 255)
1071 fkvm_virq_enqueue(vcpu
, uap
->virq
);
1077 fkvm_vcpu_create(struct guestvm
*guest_vm
)
1080 vcpu
= malloc(sizeof(struct vcpu
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
1082 vcpu
->vmcb
= fkvm_vmcb_alloc();
1083 vcpu
->vmcb_pa
= vtophys(vcpu
->vmcb
);
1084 printf("vmcb = 0x%p\n", vcpu
->vmcb
);
1085 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu
->vmcb_pa
);
1087 fkvm_vmcb_init(vcpu
->vmcb
);
1088 vcpu
->vmcb
->control
.nested_cr3
= guest_vm
->nested_cr3
;
1089 vcpu
->regs
[VCPU_REGS_RIP
] = vcpu
->vmcb
->save
.rip
;
1091 vcpu
->guest_vm
= guest_vm
;
1097 fkvm_vcpu_destroy(struct vcpu
*vcpu
)
1099 fkvm_vmcb_free(vcpu
->vmcb
);
1100 free(vcpu
, M_DEVBUF
);
1103 static struct guestvm
*
1104 fkvm_guestvm_alloc(void)
1106 return malloc(sizeof(struct guestvm
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
1110 fkvm_guestvm_free(struct guestvm
* guest_vm
)
1112 free(guest_vm
, M_DEVBUF
);
1116 fkvm_guestvm_add_vcpu(struct guestvm
*guest_vm
, struct vcpu
*vcpu
)
1118 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = vcpu
;
1119 guest_vm
->nr_vcpus
++; /* TODO: Probably not safe to increment */
1120 /* How about a lock to protect all of this? */
1125 fkvm_userpoke(struct thread
*td
, struct fkvm_userpoke_args
*uap
)
1127 printf("fkvm_userpoke\n");
1136 fkvm_mem_has_entry(vm_map_entry_t expected_entry
, vm_map_t vm_map
, vm_offset_t vaddr
)
1138 vm_map_entry_t lookup_entry
;
1139 vm_object_t throwaway_object
;
1140 vm_pindex_t throwaway_pindex
;
1141 vm_prot_t throwaway_prot
;
1142 boolean_t throwaway_wired
;
1145 error
= vm_map_lookup(&vm_map
, /* IN/OUT */
1147 VM_PROT_READ
|VM_PROT_WRITE
,
1148 &lookup_entry
, /* OUT */
1149 &throwaway_object
, /* OUT */
1150 &throwaway_pindex
, /* OUT */
1151 &throwaway_prot
, /* OUT */
1152 &throwaway_wired
); /* OUT */
1153 if (error
!= KERN_SUCCESS
)
1155 vm_map_lookup_done(vm_map
, lookup_entry
);
1156 return (lookup_entry
== expected_entry
);
1160 fkvm_guest_check_range(struct guestvm
*guest_vm
, uint64_t start
, uint64_t end
)
1162 vm_map_t guest_vm_map
;
1163 vm_map_entry_t lookup_entry
;
1164 vm_object_t throwaway_object
;
1165 vm_pindex_t throwaway_pindex
;
1166 vm_prot_t throwaway_prot
;
1167 boolean_t throwaway_wired
;
1171 guest_vm_map
= &guest_vm
->sp
->vm_map
;
1173 error
= vm_map_lookup(&guest_vm_map
, /* IN/OUT */
1175 VM_PROT_READ
|VM_PROT_WRITE
,
1176 &lookup_entry
, /* OUT */
1177 &throwaway_object
, /* OUT */
1178 &throwaway_pindex
, /* OUT */
1179 &throwaway_prot
, /* OUT */
1180 &throwaway_wired
); /* OUT */
1181 if (error
!= KERN_SUCCESS
)
1183 vm_map_lookup_done(guest_vm_map
, lookup_entry
);
1186 TODO: We can't actually nest the lookups:
1187 panic: _sx_xlock_hard: recursed on non-recursive sx user map @ ../../../vm/vm_map.c:3115
1188 Therefore, I've moved the lookup_done above for now, but we really need a lock here.
1190 Maybe it's better to use vm_map_lookup_entry directly.
1194 if (fkvm_mem_has_entry(lookup_entry
, guest_vm_map
, end
))
1203 fkvm_get_regs_regs(struct vcpu
*vcpu
, struct kvm_regs
*out
)
1205 out
->rax
= vcpu
->regs
[VCPU_REGS_RAX
];
1206 out
->rbx
= vcpu
->regs
[VCPU_REGS_RBX
];
1207 out
->rcx
= vcpu
->regs
[VCPU_REGS_RCX
];
1208 out
->rdx
= vcpu
->regs
[VCPU_REGS_RDX
];
1209 out
->rsi
= vcpu
->regs
[VCPU_REGS_RSI
];
1210 out
->rdi
= vcpu
->regs
[VCPU_REGS_RDI
];
1211 out
->rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
1212 out
->rbp
= vcpu
->regs
[VCPU_REGS_RBP
];
1213 out
->r8
= vcpu
->regs
[VCPU_REGS_R8
];
1214 out
->r9
= vcpu
->regs
[VCPU_REGS_R9
];
1215 out
->r10
= vcpu
->regs
[VCPU_REGS_R10
];
1216 out
->r11
= vcpu
->regs
[VCPU_REGS_R11
];
1217 out
->r12
= vcpu
->regs
[VCPU_REGS_R12
];
1218 out
->r13
= vcpu
->regs
[VCPU_REGS_R13
];
1219 out
->r14
= vcpu
->regs
[VCPU_REGS_R14
];
1220 out
->r15
= vcpu
->regs
[VCPU_REGS_R15
];
1221 out
->rip
= vcpu
->regs
[VCPU_REGS_RIP
];
1222 out
->rflags
= vcpu
->vmcb
->save
.rflags
;
1226 fkvm_set_regs_regs(struct vcpu
*vcpu
, const struct kvm_regs
*in
)
1228 vcpu
->regs
[VCPU_REGS_RAX
] = in
->rax
;
1229 vcpu
->regs
[VCPU_REGS_RBX
] = in
->rbx
;
1230 vcpu
->regs
[VCPU_REGS_RCX
] = in
->rcx
;
1231 vcpu
->regs
[VCPU_REGS_RDX
] = in
->rdx
;
1232 vcpu
->regs
[VCPU_REGS_RSI
] = in
->rsi
;
1233 vcpu
->regs
[VCPU_REGS_RDI
] = in
->rdi
;
1234 vcpu
->regs
[VCPU_REGS_RSP
] = in
->rsp
;
1235 vcpu
->regs
[VCPU_REGS_RBP
] = in
->rbp
;
1236 vcpu
->regs
[VCPU_REGS_R8
] = in
->r8
;
1237 vcpu
->regs
[VCPU_REGS_R9
] = in
->r9
;
1238 vcpu
->regs
[VCPU_REGS_R10
] = in
->r10
;
1239 vcpu
->regs
[VCPU_REGS_R11
] = in
->r11
;
1240 vcpu
->regs
[VCPU_REGS_R12
] = in
->r12
;
1241 vcpu
->regs
[VCPU_REGS_R13
] = in
->r13
;
1242 vcpu
->regs
[VCPU_REGS_R14
] = in
->r14
;
1243 vcpu
->regs
[VCPU_REGS_R15
] = in
->r15
;
1244 vcpu
->regs
[VCPU_REGS_RIP
] = in
->rip
;
1245 vcpu
->vmcb
->save
.rflags
= in
->rflags
;
1249 fkvm_get_vmcb_dtable(struct vmcb_seg
*vmcb_seg
, struct kvm_dtable
*fkvm_dtable
)
1251 fkvm_dtable
->base
= vmcb_seg
->base
;
1252 fkvm_dtable
->limit
= vmcb_seg
->limit
;
1256 fkvm_set_vmcb_dtable(struct vmcb_seg
*vmcb_seg
, struct kvm_dtable
*fkvm_dtable
)
1258 vmcb_seg
->base
= fkvm_dtable
->base
;
1259 vmcb_seg
->limit
= fkvm_dtable
->limit
;
1263 fkvm_get_vmcb_seg(struct vmcb_seg
*vmcb_seg
, struct kvm_segment
*fkvm_seg
)
1265 fkvm_seg
->base
= vmcb_seg
->base
;
1266 fkvm_seg
->limit
= vmcb_seg
->limit
;
1267 fkvm_seg
->selector
= vmcb_seg
->selector
;
1269 if (vmcb_seg
->attrib
== 0)
1270 fkvm_seg
->unusable
= 1;
1272 fkvm_seg
->type
= (vmcb_seg
->attrib
& VMCB_SELECTOR_TYPE_MASK
);
1273 fkvm_seg
->s
= (vmcb_seg
->attrib
& VMCB_SELECTOR_S_MASK
) >> VMCB_SELECTOR_S_SHIFT
;
1274 fkvm_seg
->dpl
= (vmcb_seg
->attrib
& VMCB_SELECTOR_DPL_MASK
) >> VMCB_SELECTOR_DPL_SHIFT
;
1275 fkvm_seg
->present
= (vmcb_seg
->attrib
& VMCB_SELECTOR_P_MASK
) >> VMCB_SELECTOR_P_SHIFT
;
1276 fkvm_seg
->avl
= (vmcb_seg
->attrib
& VMCB_SELECTOR_AVL_MASK
) >> VMCB_SELECTOR_AVL_SHIFT
;
1277 fkvm_seg
->l
= (vmcb_seg
->attrib
& VMCB_SELECTOR_L_MASK
) >> VMCB_SELECTOR_L_SHIFT
;
1278 fkvm_seg
->db
= (vmcb_seg
->attrib
& VMCB_SELECTOR_DB_MASK
) >> VMCB_SELECTOR_DB_SHIFT
;
1279 fkvm_seg
->g
= (vmcb_seg
->attrib
& VMCB_SELECTOR_G_MASK
) >> VMCB_SELECTOR_G_SHIFT
;
1284 fkvm_set_vmcb_seg(struct vmcb_seg
*vmcb_seg
, struct kvm_segment
*fkvm_seg
)
1286 vmcb_seg
->base
= fkvm_seg
->base
;
1287 vmcb_seg
->limit
= fkvm_seg
->limit
;
1288 vmcb_seg
->selector
= fkvm_seg
->selector
;
1290 if (fkvm_seg
->unusable
)
1293 vmcb_seg
->attrib
= (fkvm_seg
->type
& VMCB_SELECTOR_TYPE_MASK
);
1294 vmcb_seg
->attrib
|= (fkvm_seg
->s
& 1) << VMCB_SELECTOR_S_SHIFT
;
1295 vmcb_seg
->attrib
|= (fkvm_seg
->dpl
& 3) << VMCB_SELECTOR_DPL_SHIFT
;
1296 vmcb_seg
->attrib
|= (fkvm_seg
->present
& 1) << VMCB_SELECTOR_P_SHIFT
;
1297 vmcb_seg
->attrib
|= (fkvm_seg
->avl
& 1) << VMCB_SELECTOR_AVL_SHIFT
;
1298 vmcb_seg
->attrib
|= (fkvm_seg
->l
& 1) << VMCB_SELECTOR_L_SHIFT
;
1299 vmcb_seg
->attrib
|= (fkvm_seg
->db
& 1) << VMCB_SELECTOR_DB_SHIFT
;
1300 vmcb_seg
->attrib
|= (fkvm_seg
->g
& 1) << VMCB_SELECTOR_G_SHIFT
;
1305 fkvm_get_cr8(struct vcpu
*vcpu
)
1307 // TODO: if cr8 has reserved bits inject GP Fault, return
1309 return (uint64_t) vcpu
->vmcb
->control
.v_tpr
;
1313 fkvm_set_cr8(struct vcpu
*vcpu
, uint64_t cr8
)
1315 // TODO: if cr8 has reserved bits inject GP Fault, return
1317 vcpu
->vmcb
->control
.v_tpr
= (uint8_t) cr8
;
1321 fkvm_get_efer(struct vcpu
*vcpu
)
1323 struct vmcb
*vmcb
= vcpu
->vmcb
;
1325 return vmcb
->save
.efer
& (~EFER_SVME
);
1329 fkvm_set_efer(struct vcpu
*vcpu
, uint64_t efer
)
1331 struct vmcb
*vmcb
= vcpu
->vmcb
;
1332 //TODO: if efer has reserved bits set: inject GP Fault
1334 if (vmcb
->save
.cr0
& CR0_PG
) { //If paging is enabled do not allow changes to LME
1335 if ((vmcb
->save
.efer
& EFER_LME
) != (efer
& EFER_LME
)) {
1336 printf("fkvm_set_efer: attempt to change LME while paging\n");
1337 //TODO: inject GP fault
1341 vmcb
->save
.efer
= efer
| EFER_SVME
;
1345 fkvm_get_regs_sregs(struct vcpu
*vcpu
, struct kvm_sregs
*out
)
1347 struct vmcb
*vmcb
= vcpu
->vmcb
;
1349 fkvm_get_vmcb_seg(&vmcb
->save
.cs
, &out
->cs
);
1350 fkvm_get_vmcb_seg(&vmcb
->save
.ds
, &out
->ds
);
1351 fkvm_get_vmcb_seg(&vmcb
->save
.es
, &out
->es
);
1352 fkvm_get_vmcb_seg(&vmcb
->save
.fs
, &out
->fs
);
1353 fkvm_get_vmcb_seg(&vmcb
->save
.gs
, &out
->gs
);
1354 fkvm_get_vmcb_seg(&vmcb
->save
.ss
, &out
->ss
);
1355 fkvm_get_vmcb_seg(&vmcb
->save
.tr
, &out
->tr
);
1356 fkvm_get_vmcb_seg(&vmcb
->save
.ldtr
, &out
->ldt
);
1358 fkvm_get_vmcb_dtable(&vmcb
->save
.idtr
, &out
->idt
);
1359 fkvm_get_vmcb_dtable(&vmcb
->save
.gdtr
, &out
->gdt
);
1361 out
->cr2
= vcpu
->cr2
;
1362 out
->cr3
= vcpu
->cr3
;
1364 out
->cr8
= fkvm_get_cr8(vcpu
);
1365 out
->efer
= fkvm_get_efer(vcpu
);
1366 /* TODO: apic_base */
1367 out
->cr0
= vmcb
->save
.cr0
;
1368 out
->cr4
= vmcb
->save
.cr4
;
1369 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1373 fkvm_set_regs_sregs(struct vcpu
*vcpu
, struct kvm_sregs
*in
)
1375 struct vmcb
*vmcb
= vcpu
->vmcb
;
1377 fkvm_set_vmcb_seg(&vmcb
->save
.cs
, &in
->cs
);
1378 fkvm_set_vmcb_seg(&vmcb
->save
.ds
, &in
->ds
);
1379 fkvm_set_vmcb_seg(&vmcb
->save
.es
, &in
->es
);
1380 fkvm_set_vmcb_seg(&vmcb
->save
.fs
, &in
->fs
);
1381 fkvm_set_vmcb_seg(&vmcb
->save
.gs
, &in
->gs
);
1382 fkvm_set_vmcb_seg(&vmcb
->save
.ss
, &in
->ss
);
1383 fkvm_set_vmcb_seg(&vmcb
->save
.tr
, &in
->tr
);
1384 fkvm_set_vmcb_seg(&vmcb
->save
.ldtr
, &in
->ldt
);
1386 vmcb
->save
.cpl
= (vmcb
->save
.cs
.attrib
>> VMCB_SELECTOR_DPL_SHIFT
) & 3;
1388 fkvm_set_vmcb_dtable(&vmcb
->save
.idtr
, &in
->idt
);
1389 fkvm_set_vmcb_dtable(&vmcb
->save
.gdtr
, &in
->gdt
);
1391 vcpu
->cr2
= in
->cr2
;
1392 vcpu
->cr3
= in
->cr3
;
1394 fkvm_set_cr8(vcpu
, in
->cr8
);
1395 fkvm_set_efer(vcpu
, in
->efer
);
1396 /* TODO: apic_base */
1397 vmcb
->save
.cr0
= in
->cr0
;
1398 vmcb
->save
.cr4
= in
->cr4
;
1399 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1403 fkvm_get_reg_msr(struct vcpu
*vcpu
, uint32_t index
, uint64_t *data
) {
1404 struct vmcb
*vmcb
= vcpu
->vmcb
;
1412 *data
= vmcb
->control
.tsc_offset
+ tsc
;
1417 *data
= vmcb
->save
.star
;
1422 *data
= vmcb
->save
.lstar
;
1427 *data
= vmcb
->save
.cstar
;
1432 *data
= vmcb
->save
.kernel_gs_base
;
1437 *data
= vmcb
->save
.sfmask
;
1441 case MSR_SYSENTER_CS_MSR
: {
1442 *data
= vmcb
->save
.sysenter_cs
;
1446 case MSR_SYSENTER_EIP_MSR
: {
1447 *data
= vmcb
->save
.sysenter_eip
;
1451 case MSR_SYSENTER_ESP_MSR
: {
1452 *data
= vmcb
->save
.sysenter_esp
;
1456 case MSR_DEBUGCTLMSR
: {
1457 printf("unimplemented at %d\n", __LINE__
);
1462 case MSR_PERFEVSEL0
... MSR_PERFEVSEL3
:
1463 case MSR_PERFCTR0
... MSR_PERFCTR3
: {
1464 printf("unimplemented at %d\n", __LINE__
);
1470 *data
= fkvm_get_efer(vcpu
);
1474 case MSR_MC0_STATUS
: {
1475 printf("unimplemented at %d\n", __LINE__
);
1480 case MSR_MCG_STATUS
: {
1481 printf("unimplemented at %d\n", __LINE__
);
1487 printf("unimplemented at %d\n", __LINE__
);
1492 //TODO: MSR_IA32_UCODE_REV
1493 //TODO: MSR_IA32_UCODE_WRITE
1496 *data
= MTRR_CAP_WC
| MTRR_CAP_FIXED
| FKVM_MTRR_NVAR
;
1500 case MSR_MTRRdefType
: {
1501 *data
= vcpu
->mtrrs
.default_type
;
1505 case MSR_MTRR64kBase
... (MSR_MTRR64kBase
+ MTRR_N64K
- 1): {
1506 *data
= vcpu
->mtrrs
.mtrr64k
[index
- MSR_MTRR64kBase
];
1510 case MSR_MTRR16kBase
... (MSR_MTRR16kBase
+ MTRR_N16K
- 1): {
1511 *data
= vcpu
->mtrrs
.mtrr16k
[index
- MSR_MTRR16kBase
];
1515 case MSR_MTRR4kBase
... (MSR_MTRR4kBase
+ MTRR_N4K
- 1): {
1516 *data
= vcpu
->mtrrs
.mtrr4k
[index
- MSR_MTRR4kBase
];
1520 case MSR_MTRRVarBase
... (MSR_MTRRVarBase
+ FKVM_MTRR_NVAR
* 2 - 1): {
1521 *data
= vcpu
->mtrrs
.mtrrvar
[index
- MSR_MTRRVarBase
];
1525 case MSR_APICBASE
: {
1526 printf("unimplemented at %d\n", __LINE__
);
1531 case MSR_IA32_MISC_ENABLE
: {
1532 printf("unimplemented at %d\n", __LINE__
);
1537 //TODO: MSR_KVM_WALL_CLOCK
1538 //TODO: MSR_KVM_SYSTEM_TIME
1541 printf("Did not get unimplemented msr: 0x%" PRIx32
"\n", index
);
1549 fkvm_get_regs_msrs(struct vcpu
*vcpu
, uint32_t nmsrs
, struct kvm_msr_entry
*entries
) {
1552 for (i
= 0; i
< nmsrs
; i
++) {
1553 fkvm_get_reg_msr(vcpu
, entries
[i
].index
, &entries
[i
].data
);
1558 fkvm_set_reg_msr(struct vcpu
*vcpu
, uint32_t index
, uint64_t data
) {
1559 struct vmcb
*vmcb
= vcpu
->vmcb
;
1567 vmcb
->control
.tsc_offset
= data
- tsc
;
1572 vmcb
->save
.star
= data
;
1577 vmcb
->save
.lstar
= data
;
1582 vmcb
->save
.cstar
= data
;
1587 vmcb
->save
.kernel_gs_base
= data
;
1592 vmcb
->save
.sfmask
= data
;
1596 case MSR_SYSENTER_CS_MSR
: {
1597 vmcb
->save
.sysenter_cs
= data
;
1601 case MSR_SYSENTER_EIP_MSR
: {
1602 vmcb
->save
.sysenter_eip
= data
;
1606 case MSR_SYSENTER_ESP_MSR
: {
1607 vmcb
->save
.sysenter_esp
= data
;
1611 case MSR_DEBUGCTLMSR
: {
1612 printf("unimplemented at %d\n", __LINE__
);
1617 case MSR_PERFEVSEL0
... MSR_PERFEVSEL3
:
1618 case MSR_PERFCTR0
... MSR_PERFCTR3
: {
1619 printf("unimplemented at %d\n", __LINE__
);
1625 fkvm_set_efer(vcpu
, data
);
1629 case MSR_MC0_STATUS
: {
1630 printf("unimplemented at %d\n", __LINE__
);
1635 case MSR_MCG_STATUS
: {
1636 printf("unimplemented at %d\n", __LINE__
);
1642 printf("unimplemented at %d\n", __LINE__
);
1647 //TODO: MSR_IA32_UCODE_REV
1648 //TODO: MSR_IA32_UCODE_WRITE
1650 case MSR_MTRRdefType
: {
1651 vcpu
->mtrrs
.default_type
= data
;
1655 case MSR_MTRR64kBase
... (MSR_MTRR64kBase
+ MTRR_N64K
- 1): {
1656 vcpu
->mtrrs
.mtrr64k
[index
- MSR_MTRR64kBase
] = data
;
1660 case MSR_MTRR16kBase
... (MSR_MTRR16kBase
+ MTRR_N16K
- 1): {
1661 vcpu
->mtrrs
.mtrr16k
[index
- MSR_MTRR16kBase
] = data
;
1665 case MSR_MTRR4kBase
... (MSR_MTRR4kBase
+ MTRR_N4K
- 1): {
1666 vcpu
->mtrrs
.mtrr4k
[index
- MSR_MTRR4kBase
] = data
;
1670 case MSR_MTRRVarBase
... (MSR_MTRRVarBase
+ FKVM_MTRR_NVAR
* 2 - 1): {
1671 vcpu
->mtrrs
.mtrrvar
[index
- MSR_MTRRVarBase
] = data
;
1675 case MSR_APICBASE
: {
1676 printf("unimplemented at %d\n", __LINE__
);
1681 case MSR_IA32_MISC_ENABLE
: {
1682 printf("unimplemented at %d\n", __LINE__
);
1687 //TODO: MSR_KVM_WALL_CLOCK
1688 //TODO: MSR_KVM_SYSTEM_TIME
1691 printf("Did not set unimplemented msr: 0x%" PRIx32
"\n", index
);
1699 fkvm_set_regs_msrs(struct vcpu
*vcpu
, uint32_t nmsrs
, struct kvm_msr_entry
*entries
) {
1702 for (i
= 0; i
< nmsrs
; i
++) {
1703 fkvm_set_reg_msr(vcpu
, entries
[i
].index
, entries
[i
].data
);
1710 fkvm_get_regs(struct thread
*td
, struct fkvm_get_regs_args
*uap
)
1718 vcpu
= TD_GET_VCPU(td
);
1722 switch (uap
->type
) {
1724 case FKVM_REGS_TYPE_REGS
: {
1725 struct kvm_regs out
;
1726 fkvm_get_regs_regs(vcpu
, &out
);
1727 return copyout(&out
, uap
->regs
, sizeof(out
));
1730 case FKVM_REGS_TYPE_SREGS
: {
1731 struct kvm_sregs out
;
1732 fkvm_get_regs_sregs(vcpu
, &out
);
1733 return copyout(&out
, uap
->regs
, sizeof(out
));
1736 case FKVM_REGS_TYPE_MSRS
: {
1737 struct kvm_msr_entry
*user_entries
;
1738 struct kvm_msr_entry
*entries
;
1741 user_entries
= (struct kvm_msr_entry
*)uap
->regs
;
1743 size
= sizeof(*entries
) * uap
->n
;
1744 entries
= malloc(size
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
1745 if (entries
== NULL
)
1748 error
= copyin(user_entries
, entries
, size
);
1750 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1751 free(entries
, M_DEVBUF
);
1755 fkvm_get_regs_msrs(vcpu
, uap
->n
, entries
);
1757 error
= copyout(user_entries
, entries
, size
);
1759 printf("FKVM_REGS_TYPE_MSRS: unable to copyout entries\n");
1762 free(entries
, M_DEVBUF
);
1772 fkvm_set_regs(struct thread
*td
, struct fkvm_set_regs_args
*uap
)
1777 vcpu
= TD_GET_VCPU(td
);
1781 switch (uap
->type
) {
1783 case FKVM_REGS_TYPE_REGS
: {
1785 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1788 fkvm_set_regs_regs(vcpu
, &in
);
1792 case FKVM_REGS_TYPE_SREGS
: {
1793 struct kvm_sregs in
;
1794 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1797 fkvm_set_regs_sregs(vcpu
, &in
);
1801 case FKVM_REGS_TYPE_MSRS
: {
1802 struct kvm_msr_entry
*user_entries
;
1803 struct kvm_msr_entry
*entries
;
1806 user_entries
= (struct kvm_msr_entry
*)uap
->regs
;
1808 size
= sizeof(*entries
) * uap
->n
;
1809 entries
= malloc(size
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
1810 if (entries
== NULL
)
1813 error
= copyin(user_entries
, entries
, size
);
1815 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1816 free(entries
, M_DEVBUF
);
1820 fkvm_set_regs_msrs(vcpu
, uap
->n
, entries
);
1822 free(entries
, M_DEVBUF
);
1831 /* This function can only be called with multiples of page sizes */
1832 /* vaddr as NULL overloads to fkvm_guest_check_range */
1834 fkvm_set_user_mem_region(struct thread
*td
, struct fkvm_set_user_mem_region_args
*uap
)
1836 struct guestvm
*guest_vm
;
1841 struct vmspace
*user_vm_space
;
1842 vm_map_t user_vm_map
;
1844 vm_object_t vm_object
;
1845 vm_pindex_t vm_object_pindex
;
1846 vm_ooffset_t vm_object_offset
;
1847 vm_prot_t throwaway_prot
;
1848 boolean_t throwaway_wired
;
1849 vm_map_entry_t lookup_entry
;
1853 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1854 if (guest_vm
== NULL
) {
1855 printf("PROC_GET_GUESTVM -> NULL\n");
1859 start
= uap
->guest_pa
;
1860 end
= uap
->guest_pa
+ uap
->size
- 1;
1861 printf("start: 0x%" PRIx64
" bytes\n", start
);
1862 printf("end: 0x%" PRIx64
" bytes\n", end
);
1864 if (uap
->vaddr
== 0)
1865 return fkvm_guest_check_range(guest_vm
, start
, end
);
1867 user_vm_space
= td
->td_proc
->p_vmspace
;
1868 user_vm_map
= &user_vm_space
->vm_map
;
1869 printf("user vm space: %p\n", user_vm_space
);
1870 printf("user vm map: %p\n", user_vm_map
);
1872 error
= vm_map_lookup(&user_vm_map
, /* IN/OUT */
1874 VM_PROT_READ
|VM_PROT_WRITE
,
1875 &lookup_entry
, /* OUT */
1876 &vm_object
, /* OUT */
1877 &vm_object_pindex
, /* OUT */
1878 &throwaway_prot
, /* OUT */
1879 &throwaway_wired
); /* OUT */
1880 if (error
!= KERN_SUCCESS
) {
1881 printf("vm_map_lookup failed: %d\n", error
);
1885 /* TODO: Trust the user that the full region is valid.
1886 * This is very bad. See the note in fkvm_guest_check_range
1887 * on nesting vm lookups. */
1889 if (!fkvm_mem_has_entry(lookup_entry
, user_vm_map
, uap
->vaddr
+ uap
->size
)) {
1890 printf("end of range not contained in same vm map entry as start\n");
1895 printf("vm object: %p\n", vm_object
);
1896 printf(" size: %d pages\n", (int) vm_object
->size
);
1898 vm_object_offset
= IDX_TO_OFF(vm_object_pindex
);
1899 printf("vm_ooffset: 0x%" PRIx64
"\n", vm_object_offset
);
1901 vm_object_reference(vm_object
); // TODO: this might be a mem leak
1903 vm_map_lookup_done(user_vm_map
, lookup_entry
);
1905 error
= vm_map_insert(&guest_vm
->sp
->vm_map
,
1910 VM_PROT_ALL
, VM_PROT_ALL
,
1912 if (error
!= KERN_SUCCESS
) {
1913 printf("vm_map_insert failed: %d\n", error
);
1915 case KERN_INVALID_ADDRESS
:
1928 fkvm_unset_user_mem_region(struct thread
*td
, struct fkvm_unset_user_mem_region_args
*uap
)
1930 struct guestvm
*guest_vm
;
1935 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1936 if (guest_vm
== NULL
) {
1937 printf("PROC_GET_GUESTVM -> NULL\n");
1944 vm_map_t guest_vm_map
;
1948 start
= uap
->guest_pa
;
1949 end
= uap
->guest_pa
+ uap
->size
- 1;
1950 printf("start: 0x%" PRIx64
" bytes\n", start
);
1951 printf("end: 0x%" PRIx64
" bytes\n", end
);
1953 guest_vm_map
= &guest_vm
->sp
->vm_map
;
1955 error
= vm_map_remove(guest_vm_map
, start
, end
);
1956 if (error
!= KERN_SUCCESS
)
1963 fkvm_create_vm(struct thread
*td
, struct fkvm_create_vm_args
*uap
)
1965 struct guestvm
*guest_vm
;
1967 printf("SYSCALL : fkvm_create_vm\n");
1972 /* Allocate Guest VM */
1973 guest_vm
= fkvm_guestvm_alloc();
1975 /* Set up the vm address space */
1976 guest_vm
->sp
= fkvm_make_vmspace();
1977 if (guest_vm
->sp
== NULL
) {
1978 fkvm_guestvm_free(guest_vm
);
1981 guest_vm
->nested_cr3
= vtophys(vmspace_pmap(guest_vm
->sp
)->pm_pml4
);
1984 printf(" vm space: %p\n", guest_vm
->sp
);
1985 printf(" vm map: %p\n", &guest_vm
->sp
->vm_map
);
1986 printf(" ncr3: 0x%" PRIx64
"\n", guest_vm
->nested_cr3
);
1988 PROC_SET_GUESTVM(td
->td_proc
, guest_vm
);
1990 printf("fkvm_create_vm done\n");
1995 fkvm_destroy_vm(struct guestvm
*guest_vm
)
1997 /* Destroy the VCPUs */
1998 while (guest_vm
->nr_vcpus
> 0) {
1999 guest_vm
->nr_vcpus
--;
2000 fkvm_vcpu_destroy(guest_vm
->vcpus
[guest_vm
->nr_vcpus
]);
2001 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = NULL
;
2004 /* Destroy the vmspace */
2005 if (guest_vm
->sp
!= NULL
)
2006 fkvm_destroy_vmspace(guest_vm
->sp
);
2008 /* Destroy the Guest VM itself */
2009 fkvm_guestvm_free(guest_vm
);
2013 intercept_ioio(struct vcpu
*vcpu
, struct kvm_run
*kvm_run
, uint64_t ioio_info
, uint64_t rip
)
2015 struct vmcb
*vmcb
= vcpu
->vmcb
;
2017 kvm_run
->u
.io
.string
= (ioio_info
& STR_MASK
) >> STR_SHIFT
;
2019 kvm_run
->u
.io
.port
= ioio_info
>> PORT_SHIFT
;
2020 kvm_run
->u
.io
.in
= ioio_info
& TYPE_MASK
;
2022 kvm_run
->u
.io
.size
= (ioio_info
& SIZE_MASK
) >> SIZE_SHIFT
;
2024 /* We need to remove the Interrupt Shadow Flag from the VMCB (see 15.20.5 in AMD_Vol2) */
2025 vmcb
->control
.intr_shadow
= 0;
2027 kvm_run
->u
.io
.rep
= (ioio_info
& REP_MASK
) >> REP_SHIFT
;
2028 /* TODO: Research more into Direction Flag checked in KVM; DF bit in RFLAGS */
2030 /* set the next rip in the VMCB save area for now */
2031 /* TODO: Store rIP in vm_run structure until we absolutely need it */
2032 vcpu
->regs
[VCPU_REGS_RIP
] = rip
;
2038 intercept_shutdown(struct vcpu
*vcpu
)
2040 struct vmcb
*vmcb
= vcpu
->vmcb
;
2041 memset(vmcb
, 0, PAGE_SIZE
);
2042 fkvm_vmcb_init(vmcb
);
2046 fkvm_vm_run(struct thread
*td
, struct fkvm_vm_run_args
*uap
)
2049 struct guestvm
*guest_vm
;
2054 struct kvm_run kvm_run
;
2059 vcpu
= TD_GET_VCPU(td
);
2063 guest_vm
= vcpu
->guest_vm
;
2066 error
= copyin(uap
->run
, &kvm_run
, sizeof(struct kvm_run
));
2070 fkvm_set_cr8(vcpu
, kvm_run
.cr8
);
2072 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
2074 while(kvm_run
.exit_reason
== KVM_EXIT_CONTINUE
) {
2075 fkvm_vcpu_run(vcpu
);
2077 switch (vmcb
->control
.exit_code
) {
2079 case VMCB_EXIT_EXCP_BASE
... (VMCB_EXIT_EXCP_BASE
+ 31): {
2082 excp_vector
= vmcb
->control
.exit_code
- VMCB_EXIT_EXCP_BASE
;
2084 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
2086 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2091 case VMCB_EXIT_INTR
: {
2092 printf("VMCB_EXIT_INTR - nothing to do\n");
2093 /* Handled by host OS already */
2094 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
2098 case VMCB_EXIT_NPF
: {
2099 /* EXITINFO1 contains fault error code */
2100 /* EXITINFO2 contains the guest physical address causing the fault. */
2102 u_int64_t fault_code
;
2103 u_int64_t fault_gpa
;
2105 vm_prot_t fault_type
;
2109 fault_code
= vmcb
->control
.exit_info_1
;
2110 fault_gpa
= vmcb
->control
.exit_info_2
;
2111 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
2114 printf("VMCB_EXIT_NPF:\n");
2115 printf("gpa=0x%" PRIx64
"\n", fault_gpa
);
2116 printf("fault code=0x%" PRIx64
" [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
2118 (fault_code
& PGEX_P
) != 0,
2119 (fault_code
& PGEX_W
) != 0,
2120 (fault_code
& PGEX_U
) != 0,
2121 (fault_code
& PGEX_I
) != 0);
2123 if (fault_code
& PGEX_W
)
2124 fault_type
= VM_PROT_WRITE
;
2125 else if (fault_code
& PGEX_I
)
2126 fault_type
= VM_PROT_EXECUTE
;
2128 fault_type
= VM_PROT_READ
;
2130 fault_flags
= 0; /* TODO: is that right? */
2131 rc
= vm_fault(&guest_vm
->sp
->vm_map
, (fault_gpa
& (~PAGE_MASK
)), fault_type
, fault_flags
);
2132 if (rc
!= KERN_SUCCESS
) {
2133 printf("vm_fault failed: %d\n", rc
);
2134 kvm_run
.u
.mmio
.fault_gpa
= fault_gpa
;
2135 kvm_run
.u
.mmio
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
2136 kvm_run
.u
.mmio
.cs_base
= vmcb
->save
.cs
.base
;
2137 kvm_run
.exit_reason
= KVM_EXIT_MMIO
;
2142 case VMCB_EXIT_WRITE_CR8
:
2143 kvm_run
.exit_reason
= KVM_EXIT_SET_TPR
;
2146 kvm_run
.exit_reason
= KVM_EXIT_NMI
;
2149 vcpu
->regs
[VCPU_REGS_RIP
]++; /* skip HLT, opcode F4 */
2150 kvm_run
.exit_reason
= KVM_EXIT_HLT
;
2152 case VMCB_EXIT_SHUTDOWN
:
2153 intercept_shutdown(vcpu
);
2154 kvm_run
.exit_reason
= KVM_EXIT_SHUTDOWN
;
2156 case VMCB_EXIT_IOIO
:
2157 error
= intercept_ioio(vcpu
, &kvm_run
,
2158 vmcb
->control
.exit_info_1
,
2159 vmcb
->control
.exit_info_2
);
2161 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2163 kvm_run
.exit_reason
= KVM_EXIT_IO
;
2165 case VMCB_EXIT_MSR
: {
2176 wrmsr
= vmcb
->control
.exit_info_1
;
2177 msr
= (uint32_t) vcpu
->regs
[VCPU_REGS_RCX
];
2179 printf("VMCB_EXIT_MSR:\n"
2180 " %s msr 0x%" PRIx64
"\n",
2181 wrmsr
? "write to" : "read from",
2182 vcpu
->regs
[VCPU_REGS_RCX
]);
2184 if (!wrmsr
) { /* rdmsr */
2185 error
= fkvm_get_reg_msr(vcpu
, msr
, &value
.full
);
2188 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2192 vcpu
->regs
[VCPU_REGS_RDX
] = (uint64_t) value
.split
.high
;
2193 vcpu
->regs
[VCPU_REGS_RAX
] = (uint64_t) value
.split
.low
;
2196 value
.split
.high
= (uint32_t) vcpu
->regs
[VCPU_REGS_RDX
];
2197 value
.split
.low
= (uint32_t) vcpu
->regs
[VCPU_REGS_RAX
];
2199 error
= fkvm_set_reg_msr(vcpu
, msr
, value
.full
);
2202 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2208 vcpu
->regs
[VCPU_REGS_RIP
] += 2;
2211 case VMCB_EXIT_CPUID
: {
2212 kvm_run
.u
.cpuid
.fn
= (uint32_t) vcpu
->regs
[VCPU_REGS_RAX
];
2213 kvm_run
.exit_reason
= KVM_EXIT_CPUID
;
2216 case VMCB_EXIT_WBINVD
: {
2217 /* TODO: stop ignoring this intercept when we have more than 1-cpu guests */
2218 vcpu
->regs
[VCPU_REGS_RIP
] += 2;
2221 case VMCB_EXIT_READ_CR0
:
2222 case VMCB_EXIT_READ_CR3
:
2223 case VMCB_EXIT_READ_CR4
:
2224 case VMCB_EXIT_READ_CR8
:
2225 case VMCB_EXIT_WRITE_CR0
:
2226 case VMCB_EXIT_WRITE_CR3
:
2227 case VMCB_EXIT_WRITE_CR4
:
2228 case VMCB_EXIT_READ_DR0
:
2229 case VMCB_EXIT_READ_DR1
:
2230 case VMCB_EXIT_READ_DR2
:
2231 case VMCB_EXIT_READ_DR3
:
2232 case VMCB_EXIT_WRITE_DR0
:
2233 case VMCB_EXIT_WRITE_DR1
:
2234 case VMCB_EXIT_WRITE_DR2
:
2235 case VMCB_EXIT_WRITE_DR3
:
2236 case VMCB_EXIT_WRITE_DR5
:
2237 case VMCB_EXIT_WRITE_DR7
:
2239 case VMCB_EXIT_INIT
:
2240 case VMCB_EXIT_VINTR
:
2241 case VMCB_EXIT_CR0_SEL_WRITE
:
2242 case VMCB_EXIT_INVD
:
2243 case VMCB_EXIT_INVLPG
:
2244 case VMCB_EXIT_INVLPGA
:
2245 case VMCB_EXIT_TASK_SWITCH
:
2246 case VMCB_EXIT_VMRUN
:
2247 case VMCB_EXIT_VMMCALL
:
2248 case VMCB_EXIT_VMLOAD
:
2249 case VMCB_EXIT_VMSAVE
:
2250 case VMCB_EXIT_STGI
:
2251 case VMCB_EXIT_CLGI
:
2252 case VMCB_EXIT_SKINIT
:
2253 case VMCB_EXIT_MONITOR
:
2254 case VMCB_EXIT_MWAIT_UNCOND
:
2256 printf("Unhandled vmexit:\n"
2257 " code: 0x%" PRIx64
"\n"
2258 " info1: 0x%" PRIx64
"\n"
2259 " info2: 0x%" PRIx64
"\n",
2260 vmcb
->control
.exit_code
,
2261 vmcb
->control
.exit_info_1
,
2262 vmcb
->control
.exit_info_2
);
2265 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2269 if (num_runs
== 20) //TODO: make this a #define
2275 /* we're going up to userspace - set the out fields of kvm_run: */
2277 #define IF_MASK 0x00000200
2278 kvm_run
.if_flag
= !!(vcpu
->vmcb
->save
.rflags
& IF_MASK
);
2280 /* TODO: kvm adds a check to see if in-kernel interrupt queues are empty */
2281 kvm_run
.ready_for_interrupt_injection
= kvm_run
.if_flag
&&
2282 !vcpu
->vmcb
->control
.intr_shadow
;
2284 /* TODO kvm_run.ready_for_nmi_injection = ...; */
2286 kvm_run
.cr8
= fkvm_get_cr8(vcpu
);
2289 /* TODO: check copyout ret val */
2290 copyout(&kvm_run
, uap
->run
, sizeof(struct kvm_run
));
2291 // printf("sizeof(struct kvm_run) = %" PRIu64 "\n", sizeof(struct kvm_run));
2297 fkvm_create_vcpu(struct thread
*td
, struct fkvm_create_vcpu_args
*uap
)
2299 struct guestvm
*guest_vm
;
2305 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
2306 if (guest_vm
== NULL
) {
2307 printf("PROC_GET_GUESTVM -> NULL\n");
2312 printf("fkvm_create_vcpu: td = %p\n", td
);
2313 vcpu
= fkvm_vcpu_create(guest_vm
);
2314 fkvm_guestvm_add_vcpu(guest_vm
, vcpu
);
2316 TD_SET_VCPU(td
, vcpu
);
2317 printf("fkvm_create_vcpu: vcpu = %p\n", vcpu
);
2322 fkvm_check_cpu_extension(void)
2328 printf("fkvm_check_cpu_extension\n");
2330 /* Assumption: the architecture supports the cpuid instruction */
2332 /* Check if CPUID extended function 8000_0001h is supported. */
2333 do_cpuid(0x80000000, regs
);
2334 cpu_exthigh
= regs
[0];
2336 printf("cpu_exthigh = %u\n", cpu_exthigh
);
2338 if(cpu_exthigh
>= 0x80000001) {
2339 /* Execute CPUID extended function 8000_0001h */
2340 do_cpuid(0x80000001, regs
);
2341 printf("EAX = %u\n", regs
[0]);
2343 if((regs
[0] & 0x2) == 0) { /* Check SVM bit */
2344 printf("SVM not available\n");
2345 goto fail
; /* SVM not available */
2348 vmcr
= rdmsr(0xc0010114); /* Read VM_CR MSR */
2349 if((vmcr
& 0x8) == 0) { /* Check SVMDIS bit */
2350 printf("vmcr = %" PRIx64
"\n", vmcr
);
2351 printf("SVM allowed\n");
2352 return KERN_SUCCESS
; /* SVM allowed */
2355 /* Execute CPUID extended function 8000_000ah */
2356 do_cpuid(0x8000000a, regs
);
2357 if((regs
[3] & 0x2) == 0) { /* Check SVM_LOCK bit */
2358 /* SVM disabled at bios; not unlockable.
2359 * User must change a BIOS setting to enable SVM.
2361 printf("EDX = %u\n", regs
[3]);
2362 printf("SVM disabled at bios\n");
2366 * SVM may be unlockable;
2367 * consult the BIOS or TPM to obtain the key.
2369 printf("EDX = %u\n", regs
[3]);
2370 printf("SVM maybe unlockable\n");
2375 return KERN_FAILURE
;
2379 fkvm_proc_exit(void *arg
, struct proc
*p
)
2381 struct guestvm
*guest_vm
;
2383 guest_vm
= PROC_GET_GUESTVM(p
);
2384 if (guest_vm
== NULL
)
2387 fkvm_destroy_vm(guest_vm
);
2388 PROC_SET_GUESTVM(p
, NULL
);
2392 fkvm_load(void *unused
)
2397 printf("fkvm_load\n");
2398 printf("sizeof(struct vmcb) = %" PRIx64
"\n", sizeof(struct vmcb
));
2404 /* check if SVM is supported */
2405 error
= fkvm_check_cpu_extension();
2406 if(error
!= KERN_SUCCESS
) {
2407 printf("ERROR: SVM extension not available\n");
2411 exit_tag
= EVENTHANDLER_REGISTER(process_exit
, fkvm_proc_exit
, NULL
,
2412 EVENTHANDLER_PRI_ANY
);
2414 /* allocate structures */
2415 hsave_area
= fkvm_hsave_area_alloc();
2416 iopm
= fkvm_iopm_alloc();
2417 msrpm
= fkvm_msrpm_alloc();
2419 /* Initialize structures */
2420 fkvm_hsave_area_init(hsave_area
);
2421 fkvm_iopm_init(iopm
);
2422 fkvm_msrpm_init(msrpm
);
2424 /* Enable SVM in EFER */
2425 efer
= rdmsr(MSR_EFER
);
2426 printf("EFER = %" PRIx64
"\n", efer
);
2427 wrmsr(MSR_EFER
, efer
| EFER_SVME
);
2428 efer
= rdmsr(MSR_EFER
);
2429 printf("new EFER = %" PRIx64
"\n", efer
);
2431 /* Write Host save address in MSR_VM_HSAVE_PA */
2432 wrmsr(MSR_VM_HSAVE_PA
, vtophys(hsave_area
));
2436 SYSINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_load
, NULL
);
2439 fkvm_unload(void *unused
)
2441 printf("fkvm_unload\n");
2444 printf("fkvm_unload: fkvm not loaded");
2448 EVENTHANDLER_DEREGISTER(process_exit
, exit_tag
);
2450 if (msrpm
!= NULL
) {
2451 fkvm_msrpm_free(iopm
);
2455 fkvm_iopm_free(iopm
);
2458 if (hsave_area
!= NULL
) {
2459 fkvm_hsave_area_free(hsave_area
);
2463 SYSUNINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_unload
, NULL
);