1 #include "kvm/kvm-cpu.h"
3 #include "kvm/symbol.h"
7 #include <asm/msr-index.h>
17 #define PAGE_SIZE (sysconf(_SC_PAGE_SIZE))
19 extern struct kvm_cpu
*kvm_cpus
[KVM_NR_CPUS
];
20 extern __thread
struct kvm_cpu
*current_kvm_cpu
;
22 static inline bool is_in_protected_mode(struct kvm_cpu
*vcpu
)
24 return vcpu
->sregs
.cr0
& 0x01;
27 static inline u64
ip_to_flat(struct kvm_cpu
*vcpu
, u64 ip
)
32 * NOTE! We should take code segment base address into account here.
33 * Luckily it's usually zero because Linux uses flat memory model.
35 if (is_in_protected_mode(vcpu
))
38 cs
= vcpu
->sregs
.cs
.selector
;
40 return ip
+ (cs
<< 4);
43 static inline u32
selector_to_base(u16 selector
)
46 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
48 return (u32
)selector
* 16;
51 static struct kvm_cpu
*kvm_cpu__new(struct kvm
*kvm
)
55 vcpu
= calloc(1, sizeof *vcpu
);
64 void kvm_cpu__delete(struct kvm_cpu
*vcpu
)
72 struct kvm_cpu
*kvm_cpu__init(struct kvm
*kvm
, unsigned long cpu_id
)
78 vcpu
= kvm_cpu__new(kvm
);
82 vcpu
->cpu_id
= cpu_id
;
84 vcpu
->vcpu_fd
= ioctl(vcpu
->kvm
->vm_fd
, KVM_CREATE_VCPU
, cpu_id
);
85 if (vcpu
->vcpu_fd
< 0)
86 die_perror("KVM_CREATE_VCPU ioctl");
88 mmap_size
= ioctl(vcpu
->kvm
->sys_fd
, KVM_GET_VCPU_MMAP_SIZE
, 0);
90 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
92 vcpu
->kvm_run
= mmap(NULL
, mmap_size
, PROT_RW
, MAP_SHARED
, vcpu
->vcpu_fd
, 0);
93 if (vcpu
->kvm_run
== MAP_FAILED
)
94 die("unable to mmap vcpu fd");
96 coalesced_offset
= ioctl(kvm
->sys_fd
, KVM_CHECK_EXTENSION
, KVM_CAP_COALESCED_MMIO
);
98 vcpu
->ring
= (void *)vcpu
->kvm_run
+ (coalesced_offset
* PAGE_SIZE
);
100 vcpu
->is_running
= true;
105 void kvm_cpu__enable_singlestep(struct kvm_cpu
*vcpu
)
107 struct kvm_guest_debug debug
= {
108 .control
= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_SINGLESTEP
,
111 if (ioctl(vcpu
->vcpu_fd
, KVM_SET_GUEST_DEBUG
, &debug
) < 0)
112 pr_warning("KVM_SET_GUEST_DEBUG failed");
115 static struct kvm_msrs
*kvm_msrs__new(size_t nmsrs
)
117 struct kvm_msrs
*vcpu
= calloc(1, sizeof(*vcpu
) + (sizeof(struct kvm_msr_entry
) * nmsrs
));
120 die("out of memory");
125 #define KVM_MSR_ENTRY(_index, _data) \
126 (struct kvm_msr_entry) { .index = _index, .data = _data }
128 static void kvm_cpu__setup_msrs(struct kvm_cpu
*vcpu
)
130 unsigned long ndx
= 0;
132 vcpu
->msrs
= kvm_msrs__new(100);
134 vcpu
->msrs
->entries
[ndx
++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS
, 0x0);
135 vcpu
->msrs
->entries
[ndx
++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP
, 0x0);
136 vcpu
->msrs
->entries
[ndx
++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP
, 0x0);
138 vcpu
->msrs
->entries
[ndx
++] = KVM_MSR_ENTRY(MSR_STAR
, 0x0);
139 vcpu
->msrs
->entries
[ndx
++] = KVM_MSR_ENTRY(MSR_CSTAR
, 0x0);
140 vcpu
->msrs
->entries
[ndx
++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE
, 0x0);
141 vcpu
->msrs
->entries
[ndx
++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK
, 0x0);
142 vcpu
->msrs
->entries
[ndx
++] = KVM_MSR_ENTRY(MSR_LSTAR
, 0x0);
144 vcpu
->msrs
->entries
[ndx
++] = KVM_MSR_ENTRY(MSR_IA32_TSC
, 0x0);
146 vcpu
->msrs
->nmsrs
= ndx
;
148 if (ioctl(vcpu
->vcpu_fd
, KVM_SET_MSRS
, vcpu
->msrs
) < 0)
149 die_perror("KVM_SET_MSRS failed");
152 static void kvm_cpu__setup_fpu(struct kvm_cpu
*vcpu
)
154 vcpu
->fpu
= (struct kvm_fpu
) {
159 if (ioctl(vcpu
->vcpu_fd
, KVM_SET_FPU
, &vcpu
->fpu
) < 0)
160 die_perror("KVM_SET_FPU failed");
163 static void kvm_cpu__setup_regs(struct kvm_cpu
*vcpu
)
165 vcpu
->regs
= (struct kvm_regs
) {
166 /* We start the guest in 16-bit real mode */
167 .rflags
= 0x0000000000000002ULL
,
169 .rip
= vcpu
->kvm
->boot_ip
,
170 .rsp
= vcpu
->kvm
->boot_sp
,
171 .rbp
= vcpu
->kvm
->boot_sp
,
174 if (vcpu
->regs
.rip
> USHRT_MAX
)
175 die("ip 0x%llx is too high for real mode", (u64
) vcpu
->regs
.rip
);
177 if (ioctl(vcpu
->vcpu_fd
, KVM_SET_REGS
, &vcpu
->regs
) < 0)
178 die_perror("KVM_SET_REGS failed");
181 static void kvm_cpu__setup_sregs(struct kvm_cpu
*vcpu
)
184 if (ioctl(vcpu
->vcpu_fd
, KVM_GET_SREGS
, &vcpu
->sregs
) < 0)
185 die_perror("KVM_GET_SREGS failed");
187 vcpu
->sregs
.cs
.selector
= vcpu
->kvm
->boot_selector
;
188 vcpu
->sregs
.cs
.base
= selector_to_base(vcpu
->kvm
->boot_selector
);
189 vcpu
->sregs
.ss
.selector
= vcpu
->kvm
->boot_selector
;
190 vcpu
->sregs
.ss
.base
= selector_to_base(vcpu
->kvm
->boot_selector
);
191 vcpu
->sregs
.ds
.selector
= vcpu
->kvm
->boot_selector
;
192 vcpu
->sregs
.ds
.base
= selector_to_base(vcpu
->kvm
->boot_selector
);
193 vcpu
->sregs
.es
.selector
= vcpu
->kvm
->boot_selector
;
194 vcpu
->sregs
.es
.base
= selector_to_base(vcpu
->kvm
->boot_selector
);
195 vcpu
->sregs
.fs
.selector
= vcpu
->kvm
->boot_selector
;
196 vcpu
->sregs
.fs
.base
= selector_to_base(vcpu
->kvm
->boot_selector
);
197 vcpu
->sregs
.gs
.selector
= vcpu
->kvm
->boot_selector
;
198 vcpu
->sregs
.gs
.base
= selector_to_base(vcpu
->kvm
->boot_selector
);
200 if (ioctl(vcpu
->vcpu_fd
, KVM_SET_SREGS
, &vcpu
->sregs
) < 0)
201 die_perror("KVM_SET_SREGS failed");
205 * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
207 void kvm_cpu__reset_vcpu(struct kvm_cpu
*vcpu
)
209 kvm_cpu__setup_sregs(vcpu
);
210 kvm_cpu__setup_regs(vcpu
);
211 kvm_cpu__setup_fpu(vcpu
);
212 kvm_cpu__setup_msrs(vcpu
);
215 static void print_dtable(const char *name
, struct kvm_dtable
*dtable
)
217 printf(" %s %016llx %08hx\n",
218 name
, (u64
) dtable
->base
, (u16
) dtable
->limit
);
221 static void print_segment(const char *name
, struct kvm_segment
*seg
)
223 printf(" %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n",
224 name
, (u16
) seg
->selector
, (u64
) seg
->base
, (u32
) seg
->limit
,
225 (u8
) seg
->type
, seg
->present
, seg
->dpl
, seg
->db
, seg
->s
, seg
->l
, seg
->g
, seg
->avl
);
228 void kvm_cpu__show_registers(struct kvm_cpu
*vcpu
)
230 unsigned long cr0
, cr2
, cr3
;
231 unsigned long cr4
, cr8
;
232 unsigned long rax
, rbx
, rcx
;
233 unsigned long rdx
, rsi
, rdi
;
234 unsigned long rbp
, r8
, r9
;
235 unsigned long r10
, r11
, r12
;
236 unsigned long r13
, r14
, r15
;
237 unsigned long rip
, rsp
;
238 struct kvm_sregs sregs
;
239 unsigned long rflags
;
240 struct kvm_regs regs
;
243 if (ioctl(vcpu
->vcpu_fd
, KVM_GET_REGS
, ®s
) < 0)
244 die("KVM_GET_REGS failed");
246 rflags
= regs
.rflags
;
248 rip
= regs
.rip
; rsp
= regs
.rsp
;
249 rax
= regs
.rax
; rbx
= regs
.rbx
; rcx
= regs
.rcx
;
250 rdx
= regs
.rdx
; rsi
= regs
.rsi
; rdi
= regs
.rdi
;
251 rbp
= regs
.rbp
; r8
= regs
.r8
; r9
= regs
.r9
;
252 r10
= regs
.r10
; r11
= regs
.r11
; r12
= regs
.r12
;
253 r13
= regs
.r13
; r14
= regs
.r14
; r15
= regs
.r15
;
255 printf("\n Registers:\n");
256 printf( " ----------\n");
257 printf(" rip: %016lx rsp: %016lx flags: %016lx\n", rip
, rsp
, rflags
);
258 printf(" rax: %016lx rbx: %016lx rcx: %016lx\n", rax
, rbx
, rcx
);
259 printf(" rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx
, rsi
, rdi
);
260 printf(" rbp: %016lx r8: %016lx r9: %016lx\n", rbp
, r8
, r9
);
261 printf(" r10: %016lx r11: %016lx r12: %016lx\n", r10
, r11
, r12
);
262 printf(" r13: %016lx r14: %016lx r15: %016lx\n", r13
, r14
, r15
);
264 if (ioctl(vcpu
->vcpu_fd
, KVM_GET_SREGS
, &sregs
) < 0)
265 die("KVM_GET_REGS failed");
267 cr0
= sregs
.cr0
; cr2
= sregs
.cr2
; cr3
= sregs
.cr3
;
268 cr4
= sregs
.cr4
; cr8
= sregs
.cr8
;
270 printf(" cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0
, cr2
, cr3
);
271 printf(" cr4: %016lx cr8: %016lx\n", cr4
, cr8
);
272 printf("\n Segment registers:\n");
273 printf( " ------------------\n");
274 printf(" register selector base limit type p dpl db s l g avl\n");
275 print_segment("cs ", &sregs
.cs
);
276 print_segment("ss ", &sregs
.ss
);
277 print_segment("ds ", &sregs
.ds
);
278 print_segment("es ", &sregs
.es
);
279 print_segment("fs ", &sregs
.fs
);
280 print_segment("gs ", &sregs
.gs
);
281 print_segment("tr ", &sregs
.tr
);
282 print_segment("ldt", &sregs
.ldt
);
283 print_dtable("gdt", &sregs
.gdt
);
284 print_dtable("idt", &sregs
.idt
);
286 printf("\n APIC:\n");
288 printf(" efer: %016llx apic base: %016llx nmi: %s\n",
289 (u64
) sregs
.efer
, (u64
) sregs
.apic_base
,
290 (vcpu
->kvm
->nmi_disabled
? "disabled" : "enabled"));
292 printf("\n Interrupt bitmap:\n");
293 printf( " -----------------\n");
294 for (i
= 0; i
< (KVM_NR_INTERRUPTS
+ 63) / 64; i
++)
295 printf(" %016llx", (u64
) sregs
.interrupt_bitmap
[i
]);
299 #define MAX_SYM_LEN 128
301 void kvm_cpu__show_code(struct kvm_cpu
*vcpu
)
303 unsigned int code_bytes
= 64;
304 unsigned int code_prologue
= code_bytes
* 43 / 64;
305 unsigned int code_len
= code_bytes
;
306 char sym
[MAX_SYM_LEN
];
311 if (ioctl(vcpu
->vcpu_fd
, KVM_GET_REGS
, &vcpu
->regs
) < 0)
312 die("KVM_GET_REGS failed");
314 if (ioctl(vcpu
->vcpu_fd
, KVM_GET_SREGS
, &vcpu
->sregs
) < 0)
315 die("KVM_GET_SREGS failed");
317 ip
= guest_flat_to_host(vcpu
->kvm
, ip_to_flat(vcpu
, vcpu
->regs
.rip
) - code_prologue
);
319 printf("\n Code:\n");
322 symbol__lookup(vcpu
->kvm
, vcpu
->regs
.rip
, sym
, MAX_SYM_LEN
);
324 printf(" rip: [<%016lx>] %s\n\n", (unsigned long) vcpu
->regs
.rip
, sym
);
326 for (i
= 0; i
< code_len
; i
++, ip
++) {
327 if (!host_ptr_in_ram(vcpu
->kvm
, ip
))
332 if (ip
== guest_flat_to_host(vcpu
->kvm
, ip_to_flat(vcpu
, vcpu
->regs
.rip
)))
333 printf(" <%02x>", c
);
340 printf("\n Stack:\n");
341 printf( " ------\n");
342 kvm__dump_mem(vcpu
->kvm
, vcpu
->regs
.rsp
, 32);
345 void kvm_cpu__show_page_tables(struct kvm_cpu
*vcpu
)
352 if (!is_in_protected_mode(vcpu
))
355 if (ioctl(vcpu
->vcpu_fd
, KVM_GET_SREGS
, &vcpu
->sregs
) < 0)
356 die("KVM_GET_SREGS failed");
358 pte4
= guest_flat_to_host(vcpu
->kvm
, vcpu
->sregs
.cr3
);
359 if (!host_ptr_in_ram(vcpu
->kvm
, pte4
))
362 pte3
= guest_flat_to_host(vcpu
->kvm
, (*pte4
& ~0xfff));
363 if (!host_ptr_in_ram(vcpu
->kvm
, pte3
))
366 pte2
= guest_flat_to_host(vcpu
->kvm
, (*pte3
& ~0xfff));
367 if (!host_ptr_in_ram(vcpu
->kvm
, pte2
))
370 pte1
= guest_flat_to_host(vcpu
->kvm
, (*pte2
& ~0xfff));
371 if (!host_ptr_in_ram(vcpu
->kvm
, pte1
))
374 printf("Page Tables:\n");
375 if (*pte2
& (1 << 7))
376 printf(" pte4: %016llx pte3: %016llx"
378 *pte4
, *pte3
, *pte2
);
380 printf(" pte4: %016llx pte3: %016llx pte2: %016"
381 "llx pte1: %016llx\n",
382 *pte4
, *pte3
, *pte2
, *pte1
);
385 void kvm_cpu__run(struct kvm_cpu
*vcpu
)
389 err
= ioctl(vcpu
->vcpu_fd
, KVM_RUN
, 0);
390 if (err
&& (errno
!= EINTR
&& errno
!= EAGAIN
))
391 die_perror("KVM_RUN failed");
394 static void kvm_cpu_signal_handler(int signum
)
396 if (signum
== SIGKVMEXIT
) {
397 if (current_kvm_cpu
&& current_kvm_cpu
->is_running
) {
398 current_kvm_cpu
->is_running
= false;
399 pthread_kill(pthread_self(), SIGKVMEXIT
);
401 } else if (signum
== SIGKVMPAUSE
) {
402 current_kvm_cpu
->paused
= 1;
406 static void kvm_cpu__handle_coalesced_mmio(struct kvm_cpu
*cpu
)
409 while (cpu
->ring
->first
!= cpu
->ring
->last
) {
410 struct kvm_coalesced_mmio
*m
;
411 m
= &cpu
->ring
->coalesced_mmio
[cpu
->ring
->first
];
412 kvm__emulate_mmio(cpu
->kvm
,
417 cpu
->ring
->first
= (cpu
->ring
->first
+ 1) % KVM_COALESCED_MMIO_MAX
;
422 void kvm_cpu__reboot(void)
426 for (i
= 0; i
< KVM_NR_CPUS
; i
++)
428 pthread_kill(kvm_cpus
[i
]->thread
, SIGKVMEXIT
);
431 int kvm_cpu__start(struct kvm_cpu
*cpu
)
435 sigemptyset(&sigset
);
436 sigaddset(&sigset
, SIGALRM
);
438 pthread_sigmask(SIG_BLOCK
, &sigset
, NULL
);
440 signal(SIGKVMEXIT
, kvm_cpu_signal_handler
);
441 signal(SIGKVMPAUSE
, kvm_cpu_signal_handler
);
443 kvm_cpu__setup_cpuid(cpu
);
444 kvm_cpu__reset_vcpu(cpu
);
446 if (cpu
->kvm
->single_step
)
447 kvm_cpu__enable_singlestep(cpu
);
449 while (cpu
->is_running
) {
451 kvm__notify_paused();
457 switch (cpu
->kvm_run
->exit_reason
) {
458 case KVM_EXIT_UNKNOWN
:
461 kvm_cpu__show_registers(cpu
);
462 kvm_cpu__show_code(cpu
);
467 ret
= kvm__emulate_io(cpu
->kvm
,
468 cpu
->kvm_run
->io
.port
,
470 cpu
->kvm_run
->io
.data_offset
,
471 cpu
->kvm_run
->io
.direction
,
472 cpu
->kvm_run
->io
.size
,
473 cpu
->kvm_run
->io
.count
);
479 case KVM_EXIT_MMIO
: {
482 ret
= kvm__emulate_mmio(cpu
->kvm
,
483 cpu
->kvm_run
->mmio
.phys_addr
,
484 cpu
->kvm_run
->mmio
.data
,
485 cpu
->kvm_run
->mmio
.len
,
486 cpu
->kvm_run
->mmio
.is_write
);
496 case KVM_EXIT_SHUTDOWN
:
501 kvm_cpu__handle_coalesced_mmio(cpu
);