2 * Kernel-based Virtual Machine control library
4 * This library provides an API to control the kvm hardware virtualization
7 * Copyright (C) 2006 Qumranet
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
14 * This work is licensed under the GNU LGPL license, version 2.
26 #define EXPECTED_KVM_API_VERSION 3
28 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
29 #error libkvm: userspace and kernel version mismatch
32 #define PAGE_SIZE 4096ul
35 * \brief The KVM context
37 * The verbose KVM context
40 /// Filedescriptor to /dev/kvm
42 /// Callbacks that KVM uses to emulate various unvirtualizable functionality
43 struct kvm_callbacks
*callbacks
;
45 /// A pointer to the memory used as the physical memory for the guest
46 void *physical_memory
;
49 struct translation_cache
{
54 static void translation_cache_init(struct translation_cache
*tr
)
59 static int translate(kvm_context_t kvm
, int vcpu
, struct translation_cache
*tr
,
60 unsigned long linear
, void **physical
)
62 unsigned long page
= linear
& ~(PAGE_SIZE
-1);
63 unsigned long offset
= linear
& (PAGE_SIZE
-1);
65 if (!(tr
->physical
&& tr
->linear
== page
)) {
66 struct kvm_translation kvm_tr
;
69 kvm_tr
.linear_address
= page
;
72 r
= ioctl(kvm
->fd
, KVM_TRANSLATE
, &kvm_tr
);
80 tr
->physical
= kvm
->physical_memory
+ kvm_tr
.physical_address
;
82 *physical
= tr
->physical
+ offset
;
86 kvm_context_t
kvm_init(struct kvm_callbacks
*callbacks
,
93 fd
= open("/dev/kvm", O_RDWR
);
95 perror("open /dev/kvm");
98 r
= ioctl(fd
, KVM_GET_API_VERSION
, 0);
100 fprintf(stderr
, "kvm kernel version too old\n");
103 if (r
< EXPECTED_KVM_API_VERSION
) {
104 fprintf(stderr
, "kvm kernel version too old\n");
107 if (r
> EXPECTED_KVM_API_VERSION
) {
108 fprintf(stderr
, "kvm userspace version too old\n");
111 kvm
= malloc(sizeof(*kvm
));
113 kvm
->callbacks
= callbacks
;
114 kvm
->opaque
= opaque
;
121 void kvm_finalize(kvm_context_t kvm
)
127 int kvm_create(kvm_context_t kvm
, unsigned long memory
, void **vm_mem
)
129 unsigned long dosmem
= 0xa0000;
130 unsigned long exmem
= 0xc0000;
133 struct kvm_memory_region low_memory
= {
135 .memory_size
= memory
< dosmem
? memory
: dosmem
,
136 .guest_phys_addr
= 0,
138 struct kvm_memory_region extended_memory
= {
140 .memory_size
= memory
< exmem
? 0 : memory
- exmem
,
141 .guest_phys_addr
= exmem
,
144 /* 640K should be enough. */
145 r
= ioctl(fd
, KVM_SET_MEMORY_REGION
, &low_memory
);
147 fprintf(stderr
, "kvm_create_memory_region: %m\n");
150 if (extended_memory
.memory_size
) {
151 r
= ioctl(fd
, KVM_SET_MEMORY_REGION
, &extended_memory
);
153 fprintf(stderr
, "kvm_create_memory_region: %m\n");
158 *vm_mem
= mmap(0, memory
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
159 if (*vm_mem
== MAP_FAILED
) {
160 fprintf(stderr
, "mmap: %m\n");
163 kvm
->physical_memory
= *vm_mem
;
165 r
= ioctl(fd
, KVM_CREATE_VCPU
, 0);
167 fprintf(stderr
, "kvm_create_vcpu: %m\n");
173 void *kvm_create_phys_mem(kvm_context_t kvm
, unsigned long phys_start
,
174 unsigned long len
, int slot
, int log
, int writable
)
179 int prot
= PROT_READ
;
180 struct kvm_memory_region memory
= {
183 .guest_phys_addr
= phys_start
,
184 .flags
= log
? KVM_MEM_LOG_DIRTY_PAGES
: 0,
187 r
= ioctl(fd
, KVM_SET_MEMORY_REGION
, &memory
);
194 ptr
= mmap(0, len
, prot
, MAP_SHARED
, fd
, phys_start
);
195 if (ptr
== MAP_FAILED
)
200 void kvm_destroy_phys_mem(kvm_context_t kvm
, unsigned long phys_start
,
203 printf("kvm_destroy_phys_mem: implement me\n");
208 int kvm_get_dirty_pages(kvm_context_t kvm
, int slot
, void *buf
)
211 struct kvm_dirty_log log
= {
215 log
.dirty_bitmap
= buf
;
217 r
= ioctl(kvm
->fd
, KVM_GET_DIRTY_LOG
, &log
);
222 static int more_io(struct kvm_run
*run
, int first_time
)
227 return run
->io
.count
!= 0;
230 static int handle_io(kvm_context_t kvm
, struct kvm_run
*run
)
232 uint16_t addr
= run
->io
.port
;
233 struct kvm_regs regs
;
236 struct translation_cache tr
;
237 int _in
= (run
->io
.direction
== KVM_EXIT_IO_IN
);
240 translation_cache_init(&tr
);
242 if (run
->io
.string
|| _in
) {
243 regs
.vcpu
= run
->vcpu
;
244 r
= ioctl(kvm
->fd
, KVM_GET_REGS
, ®s
);
249 delta
= run
->io
.string_down
? -run
->io
.size
: run
->io
.size
;
251 while (more_io(run
, first_time
)) {
254 if (!run
->io
.string
) {
256 value_addr
= ®s
.rax
;
258 value_addr
= &run
->io
.value
;
260 r
= translate(kvm
, run
->vcpu
, &tr
, run
->io
.address
,
263 fprintf(stderr
, "failed translating I/O address %x\n",
269 switch (run
->io
.direction
) {
270 case KVM_EXIT_IO_IN
: {
271 switch (run
->io
.size
) {
274 r
= kvm
->callbacks
->inb(kvm
->opaque
, addr
, &value
);
275 *(uint8_t *)value_addr
= value
;
280 r
= kvm
->callbacks
->inw(kvm
->opaque
, addr
, &value
);
281 *(uint16_t *)value_addr
= value
;
286 r
= kvm
->callbacks
->inl(kvm
->opaque
, addr
, &value
);
287 *(uint32_t *)value_addr
= value
;
291 fprintf(stderr
, "bad I/O size %d\n", run
->io
.size
);
296 case KVM_EXIT_IO_OUT
:
297 switch (run
->io
.size
) {
299 r
= kvm
->callbacks
->outb(kvm
->opaque
, addr
,
300 *(uint8_t *)value_addr
);
303 r
= kvm
->callbacks
->outw(kvm
->opaque
, addr
,
304 *(uint16_t *)value_addr
);
307 r
= kvm
->callbacks
->outl(kvm
->opaque
, addr
,
308 *(uint32_t *)value_addr
);
311 fprintf(stderr
, "bad I/O size %d\n", run
->io
.size
);
316 fprintf(stderr
, "bad I/O direction %d\n", run
->io
.direction
);
319 if (run
->io
.string
) {
320 run
->io
.address
+= delta
;
321 switch (run
->io
.direction
) {
322 case KVM_EXIT_IO_IN
: regs
.rdi
+= delta
; break;
323 case KVM_EXIT_IO_OUT
: regs
.rsi
+= delta
; break;
333 r
= ioctl(kvm
->fd
, KVM_SET_REGS
, ®s
);
341 if (run
->io
.string
|| _in
) {
342 r
= ioctl(kvm
->fd
, KVM_SET_REGS
, ®s
);
352 int handle_debug(kvm_context_t kvm
, struct kvm_run
*run
)
354 return kvm
->callbacks
->debug(kvm
->opaque
, run
->vcpu
);
357 int kvm_get_regs(kvm_context_t kvm
, int vcpu
, struct kvm_regs
*regs
)
360 return ioctl(kvm
->fd
, KVM_GET_REGS
, regs
);
363 int kvm_set_regs(kvm_context_t kvm
, int vcpu
, struct kvm_regs
*regs
)
366 return ioctl(kvm
->fd
, KVM_SET_REGS
, regs
);
369 int kvm_get_sregs(kvm_context_t kvm
, int vcpu
, struct kvm_sregs
*sregs
)
372 return ioctl(kvm
->fd
, KVM_GET_SREGS
, sregs
);
375 int kvm_set_sregs(kvm_context_t kvm
, int vcpu
, struct kvm_sregs
*sregs
)
378 return ioctl(kvm
->fd
, KVM_SET_SREGS
, sregs
);
382 * Returns available msr list. User must free.
384 struct kvm_msr_list
*kvm_get_msr_list(kvm_context_t kvm
)
386 struct kvm_msr_list sizer
, *msrs
;
390 r
= ioctl(kvm
->fd
, KVM_GET_MSR_INDEX_LIST
, &sizer
);
391 if (r
== -1 && errno
!= E2BIG
)
393 msrs
= malloc(sizeof *msrs
+ sizer
.nmsrs
* sizeof *msrs
->indices
);
398 msrs
->nmsrs
= sizer
.nmsrs
;
399 r
= ioctl(kvm
->fd
, KVM_GET_MSR_INDEX_LIST
, msrs
);
409 int kvm_get_msrs(kvm_context_t kvm
, int vcpu
, struct kvm_msr_entry
*msrs
,
412 struct kvm_msrs
*kmsrs
= malloc(sizeof *kmsrs
+ n
* sizeof *msrs
);
421 memcpy(kmsrs
->entries
, msrs
, n
* sizeof *msrs
);
422 r
= ioctl(kvm
->fd
, KVM_GET_MSRS
, kmsrs
);
424 memcpy(msrs
, kmsrs
->entries
, n
* sizeof *msrs
);
430 int kvm_set_msrs(kvm_context_t kvm
, int vcpu
, struct kvm_msr_entry
*msrs
,
433 struct kvm_msrs
*kmsrs
= malloc(sizeof *kmsrs
+ n
* sizeof *msrs
);
442 memcpy(kmsrs
->entries
, msrs
, n
* sizeof *msrs
);
443 r
= ioctl(kvm
->fd
, KVM_SET_MSRS
, kmsrs
);
450 void kvm_show_regs(kvm_context_t kvm
, int vcpu
)
453 struct kvm_regs regs
;
457 r
= ioctl(fd
, KVM_GET_REGS
, ®s
);
459 perror("KVM_GET_REGS");
463 "rax %016llx rbx %016llx rcx %016llx rdx %016llx\n"
464 "rsi %016llx rdi %016llx rsp %016llx rbp %016llx\n"
465 "r8 %016llx r9 %016llx r10 %016llx r11 %016llx\n"
466 "r12 %016llx r13 %016llx r14 %016llx r15 %016llx\n"
467 "rip %016llx rflags %08llx\n",
468 regs
.rax
, regs
.rbx
, regs
.rcx
, regs
.rdx
,
469 regs
.rsi
, regs
.rdi
, regs
.rsp
, regs
.rbp
,
470 regs
.r8
, regs
.r9
, regs
.r10
, regs
.r11
,
471 regs
.r12
, regs
.r13
, regs
.r14
, regs
.r15
,
472 regs
.rip
, regs
.rflags
);
475 static int handle_cpuid(kvm_context_t kvm
, struct kvm_run
*run
)
477 struct kvm_regs regs
;
481 kvm_get_regs(kvm
, run
->vcpu
, ®s
);
483 r
= kvm
->callbacks
->cpuid(kvm
->opaque
,
484 ®s
.rax
, ®s
.rbx
, ®s
.rcx
, ®s
.rdx
);
486 regs
.rdx
&= ~(1ull << 12); /* disable mtrr support */
487 kvm_set_regs(kvm
, run
->vcpu
, ®s
);
492 static int handle_mmio(kvm_context_t kvm
, struct kvm_run
*kvm_run
)
494 unsigned long addr
= kvm_run
->mmio
.phys_addr
;
495 void *data
= kvm_run
->mmio
.data
;
498 if (kvm_run
->mmio
.is_write
) {
499 switch (kvm_run
->mmio
.len
) {
501 r
= kvm
->callbacks
->writeb(kvm
->opaque
, addr
, *(uint8_t *)data
);
504 r
= kvm
->callbacks
->writew(kvm
->opaque
, addr
, *(uint16_t *)data
);
507 r
= kvm
->callbacks
->writel(kvm
->opaque
, addr
, *(uint32_t *)data
);
510 r
= kvm
->callbacks
->writeq(kvm
->opaque
, addr
, *(uint64_t *)data
);
514 switch (kvm_run
->mmio
.len
) {
516 r
= kvm
->callbacks
->readb(kvm
->opaque
, addr
, (uint8_t *)data
);
519 r
= kvm
->callbacks
->readw(kvm
->opaque
, addr
, (uint16_t *)data
);
522 r
= kvm
->callbacks
->readl(kvm
->opaque
, addr
, (uint32_t *)data
);
525 r
= kvm
->callbacks
->readq(kvm
->opaque
, addr
, (uint64_t *)data
);
528 kvm_run
->mmio_completed
= 1;
533 static int handle_io_window(kvm_context_t kvm
, struct kvm_run
*kvm_run
)
535 return kvm
->callbacks
->io_window(kvm
->opaque
);
538 static int handle_halt(kvm_context_t kvm
, struct kvm_run
*kvm_run
)
540 return kvm
->callbacks
->halt(kvm
->opaque
, kvm_run
->vcpu
);
543 static int handle_shutdown(kvm_context_t kvm
, struct kvm_run
*kvm_run
)
545 return kvm
->callbacks
->shutdown(kvm
->opaque
, kvm_run
->vcpu
);
548 int try_push_interrupts(kvm_context_t kvm
)
550 return kvm
->callbacks
->try_push_interrupts(kvm
->opaque
);
553 static void post_kvm_run(kvm_context_t kvm
, struct kvm_run
*kvm_run
)
555 kvm
->callbacks
->post_kvm_run(kvm
->opaque
, kvm_run
);
558 static void pre_kvm_run(kvm_context_t kvm
, struct kvm_run
*kvm_run
)
560 kvm
->callbacks
->pre_kvm_run(kvm
->opaque
, kvm_run
);
563 int kvm_run(kvm_context_t kvm
, int vcpu
)
567 struct kvm_run kvm_run
= {
574 kvm_run
.request_interrupt_window
= try_push_interrupts(kvm
);
575 pre_kvm_run(kvm
, &kvm_run
);
576 r
= ioctl(fd
, KVM_RUN
, &kvm_run
);
577 post_kvm_run(kvm
, &kvm_run
);
579 kvm_run
.emulated
= 0;
580 kvm_run
.mmio_completed
= 0;
581 if (r
== -1 && errno
!= EINTR
) {
583 printf("kvm_run: %m\n");
587 r
= handle_io_window(kvm
, &kvm_run
);
590 switch (kvm_run
.exit_type
) {
591 case KVM_EXIT_TYPE_FAIL_ENTRY
:
592 fprintf(stderr
, "kvm_run: failed entry, reason %u\n",
593 kvm_run
.exit_reason
& 0xffff);
596 case KVM_EXIT_TYPE_VM_EXIT
:
597 switch (kvm_run
.exit_reason
) {
598 case KVM_EXIT_UNKNOWN
:
599 fprintf(stderr
, "unhandled vm exit: 0x%x\n",
600 kvm_run
.hw
.hardware_exit_reason
);
601 kvm_show_regs(kvm
, vcpu
);
604 case KVM_EXIT_EXCEPTION
:
605 fprintf(stderr
, "exception %d (%x)\n",
606 kvm_run
.ex
.exception
,
607 kvm_run
.ex
.error_code
);
611 r
= handle_io(kvm
, &kvm_run
);
614 r
= handle_cpuid(kvm
, &kvm_run
);
617 r
= handle_debug(kvm
, &kvm_run
);
620 r
= handle_mmio(kvm
, &kvm_run
);
623 r
= handle_halt(kvm
, &kvm_run
);
625 case KVM_EXIT_IRQ_WINDOW_OPEN
:
627 case KVM_EXIT_SHUTDOWN
:
628 r
= handle_shutdown(kvm
, &kvm_run
);
631 fprintf(stderr
, "unhandled vm exit: 0x%x\n", kvm_run
.exit_reason
);
632 kvm_show_regs(kvm
, vcpu
);
643 int kvm_inject_irq(kvm_context_t kvm
, int vcpu
, unsigned irq
)
645 struct kvm_interrupt intr
;
649 return ioctl(kvm
->fd
, KVM_INTERRUPT
, &intr
);
652 int kvm_guest_debug(kvm_context_t kvm
, int vcpu
, struct kvm_debug_guest
*dbg
)
656 return ioctl(kvm
->fd
, KVM_DEBUG_GUEST
, dbg
);