2 * Kernel-based Virtual Machine control library
4 * This library provides an API to control the kvm hardware virtualization
7 * Copyright (C) 2006 Qumranet
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
14 * This work is licensed under the GNU LGPL license, version 2.
18 #define __user /* temporary, until installed via make headers_install */
21 #include <linux/kvm.h>
23 #define EXPECTED_KVM_API_VERSION 12
25 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
26 #error libkvm: userspace and kernel version mismatch
36 #include <sys/ioctl.h>
38 #include "libkvm-all.h"
42 //#define DEBUG_MEMREG
44 #define DPRINTF(fmt, args...) \
45 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
47 #define DPRINTF(fmt, args...) do {} while (0)
50 #define MIN(x,y) ((x) < (y) ? (x) : (y))
51 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
53 int kvm_abi
= EXPECTED_KVM_API_VERSION
;
56 static inline void set_gsi(kvm_context_t kvm
, unsigned int gsi
)
58 uint32_t *bitmap
= kvm
->used_gsi_bitmap
;
60 if (gsi
< kvm
->max_gsi
)
61 bitmap
[gsi
/ 32] |= 1U << (gsi
% 32);
63 DPRINTF("Invalid GSI %d\n");
66 static inline void clear_gsi(kvm_context_t kvm
, unsigned int gsi
)
68 uint32_t *bitmap
= kvm
->used_gsi_bitmap
;
70 if (gsi
< kvm
->max_gsi
)
71 bitmap
[gsi
/ 32] &= ~(1U << (gsi
% 32));
73 DPRINTF("Invalid GSI %d\n");
77 unsigned long phys_addr
;
79 unsigned long userspace_addr
;
84 struct slot_info slots
[KVM_MAX_NUM_MEM_REGIONS
];
86 static void init_slots(void)
90 for (i
= 0; i
< KVM_MAX_NUM_MEM_REGIONS
; ++i
)
94 static int get_free_slot(kvm_context_t kvm
)
99 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
100 tss_ext
= ioctl(kvm
->fd
, KVM_CHECK_EXTENSION
, KVM_CAP_SET_TSS_ADDR
);
106 * on older kernels where the set tss ioctl is not supprted we must save
107 * slot 0 to hold the extended memory, as the vmx will use the last 3
108 * pages of this slot.
115 for (; i
< KVM_MAX_NUM_MEM_REGIONS
; ++i
)
121 static void register_slot(int slot
, unsigned long phys_addr
, unsigned long len
,
122 unsigned long userspace_addr
, unsigned flags
)
124 slots
[slot
].phys_addr
= phys_addr
;
125 slots
[slot
].len
= len
;
126 slots
[slot
].userspace_addr
= userspace_addr
;
127 slots
[slot
].flags
= flags
;
130 static void free_slot(int slot
)
133 slots
[slot
].logging_count
= 0;
136 static int get_slot(unsigned long phys_addr
)
140 for (i
= 0; i
< KVM_MAX_NUM_MEM_REGIONS
; ++i
) {
141 if (slots
[i
].len
&& slots
[i
].phys_addr
<= phys_addr
&&
142 (slots
[i
].phys_addr
+ slots
[i
].len
-1) >= phys_addr
)
148 /* Returns -1 if this slot is not totally contained on any other,
149 * and the number of the slot otherwise */
150 static int get_container_slot(uint64_t phys_addr
, unsigned long size
)
154 for (i
= 0; i
< KVM_MAX_NUM_MEM_REGIONS
; ++i
)
155 if (slots
[i
].len
&& slots
[i
].phys_addr
<= phys_addr
&&
156 (slots
[i
].phys_addr
+ slots
[i
].len
) >= phys_addr
+ size
)
161 int kvm_is_containing_region(kvm_context_t kvm
, unsigned long phys_addr
, unsigned long size
)
163 int slot
= get_container_slot(phys_addr
, size
);
170 * dirty pages logging control
172 static int kvm_dirty_pages_log_change(kvm_context_t kvm
,
173 unsigned long phys_addr
,
178 int slot
= get_slot(phys_addr
);
181 fprintf(stderr
, "BUG: %s: invalid parameters\n", __FUNCTION__
);
185 flags
= (slots
[slot
].flags
& ~mask
) | flags
;
186 if (flags
== slots
[slot
].flags
)
188 slots
[slot
].flags
= flags
;
191 struct kvm_userspace_memory_region mem
= {
193 .memory_size
= slots
[slot
].len
,
194 .guest_phys_addr
= slots
[slot
].phys_addr
,
195 .userspace_addr
= slots
[slot
].userspace_addr
,
196 .flags
= slots
[slot
].flags
,
200 DPRINTF("slot %d start %llx len %llx flags %x\n",
205 r
= ioctl(kvm
->vm_fd
, KVM_SET_USER_MEMORY_REGION
, &mem
);
207 fprintf(stderr
, "%s: %m\n", __FUNCTION__
);
212 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm
,
213 int (*change
)(kvm_context_t kvm
,
219 for (i
=r
=0; i
<KVM_MAX_NUM_MEM_REGIONS
&& r
==0; i
++) {
221 r
= change(kvm
, slots
[i
].phys_addr
, slots
[i
].len
);
226 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm
,
230 int slot
= get_slot(phys_addr
);
232 DPRINTF("start %"PRIx64
" len %"PRIx64
"\n", phys_addr
, len
);
234 fprintf(stderr
, "BUG: %s: invalid parameters\n", __func__
);
238 if (slots
[slot
].logging_count
++)
241 return kvm_dirty_pages_log_change(kvm
, slots
[slot
].phys_addr
,
242 KVM_MEM_LOG_DIRTY_PAGES
,
243 KVM_MEM_LOG_DIRTY_PAGES
);
246 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm
,
250 int slot
= get_slot(phys_addr
);
253 fprintf(stderr
, "BUG: %s: invalid parameters\n", __func__
);
257 if (--slots
[slot
].logging_count
)
260 return kvm_dirty_pages_log_change(kvm
, slots
[slot
].phys_addr
,
262 KVM_MEM_LOG_DIRTY_PAGES
);
266 * Enable dirty page logging for all memory regions
268 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm
)
270 if (kvm
->dirty_pages_log_all
)
272 kvm
->dirty_pages_log_all
= 1;
273 return kvm_dirty_pages_log_change_all(kvm
,
274 kvm_dirty_pages_log_enable_slot
);
278 * Enable dirty page logging only for memory regions that were created with
279 * dirty logging enabled (disable for all other memory regions).
281 int kvm_dirty_pages_log_reset(kvm_context_t kvm
)
283 if (!kvm
->dirty_pages_log_all
)
285 kvm
->dirty_pages_log_all
= 0;
286 return kvm_dirty_pages_log_change_all(kvm
,
287 kvm_dirty_pages_log_disable_slot
);
291 kvm_context_t
kvm_init(struct kvm_callbacks
*callbacks
,
298 fd
= open("/dev/kvm", O_RDWR
);
300 perror("open /dev/kvm");
303 r
= ioctl(fd
, KVM_GET_API_VERSION
, 0);
305 fprintf(stderr
, "kvm kernel version too old: "
306 "KVM_GET_API_VERSION ioctl not supported\n");
309 if (r
< EXPECTED_KVM_API_VERSION
) {
310 fprintf(stderr
, "kvm kernel version too old: "
311 "We expect API version %d or newer, but got "
313 EXPECTED_KVM_API_VERSION
, r
);
316 if (r
> EXPECTED_KVM_API_VERSION
) {
317 fprintf(stderr
, "kvm userspace version too old\n");
321 kvm_page_size
= getpagesize();
322 kvm
= malloc(sizeof(*kvm
));
325 memset(kvm
, 0, sizeof(*kvm
));
328 kvm
->callbacks
= callbacks
;
329 kvm
->opaque
= opaque
;
330 kvm
->dirty_pages_log_all
= 0;
331 kvm
->no_irqchip_creation
= 0;
332 kvm
->no_pit_creation
= 0;
334 gsi_count
= kvm_get_gsi_count(kvm
);
338 /* Round up so we can search ints using ffs */
339 gsi_bits
= ALIGN(gsi_count
, 32);
340 kvm
->used_gsi_bitmap
= malloc(gsi_bits
/ 8);
341 if (!kvm
->used_gsi_bitmap
)
343 memset(kvm
->used_gsi_bitmap
, 0, gsi_bits
/ 8);
344 kvm
->max_gsi
= gsi_bits
;
346 /* Mark any over-allocated bits as already in use */
347 for (i
= gsi_count
; i
< gsi_bits
; i
++)
357 void kvm_finalize(kvm_context_t kvm
)
360 if (kvm->vcpu_fd[0] != -1)
361 close(kvm->vcpu_fd[0]);
362 if (kvm->vm_fd != -1)
369 void kvm_disable_irqchip_creation(kvm_context_t kvm
)
371 kvm
->no_irqchip_creation
= 1;
374 void kvm_disable_pit_creation(kvm_context_t kvm
)
376 kvm
->no_pit_creation
= 1;
379 kvm_vcpu_context_t
kvm_create_vcpu(kvm_context_t kvm
, int id
)
383 kvm_vcpu_context_t vcpu_ctx
= malloc(sizeof(struct kvm_vcpu_context
));
393 r
= ioctl(kvm
->vm_fd
, KVM_CREATE_VCPU
, id
);
395 fprintf(stderr
, "kvm_create_vcpu: %m\n");
399 mmap_size
= ioctl(kvm
->fd
, KVM_GET_VCPU_MMAP_SIZE
, 0);
400 if (mmap_size
== -1) {
401 fprintf(stderr
, "get vcpu mmap size: %m\n");
404 vcpu_ctx
->run
= mmap(NULL
, mmap_size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
,
406 if (vcpu_ctx
->run
== MAP_FAILED
) {
407 fprintf(stderr
, "mmap vcpu area: %m\n");
418 int kvm_create_vm(kvm_context_t kvm
)
422 #ifdef KVM_CAP_IRQ_ROUTING
423 kvm
->irq_routes
= malloc(sizeof(*kvm
->irq_routes
));
424 if (!kvm
->irq_routes
)
426 memset(kvm
->irq_routes
, 0, sizeof(*kvm
->irq_routes
));
427 kvm
->nr_allocated_irq_routes
= 0;
430 fd
= ioctl(fd
, KVM_CREATE_VM
, 0);
432 fprintf(stderr
, "kvm_create_vm: %m\n");
439 static int kvm_create_default_phys_mem(kvm_context_t kvm
,
440 unsigned long phys_mem_bytes
,
443 #ifdef KVM_CAP_USER_MEMORY
444 int r
= ioctl(kvm
->fd
, KVM_CHECK_EXTENSION
, KVM_CAP_USER_MEMORY
);
447 fprintf(stderr
, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
449 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
454 int kvm_check_extension(kvm_context_t kvm
, int ext
)
458 ret
= ioctl(kvm
->fd
, KVM_CHECK_EXTENSION
, ext
);
464 void kvm_create_irqchip(kvm_context_t kvm
)
468 kvm
->irqchip_in_kernel
= 0;
469 #ifdef KVM_CAP_IRQCHIP
470 if (!kvm
->no_irqchip_creation
) {
471 r
= ioctl(kvm
->fd
, KVM_CHECK_EXTENSION
, KVM_CAP_IRQCHIP
);
472 if (r
> 0) { /* kernel irqchip supported */
473 r
= ioctl(kvm
->vm_fd
, KVM_CREATE_IRQCHIP
);
475 kvm
->irqchip_inject_ioctl
= KVM_IRQ_LINE
;
476 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
477 r
= ioctl(kvm
->fd
, KVM_CHECK_EXTENSION
,
478 KVM_CAP_IRQ_INJECT_STATUS
);
480 kvm
->irqchip_inject_ioctl
= KVM_IRQ_LINE_STATUS
;
482 kvm
->irqchip_in_kernel
= 1;
485 fprintf(stderr
, "Create kernel PIC irqchip failed\n");
491 int kvm_create(kvm_context_t kvm
, unsigned long phys_mem_bytes
, void **vm_mem
)
495 r
= kvm_create_vm(kvm
);
498 r
= kvm_arch_create(kvm
, phys_mem_bytes
, vm_mem
);
502 r
= kvm_create_default_phys_mem(kvm
, phys_mem_bytes
, vm_mem
);
505 kvm_create_irqchip(kvm
);
511 void *kvm_create_phys_mem(kvm_context_t kvm
, unsigned long phys_start
,
512 unsigned long len
, int log
, int writable
)
515 int prot
= PROT_READ
;
517 struct kvm_userspace_memory_region memory
= {
519 .guest_phys_addr
= phys_start
,
520 .flags
= log
? KVM_MEM_LOG_DIRTY_PAGES
: 0,
526 #if !defined(__s390__)
527 ptr
= mmap(NULL
, len
, prot
, MAP_ANONYMOUS
| MAP_SHARED
, -1, 0);
529 ptr
= mmap(LIBKVM_S390_ORIGIN
, len
, prot
| PROT_EXEC
,
530 MAP_FIXED
| MAP_SHARED
| MAP_ANONYMOUS
, -1, 0);
532 if (ptr
== MAP_FAILED
) {
533 fprintf(stderr
, "%s: %s", __func__
, strerror(errno
));
539 memory
.userspace_addr
= (unsigned long)ptr
;
540 memory
.slot
= get_free_slot(kvm
);
541 DPRINTF("slot %d start %llx len %llx flags %x\n",
543 memory
.guest_phys_addr
,
546 r
= ioctl(kvm
->vm_fd
, KVM_SET_USER_MEMORY_REGION
, &memory
);
548 fprintf(stderr
, "%s: %s", __func__
, strerror(errno
));
551 register_slot(memory
.slot
, memory
.guest_phys_addr
, memory
.memory_size
,
552 memory
.userspace_addr
, memory
.flags
);
557 int kvm_register_phys_mem(kvm_context_t kvm
,
558 unsigned long phys_start
, void *userspace_addr
,
559 unsigned long len
, int log
)
562 struct kvm_userspace_memory_region memory
= {
564 .guest_phys_addr
= phys_start
,
565 .userspace_addr
= (unsigned long)(intptr_t)userspace_addr
,
566 .flags
= log
? KVM_MEM_LOG_DIRTY_PAGES
: 0,
570 memory
.slot
= get_free_slot(kvm
);
571 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
572 memory
.guest_phys_addr
, memory
.memory_size
,
573 memory
.userspace_addr
, memory
.slot
, memory
.flags
);
574 r
= ioctl(kvm
->vm_fd
, KVM_SET_USER_MEMORY_REGION
, &memory
);
576 fprintf(stderr
, "create_userspace_phys_mem: %s\n", strerror(errno
));
579 register_slot(memory
.slot
, memory
.guest_phys_addr
, memory
.memory_size
,
580 memory
.userspace_addr
, memory
.flags
);
585 /* destroy/free a whole slot.
586 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
588 void kvm_destroy_phys_mem(kvm_context_t kvm
, unsigned long phys_start
,
593 struct kvm_userspace_memory_region memory
= {
595 .guest_phys_addr
= phys_start
,
600 slot
= get_slot(phys_start
);
602 if ((slot
>= KVM_MAX_NUM_MEM_REGIONS
) || (slot
== -1)) {
603 fprintf(stderr
, "BUG: %s: invalid parameters (slot=%d)\n",
607 if (phys_start
!= slots
[slot
].phys_addr
) {
609 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
610 __FUNCTION__
, phys_start
, slots
[slot
].phys_addr
);
611 phys_start
= slots
[slot
].phys_addr
;
615 DPRINTF("slot %d start %llx len %llx flags %x\n",
617 memory
.guest_phys_addr
,
620 r
= ioctl(kvm
->vm_fd
, KVM_SET_USER_MEMORY_REGION
, &memory
);
622 fprintf(stderr
, "destroy_userspace_phys_mem: %s",
627 free_slot(memory
.slot
);
630 void kvm_unregister_memory_area(kvm_context_t kvm
, uint64_t phys_addr
, unsigned long size
)
633 int slot
= get_container_slot(phys_addr
, size
);
636 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr
, size
);
637 kvm_destroy_phys_mem(kvm
, phys_addr
, size
);
642 static int kvm_get_map(kvm_context_t kvm
, int ioctl_num
, int slot
, void *buf
)
645 struct kvm_dirty_log log
= {
649 log
.dirty_bitmap
= buf
;
651 r
= ioctl(kvm
->vm_fd
, ioctl_num
, &log
);
657 int kvm_get_dirty_pages(kvm_context_t kvm
, unsigned long phys_addr
, void *buf
)
661 slot
= get_slot(phys_addr
);
662 return kvm_get_map(kvm
, KVM_GET_DIRTY_LOG
, slot
, buf
);
665 int kvm_get_dirty_pages_range(kvm_context_t kvm
, unsigned long phys_addr
,
666 unsigned long len
, void *buf
, void *opaque
,
667 int (*cb
)(unsigned long start
, unsigned long len
,
668 void*bitmap
, void *opaque
))
672 unsigned long end_addr
= phys_addr
+ len
;
674 for (i
= 0; i
< KVM_MAX_NUM_MEM_REGIONS
; ++i
) {
675 if ((slots
[i
].len
&& (uint64_t)slots
[i
].phys_addr
>= phys_addr
)
676 && ((uint64_t)slots
[i
].phys_addr
+ slots
[i
].len
<= end_addr
)) {
677 r
= kvm_get_map(kvm
, KVM_GET_DIRTY_LOG
, i
, buf
);
680 r
= cb(slots
[i
].phys_addr
, slots
[i
].len
, buf
, opaque
);
688 #ifdef KVM_CAP_IRQCHIP
690 int kvm_set_irq_level(kvm_context_t kvm
, int irq
, int level
, int *status
)
692 struct kvm_irq_level event
;
695 if (!kvm
->irqchip_in_kernel
)
699 r
= ioctl(kvm
->vm_fd
, kvm
->irqchip_inject_ioctl
, &event
);
701 perror("kvm_set_irq_level");
704 #ifdef KVM_CAP_IRQ_INJECT_STATUS
705 *status
= (kvm
->irqchip_inject_ioctl
== KVM_IRQ_LINE
) ?
715 int kvm_get_irqchip(kvm_context_t kvm
, struct kvm_irqchip
*chip
)
719 if (!kvm
->irqchip_in_kernel
)
721 r
= ioctl(kvm
->vm_fd
, KVM_GET_IRQCHIP
, chip
);
724 perror("kvm_get_irqchip\n");
729 int kvm_set_irqchip(kvm_context_t kvm
, struct kvm_irqchip
*chip
)
733 if (!kvm
->irqchip_in_kernel
)
735 r
= ioctl(kvm
->vm_fd
, KVM_SET_IRQCHIP
, chip
);
738 perror("kvm_set_irqchip\n");
745 static int handle_io(kvm_vcpu_context_t vcpu
)
747 struct kvm_run
*run
= vcpu
->run
;
748 kvm_context_t kvm
= vcpu
->kvm
;
749 uint16_t addr
= run
->io
.port
;
752 void *p
= (void *)run
+ run
->io
.data_offset
;
754 for (i
= 0; i
< run
->io
.count
; ++i
) {
755 switch (run
->io
.direction
) {
757 switch (run
->io
.size
) {
759 r
= kvm
->callbacks
->inb(kvm
->opaque
, addr
, p
);
762 r
= kvm
->callbacks
->inw(kvm
->opaque
, addr
, p
);
765 r
= kvm
->callbacks
->inl(kvm
->opaque
, addr
, p
);
768 fprintf(stderr
, "bad I/O size %d\n", run
->io
.size
);
772 case KVM_EXIT_IO_OUT
:
773 switch (run
->io
.size
) {
775 r
= kvm
->callbacks
->outb(kvm
->opaque
, addr
,
779 r
= kvm
->callbacks
->outw(kvm
->opaque
, addr
,
783 r
= kvm
->callbacks
->outl(kvm
->opaque
, addr
,
787 fprintf(stderr
, "bad I/O size %d\n", run
->io
.size
);
792 fprintf(stderr
, "bad I/O direction %d\n", run
->io
.direction
);
802 int handle_debug(kvm_vcpu_context_t vcpu
, void *env
)
804 #ifdef KVM_CAP_SET_GUEST_DEBUG
805 struct kvm_run
*run
= vcpu
->run
;
806 kvm_context_t kvm
= vcpu
->kvm
;
808 return kvm
->callbacks
->debug(kvm
->opaque
, env
, &run
->debug
.arch
);
814 int kvm_get_regs(kvm_vcpu_context_t vcpu
, struct kvm_regs
*regs
)
816 return ioctl(vcpu
->fd
, KVM_GET_REGS
, regs
);
819 int kvm_set_regs(kvm_vcpu_context_t vcpu
, struct kvm_regs
*regs
)
821 return ioctl(vcpu
->fd
, KVM_SET_REGS
, regs
);
824 int kvm_get_fpu(kvm_vcpu_context_t vcpu
, struct kvm_fpu
*fpu
)
826 return ioctl(vcpu
->fd
, KVM_GET_FPU
, fpu
);
829 int kvm_set_fpu(kvm_vcpu_context_t vcpu
, struct kvm_fpu
*fpu
)
831 return ioctl(vcpu
->fd
, KVM_SET_FPU
, fpu
);
834 int kvm_get_sregs(kvm_vcpu_context_t vcpu
, struct kvm_sregs
*sregs
)
836 return ioctl(vcpu
->fd
, KVM_GET_SREGS
, sregs
);
839 int kvm_set_sregs(kvm_vcpu_context_t vcpu
, struct kvm_sregs
*sregs
)
841 return ioctl(vcpu
->fd
, KVM_SET_SREGS
, sregs
);
844 #ifdef KVM_CAP_MP_STATE
845 int kvm_get_mpstate(kvm_vcpu_context_t vcpu
, struct kvm_mp_state
*mp_state
)
849 r
= ioctl(vcpu
->kvm
->fd
, KVM_CHECK_EXTENSION
, KVM_CAP_MP_STATE
);
851 return ioctl(vcpu
->fd
, KVM_GET_MP_STATE
, mp_state
);
855 int kvm_set_mpstate(kvm_vcpu_context_t vcpu
, struct kvm_mp_state
*mp_state
)
859 r
= ioctl(vcpu
->kvm
->fd
, KVM_CHECK_EXTENSION
, KVM_CAP_MP_STATE
);
861 return ioctl(vcpu
->fd
, KVM_SET_MP_STATE
, mp_state
);
866 static int handle_mmio(kvm_vcpu_context_t vcpu
)
868 unsigned long addr
= vcpu
->run
->mmio
.phys_addr
;
869 kvm_context_t kvm
= vcpu
->kvm
;
870 struct kvm_run
*kvm_run
= vcpu
->run
;
871 void *data
= kvm_run
->mmio
.data
;
873 /* hack: Red Hat 7.1 generates these weird accesses. */
874 if ((addr
> 0xa0000-4 && addr
<= 0xa0000) && kvm_run
->mmio
.len
== 3)
877 if (kvm_run
->mmio
.is_write
)
878 return kvm
->callbacks
->mmio_write(kvm
->opaque
, addr
, data
,
881 return kvm
->callbacks
->mmio_read(kvm
->opaque
, addr
, data
,
885 int handle_io_window(kvm_context_t kvm
)
887 return kvm
->callbacks
->io_window(kvm
->opaque
);
890 int handle_halt(kvm_vcpu_context_t vcpu
)
892 return vcpu
->kvm
->callbacks
->halt(vcpu
->kvm
->opaque
, vcpu
);
895 int handle_shutdown(kvm_context_t kvm
, void *env
)
897 return kvm
->callbacks
->shutdown(kvm
->opaque
, env
);
900 int try_push_interrupts(kvm_context_t kvm
)
902 return kvm
->callbacks
->try_push_interrupts(kvm
->opaque
);
905 static inline void push_nmi(kvm_context_t kvm
)
907 #ifdef KVM_CAP_USER_NMI
908 kvm
->callbacks
->push_nmi(kvm
->opaque
);
909 #endif /* KVM_CAP_USER_NMI */
912 void post_kvm_run(kvm_context_t kvm
, void *env
)
914 kvm
->callbacks
->post_kvm_run(kvm
->opaque
, env
);
917 int pre_kvm_run(kvm_context_t kvm
, void *env
)
919 return kvm
->callbacks
->pre_kvm_run(kvm
->opaque
, env
);
922 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu
)
924 return vcpu
->run
->if_flag
;
927 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu
)
929 return vcpu
->run
->ready_for_interrupt_injection
;
932 int kvm_run(kvm_vcpu_context_t vcpu
, void *env
)
936 struct kvm_run
*run
= vcpu
->run
;
937 kvm_context_t kvm
= vcpu
->kvm
;
941 #if !defined(__s390__)
942 if (!kvm
->irqchip_in_kernel
)
943 run
->request_interrupt_window
= try_push_interrupts(kvm
);
945 r
= pre_kvm_run(kvm
, env
);
948 r
= ioctl(fd
, KVM_RUN
, 0);
950 if (r
== -1 && errno
!= EINTR
&& errno
!= EAGAIN
) {
952 post_kvm_run(kvm
, env
);
953 fprintf(stderr
, "kvm_run: %s\n", strerror(-r
));
957 post_kvm_run(kvm
, env
);
959 #if defined(KVM_CAP_COALESCED_MMIO)
960 if (kvm
->coalesced_mmio
) {
961 struct kvm_coalesced_mmio_ring
*ring
= (void *)run
+
962 kvm
->coalesced_mmio
* PAGE_SIZE
;
963 while (ring
->first
!= ring
->last
) {
964 kvm
->callbacks
->mmio_write(kvm
->opaque
,
965 ring
->coalesced_mmio
[ring
->first
].phys_addr
,
966 &ring
->coalesced_mmio
[ring
->first
].data
[0],
967 ring
->coalesced_mmio
[ring
->first
].len
);
969 ring
->first
= (ring
->first
+ 1) %
970 KVM_COALESCED_MMIO_MAX
;
975 #if !defined(__s390__)
977 r
= handle_io_window(kvm
);
982 switch (run
->exit_reason
) {
983 case KVM_EXIT_UNKNOWN
:
984 r
= kvm
->callbacks
->unhandled(kvm
, vcpu
,
985 run
->hw
.hardware_exit_reason
);
987 case KVM_EXIT_FAIL_ENTRY
:
988 r
= kvm
->callbacks
->unhandled(kvm
, vcpu
,
989 run
->fail_entry
.hardware_entry_failure_reason
);
991 case KVM_EXIT_EXCEPTION
:
992 fprintf(stderr
, "exception %d (%x)\n",
1000 r
= handle_io(vcpu
);
1002 case KVM_EXIT_DEBUG
:
1003 r
= handle_debug(vcpu
, env
);
1006 r
= handle_mmio(vcpu
);
1009 r
= handle_halt(vcpu
);
1011 case KVM_EXIT_IRQ_WINDOW_OPEN
:
1013 case KVM_EXIT_SHUTDOWN
:
1014 r
= handle_shutdown(kvm
, env
);
1016 #if defined(__s390__)
1017 case KVM_EXIT_S390_SIEIC
:
1018 r
= kvm
->callbacks
->s390_handle_intercept(kvm
, vcpu
,
1021 case KVM_EXIT_S390_RESET
:
1022 r
= kvm
->callbacks
->s390_handle_reset(kvm
, vcpu
, run
);
1026 if (kvm_arch_run(vcpu
)) {
1027 fprintf(stderr
, "unhandled vm exit: 0x%x\n",
1029 kvm_show_regs(vcpu
);
1041 int kvm_inject_irq(kvm_vcpu_context_t vcpu
, unsigned irq
)
1043 struct kvm_interrupt intr
;
1046 return ioctl(vcpu
->fd
, KVM_INTERRUPT
, &intr
);
1049 #ifdef KVM_CAP_SET_GUEST_DEBUG
1050 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu
, struct kvm_guest_debug
*dbg
)
1052 return ioctl(vcpu
->fd
, KVM_SET_GUEST_DEBUG
, dbg
);
1056 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu
, const sigset_t
*sigset
)
1058 struct kvm_signal_mask
*sigmask
;
1062 r
= ioctl(vcpu
->fd
, KVM_SET_SIGNAL_MASK
, NULL
);
1067 sigmask
= malloc(sizeof(*sigmask
) + sizeof(*sigset
));
1072 memcpy(sigmask
->sigset
, sigset
, sizeof(*sigset
));
1073 r
= ioctl(vcpu
->fd
, KVM_SET_SIGNAL_MASK
, sigmask
);
1080 int kvm_irqchip_in_kernel(kvm_context_t kvm
)
1082 return kvm
->irqchip_in_kernel
;
1085 int kvm_pit_in_kernel(kvm_context_t kvm
)
1087 return kvm
->pit_in_kernel
;
1090 int kvm_has_sync_mmu(kvm_context_t kvm
)
1093 #ifdef KVM_CAP_SYNC_MMU
1094 r
= ioctl(kvm
->fd
, KVM_CHECK_EXTENSION
, KVM_CAP_SYNC_MMU
);
1099 int kvm_inject_nmi(kvm_vcpu_context_t vcpu
)
1101 #ifdef KVM_CAP_USER_NMI
1102 return ioctl(vcpu
->fd
, KVM_NMI
);
1108 int kvm_init_coalesced_mmio(kvm_context_t kvm
)
1111 kvm
->coalesced_mmio
= 0;
1112 #ifdef KVM_CAP_COALESCED_MMIO
1113 r
= ioctl(kvm
->fd
, KVM_CHECK_EXTENSION
, KVM_CAP_COALESCED_MMIO
);
1115 kvm
->coalesced_mmio
= r
;
1122 int kvm_register_coalesced_mmio(kvm_context_t kvm
, uint64_t addr
, uint32_t size
)
1124 #ifdef KVM_CAP_COALESCED_MMIO
1125 struct kvm_coalesced_mmio_zone zone
;
1128 if (kvm
->coalesced_mmio
) {
1133 r
= ioctl(kvm
->vm_fd
, KVM_REGISTER_COALESCED_MMIO
, &zone
);
1135 perror("kvm_register_coalesced_mmio_zone");
1144 int kvm_unregister_coalesced_mmio(kvm_context_t kvm
, uint64_t addr
, uint32_t size
)
1146 #ifdef KVM_CAP_COALESCED_MMIO
1147 struct kvm_coalesced_mmio_zone zone
;
1150 if (kvm
->coalesced_mmio
) {
1155 r
= ioctl(kvm
->vm_fd
, KVM_UNREGISTER_COALESCED_MMIO
, &zone
);
1157 perror("kvm_unregister_coalesced_mmio_zone");
1160 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr
, size
);
1167 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1168 int kvm_assign_pci_device(kvm_context_t kvm
,
1169 struct kvm_assigned_pci_dev
*assigned_dev
)
1173 ret
= ioctl(kvm
->vm_fd
, KVM_ASSIGN_PCI_DEVICE
, assigned_dev
);
1180 static int kvm_old_assign_irq(kvm_context_t kvm
,
1181 struct kvm_assigned_irq
*assigned_irq
)
1185 ret
= ioctl(kvm
->vm_fd
, KVM_ASSIGN_IRQ
, assigned_irq
);
1192 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1193 int kvm_assign_irq(kvm_context_t kvm
,
1194 struct kvm_assigned_irq
*assigned_irq
)
1198 ret
= ioctl(kvm
->fd
, KVM_CHECK_EXTENSION
, KVM_CAP_ASSIGN_DEV_IRQ
);
1200 ret
= ioctl(kvm
->vm_fd
, KVM_ASSIGN_DEV_IRQ
, assigned_irq
);
1206 return kvm_old_assign_irq(kvm
, assigned_irq
);
1209 int kvm_deassign_irq(kvm_context_t kvm
,
1210 struct kvm_assigned_irq
*assigned_irq
)
1214 ret
= ioctl(kvm
->vm_fd
, KVM_DEASSIGN_DEV_IRQ
, assigned_irq
);
1221 int kvm_assign_irq(kvm_context_t kvm
,
1222 struct kvm_assigned_irq
*assigned_irq
)
1224 return kvm_old_assign_irq(kvm
, assigned_irq
);
1229 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1230 int kvm_deassign_pci_device(kvm_context_t kvm
,
1231 struct kvm_assigned_pci_dev
*assigned_dev
)
1235 ret
= ioctl(kvm
->vm_fd
, KVM_DEASSIGN_PCI_DEVICE
, assigned_dev
);
1243 int kvm_destroy_memory_region_works(kvm_context_t kvm
)
1247 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1248 ret
= ioctl(kvm
->fd
, KVM_CHECK_EXTENSION
,
1249 KVM_CAP_DESTROY_MEMORY_REGION_WORKS
);
1256 int kvm_reinject_control(kvm_context_t kvm
, int pit_reinject
)
1258 #ifdef KVM_CAP_REINJECT_CONTROL
1260 struct kvm_reinject_control control
;
1262 control
.pit_reinject
= pit_reinject
;
1264 r
= ioctl(kvm
->fd
, KVM_CHECK_EXTENSION
, KVM_CAP_REINJECT_CONTROL
);
1266 r
= ioctl(kvm
->vm_fd
, KVM_REINJECT_CONTROL
, &control
);
1275 int kvm_has_gsi_routing(kvm_context_t kvm
)
1279 #ifdef KVM_CAP_IRQ_ROUTING
1280 r
= kvm_check_extension(kvm
, KVM_CAP_IRQ_ROUTING
);
1285 int kvm_get_gsi_count(kvm_context_t kvm
)
1287 #ifdef KVM_CAP_IRQ_ROUTING
1288 return kvm_check_extension(kvm
, KVM_CAP_IRQ_ROUTING
);
1294 int kvm_clear_gsi_routes(kvm_context_t kvm
)
1296 #ifdef KVM_CAP_IRQ_ROUTING
1297 kvm
->irq_routes
->nr
= 0;
1304 int kvm_add_routing_entry(kvm_context_t kvm
,
1305 struct kvm_irq_routing_entry
* entry
)
1307 #ifdef KVM_CAP_IRQ_ROUTING
1308 struct kvm_irq_routing
*z
;
1309 struct kvm_irq_routing_entry
*new;
1312 if (kvm
->irq_routes
->nr
== kvm
->nr_allocated_irq_routes
) {
1313 n
= kvm
->nr_allocated_irq_routes
* 2;
1316 size
= sizeof(struct kvm_irq_routing
);
1317 size
+= n
* sizeof(*new);
1318 z
= realloc(kvm
->irq_routes
, size
);
1321 kvm
->nr_allocated_irq_routes
= n
;
1322 kvm
->irq_routes
= z
;
1324 n
= kvm
->irq_routes
->nr
++;
1325 new = &kvm
->irq_routes
->entries
[n
];
1326 memset(new, 0, sizeof(*new));
1327 new->gsi
= entry
->gsi
;
1328 new->type
= entry
->type
;
1329 new->flags
= entry
->flags
;
1332 set_gsi(kvm
, entry
->gsi
);
1340 int kvm_add_irq_route(kvm_context_t kvm
, int gsi
, int irqchip
, int pin
)
1342 #ifdef KVM_CAP_IRQ_ROUTING
1343 struct kvm_irq_routing_entry e
;
1346 e
.type
= KVM_IRQ_ROUTING_IRQCHIP
;
1348 e
.u
.irqchip
.irqchip
= irqchip
;
1349 e
.u
.irqchip
.pin
= pin
;
1350 return kvm_add_routing_entry(kvm
, &e
);
1356 int kvm_del_routing_entry(kvm_context_t kvm
,
1357 struct kvm_irq_routing_entry
* entry
)
1359 #ifdef KVM_CAP_IRQ_ROUTING
1360 struct kvm_irq_routing_entry
*e
, *p
;
1361 int i
, gsi
, found
= 0;
1365 for (i
= 0; i
< kvm
->irq_routes
->nr
; ++i
) {
1366 e
= &kvm
->irq_routes
->entries
[i
];
1367 if (e
->type
== entry
->type
1371 case KVM_IRQ_ROUTING_IRQCHIP
: {
1372 if (e
->u
.irqchip
.irqchip
==
1373 entry
->u
.irqchip
.irqchip
1374 && e
->u
.irqchip
.pin
==
1375 entry
->u
.irqchip
.pin
) {
1376 p
= &kvm
->irq_routes
->
1377 entries
[--kvm
->irq_routes
->nr
];
1383 case KVM_IRQ_ROUTING_MSI
: {
1384 if (e
->u
.msi
.address_lo
==
1385 entry
->u
.msi
.address_lo
1386 && e
->u
.msi
.address_hi
==
1387 entry
->u
.msi
.address_hi
1388 && e
->u
.msi
.data
== entry
->u
.msi
.data
) {
1389 p
= &kvm
->irq_routes
->
1390 entries
[--kvm
->irq_routes
->nr
];
1400 /* If there are no other users of this GSI
1401 * mark it available in the bitmap */
1402 for (i
= 0; i
< kvm
->irq_routes
->nr
; i
++) {
1403 e
= &kvm
->irq_routes
->entries
[i
];
1407 if (i
== kvm
->irq_routes
->nr
)
1408 clear_gsi(kvm
, gsi
);
1420 int kvm_del_irq_route(kvm_context_t kvm
, int gsi
, int irqchip
, int pin
)
1422 #ifdef KVM_CAP_IRQ_ROUTING
1423 struct kvm_irq_routing_entry e
;
1426 e
.type
= KVM_IRQ_ROUTING_IRQCHIP
;
1428 e
.u
.irqchip
.irqchip
= irqchip
;
1429 e
.u
.irqchip
.pin
= pin
;
1430 return kvm_del_routing_entry(kvm
, &e
);
1436 int kvm_commit_irq_routes(kvm_context_t kvm
)
1438 #ifdef KVM_CAP_IRQ_ROUTING
1441 kvm
->irq_routes
->flags
= 0;
1442 r
= ioctl(kvm
->vm_fd
, KVM_SET_GSI_ROUTING
, kvm
->irq_routes
);
1451 int kvm_get_irq_route_gsi(kvm_context_t kvm
)
1454 uint32_t *buf
= kvm
->used_gsi_bitmap
;
1456 /* Return the lowest unused GSI in the bitmap */
1457 for (i
= 0; i
< kvm
->max_gsi
/ 32; i
++) {
1462 return bit
- 1 + i
* 32;
1468 #ifdef KVM_CAP_DEVICE_MSIX
1469 int kvm_assign_set_msix_nr(kvm_context_t kvm
,
1470 struct kvm_assigned_msix_nr
*msix_nr
)
1474 ret
= ioctl(kvm
->vm_fd
, KVM_ASSIGN_SET_MSIX_NR
, msix_nr
);
1481 int kvm_assign_set_msix_entry(kvm_context_t kvm
,
1482 struct kvm_assigned_msix_entry
*entry
)
1486 ret
= ioctl(kvm
->vm_fd
, KVM_ASSIGN_SET_MSIX_ENTRY
, entry
);
1494 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd)
1496 #include <sys/eventfd.h>
1498 static int _kvm_irqfd(kvm_context_t kvm
, int fd
, int gsi
, int flags
)
1501 struct kvm_irqfd data
= {
1507 r
= ioctl(kvm
->vm_fd
, KVM_IRQFD
, &data
);
1513 int kvm_irqfd(kvm_context_t kvm
, int gsi
, int flags
)
1518 if (!kvm_check_extension(kvm
, KVM_CAP_IRQFD
))
1525 r
= _kvm_irqfd(kvm
, fd
, gsi
, 0);
1534 #else /* KVM_CAP_IRQFD */
1536 int kvm_irqfd(kvm_context_t kvm
, int gsi
, int flags
)
1541 #endif /* KVM_CAP_IRQFD */