kvm: libkvm: add kvm_has_sync_mmu
[kvm-userspace.git] / libkvm / libkvm.c
blobfb0aa43469f2f085cbf8291fb8039ae19c915912
1 /*
2 * Kernel-based Virtual Machine control library
4 * This library provides an API to control the kvm hardware virtualization
5 * module.
7 * Copyright (C) 2006 Qumranet
9 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
14 * This work is licensed under the GNU LGPL license, version 2.
17 #ifndef __user
18 #define __user /* temporary, until installed via make headers_install */
19 #endif
21 #include <linux/kvm.h>
23 #define EXPECTED_KVM_API_VERSION 12
25 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
26 #error libkvm: userspace and kernel version mismatch
27 #endif
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <sys/mman.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <sys/ioctl.h>
37 #include "libkvm.h"
39 #if defined(__x86_64__) || defined(__i386__)
40 #include "kvm-x86.h"
41 #endif
43 #if defined(__ia64__)
44 #include "kvm-ia64.h"
45 #endif
47 #if defined(__powerpc__)
48 #include "kvm-powerpc.h"
49 #endif
51 #if defined(__s390__)
52 #include "kvm-s390.h"
53 #endif
55 int kvm_abi = EXPECTED_KVM_API_VERSION;
56 int kvm_page_size;
58 struct slot_info {
59 unsigned long phys_addr;
60 unsigned long len;
61 int user_alloc;
62 unsigned long userspace_addr;
63 unsigned flags;
66 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
68 void init_slots(void)
70 int i;
72 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
73 slots[i].len = 0;
76 int get_free_slot(kvm_context_t kvm)
78 int i;
79 int tss_ext;
81 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
82 tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
83 #else
84 tss_ext = 0;
85 #endif
88 * on older kernels where the set tss ioctl is not supprted we must save
89 * slot 0 to hold the extended memory, as the vmx will use the last 3
90 * pages of this slot.
92 if (tss_ext > 0)
93 i = 0;
94 else
95 i = 1;
97 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
98 if (!slots[i].len)
99 return i;
100 return -1;
103 void register_slot(int slot, unsigned long phys_addr, unsigned long len,
104 int user_alloc, unsigned long userspace_addr, unsigned flags)
106 slots[slot].phys_addr = phys_addr;
107 slots[slot].len = len;
108 slots[slot].user_alloc = user_alloc;
109 slots[slot].userspace_addr = userspace_addr;
110 slots[slot].flags = flags;
113 void free_slot(int slot)
115 slots[slot].len = 0;
118 int get_slot(unsigned long phys_addr)
120 int i;
122 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
123 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
124 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
125 return i;
127 return -1;
130 int get_intersecting_slot(unsigned long phys_addr)
132 int i;
134 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
135 if (slots[i].len && slots[i].phys_addr < phys_addr &&
136 (slots[i].phys_addr + slots[i].len) > phys_addr)
137 return i;
138 return -1;
142 * dirty pages logging control
144 static int kvm_dirty_pages_log_change(kvm_context_t kvm, unsigned long phys_addr
145 , __u32 flag)
147 int r;
148 int slot;
150 slot = get_slot(phys_addr);
151 if (slot == -1) {
152 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
153 return 1;
155 flag |= slots[slot].flags;
156 #ifdef KVM_CAP_USER_MEMORY
157 if (slots[slot].user_alloc) {
158 struct kvm_userspace_memory_region mem = {
159 .slot = slot,
160 .memory_size = slots[slot].len,
161 .guest_phys_addr = slots[slot].phys_addr,
162 .userspace_addr = slots[slot].userspace_addr,
163 .flags = flag,
165 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
167 #endif
168 if (!slots[slot].user_alloc) {
169 struct kvm_memory_region mem = {
170 .slot = slot,
171 .memory_size = slots[slot].len,
172 .guest_phys_addr = slots[slot].phys_addr,
173 .flags = flag,
175 r = ioctl(kvm->vm_fd, KVM_SET_MEMORY_REGION, &mem);
177 if (r == -1)
178 fprintf(stderr, "%s: %m\n", __FUNCTION__);
179 return r;
182 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm, __u32 flag)
184 int i, r;
186 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
187 if (slots[i].len)
188 r = kvm_dirty_pages_log_change(kvm, slots[i].phys_addr,
189 flag);
191 return r;
195 * Enable dirty page logging for all memory regions
197 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
199 if (kvm->dirty_pages_log_all)
200 return 0;
201 kvm->dirty_pages_log_all = 1;
202 return kvm_dirty_pages_log_change_all(kvm, KVM_MEM_LOG_DIRTY_PAGES);
206 * Enable dirty page logging only for memory regions that were created with
207 * dirty logging enabled (disable for all other memory regions).
209 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
211 if (!kvm->dirty_pages_log_all)
212 return 0;
213 kvm->dirty_pages_log_all = 0;
214 return kvm_dirty_pages_log_change_all(kvm, 0);
218 kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
219 void *opaque)
221 int fd;
222 kvm_context_t kvm;
223 int r;
225 fd = open("/dev/kvm", O_RDWR);
226 if (fd == -1) {
227 perror("open /dev/kvm");
228 return NULL;
230 r = ioctl(fd, KVM_GET_API_VERSION, 0);
231 if (r == -1) {
232 fprintf(stderr, "kvm kernel version too old: "
233 "KVM_GET_API_VERSION ioctl not supported\n");
234 goto out_close;
236 if (r < EXPECTED_KVM_API_VERSION) {
237 fprintf(stderr, "kvm kernel version too old: "
238 "We expect API version %d or newer, but got "
239 "version %d\n",
240 EXPECTED_KVM_API_VERSION, r);
241 goto out_close;
243 if (r > EXPECTED_KVM_API_VERSION) {
244 fprintf(stderr, "kvm userspace version too old\n");
245 goto out_close;
247 kvm_abi = r;
248 kvm_page_size = getpagesize();
249 kvm = malloc(sizeof(*kvm));
250 kvm->fd = fd;
251 kvm->vm_fd = -1;
252 kvm->callbacks = callbacks;
253 kvm->opaque = opaque;
254 kvm->dirty_pages_log_all = 0;
255 kvm->no_irqchip_creation = 0;
256 kvm->no_pit_creation = 0;
258 return kvm;
259 out_close:
260 close(fd);
261 return NULL;
264 void kvm_finalize(kvm_context_t kvm)
266 if (kvm->vcpu_fd[0] != -1)
267 close(kvm->vcpu_fd[0]);
268 if (kvm->vm_fd != -1)
269 close(kvm->vm_fd);
270 close(kvm->fd);
271 free(kvm);
274 void kvm_disable_irqchip_creation(kvm_context_t kvm)
276 kvm->no_irqchip_creation = 1;
279 void kvm_disable_pit_creation(kvm_context_t kvm)
281 kvm->no_pit_creation = 1;
284 int kvm_create_vcpu(kvm_context_t kvm, int slot)
286 long mmap_size;
287 int r;
289 r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, slot);
290 if (r == -1) {
291 r = -errno;
292 fprintf(stderr, "kvm_create_vcpu: %m\n");
293 return r;
295 kvm->vcpu_fd[slot] = r;
296 mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0);
297 if (mmap_size == -1) {
298 r = -errno;
299 fprintf(stderr, "get vcpu mmap size: %m\n");
300 return r;
302 kvm->run[slot] = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
303 kvm->vcpu_fd[slot], 0);
304 if (kvm->run[slot] == MAP_FAILED) {
305 r = -errno;
306 fprintf(stderr, "mmap vcpu area: %m\n");
307 return r;
309 return 0;
312 int kvm_create_vm(kvm_context_t kvm)
314 int fd = kvm->fd;
316 kvm->vcpu_fd[0] = -1;
318 fd = ioctl(fd, KVM_CREATE_VM, 0);
319 if (fd == -1) {
320 fprintf(stderr, "kvm_create_vm: %m\n");
321 return -1;
323 kvm->vm_fd = fd;
324 return 0;
327 static int kvm_create_default_phys_mem(kvm_context_t kvm,
328 unsigned long phys_mem_bytes,
329 void **vm_mem)
331 unsigned long memory = (phys_mem_bytes + PAGE_SIZE - 1) & PAGE_MASK;
332 int r;
334 #ifdef KVM_CAP_USER_MEMORY
335 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
336 if (r > 0)
337 return 0;
338 else
339 #endif
340 r = kvm_alloc_kernel_memory(kvm, memory, vm_mem);
341 if (r < 0)
342 return r;
344 r = kvm_arch_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
345 if (r < 0)
346 return r;
348 return 0;
351 int kvm_check_extension(kvm_context_t kvm, int ext)
353 int ret;
355 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext);
356 if (ret > 0)
357 return 1;
358 return 0;
361 void kvm_create_irqchip(kvm_context_t kvm)
363 int r;
365 kvm->irqchip_in_kernel = 0;
366 #ifdef KVM_CAP_IRQCHIP
367 if (!kvm->no_irqchip_creation) {
368 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
369 if (r > 0) { /* kernel irqchip supported */
370 r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
371 if (r >= 0)
372 kvm->irqchip_in_kernel = 1;
373 else
374 fprintf(stderr, "Create kernel PIC irqchip failed\n");
377 #endif
380 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
382 int r;
384 r = kvm_create_vm(kvm);
385 if (r < 0)
386 return r;
387 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
388 if (r < 0)
389 return r;
390 init_slots();
391 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
392 if (r < 0)
393 return r;
394 kvm_create_irqchip(kvm);
396 return 0;
400 #ifdef KVM_CAP_USER_MEMORY
402 void *kvm_create_userspace_phys_mem(kvm_context_t kvm, unsigned long phys_start,
403 unsigned long len, int log, int writable)
405 int r;
406 int prot = PROT_READ;
407 void *ptr;
408 struct kvm_userspace_memory_region memory = {
409 .memory_size = len,
410 .guest_phys_addr = phys_start,
411 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
414 if (writable)
415 prot |= PROT_WRITE;
417 #if !defined(__s390__)
418 ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
419 #else
420 ptr = mmap(LIBKVM_S390_ORIGIN, len, prot | PROT_EXEC,
421 MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
422 #endif
423 if (ptr == MAP_FAILED) {
424 fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno));
425 return 0;
428 memset(ptr, 0, len);
430 memory.userspace_addr = (unsigned long)ptr;
431 memory.slot = get_free_slot(kvm);
432 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
433 if (r == -1) {
434 fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno));
435 return 0;
437 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
438 1, memory.userspace_addr, memory.flags);
440 return ptr;
443 void kvm_destroy_userspace_phys_mem(kvm_context_t kvm,
444 unsigned long phys_start)
446 int r;
447 struct kvm_userspace_memory_region memory = {
448 .memory_size = 0,
449 .guest_phys_addr = phys_start,
450 .flags = 0,
453 memory.userspace_addr = 0;
454 memory.slot = get_slot(phys_start);
456 if (memory.slot == -1)
457 return;
459 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
460 if (r == -1) {
461 fprintf(stderr, "destroy_userspace_phys_mem: %s",
462 strerror(errno));
463 return;
466 free_slot(memory.slot);
469 #endif
471 void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start,
472 unsigned long len, int log, int writable)
474 #ifdef KVM_CAP_USER_MEMORY
475 int r;
477 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
478 if (r > 0)
479 return kvm_create_userspace_phys_mem(kvm, phys_start, len,
480 log, writable);
481 else
482 #endif
483 return kvm_create_kernel_phys_mem(kvm, phys_start, len,
484 log, writable);
487 int kvm_is_intersecting_mem(kvm_context_t kvm, unsigned long phys_start)
489 return get_intersecting_slot(phys_start) != -1;
492 int kvm_is_allocated_mem(kvm_context_t kvm, unsigned long phys_start,
493 unsigned long len)
495 int slot;
497 slot = get_slot(phys_start);
498 if (slot == -1)
499 return 0;
500 if (slots[slot].len == len)
501 return 1;
502 return 0;
505 int kvm_create_mem_hole(kvm_context_t kvm, unsigned long phys_start,
506 unsigned long len)
508 #ifdef KVM_CAP_USER_MEMORY
509 int slot;
510 int r;
511 struct kvm_userspace_memory_region rmslot;
512 struct kvm_userspace_memory_region newslot1;
513 struct kvm_userspace_memory_region newslot2;
515 len = (len + PAGE_SIZE - 1) & PAGE_MASK;
517 slot = get_intersecting_slot(phys_start);
518 /* no need to create hole, as there is already hole */
519 if (slot == -1)
520 return 0;
522 memset(&rmslot, 0, sizeof(struct kvm_userspace_memory_region));
523 memset(&newslot1, 0, sizeof(struct kvm_userspace_memory_region));
524 memset(&newslot2, 0, sizeof(struct kvm_userspace_memory_region));
526 rmslot.guest_phys_addr = slots[slot].phys_addr;
527 rmslot.slot = slot;
529 newslot1.guest_phys_addr = slots[slot].phys_addr;
530 newslot1.memory_size = phys_start - slots[slot].phys_addr;
531 newslot1.slot = slot;
532 newslot1.userspace_addr = slots[slot].userspace_addr;
533 newslot1.flags = slots[slot].flags;
535 newslot2.guest_phys_addr = newslot1.guest_phys_addr +
536 newslot1.memory_size + len;
537 newslot2.memory_size = slots[slot].phys_addr +
538 slots[slot].len - newslot2.guest_phys_addr;
539 newslot2.userspace_addr = newslot1.userspace_addr +
540 newslot1.memory_size;
541 newslot2.slot = get_free_slot(kvm);
542 newslot2.flags = newslot1.flags;
544 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &rmslot);
545 if (r == -1) {
546 fprintf(stderr, "kvm_create_mem_hole: %s\n", strerror(errno));
547 return -1;
549 free_slot(slot);
551 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &newslot1);
552 if (r == -1) {
553 fprintf(stderr, "kvm_create_mem_hole: %s\n", strerror(errno));
554 return -1;
556 register_slot(newslot1.slot, newslot1.guest_phys_addr,
557 newslot1.memory_size, 1, newslot1.userspace_addr,
558 newslot1.flags);
560 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &newslot2);
561 if (r == -1) {
562 fprintf(stderr, "kvm_create_mem_hole: %s\n", strerror(errno));
563 return -1;
565 register_slot(newslot2.slot, newslot2.guest_phys_addr,
566 newslot2.memory_size, 1, newslot2.userspace_addr,
567 newslot2.flags);
568 #endif
569 return 0;
572 int kvm_register_userspace_phys_mem(kvm_context_t kvm,
573 unsigned long phys_start, void *userspace_addr,
574 unsigned long len, int log)
577 #ifdef KVM_CAP_USER_MEMORY
578 struct kvm_userspace_memory_region memory = {
579 .memory_size = len,
580 .guest_phys_addr = phys_start,
581 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
582 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
584 int r;
586 memory.slot = get_free_slot(kvm);
587 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
588 if (r == -1) {
589 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno));
590 return -1;
592 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
593 1, memory.userspace_addr, memory.flags);
594 return 0;
595 #else
596 return -ENOSYS;
597 #endif
601 /* destroy/free a whole slot.
602 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
604 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
605 unsigned long len)
607 int slot;
609 slot = get_slot(phys_start);
611 if (slot >= KVM_MAX_NUM_MEM_REGIONS) {
612 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
613 __FUNCTION__, slot);
614 return;
616 if (phys_start != slots[slot].phys_addr) {
617 fprintf(stderr,
618 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
619 __FUNCTION__, phys_start, slots[slot].phys_addr);
620 phys_start = slots[slot].phys_addr;
623 #ifdef KVM_CAP_USER_MEMORY
624 if (ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY) > 0)
625 kvm_destroy_userspace_phys_mem(kvm, phys_start);
626 else
627 #endif
628 kvm_create_kernel_phys_mem(kvm, phys_start, 0, 0, 0);
631 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
633 int r;
634 struct kvm_dirty_log log = {
635 .slot = slot,
638 log.dirty_bitmap = buf;
640 r = ioctl(kvm->vm_fd, ioctl_num, &log);
641 if (r == -1)
642 return -errno;
643 return 0;
646 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
648 int slot;
650 slot = get_slot(phys_addr);
651 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
654 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
655 #define BITMAP_SIZE(m) (ALIGN(((m)/PAGE_SIZE), sizeof(long) * 8) / 8)
657 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
658 unsigned long len, void *buf, void *opaque,
659 int (*cb)(unsigned long start, unsigned long len,
660 void*bitmap, void *opaque))
662 int i;
663 int r;
664 unsigned long end_addr = phys_addr + len;
666 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
667 if ((slots[i].len && slots[i].phys_addr >= phys_addr) &&
668 (slots[i].phys_addr + slots[i].len <= end_addr)) {
669 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
670 if (r)
671 return r;
672 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
673 if (r)
674 return r;
677 return 0;
680 #ifdef KVM_CAP_IRQCHIP
682 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level)
684 struct kvm_irq_level event;
685 int r;
687 if (!kvm->irqchip_in_kernel)
688 return 0;
689 event.level = level;
690 event.irq = irq;
691 r = ioctl(kvm->vm_fd, KVM_IRQ_LINE, &event);
692 if (r == -1)
693 perror("kvm_set_irq_level");
694 return 1;
697 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
699 int r;
701 if (!kvm->irqchip_in_kernel)
702 return 0;
703 r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip);
704 if (r == -1) {
705 r = -errno;
706 perror("kvm_get_irqchip\n");
708 return r;
711 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
713 int r;
715 if (!kvm->irqchip_in_kernel)
716 return 0;
717 r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip);
718 if (r == -1) {
719 r = -errno;
720 perror("kvm_set_irqchip\n");
722 return r;
725 #endif
727 static int handle_io(kvm_context_t kvm, struct kvm_run *run, int vcpu)
729 uint16_t addr = run->io.port;
730 int r;
731 int i;
732 void *p = (void *)run + run->io.data_offset;
734 for (i = 0; i < run->io.count; ++i) {
735 switch (run->io.direction) {
736 case KVM_EXIT_IO_IN:
737 switch (run->io.size) {
738 case 1:
739 r = kvm->callbacks->inb(kvm->opaque, addr, p);
740 break;
741 case 2:
742 r = kvm->callbacks->inw(kvm->opaque, addr, p);
743 break;
744 case 4:
745 r = kvm->callbacks->inl(kvm->opaque, addr, p);
746 break;
747 default:
748 fprintf(stderr, "bad I/O size %d\n", run->io.size);
749 return -EMSGSIZE;
751 break;
752 case KVM_EXIT_IO_OUT:
753 switch (run->io.size) {
754 case 1:
755 r = kvm->callbacks->outb(kvm->opaque, addr,
756 *(uint8_t *)p);
757 break;
758 case 2:
759 r = kvm->callbacks->outw(kvm->opaque, addr,
760 *(uint16_t *)p);
761 break;
762 case 4:
763 r = kvm->callbacks->outl(kvm->opaque, addr,
764 *(uint32_t *)p);
765 break;
766 default:
767 fprintf(stderr, "bad I/O size %d\n", run->io.size);
768 return -EMSGSIZE;
770 break;
771 default:
772 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
773 return -EPROTO;
776 p += run->io.size;
779 return 0;
782 int handle_debug(kvm_context_t kvm, int vcpu)
784 return kvm->callbacks->debug(kvm->opaque, vcpu);
787 int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
789 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_REGS, regs);
792 int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
794 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, regs);
797 int kvm_get_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu)
799 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_FPU, fpu);
802 int kvm_set_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu)
804 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_FPU, fpu);
807 int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
809 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_SREGS, sregs);
812 int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
814 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SREGS, sregs);
817 #ifdef KVM_CAP_MP_STATE
818 int kvm_get_mpstate(kvm_context_t kvm, int vcpu, struct kvm_mp_state *mp_state)
820 int r;
822 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
823 if (r > 0)
824 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_MP_STATE, mp_state);
825 return -ENOSYS;
828 int kvm_set_mpstate(kvm_context_t kvm, int vcpu, struct kvm_mp_state *mp_state)
830 int r;
832 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
833 if (r > 0)
834 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_MP_STATE, mp_state);
835 return -ENOSYS;
837 #endif
839 static int handle_mmio(kvm_context_t kvm, struct kvm_run *kvm_run)
841 unsigned long addr = kvm_run->mmio.phys_addr;
842 void *data = kvm_run->mmio.data;
844 /* hack: Red Hat 7.1 generates these weird accesses. */
845 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
846 return 0;
848 if (kvm_run->mmio.is_write)
849 return kvm->callbacks->mmio_write(kvm->opaque, addr, data,
850 kvm_run->mmio.len);
851 else
852 return kvm->callbacks->mmio_read(kvm->opaque, addr, data,
853 kvm_run->mmio.len);
856 int handle_io_window(kvm_context_t kvm)
858 return kvm->callbacks->io_window(kvm->opaque);
861 int handle_halt(kvm_context_t kvm, int vcpu)
863 return kvm->callbacks->halt(kvm->opaque, vcpu);
866 int handle_shutdown(kvm_context_t kvm, int vcpu)
868 return kvm->callbacks->shutdown(kvm->opaque, vcpu);
871 int try_push_interrupts(kvm_context_t kvm)
873 return kvm->callbacks->try_push_interrupts(kvm->opaque);
876 void post_kvm_run(kvm_context_t kvm, int vcpu)
878 kvm->callbacks->post_kvm_run(kvm->opaque, vcpu);
881 int pre_kvm_run(kvm_context_t kvm, int vcpu)
883 return kvm->callbacks->pre_kvm_run(kvm->opaque, vcpu);
886 int kvm_get_interrupt_flag(kvm_context_t kvm, int vcpu)
888 struct kvm_run *run = kvm->run[vcpu];
890 return run->if_flag;
893 int kvm_is_ready_for_interrupt_injection(kvm_context_t kvm, int vcpu)
895 struct kvm_run *run = kvm->run[vcpu];
897 return run->ready_for_interrupt_injection;
900 int kvm_run(kvm_context_t kvm, int vcpu)
902 int r;
903 int fd = kvm->vcpu_fd[vcpu];
904 struct kvm_run *run = kvm->run[vcpu];
906 again:
907 #if !defined(__s390__)
908 if (!kvm->irqchip_in_kernel)
909 run->request_interrupt_window = try_push_interrupts(kvm);
910 #endif
911 r = pre_kvm_run(kvm, vcpu);
912 if (r)
913 return r;
914 r = ioctl(fd, KVM_RUN, 0);
916 if (r == -1 && errno != EINTR && errno != EAGAIN) {
917 r = -errno;
918 post_kvm_run(kvm, vcpu);
919 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
920 return r;
923 post_kvm_run(kvm, vcpu);
925 #if defined(KVM_CAP_COALESCED_MMIO)
926 if (kvm->coalesced_mmio) {
927 struct kvm_coalesced_mmio_ring *ring = (void *)run +
928 kvm->coalesced_mmio * PAGE_SIZE;
929 while (ring->first != ring->last) {
930 kvm->callbacks->mmio_write(kvm->opaque,
931 ring->coalesced_mmio[ring->first].phys_addr,
932 &ring->coalesced_mmio[ring->first].data[0],
933 ring->coalesced_mmio[ring->first].len);
934 smp_wmb();
935 ring->first = (ring->first + 1) %
936 KVM_COALESCED_MMIO_MAX;
939 #endif
941 #if !defined(__s390__)
942 if (r == -1) {
943 r = handle_io_window(kvm);
944 goto more;
946 #endif
947 if (1) {
948 switch (run->exit_reason) {
949 case KVM_EXIT_UNKNOWN:
950 fprintf(stderr, "unhandled vm exit: 0x%x vcpu_id %d\n",
951 (unsigned)run->hw.hardware_exit_reason, vcpu);
952 kvm_show_regs(kvm, vcpu);
953 abort();
954 break;
955 case KVM_EXIT_FAIL_ENTRY:
956 fprintf(stderr, "kvm_run: failed entry, reason %u\n",
957 (unsigned)run->fail_entry.hardware_entry_failure_reason & 0xffff);
958 return -ENOEXEC;
959 break;
960 case KVM_EXIT_EXCEPTION:
961 fprintf(stderr, "exception %d (%x)\n",
962 run->ex.exception,
963 run->ex.error_code);
964 kvm_show_regs(kvm, vcpu);
965 kvm_show_code(kvm, vcpu);
966 abort();
967 break;
968 case KVM_EXIT_IO:
969 r = handle_io(kvm, run, vcpu);
970 break;
971 case KVM_EXIT_DEBUG:
972 r = handle_debug(kvm, vcpu);
973 break;
974 case KVM_EXIT_MMIO:
975 r = handle_mmio(kvm, run);
976 break;
977 case KVM_EXIT_HLT:
978 r = handle_halt(kvm, vcpu);
979 break;
980 case KVM_EXIT_IRQ_WINDOW_OPEN:
981 break;
982 case KVM_EXIT_SHUTDOWN:
983 r = handle_shutdown(kvm, vcpu);
984 break;
985 #if defined(__s390__)
986 case KVM_EXIT_S390_SIEIC:
987 r = kvm->callbacks->s390_handle_intercept(kvm, vcpu,
988 run);
989 break;
990 case KVM_EXIT_S390_RESET:
991 r = kvm->callbacks->s390_handle_reset(kvm, vcpu, run);
992 #endif
993 default:
994 if (kvm_arch_run(run, kvm, vcpu)) {
995 fprintf(stderr, "unhandled vm exit: 0x%x\n",
996 run->exit_reason);
997 kvm_show_regs(kvm, vcpu);
998 abort();
1000 break;
1003 more:
1004 if (!r)
1005 goto again;
1006 return r;
1009 int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq)
1011 struct kvm_interrupt intr;
1013 intr.irq = irq;
1014 return ioctl(kvm->vcpu_fd[vcpu], KVM_INTERRUPT, &intr);
1017 int kvm_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_debug_guest *dbg)
1019 return ioctl(kvm->vcpu_fd[vcpu], KVM_DEBUG_GUEST, dbg);
1022 int kvm_set_signal_mask(kvm_context_t kvm, int vcpu, const sigset_t *sigset)
1024 struct kvm_signal_mask *sigmask;
1025 int r;
1027 if (!sigset) {
1028 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SIGNAL_MASK, NULL);
1029 if (r == -1)
1030 r = -errno;
1031 return r;
1033 sigmask = malloc(sizeof(*sigmask) + sizeof(*sigset));
1034 if (!sigmask)
1035 return -ENOMEM;
1037 sigmask->len = 8;
1038 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1039 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SIGNAL_MASK, sigmask);
1040 if (r == -1)
1041 r = -errno;
1042 free(sigmask);
1043 return r;
1046 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1048 return kvm->irqchip_in_kernel;
1051 int kvm_pit_in_kernel(kvm_context_t kvm)
1053 return kvm->pit_in_kernel;
1056 int kvm_has_sync_mmu(kvm_context_t kvm)
1058 int r = 0;
1059 #ifdef KVM_CAP_SYNC_MMU
1060 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1061 #endif
1062 return r;
1065 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1067 int r = 0;
1068 kvm->coalesced_mmio = 0;
1069 #ifdef KVM_CAP_COALESCED_MMIO
1070 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1071 if (r > 0) {
1072 kvm->coalesced_mmio = r;
1073 return 0;
1075 #endif
1076 return r;
1079 int kvm_register_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1081 #ifdef KVM_CAP_COALESCED_MMIO
1082 struct kvm_coalesced_mmio_zone zone;
1083 int r;
1085 if (kvm->coalesced_mmio) {
1087 zone.addr = addr;
1088 zone.size = size;
1090 r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
1091 if (r == -1) {
1092 perror("kvm_register_coalesced_mmio_zone");
1093 return -errno;
1095 return 0;
1097 #endif
1098 return -ENOSYS;
1101 int kvm_unregister_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1103 #ifdef KVM_CAP_COALESCED_MMIO
1104 struct kvm_coalesced_mmio_zone zone;
1105 int r;
1107 if (kvm->coalesced_mmio) {
1109 zone.addr = addr;
1110 zone.size = size;
1112 r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1113 if (r == -1) {
1114 perror("kvm_unregister_coalesced_mmio_zone");
1115 return -errno;
1117 return 0;
1119 #endif
1120 return -ENOSYS;