kvm: qemu: expose MSI capability to guest
[kvm-userspace.git] / libkvm / libkvm.c
blobf43e2c516c80b74e2430cf59f708fda9ac4f5118
1 /*
2 * Kernel-based Virtual Machine control library
4 * This library provides an API to control the kvm hardware virtualization
5 * module.
7 * Copyright (C) 2006 Qumranet
9 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
14 * This work is licensed under the GNU LGPL license, version 2.
17 #ifndef __user
18 #define __user /* temporary, until installed via make headers_install */
19 #endif
21 #include <linux/kvm.h>
23 #define EXPECTED_KVM_API_VERSION 12
25 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
26 #error libkvm: userspace and kernel version mismatch
27 #endif
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <sys/mman.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <sys/ioctl.h>
37 #include <inttypes.h>
38 #include "libkvm.h"
40 #if defined(__x86_64__) || defined(__i386__)
41 #include "kvm-x86.h"
42 #endif
44 #if defined(__ia64__)
45 #include "kvm-ia64.h"
46 #endif
48 #if defined(__powerpc__)
49 #include "kvm-powerpc.h"
50 #endif
52 #if defined(__s390__)
53 #include "kvm-s390.h"
54 #endif
56 //#define DEBUG_MEMREG
57 #ifdef DEBUG_MEMREG
58 #define DPRINTF(fmt, args...) \
59 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
60 #else
61 #define DPRINTF(fmt, args...) do {} while (0)
62 #endif
65 int kvm_abi = EXPECTED_KVM_API_VERSION;
66 int kvm_page_size;
68 struct slot_info {
69 unsigned long phys_addr;
70 unsigned long len;
71 unsigned long userspace_addr;
72 unsigned flags;
73 int logging_count;
76 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
78 static void init_slots(void)
80 int i;
82 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
83 slots[i].len = 0;
86 static int get_free_slot(kvm_context_t kvm)
88 int i;
89 int tss_ext;
91 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
92 tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
93 #else
94 tss_ext = 0;
95 #endif
98 * on older kernels where the set tss ioctl is not supprted we must save
99 * slot 0 to hold the extended memory, as the vmx will use the last 3
100 * pages of this slot.
102 if (tss_ext > 0)
103 i = 0;
104 else
105 i = 1;
107 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
108 if (!slots[i].len)
109 return i;
110 return -1;
113 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
114 unsigned long userspace_addr, unsigned flags)
116 slots[slot].phys_addr = phys_addr;
117 slots[slot].len = len;
118 slots[slot].userspace_addr = userspace_addr;
119 slots[slot].flags = flags;
122 static void free_slot(int slot)
124 slots[slot].len = 0;
125 slots[slot].logging_count = 0;
128 static int get_slot(unsigned long phys_addr)
130 int i;
132 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
133 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
134 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
135 return i;
137 return -1;
140 /* Returns -1 if this slot is not totally contained on any other,
141 * and the number of the slot otherwise */
142 static int get_container_slot(uint64_t phys_addr, unsigned long size)
144 int i;
146 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
147 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
148 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
149 return i;
150 return -1;
153 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
155 int slot = get_container_slot(phys_addr, size);
156 if (slot == -1)
157 return 0;
158 return 1;
162 * dirty pages logging control
164 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
165 unsigned long phys_addr,
166 unsigned flags,
167 unsigned mask)
169 int r = -1;
170 int slot = get_slot(phys_addr);
172 if (slot == -1) {
173 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
174 return 1;
177 flags = (slots[slot].flags & ~mask) | flags;
178 if (flags == slots[slot].flags)
179 return 0;
180 slots[slot].flags = flags;
183 struct kvm_userspace_memory_region mem = {
184 .slot = slot,
185 .memory_size = slots[slot].len,
186 .guest_phys_addr = slots[slot].phys_addr,
187 .userspace_addr = slots[slot].userspace_addr,
188 .flags = slots[slot].flags,
192 DPRINTF("slot %d start %llx len %llx flags %x\n",
193 mem.slot,
194 mem.guest_phys_addr,
195 mem.memory_size,
196 mem.flags);
197 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
198 if (r == -1)
199 fprintf(stderr, "%s: %m\n", __FUNCTION__);
201 return r;
204 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
205 int (*change)(kvm_context_t kvm,
206 uint64_t start,
207 uint64_t len))
209 int i, r;
211 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
212 if (slots[i].len)
213 r = change(kvm, slots[i].phys_addr, slots[i].len);
215 return r;
218 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
219 uint64_t phys_addr,
220 uint64_t len)
222 int slot = get_slot(phys_addr);
224 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
225 if (slot == -1) {
226 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
227 return -EINVAL;
230 if (slots[slot].logging_count++)
231 return 0;
233 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
234 KVM_MEM_LOG_DIRTY_PAGES,
235 KVM_MEM_LOG_DIRTY_PAGES);
238 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
239 uint64_t phys_addr,
240 uint64_t len)
242 int slot = get_slot(phys_addr);
244 if (slot == -1) {
245 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
246 return -EINVAL;
249 if (--slots[slot].logging_count)
250 return 0;
252 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
254 KVM_MEM_LOG_DIRTY_PAGES);
258 * Enable dirty page logging for all memory regions
260 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
262 if (kvm->dirty_pages_log_all)
263 return 0;
264 kvm->dirty_pages_log_all = 1;
265 return kvm_dirty_pages_log_change_all(kvm,
266 kvm_dirty_pages_log_enable_slot);
270 * Enable dirty page logging only for memory regions that were created with
271 * dirty logging enabled (disable for all other memory regions).
273 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
275 if (!kvm->dirty_pages_log_all)
276 return 0;
277 kvm->dirty_pages_log_all = 0;
278 return kvm_dirty_pages_log_change_all(kvm,
279 kvm_dirty_pages_log_disable_slot);
283 kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
284 void *opaque)
286 int fd;
287 kvm_context_t kvm;
288 int r;
290 fd = open("/dev/kvm", O_RDWR);
291 if (fd == -1) {
292 perror("open /dev/kvm");
293 return NULL;
295 r = ioctl(fd, KVM_GET_API_VERSION, 0);
296 if (r == -1) {
297 fprintf(stderr, "kvm kernel version too old: "
298 "KVM_GET_API_VERSION ioctl not supported\n");
299 goto out_close;
301 if (r < EXPECTED_KVM_API_VERSION) {
302 fprintf(stderr, "kvm kernel version too old: "
303 "We expect API version %d or newer, but got "
304 "version %d\n",
305 EXPECTED_KVM_API_VERSION, r);
306 goto out_close;
308 if (r > EXPECTED_KVM_API_VERSION) {
309 fprintf(stderr, "kvm userspace version too old\n");
310 goto out_close;
312 kvm_abi = r;
313 kvm_page_size = getpagesize();
314 kvm = malloc(sizeof(*kvm));
315 if (kvm == NULL)
316 goto out_close;
317 memset(kvm, 0, sizeof(*kvm));
318 kvm->fd = fd;
319 kvm->vm_fd = -1;
320 kvm->callbacks = callbacks;
321 kvm->opaque = opaque;
322 kvm->dirty_pages_log_all = 0;
323 kvm->no_irqchip_creation = 0;
324 kvm->no_pit_creation = 0;
326 return kvm;
327 out_close:
328 close(fd);
329 return NULL;
332 void kvm_finalize(kvm_context_t kvm)
334 if (kvm->vcpu_fd[0] != -1)
335 close(kvm->vcpu_fd[0]);
336 if (kvm->vm_fd != -1)
337 close(kvm->vm_fd);
338 close(kvm->fd);
339 free(kvm);
342 void kvm_disable_irqchip_creation(kvm_context_t kvm)
344 kvm->no_irqchip_creation = 1;
347 void kvm_disable_pit_creation(kvm_context_t kvm)
349 kvm->no_pit_creation = 1;
352 int kvm_create_vcpu(kvm_context_t kvm, int slot)
354 long mmap_size;
355 int r;
357 r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, slot);
358 if (r == -1) {
359 r = -errno;
360 fprintf(stderr, "kvm_create_vcpu: %m\n");
361 return r;
363 kvm->vcpu_fd[slot] = r;
364 mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0);
365 if (mmap_size == -1) {
366 r = -errno;
367 fprintf(stderr, "get vcpu mmap size: %m\n");
368 return r;
370 kvm->run[slot] = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
371 kvm->vcpu_fd[slot], 0);
372 if (kvm->run[slot] == MAP_FAILED) {
373 r = -errno;
374 fprintf(stderr, "mmap vcpu area: %m\n");
375 return r;
377 return 0;
380 int kvm_create_vm(kvm_context_t kvm)
382 int fd = kvm->fd;
384 #ifdef KVM_CAP_IRQ_ROUTING
385 kvm->irq_routes = malloc(sizeof(*kvm->irq_routes));
386 if (!kvm->irq_routes)
387 return -ENOMEM;
388 memset(kvm->irq_routes, 0, sizeof(*kvm->irq_routes));
389 kvm->nr_allocated_irq_routes = 0;
390 #endif
392 kvm->vcpu_fd[0] = -1;
394 fd = ioctl(fd, KVM_CREATE_VM, 0);
395 if (fd == -1) {
396 fprintf(stderr, "kvm_create_vm: %m\n");
397 return -1;
399 kvm->vm_fd = fd;
400 return 0;
403 static int kvm_create_default_phys_mem(kvm_context_t kvm,
404 unsigned long phys_mem_bytes,
405 void **vm_mem)
407 #ifdef KVM_CAP_USER_MEMORY
408 int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
409 if (r > 0)
410 return 0;
411 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
412 #else
413 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
414 #endif
415 return -1;
418 int kvm_check_extension(kvm_context_t kvm, int ext)
420 int ret;
422 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext);
423 if (ret > 0)
424 return ret;
425 return 0;
428 void kvm_create_irqchip(kvm_context_t kvm)
430 int r;
432 kvm->irqchip_in_kernel = 0;
433 #ifdef KVM_CAP_IRQCHIP
434 if (!kvm->no_irqchip_creation) {
435 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
436 if (r > 0) { /* kernel irqchip supported */
437 r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
438 if (r >= 0) {
439 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
440 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
441 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
442 KVM_CAP_IRQ_INJECT_STATUS);
443 if (r > 0)
444 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
445 #endif
446 kvm->irqchip_in_kernel = 1;
448 else
449 fprintf(stderr, "Create kernel PIC irqchip failed\n");
452 #endif
455 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
457 int r;
459 r = kvm_create_vm(kvm);
460 if (r < 0)
461 return r;
462 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
463 if (r < 0)
464 return r;
465 init_slots();
466 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
467 if (r < 0)
468 return r;
469 kvm_create_irqchip(kvm);
471 return 0;
475 void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start,
476 unsigned long len, int log, int writable)
478 int r;
479 int prot = PROT_READ;
480 void *ptr;
481 struct kvm_userspace_memory_region memory = {
482 .memory_size = len,
483 .guest_phys_addr = phys_start,
484 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
487 if (writable)
488 prot |= PROT_WRITE;
490 #if !defined(__s390__)
491 ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
492 #else
493 ptr = mmap(LIBKVM_S390_ORIGIN, len, prot | PROT_EXEC,
494 MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
495 #endif
496 if (ptr == MAP_FAILED) {
497 fprintf(stderr, "%s: %s", __func__, strerror(errno));
498 return 0;
501 memset(ptr, 0, len);
503 memory.userspace_addr = (unsigned long)ptr;
504 memory.slot = get_free_slot(kvm);
505 DPRINTF("slot %d start %llx len %llx flags %x\n",
506 memory.slot,
507 memory.guest_phys_addr,
508 memory.memory_size,
509 memory.flags);
510 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
511 if (r == -1) {
512 fprintf(stderr, "%s: %s", __func__, strerror(errno));
513 return 0;
515 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
516 memory.userspace_addr, memory.flags);
518 return ptr;
521 int kvm_register_phys_mem(kvm_context_t kvm,
522 unsigned long phys_start, void *userspace_addr,
523 unsigned long len, int log)
526 struct kvm_userspace_memory_region memory = {
527 .memory_size = len,
528 .guest_phys_addr = phys_start,
529 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
530 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
532 int r;
534 memory.slot = get_free_slot(kvm);
535 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
536 memory.guest_phys_addr, memory.memory_size,
537 memory.userspace_addr, memory.slot, memory.flags);
538 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
539 if (r == -1) {
540 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno));
541 return -1;
543 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
544 memory.userspace_addr, memory.flags);
545 return 0;
549 /* destroy/free a whole slot.
550 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
552 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
553 unsigned long len)
555 int slot;
556 int r;
557 struct kvm_userspace_memory_region memory = {
558 .memory_size = 0,
559 .guest_phys_addr = phys_start,
560 .userspace_addr = 0,
561 .flags = 0,
564 slot = get_slot(phys_start);
566 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
567 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
568 __FUNCTION__, slot);
569 return;
571 if (phys_start != slots[slot].phys_addr) {
572 fprintf(stderr,
573 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
574 __FUNCTION__, phys_start, slots[slot].phys_addr);
575 phys_start = slots[slot].phys_addr;
578 memory.slot = slot;
579 DPRINTF("slot %d start %llx len %llx flags %x\n",
580 memory.slot,
581 memory.guest_phys_addr,
582 memory.memory_size,
583 memory.flags);
584 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
585 if (r == -1) {
586 fprintf(stderr, "destroy_userspace_phys_mem: %s",
587 strerror(errno));
588 return;
591 free_slot(memory.slot);
594 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
597 int slot = get_container_slot(phys_addr, size);
599 if (slot != -1) {
600 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
601 kvm_destroy_phys_mem(kvm, phys_addr, size);
602 return;
606 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
608 int r;
609 struct kvm_dirty_log log = {
610 .slot = slot,
613 log.dirty_bitmap = buf;
615 r = ioctl(kvm->vm_fd, ioctl_num, &log);
616 if (r == -1)
617 return -errno;
618 return 0;
621 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
623 int slot;
625 slot = get_slot(phys_addr);
626 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
629 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
630 #define BITMAP_SIZE(m) (ALIGN(((m)/PAGE_SIZE), sizeof(long) * 8) / 8)
632 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
633 unsigned long len, void *buf, void *opaque,
634 int (*cb)(unsigned long start, unsigned long len,
635 void*bitmap, void *opaque))
637 int i;
638 int r;
639 unsigned long end_addr = phys_addr + len;
641 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
642 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
643 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
644 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
645 if (r)
646 return r;
647 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
648 if (r)
649 return r;
652 return 0;
655 #ifdef KVM_CAP_IRQCHIP
657 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
659 struct kvm_irq_level event;
660 int r;
662 if (!kvm->irqchip_in_kernel)
663 return 0;
664 event.level = level;
665 event.irq = irq;
666 r = ioctl(kvm->vm_fd, kvm->irqchip_inject_ioctl, &event);
667 if (r == -1)
668 perror("kvm_set_irq_level");
670 if (status) {
671 #ifdef KVM_CAP_IRQ_INJECT_STATUS
672 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
673 1 : event.status;
674 #else
675 *status = 1;
676 #endif
679 return 1;
682 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
684 int r;
686 if (!kvm->irqchip_in_kernel)
687 return 0;
688 r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip);
689 if (r == -1) {
690 r = -errno;
691 perror("kvm_get_irqchip\n");
693 return r;
696 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
698 int r;
700 if (!kvm->irqchip_in_kernel)
701 return 0;
702 r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip);
703 if (r == -1) {
704 r = -errno;
705 perror("kvm_set_irqchip\n");
707 return r;
710 #endif
712 static int handle_io(kvm_context_t kvm, struct kvm_run *run, int vcpu)
714 uint16_t addr = run->io.port;
715 int r;
716 int i;
717 void *p = (void *)run + run->io.data_offset;
719 for (i = 0; i < run->io.count; ++i) {
720 switch (run->io.direction) {
721 case KVM_EXIT_IO_IN:
722 switch (run->io.size) {
723 case 1:
724 r = kvm->callbacks->inb(kvm->opaque, addr, p);
725 break;
726 case 2:
727 r = kvm->callbacks->inw(kvm->opaque, addr, p);
728 break;
729 case 4:
730 r = kvm->callbacks->inl(kvm->opaque, addr, p);
731 break;
732 default:
733 fprintf(stderr, "bad I/O size %d\n", run->io.size);
734 return -EMSGSIZE;
736 break;
737 case KVM_EXIT_IO_OUT:
738 switch (run->io.size) {
739 case 1:
740 r = kvm->callbacks->outb(kvm->opaque, addr,
741 *(uint8_t *)p);
742 break;
743 case 2:
744 r = kvm->callbacks->outw(kvm->opaque, addr,
745 *(uint16_t *)p);
746 break;
747 case 4:
748 r = kvm->callbacks->outl(kvm->opaque, addr,
749 *(uint32_t *)p);
750 break;
751 default:
752 fprintf(stderr, "bad I/O size %d\n", run->io.size);
753 return -EMSGSIZE;
755 break;
756 default:
757 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
758 return -EPROTO;
761 p += run->io.size;
764 return 0;
767 int handle_debug(kvm_context_t kvm, int vcpu, void *env)
769 #ifdef KVM_CAP_SET_GUEST_DEBUG
770 struct kvm_run *run = kvm->run[vcpu];
772 return kvm->callbacks->debug(kvm->opaque, env, &run->debug.arch);
773 #else
774 return 0;
775 #endif
778 int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
780 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_REGS, regs);
783 int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
785 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, regs);
788 int kvm_get_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu)
790 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_FPU, fpu);
793 int kvm_set_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu)
795 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_FPU, fpu);
798 int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
800 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_SREGS, sregs);
803 int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
805 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SREGS, sregs);
808 #ifdef KVM_CAP_MP_STATE
809 int kvm_get_mpstate(kvm_context_t kvm, int vcpu, struct kvm_mp_state *mp_state)
811 int r;
813 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
814 if (r > 0)
815 return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_MP_STATE, mp_state);
816 return -ENOSYS;
819 int kvm_set_mpstate(kvm_context_t kvm, int vcpu, struct kvm_mp_state *mp_state)
821 int r;
823 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
824 if (r > 0)
825 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_MP_STATE, mp_state);
826 return -ENOSYS;
828 #endif
830 static int handle_mmio(kvm_context_t kvm, struct kvm_run *kvm_run)
832 unsigned long addr = kvm_run->mmio.phys_addr;
833 void *data = kvm_run->mmio.data;
835 /* hack: Red Hat 7.1 generates these weird accesses. */
836 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
837 return 0;
839 if (kvm_run->mmio.is_write)
840 return kvm->callbacks->mmio_write(kvm->opaque, addr, data,
841 kvm_run->mmio.len);
842 else
843 return kvm->callbacks->mmio_read(kvm->opaque, addr, data,
844 kvm_run->mmio.len);
847 int handle_io_window(kvm_context_t kvm)
849 return kvm->callbacks->io_window(kvm->opaque);
852 int handle_halt(kvm_context_t kvm, int vcpu)
854 return kvm->callbacks->halt(kvm->opaque, vcpu);
857 int handle_shutdown(kvm_context_t kvm, void *env)
859 return kvm->callbacks->shutdown(kvm->opaque, env);
862 int try_push_interrupts(kvm_context_t kvm)
864 return kvm->callbacks->try_push_interrupts(kvm->opaque);
867 static inline void push_nmi(kvm_context_t kvm)
869 #ifdef KVM_CAP_USER_NMI
870 kvm->callbacks->push_nmi(kvm->opaque);
871 #endif /* KVM_CAP_USER_NMI */
874 void post_kvm_run(kvm_context_t kvm, void *env)
876 kvm->callbacks->post_kvm_run(kvm->opaque, env);
879 int pre_kvm_run(kvm_context_t kvm, void *env)
881 return kvm->callbacks->pre_kvm_run(kvm->opaque, env);
884 int kvm_get_interrupt_flag(kvm_context_t kvm, int vcpu)
886 struct kvm_run *run = kvm->run[vcpu];
888 return run->if_flag;
891 int kvm_is_ready_for_interrupt_injection(kvm_context_t kvm, int vcpu)
893 struct kvm_run *run = kvm->run[vcpu];
895 return run->ready_for_interrupt_injection;
898 int kvm_run(kvm_context_t kvm, int vcpu, void *env)
900 int r;
901 int fd = kvm->vcpu_fd[vcpu];
902 struct kvm_run *run = kvm->run[vcpu];
904 again:
905 push_nmi(kvm);
906 #if !defined(__s390__)
907 if (!kvm->irqchip_in_kernel)
908 run->request_interrupt_window = try_push_interrupts(kvm);
909 #endif
910 r = pre_kvm_run(kvm, env);
911 if (r)
912 return r;
913 r = ioctl(fd, KVM_RUN, 0);
915 if (r == -1 && errno != EINTR && errno != EAGAIN) {
916 r = -errno;
917 post_kvm_run(kvm, env);
918 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
919 return r;
922 post_kvm_run(kvm, env);
924 #if defined(KVM_CAP_COALESCED_MMIO)
925 if (kvm->coalesced_mmio) {
926 struct kvm_coalesced_mmio_ring *ring = (void *)run +
927 kvm->coalesced_mmio * PAGE_SIZE;
928 while (ring->first != ring->last) {
929 kvm->callbacks->mmio_write(kvm->opaque,
930 ring->coalesced_mmio[ring->first].phys_addr,
931 &ring->coalesced_mmio[ring->first].data[0],
932 ring->coalesced_mmio[ring->first].len);
933 smp_wmb();
934 ring->first = (ring->first + 1) %
935 KVM_COALESCED_MMIO_MAX;
938 #endif
940 #if !defined(__s390__)
941 if (r == -1) {
942 r = handle_io_window(kvm);
943 goto more;
945 #endif
946 if (1) {
947 switch (run->exit_reason) {
948 case KVM_EXIT_UNKNOWN:
949 fprintf(stderr, "unhandled vm exit: 0x%x vcpu_id %d\n",
950 (unsigned)run->hw.hardware_exit_reason, vcpu);
951 kvm_show_regs(kvm, vcpu);
952 abort();
953 break;
954 case KVM_EXIT_FAIL_ENTRY:
955 fprintf(stderr, "kvm_run: failed entry, reason %u\n",
956 (unsigned)run->fail_entry.hardware_entry_failure_reason & 0xffff);
957 kvm_show_regs(kvm, vcpu);
958 return -ENOEXEC;
959 break;
960 case KVM_EXIT_EXCEPTION:
961 fprintf(stderr, "exception %d (%x)\n",
962 run->ex.exception,
963 run->ex.error_code);
964 kvm_show_regs(kvm, vcpu);
965 kvm_show_code(kvm, vcpu);
966 abort();
967 break;
968 case KVM_EXIT_IO:
969 r = handle_io(kvm, run, vcpu);
970 break;
971 case KVM_EXIT_DEBUG:
972 r = handle_debug(kvm, vcpu, env);
973 break;
974 case KVM_EXIT_MMIO:
975 r = handle_mmio(kvm, run);
976 break;
977 case KVM_EXIT_HLT:
978 r = handle_halt(kvm, vcpu);
979 break;
980 case KVM_EXIT_IRQ_WINDOW_OPEN:
981 break;
982 case KVM_EXIT_SHUTDOWN:
983 r = handle_shutdown(kvm, env);
984 break;
985 #if defined(__s390__)
986 case KVM_EXIT_S390_SIEIC:
987 r = kvm->callbacks->s390_handle_intercept(kvm, vcpu,
988 run);
989 break;
990 case KVM_EXIT_S390_RESET:
991 r = kvm->callbacks->s390_handle_reset(kvm, vcpu, run);
992 break;
993 #endif
994 default:
995 if (kvm_arch_run(run, kvm, vcpu)) {
996 fprintf(stderr, "unhandled vm exit: 0x%x\n",
997 run->exit_reason);
998 kvm_show_regs(kvm, vcpu);
999 abort();
1001 break;
1004 more:
1005 if (!r)
1006 goto again;
1007 return r;
1010 int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq)
1012 struct kvm_interrupt intr;
1014 intr.irq = irq;
1015 return ioctl(kvm->vcpu_fd[vcpu], KVM_INTERRUPT, &intr);
1018 #ifdef KVM_CAP_SET_GUEST_DEBUG
1019 int kvm_set_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_guest_debug *dbg)
1021 return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_GUEST_DEBUG, dbg);
1023 #endif
1025 int kvm_set_signal_mask(kvm_context_t kvm, int vcpu, const sigset_t *sigset)
1027 struct kvm_signal_mask *sigmask;
1028 int r;
1030 if (!sigset) {
1031 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SIGNAL_MASK, NULL);
1032 if (r == -1)
1033 r = -errno;
1034 return r;
1036 sigmask = malloc(sizeof(*sigmask) + sizeof(*sigset));
1037 if (!sigmask)
1038 return -ENOMEM;
1040 sigmask->len = 8;
1041 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1042 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SIGNAL_MASK, sigmask);
1043 if (r == -1)
1044 r = -errno;
1045 free(sigmask);
1046 return r;
1049 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1051 return kvm->irqchip_in_kernel;
1054 int kvm_pit_in_kernel(kvm_context_t kvm)
1056 return kvm->pit_in_kernel;
1059 int kvm_has_sync_mmu(kvm_context_t kvm)
1061 int r = 0;
1062 #ifdef KVM_CAP_SYNC_MMU
1063 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1064 #endif
1065 return r;
1068 int kvm_inject_nmi(kvm_context_t kvm, int vcpu)
1070 #ifdef KVM_CAP_USER_NMI
1071 return ioctl(kvm->vcpu_fd[vcpu], KVM_NMI);
1072 #else
1073 return -ENOSYS;
1074 #endif
1077 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1079 int r = 0;
1080 kvm->coalesced_mmio = 0;
1081 #ifdef KVM_CAP_COALESCED_MMIO
1082 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1083 if (r > 0) {
1084 kvm->coalesced_mmio = r;
1085 return 0;
1087 #endif
1088 return r;
1091 int kvm_register_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1093 #ifdef KVM_CAP_COALESCED_MMIO
1094 struct kvm_coalesced_mmio_zone zone;
1095 int r;
1097 if (kvm->coalesced_mmio) {
1099 zone.addr = addr;
1100 zone.size = size;
1102 r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
1103 if (r == -1) {
1104 perror("kvm_register_coalesced_mmio_zone");
1105 return -errno;
1107 return 0;
1109 #endif
1110 return -ENOSYS;
1113 int kvm_unregister_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size)
1115 #ifdef KVM_CAP_COALESCED_MMIO
1116 struct kvm_coalesced_mmio_zone zone;
1117 int r;
1119 if (kvm->coalesced_mmio) {
1121 zone.addr = addr;
1122 zone.size = size;
1124 r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1125 if (r == -1) {
1126 perror("kvm_unregister_coalesced_mmio_zone");
1127 return -errno;
1129 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1130 return 0;
1132 #endif
1133 return -ENOSYS;
1136 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1137 int kvm_assign_pci_device(kvm_context_t kvm,
1138 struct kvm_assigned_pci_dev *assigned_dev)
1140 int ret;
1142 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1143 if (ret < 0)
1144 return -errno;
1146 return ret;
1149 static int kvm_old_assign_irq(kvm_context_t kvm,
1150 struct kvm_assigned_irq *assigned_irq)
1152 int ret;
1154 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq);
1155 if (ret < 0)
1156 return -errno;
1158 return ret;
1161 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1162 int kvm_assign_irq(kvm_context_t kvm,
1163 struct kvm_assigned_irq *assigned_irq)
1165 int ret;
1167 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1168 if (ret > 0) {
1169 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1170 if (ret < 0)
1171 return -errno;
1172 return ret;
1175 return kvm_old_assign_irq(kvm, assigned_irq);
1178 int kvm_deassign_irq(kvm_context_t kvm,
1179 struct kvm_assigned_irq *assigned_irq)
1181 int ret;
1183 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1184 if (ret < 0)
1185 return -errno;
1187 return ret;
1189 #else
1190 int kvm_assign_irq(kvm_context_t kvm,
1191 struct kvm_assigned_irq *assigned_irq)
1193 return kvm_old_assign_irq(kvm, assigned_irq);
1195 #endif
1196 #endif
1198 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1199 int kvm_deassign_pci_device(kvm_context_t kvm,
1200 struct kvm_assigned_pci_dev *assigned_dev)
1202 int ret;
1204 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1205 if (ret < 0)
1206 return -errno;
1208 return ret;
1210 #endif
1212 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1214 int ret = 0;
1216 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1217 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
1218 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1219 if (ret <= 0)
1220 ret = 0;
1221 #endif
1222 return ret;
1225 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1227 #ifdef KVM_CAP_REINJECT_CONTROL
1228 int r;
1229 struct kvm_reinject_control control;
1231 control.pit_reinject = pit_reinject;
1233 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1234 if (r > 0) {
1235 r = ioctl(kvm->vm_fd, KVM_REINJECT_CONTROL, &control);
1236 if (r == -1)
1237 return -errno;
1238 return r;
1240 #endif
1241 return -ENOSYS;
1244 int kvm_has_gsi_routing(kvm_context_t kvm)
1246 int r = 0;
1248 #ifdef KVM_CAP_IRQ_ROUTING
1249 r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1250 #endif
1251 return r;
1254 int kvm_get_gsi_count(kvm_context_t kvm)
1256 #ifdef KVM_CAP_IRQ_ROUTING
1257 return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1258 #else
1259 return -EINVAL;
1260 #endif
1263 int kvm_clear_gsi_routes(kvm_context_t kvm)
1265 #ifdef KVM_CAP_IRQ_ROUTING
1266 kvm->irq_routes->nr = 0;
1267 return 0;
1268 #else
1269 return -EINVAL;
1270 #endif
1273 int kvm_add_routing_entry(kvm_context_t kvm,
1274 struct kvm_irq_routing_entry* entry)
1276 #ifdef KVM_CAP_IRQ_ROUTING
1277 struct kvm_irq_routing *z;
1278 struct kvm_irq_routing_entry *new;
1279 int n, size;
1281 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1282 n = kvm->nr_allocated_irq_routes * 2;
1283 if (n < 64)
1284 n = 64;
1285 size = sizeof(struct kvm_irq_routing);
1286 size += n * sizeof(*new);
1287 z = realloc(kvm->irq_routes, size);
1288 if (!z)
1289 return -ENOMEM;
1290 kvm->nr_allocated_irq_routes = n;
1291 kvm->irq_routes = z;
1293 n = kvm->irq_routes->nr++;
1294 new = &kvm->irq_routes->entries[n];
1295 memset(new, 0, sizeof(*new));
1296 new->gsi = entry->gsi;
1297 new->type = entry->type;
1298 new->flags = entry->flags;
1299 new->u = entry->u;
1301 if (entry->gsi > kvm->max_used_gsi)
1302 kvm->max_used_gsi = entry->gsi;
1303 return 0;
1304 #else
1305 return -ENOSYS;
1306 #endif
1309 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1311 #ifdef KVM_CAP_IRQ_ROUTING
1312 struct kvm_irq_routing_entry e;
1314 e.gsi = gsi;
1315 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1316 e.flags = 0;
1317 e.u.irqchip.irqchip = irqchip;
1318 e.u.irqchip.pin = pin;
1319 return kvm_add_routing_entry(kvm, &e);
1320 #else
1321 return -ENOSYS;
1322 #endif
1325 int kvm_del_routing_entry(kvm_context_t kvm,
1326 struct kvm_irq_routing_entry* entry)
1328 #ifdef KVM_CAP_IRQ_ROUTING
1329 struct kvm_irq_routing_entry *e, *p;
1330 int i, found = 0;
1332 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1333 e = &kvm->irq_routes->entries[i];
1334 if (e->type == entry->type
1335 && e->gsi == entry->gsi) {
1336 switch (e->type)
1338 case KVM_IRQ_ROUTING_IRQCHIP: {
1339 if (e->u.irqchip.irqchip ==
1340 entry->u.irqchip.irqchip
1341 && e->u.irqchip.pin ==
1342 entry->u.irqchip.pin) {
1343 p = &kvm->irq_routes->
1344 entries[--kvm->irq_routes->nr];
1345 *e = *p;
1346 found = 1;
1348 break;
1350 case KVM_IRQ_ROUTING_MSI: {
1351 if (e->u.msi.address_lo ==
1352 entry->u.msi.address_lo
1353 && e->u.msi.address_hi ==
1354 entry->u.msi.address_hi
1355 && e->u.msi.data == entry->u.msi.data) {
1356 p = &kvm->irq_routes->
1357 entries[--kvm->irq_routes->nr];
1358 *e = *p;
1359 found = 1;
1361 break;
1363 default:
1364 break;
1366 if (found)
1367 return 0;
1370 return -ESRCH;
1371 #else
1372 return -ENOSYS;
1373 #endif
1376 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1378 #ifdef KVM_CAP_IRQ_ROUTING
1379 struct kvm_irq_routing_entry e;
1381 e.gsi = gsi;
1382 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1383 e.flags = 0;
1384 e.u.irqchip.irqchip = irqchip;
1385 e.u.irqchip.pin = pin;
1386 return kvm_del_routing_entry(kvm, &e);
1387 #else
1388 return -ENOSYS;
1389 #endif
1392 int kvm_commit_irq_routes(kvm_context_t kvm)
1394 #ifdef KVM_CAP_IRQ_ROUTING
1395 int r;
1397 kvm->irq_routes->flags = 0;
1398 r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1399 if (r == -1)
1400 r = -errno;
1401 return r;
1402 #else
1403 return -ENOSYS;
1404 #endif
1407 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1409 if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS) {
1410 if (kvm->max_used_gsi <= kvm_get_gsi_count(kvm))
1411 return kvm->max_used_gsi + 1;
1412 else
1413 return -ENOSPC;
1414 } else
1415 return KVM_IOAPIC_NUM_PINS;