Revert "support colon in filenames"
[qemu-kvm/fedora.git] / qemu-kvm.c
blob982ad334ddfa93019283e3c6ba0b7a977048a9db
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include "libkvm-all.h"
23 #include "libkvm.h"
25 #include <pthread.h>
26 #include <sys/utsname.h>
27 #include <sys/syscall.h>
28 #include <sys/mman.h>
29 #include <sys/ioctl.h>
30 #include <signal.h>
32 #define false 0
33 #define true 1
35 #define EXPECTED_KVM_API_VERSION 12
37 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
38 #error libkvm: userspace and kernel version mismatch
39 #endif
41 int kvm_allowed = 1;
42 int kvm_irqchip = 1;
43 int kvm_pit = 1;
44 int kvm_pit_reinject = 1;
45 int kvm_nested = 0;
46 kvm_context_t kvm_context;
48 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
49 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
50 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
51 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
52 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
53 __thread CPUState *current_env;
55 static int qemu_system_ready;
57 #define SIG_IPI (SIGRTMIN+4)
59 pthread_t io_thread;
60 static int io_thread_fd = -1;
61 static int io_thread_sigfd = -1;
63 static CPUState *kvm_debug_cpu_requested;
65 static uint64_t phys_ram_size;
67 /* The list of ioperm_data */
68 static LIST_HEAD(, ioperm_data) ioperm_head;
70 //#define DEBUG_MEMREG
71 #ifdef DEBUG_MEMREG
72 #define DPRINTF(fmt, args...) \
73 do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0)
74 #else
75 #define DPRINTF(fmt, args...) do {} while (0)
76 #endif
78 #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
80 int kvm_abi = EXPECTED_KVM_API_VERSION;
81 int kvm_page_size;
83 #ifdef KVM_CAP_SET_GUEST_DEBUG
84 static int kvm_debug(void *opaque, void *data,
85 struct kvm_debug_exit_arch *arch_info)
87 int handle = kvm_arch_debug(arch_info);
88 CPUState *env = data;
90 if (handle) {
91 kvm_debug_cpu_requested = env;
92 env->kvm_cpu_state.stopped = 1;
94 return handle;
96 #endif
98 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
100 *data = cpu_inb(0, addr);
101 return 0;
104 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
106 *data = cpu_inw(0, addr);
107 return 0;
110 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
112 *data = cpu_inl(0, addr);
113 return 0;
116 #define PM_IO_BASE 0xb000
118 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
120 if (addr == 0xb2) {
121 switch (data) {
122 case 0: {
123 cpu_outb(0, 0xb3, 0);
124 break;
126 case 0xf0: {
127 unsigned x;
129 /* enable acpi */
130 x = cpu_inw(0, PM_IO_BASE + 4);
131 x &= ~1;
132 cpu_outw(0, PM_IO_BASE + 4, x);
133 break;
135 case 0xf1: {
136 unsigned x;
138 /* enable acpi */
139 x = cpu_inw(0, PM_IO_BASE + 4);
140 x |= 1;
141 cpu_outw(0, PM_IO_BASE + 4, x);
142 break;
144 default:
145 break;
147 return 0;
149 cpu_outb(0, addr, data);
150 return 0;
153 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
155 cpu_outw(0, addr, data);
156 return 0;
159 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
161 cpu_outl(0, addr, data);
162 return 0;
165 int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
167 cpu_physical_memory_rw(addr, data, len, 0);
168 return 0;
171 int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
173 cpu_physical_memory_rw(addr, data, len, 1);
174 return 0;
177 static int handle_unhandled(kvm_context_t kvm, kvm_vcpu_context_t vcpu,
178 uint64_t reason)
180 fprintf(stderr, "kvm: unhandled exit %"PRIx64"\n", reason);
181 return -EINVAL;
185 static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
187 uint32_t *bitmap = kvm->used_gsi_bitmap;
189 if (gsi < kvm->max_gsi)
190 bitmap[gsi / 32] |= 1U << (gsi % 32);
191 else
192 DPRINTF("Invalid GSI %d\n");
195 static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
197 uint32_t *bitmap = kvm->used_gsi_bitmap;
199 if (gsi < kvm->max_gsi)
200 bitmap[gsi / 32] &= ~(1U << (gsi % 32));
201 else
202 DPRINTF("Invalid GSI %d\n");
205 struct slot_info {
206 unsigned long phys_addr;
207 unsigned long len;
208 unsigned long userspace_addr;
209 unsigned flags;
210 int logging_count;
213 struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
215 static void init_slots(void)
217 int i;
219 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
220 slots[i].len = 0;
223 static int get_free_slot(kvm_context_t kvm)
225 int i;
226 int tss_ext;
228 #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__)
229 tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
230 #else
231 tss_ext = 0;
232 #endif
235 * on older kernels where the set tss ioctl is not supprted we must save
236 * slot 0 to hold the extended memory, as the vmx will use the last 3
237 * pages of this slot.
239 if (tss_ext > 0)
240 i = 0;
241 else
242 i = 1;
244 for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i)
245 if (!slots[i].len)
246 return i;
247 return -1;
250 static void register_slot(int slot, unsigned long phys_addr, unsigned long len,
251 unsigned long userspace_addr, unsigned flags)
253 slots[slot].phys_addr = phys_addr;
254 slots[slot].len = len;
255 slots[slot].userspace_addr = userspace_addr;
256 slots[slot].flags = flags;
259 static void free_slot(int slot)
261 slots[slot].len = 0;
262 slots[slot].logging_count = 0;
265 static int get_slot(unsigned long phys_addr)
267 int i;
269 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) {
270 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
271 (slots[i].phys_addr + slots[i].len-1) >= phys_addr)
272 return i;
274 return -1;
277 /* Returns -1 if this slot is not totally contained on any other,
278 * and the number of the slot otherwise */
279 static int get_container_slot(uint64_t phys_addr, unsigned long size)
281 int i;
283 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i)
284 if (slots[i].len && slots[i].phys_addr <= phys_addr &&
285 (slots[i].phys_addr + slots[i].len) >= phys_addr + size)
286 return i;
287 return -1;
290 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size)
292 int slot = get_container_slot(phys_addr, size);
293 if (slot == -1)
294 return 0;
295 return 1;
299 * dirty pages logging control
301 static int kvm_dirty_pages_log_change(kvm_context_t kvm,
302 unsigned long phys_addr,
303 unsigned flags,
304 unsigned mask)
306 int r = -1;
307 int slot = get_slot(phys_addr);
309 if (slot == -1) {
310 fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
311 return 1;
314 flags = (slots[slot].flags & ~mask) | flags;
315 if (flags == slots[slot].flags)
316 return 0;
317 slots[slot].flags = flags;
320 struct kvm_userspace_memory_region mem = {
321 .slot = slot,
322 .memory_size = slots[slot].len,
323 .guest_phys_addr = slots[slot].phys_addr,
324 .userspace_addr = slots[slot].userspace_addr,
325 .flags = slots[slot].flags,
329 DPRINTF("slot %d start %llx len %llx flags %x\n",
330 mem.slot,
331 mem.guest_phys_addr,
332 mem.memory_size,
333 mem.flags);
334 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
335 if (r == -1)
336 fprintf(stderr, "%s: %m\n", __FUNCTION__);
338 return r;
341 static int kvm_dirty_pages_log_change_all(kvm_context_t kvm,
342 int (*change)(kvm_context_t kvm,
343 uint64_t start,
344 uint64_t len))
346 int i, r;
348 for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
349 if (slots[i].len)
350 r = change(kvm, slots[i].phys_addr, slots[i].len);
352 return r;
355 int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm,
356 uint64_t phys_addr,
357 uint64_t len)
359 int slot = get_slot(phys_addr);
361 DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len);
362 if (slot == -1) {
363 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
364 return -EINVAL;
367 if (slots[slot].logging_count++)
368 return 0;
370 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
371 KVM_MEM_LOG_DIRTY_PAGES,
372 KVM_MEM_LOG_DIRTY_PAGES);
375 int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm,
376 uint64_t phys_addr,
377 uint64_t len)
379 int slot = get_slot(phys_addr);
381 if (slot == -1) {
382 fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
383 return -EINVAL;
386 if (--slots[slot].logging_count)
387 return 0;
389 return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr,
391 KVM_MEM_LOG_DIRTY_PAGES);
395 * Enable dirty page logging for all memory regions
397 int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
399 if (kvm->dirty_pages_log_all)
400 return 0;
401 kvm->dirty_pages_log_all = 1;
402 return kvm_dirty_pages_log_change_all(kvm,
403 kvm_dirty_pages_log_enable_slot);
407 * Enable dirty page logging only for memory regions that were created with
408 * dirty logging enabled (disable for all other memory regions).
410 int kvm_dirty_pages_log_reset(kvm_context_t kvm)
412 if (!kvm->dirty_pages_log_all)
413 return 0;
414 kvm->dirty_pages_log_all = 0;
415 return kvm_dirty_pages_log_change_all(kvm,
416 kvm_dirty_pages_log_disable_slot);
420 kvm_context_t kvm_init(void *opaque)
422 int fd;
423 kvm_context_t kvm;
424 int r, gsi_count;
426 fd = open("/dev/kvm", O_RDWR);
427 if (fd == -1) {
428 perror("open /dev/kvm");
429 return NULL;
431 r = ioctl(fd, KVM_GET_API_VERSION, 0);
432 if (r == -1) {
433 fprintf(stderr, "kvm kernel version too old: "
434 "KVM_GET_API_VERSION ioctl not supported\n");
435 goto out_close;
437 if (r < EXPECTED_KVM_API_VERSION) {
438 fprintf(stderr, "kvm kernel version too old: "
439 "We expect API version %d or newer, but got "
440 "version %d\n",
441 EXPECTED_KVM_API_VERSION, r);
442 goto out_close;
444 if (r > EXPECTED_KVM_API_VERSION) {
445 fprintf(stderr, "kvm userspace version too old\n");
446 goto out_close;
448 kvm_abi = r;
449 kvm_page_size = getpagesize();
450 kvm = malloc(sizeof(*kvm));
451 if (kvm == NULL)
452 goto out_close;
453 memset(kvm, 0, sizeof(*kvm));
454 kvm->fd = fd;
455 kvm->vm_fd = -1;
456 kvm->opaque = opaque;
457 kvm->dirty_pages_log_all = 0;
458 kvm->no_irqchip_creation = 0;
459 kvm->no_pit_creation = 0;
461 gsi_count = kvm_get_gsi_count(kvm);
462 if (gsi_count > 0) {
463 int gsi_bits, i;
465 /* Round up so we can search ints using ffs */
466 gsi_bits = ALIGN(gsi_count, 32);
467 kvm->used_gsi_bitmap = malloc(gsi_bits / 8);
468 if (!kvm->used_gsi_bitmap)
469 goto out_close;
470 memset(kvm->used_gsi_bitmap, 0, gsi_bits / 8);
471 kvm->max_gsi = gsi_bits;
473 /* Mark any over-allocated bits as already in use */
474 for (i = gsi_count; i < gsi_bits; i++)
475 set_gsi(kvm, i);
478 return kvm;
479 out_close:
480 close(fd);
481 return NULL;
484 void kvm_finalize(kvm_context_t kvm)
486 /* FIXME
487 if (kvm->vcpu_fd[0] != -1)
488 close(kvm->vcpu_fd[0]);
489 if (kvm->vm_fd != -1)
490 close(kvm->vm_fd);
492 close(kvm->fd);
493 free(kvm);
496 void kvm_disable_irqchip_creation(kvm_context_t kvm)
498 kvm->no_irqchip_creation = 1;
501 void kvm_disable_pit_creation(kvm_context_t kvm)
503 kvm->no_pit_creation = 1;
506 kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int id)
508 long mmap_size;
509 int r;
510 kvm_vcpu_context_t vcpu_ctx = malloc(sizeof(struct kvm_vcpu_context));
512 if (!vcpu_ctx) {
513 errno = ENOMEM;
514 return NULL;
517 vcpu_ctx->kvm = kvm;
518 vcpu_ctx->id = id;
520 r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, id);
521 if (r == -1) {
522 fprintf(stderr, "kvm_create_vcpu: %m\n");
523 goto err;
525 vcpu_ctx->fd = r;
526 mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0);
527 if (mmap_size == -1) {
528 fprintf(stderr, "get vcpu mmap size: %m\n");
529 goto err_fd;
531 vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED,
532 vcpu_ctx->fd, 0);
533 if (vcpu_ctx->run == MAP_FAILED) {
534 fprintf(stderr, "mmap vcpu area: %m\n");
535 goto err_fd;
537 return vcpu_ctx;
538 err_fd:
539 close(vcpu_ctx->fd);
540 err:
541 free(vcpu_ctx);
542 return NULL;
545 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
547 #ifdef KVM_CAP_SET_BOOT_CPU_ID
548 int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
549 if (r > 0)
550 return ioctl(kvm->vm_fd, KVM_SET_BOOT_CPU_ID, id);
551 return -ENOSYS;
552 #else
553 return -ENOSYS;
554 #endif
557 int kvm_create_vm(kvm_context_t kvm)
559 int fd = kvm->fd;
561 #ifdef KVM_CAP_IRQ_ROUTING
562 kvm->irq_routes = malloc(sizeof(*kvm->irq_routes));
563 if (!kvm->irq_routes)
564 return -ENOMEM;
565 memset(kvm->irq_routes, 0, sizeof(*kvm->irq_routes));
566 kvm->nr_allocated_irq_routes = 0;
567 #endif
569 fd = ioctl(fd, KVM_CREATE_VM, 0);
570 if (fd == -1) {
571 fprintf(stderr, "kvm_create_vm: %m\n");
572 return -1;
574 kvm->vm_fd = fd;
575 return 0;
578 static int kvm_create_default_phys_mem(kvm_context_t kvm,
579 unsigned long phys_mem_bytes,
580 void **vm_mem)
582 #ifdef KVM_CAP_USER_MEMORY
583 int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
584 if (r > 0)
585 return 0;
586 fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
587 #else
588 #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
589 #endif
590 return -1;
593 int kvm_check_extension(kvm_context_t kvm, int ext)
595 int ret;
597 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext);
598 if (ret > 0)
599 return ret;
600 return 0;
603 void kvm_create_irqchip(kvm_context_t kvm)
605 int r;
607 kvm->irqchip_in_kernel = 0;
608 #ifdef KVM_CAP_IRQCHIP
609 if (!kvm->no_irqchip_creation) {
610 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
611 if (r > 0) { /* kernel irqchip supported */
612 r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
613 if (r >= 0) {
614 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
615 #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
616 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
617 KVM_CAP_IRQ_INJECT_STATUS);
618 if (r > 0)
619 kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
620 #endif
621 kvm->irqchip_in_kernel = 1;
623 else
624 fprintf(stderr, "Create kernel PIC irqchip failed\n");
627 #endif
630 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
632 int r;
634 r = kvm_create_vm(kvm);
635 if (r < 0)
636 return r;
637 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
638 if (r < 0)
639 return r;
640 init_slots();
641 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
642 if (r < 0)
643 return r;
644 kvm_create_irqchip(kvm);
646 return 0;
650 int kvm_register_phys_mem(kvm_context_t kvm,
651 unsigned long phys_start, void *userspace_addr,
652 unsigned long len, int log)
655 struct kvm_userspace_memory_region memory = {
656 .memory_size = len,
657 .guest_phys_addr = phys_start,
658 .userspace_addr = (unsigned long)(intptr_t)userspace_addr,
659 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
661 int r;
663 memory.slot = get_free_slot(kvm);
664 DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n",
665 memory.guest_phys_addr, memory.memory_size,
666 memory.userspace_addr, memory.slot, memory.flags);
667 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
668 if (r == -1) {
669 fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno));
670 return -1;
672 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
673 memory.userspace_addr, memory.flags);
674 return 0;
678 /* destroy/free a whole slot.
679 * phys_start, len and slot are the params passed to kvm_create_phys_mem()
681 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
682 unsigned long len)
684 int slot;
685 int r;
686 struct kvm_userspace_memory_region memory = {
687 .memory_size = 0,
688 .guest_phys_addr = phys_start,
689 .userspace_addr = 0,
690 .flags = 0,
693 slot = get_slot(phys_start);
695 if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) {
696 fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n",
697 __FUNCTION__, slot);
698 return;
700 if (phys_start != slots[slot].phys_addr) {
701 fprintf(stderr,
702 "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n",
703 __FUNCTION__, phys_start, slots[slot].phys_addr);
704 phys_start = slots[slot].phys_addr;
707 memory.slot = slot;
708 DPRINTF("slot %d start %llx len %llx flags %x\n",
709 memory.slot,
710 memory.guest_phys_addr,
711 memory.memory_size,
712 memory.flags);
713 r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
714 if (r == -1) {
715 fprintf(stderr, "destroy_userspace_phys_mem: %s",
716 strerror(errno));
717 return;
720 free_slot(memory.slot);
723 void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size)
726 int slot = get_container_slot(phys_addr, size);
728 if (slot != -1) {
729 DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size);
730 kvm_destroy_phys_mem(kvm, phys_addr, size);
731 return;
735 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
737 int r;
738 struct kvm_dirty_log log = {
739 .slot = slot,
742 log.dirty_bitmap = buf;
744 r = ioctl(kvm->vm_fd, ioctl_num, &log);
745 if (r == -1)
746 return -errno;
747 return 0;
750 int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
752 int slot;
754 slot = get_slot(phys_addr);
755 return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
758 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
759 unsigned long len, void *opaque,
760 int (*cb)(unsigned long start, unsigned long len,
761 void*bitmap, void *opaque))
763 int i;
764 int r;
765 unsigned long end_addr = phys_addr + len;
766 void *buf;
768 for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
769 if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr)
770 && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) {
771 buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
772 r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
773 if (r) {
774 qemu_free(buf);
775 return r;
777 r = cb(slots[i].phys_addr, slots[i].len, buf, opaque);
778 qemu_free(buf);
779 if (r)
780 return r;
783 return 0;
786 #ifdef KVM_CAP_IRQCHIP
788 int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
790 struct kvm_irq_level event;
791 int r;
793 if (!kvm->irqchip_in_kernel)
794 return 0;
795 event.level = level;
796 event.irq = irq;
797 r = ioctl(kvm->vm_fd, kvm->irqchip_inject_ioctl, &event);
798 if (r == -1)
799 perror("kvm_set_irq_level");
801 if (status) {
802 #ifdef KVM_CAP_IRQ_INJECT_STATUS
803 *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ?
804 1 : event.status;
805 #else
806 *status = 1;
807 #endif
810 return 1;
813 int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
815 int r;
817 if (!kvm->irqchip_in_kernel)
818 return 0;
819 r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip);
820 if (r == -1) {
821 r = -errno;
822 perror("kvm_get_irqchip\n");
824 return r;
827 int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
829 int r;
831 if (!kvm->irqchip_in_kernel)
832 return 0;
833 r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip);
834 if (r == -1) {
835 r = -errno;
836 perror("kvm_set_irqchip\n");
838 return r;
841 #endif
843 static int handle_io(kvm_vcpu_context_t vcpu)
845 struct kvm_run *run = vcpu->run;
846 kvm_context_t kvm = vcpu->kvm;
847 uint16_t addr = run->io.port;
848 int r;
849 int i;
850 void *p = (void *)run + run->io.data_offset;
852 for (i = 0; i < run->io.count; ++i) {
853 switch (run->io.direction) {
854 case KVM_EXIT_IO_IN:
855 switch (run->io.size) {
856 case 1:
857 r = kvm_inb(kvm->opaque, addr, p);
858 break;
859 case 2:
860 r = kvm_inw(kvm->opaque, addr, p);
861 break;
862 case 4:
863 r = kvm_inl(kvm->opaque, addr, p);
864 break;
865 default:
866 fprintf(stderr, "bad I/O size %d\n", run->io.size);
867 return -EMSGSIZE;
869 break;
870 case KVM_EXIT_IO_OUT:
871 switch (run->io.size) {
872 case 1:
873 r = kvm_outb(kvm->opaque, addr,
874 *(uint8_t *)p);
875 break;
876 case 2:
877 r = kvm_outw(kvm->opaque, addr,
878 *(uint16_t *)p);
879 break;
880 case 4:
881 r = kvm_outl(kvm->opaque, addr,
882 *(uint32_t *)p);
883 break;
884 default:
885 fprintf(stderr, "bad I/O size %d\n", run->io.size);
886 return -EMSGSIZE;
888 break;
889 default:
890 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
891 return -EPROTO;
894 p += run->io.size;
897 return 0;
900 int handle_debug(kvm_vcpu_context_t vcpu, void *env)
902 #ifdef KVM_CAP_SET_GUEST_DEBUG
903 struct kvm_run *run = vcpu->run;
904 kvm_context_t kvm = vcpu->kvm;
906 return kvm_debug(kvm->opaque, env, &run->debug.arch);
907 #else
908 return 0;
909 #endif
912 int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
914 return ioctl(vcpu->fd, KVM_GET_REGS, regs);
917 int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs)
919 return ioctl(vcpu->fd, KVM_SET_REGS, regs);
922 int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
924 return ioctl(vcpu->fd, KVM_GET_FPU, fpu);
927 int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu)
929 return ioctl(vcpu->fd, KVM_SET_FPU, fpu);
932 int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
934 return ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
937 int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs)
939 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
942 #ifdef KVM_CAP_MP_STATE
943 int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
945 int r;
947 r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
948 if (r > 0)
949 return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state);
950 return -ENOSYS;
953 int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state)
955 int r;
957 r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
958 if (r > 0)
959 return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
960 return -ENOSYS;
962 #endif
964 static int handle_mmio(kvm_vcpu_context_t vcpu)
966 unsigned long addr = vcpu->run->mmio.phys_addr;
967 kvm_context_t kvm = vcpu->kvm;
968 struct kvm_run *kvm_run = vcpu->run;
969 void *data = kvm_run->mmio.data;
971 /* hack: Red Hat 7.1 generates these weird accesses. */
972 if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3)
973 return 0;
975 if (kvm_run->mmio.is_write)
976 return kvm_mmio_write(kvm->opaque, addr, data,
977 kvm_run->mmio.len);
978 else
979 return kvm_mmio_read(kvm->opaque, addr, data,
980 kvm_run->mmio.len);
983 int handle_io_window(kvm_context_t kvm)
985 return 1;
988 int handle_halt(kvm_vcpu_context_t vcpu)
990 return kvm_arch_halt(vcpu->kvm->opaque, vcpu);
993 int handle_shutdown(kvm_context_t kvm, CPUState *env)
995 /* stop the current vcpu from going back to guest mode */
996 env->kvm_cpu_state.stopped = 1;
998 qemu_system_reset_request();
999 return 1;
1002 static inline void push_nmi(kvm_context_t kvm)
1004 #ifdef KVM_CAP_USER_NMI
1005 kvm_arch_push_nmi(kvm->opaque);
1006 #endif /* KVM_CAP_USER_NMI */
1009 void post_kvm_run(kvm_context_t kvm, CPUState *env)
1011 pthread_mutex_lock(&qemu_mutex);
1012 kvm_arch_post_kvm_run(kvm->opaque, env);
1015 int pre_kvm_run(kvm_context_t kvm, CPUState *env)
1017 kvm_arch_pre_kvm_run(kvm->opaque, env);
1019 if (env->exit_request)
1020 return 1;
1021 pthread_mutex_unlock(&qemu_mutex);
1022 return 0;
1025 int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu)
1027 return vcpu->run->if_flag;
1030 int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu)
1032 return vcpu->run->ready_for_interrupt_injection;
1035 int kvm_run(kvm_vcpu_context_t vcpu, void *env)
1037 int r;
1038 int fd = vcpu->fd;
1039 struct kvm_run *run = vcpu->run;
1040 kvm_context_t kvm = vcpu->kvm;
1042 again:
1043 push_nmi(kvm);
1044 #if !defined(__s390__)
1045 if (!kvm->irqchip_in_kernel)
1046 run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
1047 #endif
1048 r = pre_kvm_run(kvm, env);
1049 if (r)
1050 return r;
1051 r = ioctl(fd, KVM_RUN, 0);
1053 if (r == -1 && errno != EINTR && errno != EAGAIN) {
1054 r = -errno;
1055 post_kvm_run(kvm, env);
1056 fprintf(stderr, "kvm_run: %s\n", strerror(-r));
1057 return r;
1060 post_kvm_run(kvm, env);
1062 #if defined(KVM_CAP_COALESCED_MMIO)
1063 if (kvm->coalesced_mmio) {
1064 struct kvm_coalesced_mmio_ring *ring = (void *)run +
1065 kvm->coalesced_mmio * PAGE_SIZE;
1066 while (ring->first != ring->last) {
1067 kvm_mmio_write(kvm->opaque,
1068 ring->coalesced_mmio[ring->first].phys_addr,
1069 &ring->coalesced_mmio[ring->first].data[0],
1070 ring->coalesced_mmio[ring->first].len);
1071 smp_wmb();
1072 ring->first = (ring->first + 1) %
1073 KVM_COALESCED_MMIO_MAX;
1076 #endif
1078 #if !defined(__s390__)
1079 if (r == -1) {
1080 r = handle_io_window(kvm);
1081 goto more;
1083 #endif
1084 if (1) {
1085 switch (run->exit_reason) {
1086 case KVM_EXIT_UNKNOWN:
1087 r = handle_unhandled(kvm, vcpu,
1088 run->hw.hardware_exit_reason);
1089 break;
1090 case KVM_EXIT_FAIL_ENTRY:
1091 r = handle_unhandled(kvm, vcpu,
1092 run->fail_entry.hardware_entry_failure_reason);
1093 break;
1094 case KVM_EXIT_EXCEPTION:
1095 fprintf(stderr, "exception %d (%x)\n",
1096 run->ex.exception,
1097 run->ex.error_code);
1098 kvm_show_regs(vcpu);
1099 kvm_show_code(vcpu);
1100 abort();
1101 break;
1102 case KVM_EXIT_IO:
1103 r = handle_io(vcpu);
1104 break;
1105 case KVM_EXIT_DEBUG:
1106 r = handle_debug(vcpu, env);
1107 break;
1108 case KVM_EXIT_MMIO:
1109 r = handle_mmio(vcpu);
1110 break;
1111 case KVM_EXIT_HLT:
1112 r = handle_halt(vcpu);
1113 break;
1114 case KVM_EXIT_IRQ_WINDOW_OPEN:
1115 break;
1116 case KVM_EXIT_SHUTDOWN:
1117 r = handle_shutdown(kvm, env);
1118 break;
1119 #if defined(__s390__)
1120 case KVM_EXIT_S390_SIEIC:
1121 r = kvm_s390_handle_intercept(kvm, vcpu,
1122 run);
1123 break;
1124 case KVM_EXIT_S390_RESET:
1125 r = kvm_s390_handle_reset(kvm, vcpu, run);
1126 break;
1127 #endif
1128 default:
1129 if (kvm_arch_run(vcpu)) {
1130 fprintf(stderr, "unhandled vm exit: 0x%x\n",
1131 run->exit_reason);
1132 kvm_show_regs(vcpu);
1133 abort();
1135 break;
1138 more:
1139 if (!r)
1140 goto again;
1141 return r;
1144 int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq)
1146 struct kvm_interrupt intr;
1148 intr.irq = irq;
1149 return ioctl(vcpu->fd, KVM_INTERRUPT, &intr);
1152 #ifdef KVM_CAP_SET_GUEST_DEBUG
1153 int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg)
1155 return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg);
1157 #endif
1159 int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset)
1161 struct kvm_signal_mask *sigmask;
1162 int r;
1164 if (!sigset) {
1165 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL);
1166 if (r == -1)
1167 r = -errno;
1168 return r;
1170 sigmask = malloc(sizeof(*sigmask) + sizeof(*sigset));
1171 if (!sigmask)
1172 return -ENOMEM;
1174 sigmask->len = 8;
1175 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1176 r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask);
1177 if (r == -1)
1178 r = -errno;
1179 free(sigmask);
1180 return r;
1183 int kvm_irqchip_in_kernel(kvm_context_t kvm)
1185 return kvm->irqchip_in_kernel;
1188 int kvm_pit_in_kernel(kvm_context_t kvm)
1190 return kvm->pit_in_kernel;
1193 int kvm_has_sync_mmu(void)
1195 int r = 0;
1196 #ifdef KVM_CAP_SYNC_MMU
1197 r = ioctl(kvm_context->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
1198 #endif
1199 return r;
1202 int kvm_inject_nmi(kvm_vcpu_context_t vcpu)
1204 #ifdef KVM_CAP_USER_NMI
1205 return ioctl(vcpu->fd, KVM_NMI);
1206 #else
1207 return -ENOSYS;
1208 #endif
1211 int kvm_init_coalesced_mmio(kvm_context_t kvm)
1213 int r = 0;
1214 kvm->coalesced_mmio = 0;
1215 #ifdef KVM_CAP_COALESCED_MMIO
1216 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
1217 if (r > 0) {
1218 kvm->coalesced_mmio = r;
1219 return 0;
1221 #endif
1222 return r;
1225 int kvm_coalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1227 #ifdef KVM_CAP_COALESCED_MMIO
1228 kvm_context_t kvm = kvm_context;
1229 struct kvm_coalesced_mmio_zone zone;
1230 int r;
1232 if (kvm->coalesced_mmio) {
1234 zone.addr = addr;
1235 zone.size = size;
1237 r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
1238 if (r == -1) {
1239 perror("kvm_register_coalesced_mmio_zone");
1240 return -errno;
1242 return 0;
1244 #endif
1245 return -ENOSYS;
1248 int kvm_uncoalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size)
1250 #ifdef KVM_CAP_COALESCED_MMIO
1251 kvm_context_t kvm = kvm_context;
1252 struct kvm_coalesced_mmio_zone zone;
1253 int r;
1255 if (kvm->coalesced_mmio) {
1257 zone.addr = addr;
1258 zone.size = size;
1260 r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone);
1261 if (r == -1) {
1262 perror("kvm_unregister_coalesced_mmio_zone");
1263 return -errno;
1265 DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size);
1266 return 0;
1268 #endif
1269 return -ENOSYS;
1272 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1273 int kvm_assign_pci_device(kvm_context_t kvm,
1274 struct kvm_assigned_pci_dev *assigned_dev)
1276 int ret;
1278 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
1279 if (ret < 0)
1280 return -errno;
1282 return ret;
1285 static int kvm_old_assign_irq(kvm_context_t kvm,
1286 struct kvm_assigned_irq *assigned_irq)
1288 int ret;
1290 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq);
1291 if (ret < 0)
1292 return -errno;
1294 return ret;
1297 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1298 int kvm_assign_irq(kvm_context_t kvm,
1299 struct kvm_assigned_irq *assigned_irq)
1301 int ret;
1303 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
1304 if (ret > 0) {
1305 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_DEV_IRQ, assigned_irq);
1306 if (ret < 0)
1307 return -errno;
1308 return ret;
1311 return kvm_old_assign_irq(kvm, assigned_irq);
1314 int kvm_deassign_irq(kvm_context_t kvm,
1315 struct kvm_assigned_irq *assigned_irq)
1317 int ret;
1319 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
1320 if (ret < 0)
1321 return -errno;
1323 return ret;
1325 #else
1326 int kvm_assign_irq(kvm_context_t kvm,
1327 struct kvm_assigned_irq *assigned_irq)
1329 return kvm_old_assign_irq(kvm, assigned_irq);
1331 #endif
1332 #endif
1334 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1335 int kvm_deassign_pci_device(kvm_context_t kvm,
1336 struct kvm_assigned_pci_dev *assigned_dev)
1338 int ret;
1340 ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
1341 if (ret < 0)
1342 return -errno;
1344 return ret;
1346 #endif
1348 int kvm_destroy_memory_region_works(kvm_context_t kvm)
1350 int ret = 0;
1352 #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS
1353 ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
1354 KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
1355 if (ret <= 0)
1356 ret = 0;
1357 #endif
1358 return ret;
1361 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
1363 #ifdef KVM_CAP_REINJECT_CONTROL
1364 int r;
1365 struct kvm_reinject_control control;
1367 control.pit_reinject = pit_reinject;
1369 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
1370 if (r > 0) {
1371 r = ioctl(kvm->vm_fd, KVM_REINJECT_CONTROL, &control);
1372 if (r == -1)
1373 return -errno;
1374 return r;
1376 #endif
1377 return -ENOSYS;
1380 int kvm_has_gsi_routing(kvm_context_t kvm)
1382 int r = 0;
1384 #ifdef KVM_CAP_IRQ_ROUTING
1385 r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1386 #endif
1387 return r;
1390 int kvm_get_gsi_count(kvm_context_t kvm)
1392 #ifdef KVM_CAP_IRQ_ROUTING
1393 return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
1394 #else
1395 return -EINVAL;
1396 #endif
1399 int kvm_clear_gsi_routes(kvm_context_t kvm)
1401 #ifdef KVM_CAP_IRQ_ROUTING
1402 kvm->irq_routes->nr = 0;
1403 return 0;
1404 #else
1405 return -EINVAL;
1406 #endif
1409 int kvm_add_routing_entry(kvm_context_t kvm,
1410 struct kvm_irq_routing_entry* entry)
1412 #ifdef KVM_CAP_IRQ_ROUTING
1413 struct kvm_irq_routing *z;
1414 struct kvm_irq_routing_entry *new;
1415 int n, size;
1417 if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
1418 n = kvm->nr_allocated_irq_routes * 2;
1419 if (n < 64)
1420 n = 64;
1421 size = sizeof(struct kvm_irq_routing);
1422 size += n * sizeof(*new);
1423 z = realloc(kvm->irq_routes, size);
1424 if (!z)
1425 return -ENOMEM;
1426 kvm->nr_allocated_irq_routes = n;
1427 kvm->irq_routes = z;
1429 n = kvm->irq_routes->nr++;
1430 new = &kvm->irq_routes->entries[n];
1431 memset(new, 0, sizeof(*new));
1432 new->gsi = entry->gsi;
1433 new->type = entry->type;
1434 new->flags = entry->flags;
1435 new->u = entry->u;
1437 set_gsi(kvm, entry->gsi);
1439 return 0;
1440 #else
1441 return -ENOSYS;
1442 #endif
1445 int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1447 #ifdef KVM_CAP_IRQ_ROUTING
1448 struct kvm_irq_routing_entry e;
1450 e.gsi = gsi;
1451 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1452 e.flags = 0;
1453 e.u.irqchip.irqchip = irqchip;
1454 e.u.irqchip.pin = pin;
1455 return kvm_add_routing_entry(kvm, &e);
1456 #else
1457 return -ENOSYS;
1458 #endif
1461 int kvm_del_routing_entry(kvm_context_t kvm,
1462 struct kvm_irq_routing_entry* entry)
1464 #ifdef KVM_CAP_IRQ_ROUTING
1465 struct kvm_irq_routing_entry *e, *p;
1466 int i, gsi, found = 0;
1468 gsi = entry->gsi;
1470 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1471 e = &kvm->irq_routes->entries[i];
1472 if (e->type == entry->type
1473 && e->gsi == gsi) {
1474 switch (e->type)
1476 case KVM_IRQ_ROUTING_IRQCHIP: {
1477 if (e->u.irqchip.irqchip ==
1478 entry->u.irqchip.irqchip
1479 && e->u.irqchip.pin ==
1480 entry->u.irqchip.pin) {
1481 p = &kvm->irq_routes->
1482 entries[--kvm->irq_routes->nr];
1483 *e = *p;
1484 found = 1;
1486 break;
1488 case KVM_IRQ_ROUTING_MSI: {
1489 if (e->u.msi.address_lo ==
1490 entry->u.msi.address_lo
1491 && e->u.msi.address_hi ==
1492 entry->u.msi.address_hi
1493 && e->u.msi.data == entry->u.msi.data) {
1494 p = &kvm->irq_routes->
1495 entries[--kvm->irq_routes->nr];
1496 *e = *p;
1497 found = 1;
1499 break;
1501 default:
1502 break;
1504 if (found) {
1505 /* If there are no other users of this GSI
1506 * mark it available in the bitmap */
1507 for (i = 0; i < kvm->irq_routes->nr; i++) {
1508 e = &kvm->irq_routes->entries[i];
1509 if (e->gsi == gsi)
1510 break;
1512 if (i == kvm->irq_routes->nr)
1513 clear_gsi(kvm, gsi);
1515 return 0;
1519 return -ESRCH;
1520 #else
1521 return -ENOSYS;
1522 #endif
1525 int kvm_update_routing_entry(kvm_context_t kvm,
1526 struct kvm_irq_routing_entry* entry,
1527 struct kvm_irq_routing_entry* newentry)
1529 #ifdef KVM_CAP_IRQ_ROUTING
1530 struct kvm_irq_routing_entry *e;
1531 int i;
1533 if (entry->gsi != newentry->gsi ||
1534 entry->type != newentry->type) {
1535 return -EINVAL;
1538 for (i = 0; i < kvm->irq_routes->nr; ++i) {
1539 e = &kvm->irq_routes->entries[i];
1540 if (e->type != entry->type || e->gsi != entry->gsi) {
1541 continue;
1543 switch (e->type) {
1544 case KVM_IRQ_ROUTING_IRQCHIP:
1545 if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
1546 e->u.irqchip.pin == entry->u.irqchip.pin) {
1547 memcpy(&e->u.irqchip, &entry->u.irqchip, sizeof e->u.irqchip);
1548 return 0;
1550 break;
1551 case KVM_IRQ_ROUTING_MSI:
1552 if (e->u.msi.address_lo == entry->u.msi.address_lo &&
1553 e->u.msi.address_hi == entry->u.msi.address_hi &&
1554 e->u.msi.data == entry->u.msi.data) {
1555 memcpy(&e->u.msi, &entry->u.msi, sizeof e->u.msi);
1556 return 0;
1558 break;
1559 default:
1560 break;
1563 return -ESRCH;
1564 #else
1565 return -ENOSYS;
1566 #endif
1569 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
1571 #ifdef KVM_CAP_IRQ_ROUTING
1572 struct kvm_irq_routing_entry e;
1574 e.gsi = gsi;
1575 e.type = KVM_IRQ_ROUTING_IRQCHIP;
1576 e.flags = 0;
1577 e.u.irqchip.irqchip = irqchip;
1578 e.u.irqchip.pin = pin;
1579 return kvm_del_routing_entry(kvm, &e);
1580 #else
1581 return -ENOSYS;
1582 #endif
1585 int kvm_commit_irq_routes(kvm_context_t kvm)
1587 #ifdef KVM_CAP_IRQ_ROUTING
1588 int r;
1590 kvm->irq_routes->flags = 0;
1591 r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, kvm->irq_routes);
1592 if (r == -1)
1593 r = -errno;
1594 return r;
1595 #else
1596 return -ENOSYS;
1597 #endif
1600 int kvm_get_irq_route_gsi(kvm_context_t kvm)
1602 int i, bit;
1603 uint32_t *buf = kvm->used_gsi_bitmap;
1605 /* Return the lowest unused GSI in the bitmap */
1606 for (i = 0; i < kvm->max_gsi / 32; i++) {
1607 bit = ffs(~buf[i]);
1608 if (!bit)
1609 continue;
1611 return bit - 1 + i * 32;
1614 return -ENOSPC;
1617 #ifdef KVM_CAP_DEVICE_MSIX
1618 int kvm_assign_set_msix_nr(kvm_context_t kvm,
1619 struct kvm_assigned_msix_nr *msix_nr)
1621 int ret;
1623 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
1624 if (ret < 0)
1625 return -errno;
1627 return ret;
1630 int kvm_assign_set_msix_entry(kvm_context_t kvm,
1631 struct kvm_assigned_msix_entry *entry)
1633 int ret;
1635 ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
1636 if (ret < 0)
1637 return -errno;
1639 return ret;
1641 #endif
1643 #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd)
1645 #include <sys/eventfd.h>
1647 static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
1649 int r;
1650 struct kvm_irqfd data = {
1651 .fd = fd,
1652 .gsi = gsi,
1653 .flags = flags,
1656 r = ioctl(kvm->vm_fd, KVM_IRQFD, &data);
1657 if (r == -1)
1658 r = -errno;
1659 return r;
1662 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1664 int r;
1665 int fd;
1667 if (!kvm_check_extension(kvm, KVM_CAP_IRQFD))
1668 return -ENOENT;
1670 fd = eventfd(0, 0);
1671 if (fd < 0)
1672 return -errno;
1674 r = _kvm_irqfd(kvm, fd, gsi, 0);
1675 if (r < 0) {
1676 close(fd);
1677 return -errno;
1680 return fd;
1683 #else /* KVM_CAP_IRQFD */
1685 int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
1687 return -ENOSYS;
1690 #endif /* KVM_CAP_IRQFD */
1691 static inline unsigned long kvm_get_thread_id(void)
1693 return syscall(SYS_gettid);
1696 static void qemu_cond_wait(pthread_cond_t *cond)
1698 CPUState *env = cpu_single_env;
1699 static const struct timespec ts = {
1700 .tv_sec = 0,
1701 .tv_nsec = 100000,
1704 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
1705 cpu_single_env = env;
1708 static void sig_ipi_handler(int n)
1712 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
1714 struct qemu_work_item wi;
1716 if (env == current_env) {
1717 func(data);
1718 return;
1721 wi.func = func;
1722 wi.data = data;
1723 if (!env->kvm_cpu_state.queued_work_first)
1724 env->kvm_cpu_state.queued_work_first = &wi;
1725 else
1726 env->kvm_cpu_state.queued_work_last->next = &wi;
1727 env->kvm_cpu_state.queued_work_last = &wi;
1728 wi.next = NULL;
1729 wi.done = false;
1731 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1732 while (!wi.done)
1733 qemu_cond_wait(&qemu_work_cond);
1736 static void inject_interrupt(void *data)
1738 cpu_interrupt(current_env, (long)data);
1741 void kvm_inject_interrupt(CPUState *env, int mask)
1743 on_vcpu(env, inject_interrupt, (void *)(long)mask);
1746 void kvm_update_interrupt_request(CPUState *env)
1748 int signal = 0;
1750 if (env) {
1751 if (!current_env || !current_env->kvm_cpu_state.created)
1752 signal = 1;
1754 * Testing for created here is really redundant
1756 if (current_env && current_env->kvm_cpu_state.created &&
1757 env != current_env && !env->kvm_cpu_state.signalled)
1758 signal = 1;
1760 if (signal) {
1761 env->kvm_cpu_state.signalled = 1;
1762 if (env->kvm_cpu_state.thread)
1763 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
1768 static void kvm_do_load_registers(void *_env)
1770 CPUState *env = _env;
1772 kvm_arch_load_regs(env);
1775 void kvm_load_registers(CPUState *env)
1777 if (kvm_enabled() && qemu_system_ready)
1778 on_vcpu(env, kvm_do_load_registers, env);
1781 static void kvm_do_save_registers(void *_env)
1783 CPUState *env = _env;
1785 kvm_arch_save_regs(env);
1788 void kvm_save_registers(CPUState *env)
1790 if (kvm_enabled())
1791 on_vcpu(env, kvm_do_save_registers, env);
1794 static void kvm_do_load_mpstate(void *_env)
1796 CPUState *env = _env;
1798 kvm_arch_load_mpstate(env);
1801 void kvm_load_mpstate(CPUState *env)
1803 if (kvm_enabled() && qemu_system_ready)
1804 on_vcpu(env, kvm_do_load_mpstate, env);
1807 static void kvm_do_save_mpstate(void *_env)
1809 CPUState *env = _env;
1811 kvm_arch_save_mpstate(env);
1812 env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
1815 void kvm_save_mpstate(CPUState *env)
1817 if (kvm_enabled())
1818 on_vcpu(env, kvm_do_save_mpstate, env);
1821 int kvm_cpu_exec(CPUState *env)
1823 int r;
1825 r = kvm_run(env->kvm_cpu_state.vcpu_ctx, env);
1826 if (r < 0) {
1827 printf("kvm_run returned %d\n", r);
1828 vm_stop(0);
1831 return 0;
1834 static int is_cpu_stopped(CPUState *env)
1836 return !vm_running || env->kvm_cpu_state.stopped;
1839 static void flush_queued_work(CPUState *env)
1841 struct qemu_work_item *wi;
1843 if (!env->kvm_cpu_state.queued_work_first)
1844 return;
1846 while ((wi = env->kvm_cpu_state.queued_work_first)) {
1847 env->kvm_cpu_state.queued_work_first = wi->next;
1848 wi->func(wi->data);
1849 wi->done = true;
1851 env->kvm_cpu_state.queued_work_last = NULL;
1852 pthread_cond_broadcast(&qemu_work_cond);
1855 static void kvm_main_loop_wait(CPUState *env, int timeout)
1857 struct timespec ts;
1858 int r, e;
1859 siginfo_t siginfo;
1860 sigset_t waitset;
1862 pthread_mutex_unlock(&qemu_mutex);
1864 ts.tv_sec = timeout / 1000;
1865 ts.tv_nsec = (timeout % 1000) * 1000000;
1866 sigemptyset(&waitset);
1867 sigaddset(&waitset, SIG_IPI);
1869 r = sigtimedwait(&waitset, &siginfo, &ts);
1870 e = errno;
1872 pthread_mutex_lock(&qemu_mutex);
1874 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
1875 printf("sigtimedwait: %s\n", strerror(e));
1876 exit(1);
1879 cpu_single_env = env;
1880 flush_queued_work(env);
1882 if (env->kvm_cpu_state.stop) {
1883 env->kvm_cpu_state.stop = 0;
1884 env->kvm_cpu_state.stopped = 1;
1885 pthread_cond_signal(&qemu_pause_cond);
1888 env->kvm_cpu_state.signalled = 0;
1891 static int all_threads_paused(void)
1893 CPUState *penv = first_cpu;
1895 while (penv) {
1896 if (penv->kvm_cpu_state.stop)
1897 return 0;
1898 penv = (CPUState *)penv->next_cpu;
1901 return 1;
1904 static void pause_all_threads(void)
1906 CPUState *penv = first_cpu;
1908 while (penv) {
1909 if (penv != cpu_single_env) {
1910 penv->kvm_cpu_state.stop = 1;
1911 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1912 } else {
1913 penv->kvm_cpu_state.stop = 0;
1914 penv->kvm_cpu_state.stopped = 1;
1915 cpu_exit(penv);
1917 penv = (CPUState *)penv->next_cpu;
1920 while (!all_threads_paused())
1921 qemu_cond_wait(&qemu_pause_cond);
1924 static void resume_all_threads(void)
1926 CPUState *penv = first_cpu;
1928 assert(!cpu_single_env);
1930 while (penv) {
1931 penv->kvm_cpu_state.stop = 0;
1932 penv->kvm_cpu_state.stopped = 0;
1933 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
1934 penv = (CPUState *)penv->next_cpu;
1938 static void kvm_vm_state_change_handler(void *context, int running, int reason)
1940 if (running)
1941 resume_all_threads();
1942 else
1943 pause_all_threads();
1946 static void setup_kernel_sigmask(CPUState *env)
1948 sigset_t set;
1950 sigemptyset(&set);
1951 sigaddset(&set, SIGUSR2);
1952 sigaddset(&set, SIGIO);
1953 sigaddset(&set, SIGALRM);
1954 sigprocmask(SIG_BLOCK, &set, NULL);
1956 sigprocmask(SIG_BLOCK, NULL, &set);
1957 sigdelset(&set, SIG_IPI);
1959 kvm_set_signal_mask(env->kvm_cpu_state.vcpu_ctx, &set);
1962 static void qemu_kvm_system_reset(void)
1964 CPUState *penv = first_cpu;
1966 pause_all_threads();
1968 qemu_system_reset();
1970 while (penv) {
1971 kvm_arch_cpu_reset(penv);
1972 penv = (CPUState *)penv->next_cpu;
1975 resume_all_threads();
1978 static void process_irqchip_events(CPUState *env)
1980 kvm_arch_process_irqchip_events(env);
1981 if (kvm_arch_has_work(env))
1982 env->halted = 0;
1985 static int kvm_main_loop_cpu(CPUState *env)
1987 setup_kernel_sigmask(env);
1989 pthread_mutex_lock(&qemu_mutex);
1991 kvm_qemu_init_env(env);
1992 #ifdef TARGET_I386
1993 kvm_tpr_vcpu_start(env);
1994 #endif
1996 cpu_single_env = env;
1997 kvm_arch_load_regs(env);
1999 while (1) {
2000 int run_cpu = !is_cpu_stopped(env);
2001 if (run_cpu && !kvm_irqchip_in_kernel(kvm_context)) {
2002 process_irqchip_events(env);
2003 run_cpu = !env->halted;
2005 if (run_cpu) {
2006 kvm_main_loop_wait(env, 0);
2007 kvm_cpu_exec(env);
2008 } else {
2009 kvm_main_loop_wait(env, 1000);
2012 pthread_mutex_unlock(&qemu_mutex);
2013 return 0;
2016 static void *ap_main_loop(void *_env)
2018 CPUState *env = _env;
2019 sigset_t signals;
2020 struct ioperm_data *data = NULL;
2022 current_env = env;
2023 env->thread_id = kvm_get_thread_id();
2024 sigfillset(&signals);
2025 sigprocmask(SIG_BLOCK, &signals, NULL);
2026 env->kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(kvm_context, env->cpu_index);
2028 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2029 /* do ioperm for io ports of assigned devices */
2030 LIST_FOREACH(data, &ioperm_head, entries)
2031 on_vcpu(env, kvm_arch_do_ioperm, data);
2032 #endif
2034 /* signal VCPU creation */
2035 pthread_mutex_lock(&qemu_mutex);
2036 current_env->kvm_cpu_state.created = 1;
2037 pthread_cond_signal(&qemu_vcpu_cond);
2039 /* and wait for machine initialization */
2040 while (!qemu_system_ready)
2041 qemu_cond_wait(&qemu_system_cond);
2042 pthread_mutex_unlock(&qemu_mutex);
2044 kvm_main_loop_cpu(env);
2045 return NULL;
2048 void kvm_init_vcpu(CPUState *env)
2050 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
2052 while (env->kvm_cpu_state.created == 0)
2053 qemu_cond_wait(&qemu_vcpu_cond);
2056 int kvm_vcpu_inited(CPUState *env)
2058 return env->kvm_cpu_state.created;
2061 int kvm_init_ap(void)
2063 #ifdef TARGET_I386
2064 kvm_tpr_opt_setup();
2065 #endif
2066 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
2068 signal(SIG_IPI, sig_ipi_handler);
2069 return 0;
2072 void qemu_kvm_notify_work(void)
2074 uint64_t value = 1;
2075 char buffer[8];
2076 size_t offset = 0;
2078 if (io_thread_fd == -1)
2079 return;
2081 memcpy(buffer, &value, sizeof(value));
2083 while (offset < 8) {
2084 ssize_t len;
2086 len = write(io_thread_fd, buffer + offset, 8 - offset);
2087 if (len == -1 && errno == EINTR)
2088 continue;
2090 if (len <= 0)
2091 break;
2093 offset += len;
2096 if (offset != 8)
2097 fprintf(stderr, "failed to notify io thread\n");
2100 /* If we have signalfd, we mask out the signals we want to handle and then
2101 * use signalfd to listen for them. We rely on whatever the current signal
2102 * handler is to dispatch the signals when we receive them.
2105 static void sigfd_handler(void *opaque)
2107 int fd = (unsigned long)opaque;
2108 struct qemu_signalfd_siginfo info;
2109 struct sigaction action;
2110 ssize_t len;
2112 while (1) {
2113 do {
2114 len = read(fd, &info, sizeof(info));
2115 } while (len == -1 && errno == EINTR);
2117 if (len == -1 && errno == EAGAIN)
2118 break;
2120 if (len != sizeof(info)) {
2121 printf("read from sigfd returned %zd: %m\n", len);
2122 return;
2125 sigaction(info.ssi_signo, NULL, &action);
2126 if (action.sa_handler)
2127 action.sa_handler(info.ssi_signo);
2132 /* Used to break IO thread out of select */
2133 static void io_thread_wakeup(void *opaque)
2135 int fd = (unsigned long)opaque;
2136 char buffer[8];
2137 size_t offset = 0;
2139 while (offset < 8) {
2140 ssize_t len;
2142 len = read(fd, buffer + offset, 8 - offset);
2143 if (len == -1 && errno == EINTR)
2144 continue;
2146 if (len <= 0)
2147 break;
2149 offset += len;
2153 int kvm_main_loop(void)
2155 int fds[2];
2156 sigset_t mask;
2157 int sigfd;
2159 io_thread = pthread_self();
2160 qemu_system_ready = 1;
2162 if (qemu_eventfd(fds) == -1) {
2163 fprintf(stderr, "failed to create eventfd\n");
2164 return -errno;
2167 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
2168 (void *)(unsigned long)fds[0]);
2170 io_thread_fd = fds[1];
2172 sigemptyset(&mask);
2173 sigaddset(&mask, SIGIO);
2174 sigaddset(&mask, SIGALRM);
2175 sigprocmask(SIG_BLOCK, &mask, NULL);
2177 sigfd = qemu_signalfd(&mask);
2178 if (sigfd == -1) {
2179 fprintf(stderr, "failed to create signalfd\n");
2180 return -errno;
2183 fcntl(sigfd, F_SETFL, O_NONBLOCK);
2185 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
2186 (void *)(unsigned long)sigfd);
2188 pthread_cond_broadcast(&qemu_system_cond);
2190 io_thread_sigfd = sigfd;
2191 cpu_single_env = NULL;
2193 while (1) {
2194 main_loop_wait(1000);
2195 if (qemu_shutdown_requested()) {
2196 if (qemu_no_shutdown()) {
2197 vm_stop(0);
2198 } else
2199 break;
2200 } else if (qemu_powerdown_requested())
2201 qemu_system_powerdown();
2202 else if (qemu_reset_requested())
2203 qemu_kvm_system_reset();
2204 else if (kvm_debug_cpu_requested) {
2205 gdb_set_stop_cpu(kvm_debug_cpu_requested);
2206 vm_stop(EXCP_DEBUG);
2207 kvm_debug_cpu_requested = NULL;
2211 pause_all_threads();
2212 pthread_mutex_unlock(&qemu_mutex);
2214 return 0;
2217 int kvm_qemu_init()
2219 /* Try to initialize kvm */
2220 kvm_context = kvm_init(cpu_single_env);
2221 if (!kvm_context) {
2222 return -1;
2224 pthread_mutex_lock(&qemu_mutex);
2226 return 0;
2229 #ifdef TARGET_I386
2230 static int destroy_region_works = 0;
2231 #endif
2234 #if !defined(TARGET_I386)
2235 int kvm_arch_init_irq_routing(void)
2237 return 0;
2239 #endif
2241 int kvm_qemu_create_context(void)
2243 int r;
2245 if (!kvm_irqchip) {
2246 kvm_disable_irqchip_creation(kvm_context);
2248 if (!kvm_pit) {
2249 kvm_disable_pit_creation(kvm_context);
2251 if (kvm_create(kvm_context, 0, NULL) < 0) {
2252 kvm_finalize(kvm_context);
2253 return -1;
2255 r = kvm_arch_qemu_create_context();
2256 if(r <0)
2257 kvm_finalize(kvm_context);
2258 if (kvm_pit && !kvm_pit_reinject) {
2259 if (kvm_reinject_control(kvm_context, 0)) {
2260 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
2261 return -1;
2264 #ifdef TARGET_I386
2265 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
2266 #endif
2268 r = kvm_arch_init_irq_routing();
2269 if (r < 0) {
2270 return r;
2273 return 0;
2276 #ifdef TARGET_I386
2277 static int must_use_aliases_source(target_phys_addr_t addr)
2279 if (destroy_region_works)
2280 return false;
2281 if (addr == 0xa0000 || addr == 0xa8000)
2282 return true;
2283 return false;
2286 static int must_use_aliases_target(target_phys_addr_t addr)
2288 if (destroy_region_works)
2289 return false;
2290 if (addr >= 0xe0000000 && addr < 0x100000000ull)
2291 return true;
2292 return false;
2295 static struct mapping {
2296 target_phys_addr_t phys;
2297 ram_addr_t ram;
2298 ram_addr_t len;
2299 } mappings[50];
2300 static int nr_mappings;
2302 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
2304 struct mapping *p;
2306 for (p = mappings; p < mappings + nr_mappings; ++p) {
2307 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
2308 return p;
2311 return NULL;
2314 static struct mapping *find_mapping(target_phys_addr_t start_addr)
2316 struct mapping *p;
2318 for (p = mappings; p < mappings + nr_mappings; ++p) {
2319 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
2320 return p;
2323 return NULL;
2326 static void drop_mapping(target_phys_addr_t start_addr)
2328 struct mapping *p = find_mapping(start_addr);
2330 if (p)
2331 *p = mappings[--nr_mappings];
2333 #endif
2335 void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
2336 ram_addr_t phys_offset)
2338 int r = 0;
2339 unsigned long area_flags;
2340 #ifdef TARGET_I386
2341 struct mapping *p;
2342 #endif
2344 if (start_addr + size > phys_ram_size) {
2345 phys_ram_size = start_addr + size;
2348 phys_offset &= ~IO_MEM_ROM;
2349 area_flags = phys_offset & ~TARGET_PAGE_MASK;
2351 if (area_flags != IO_MEM_RAM) {
2352 #ifdef TARGET_I386
2353 if (must_use_aliases_source(start_addr)) {
2354 kvm_destroy_memory_alias(kvm_context, start_addr);
2355 return;
2357 if (must_use_aliases_target(start_addr))
2358 return;
2359 #endif
2360 while (size > 0) {
2361 p = find_mapping(start_addr);
2362 if (p) {
2363 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
2364 drop_mapping(p->phys);
2366 start_addr += TARGET_PAGE_SIZE;
2367 if (size > TARGET_PAGE_SIZE) {
2368 size -= TARGET_PAGE_SIZE;
2369 } else {
2370 size = 0;
2373 return;
2376 r = kvm_is_containing_region(kvm_context, start_addr, size);
2377 if (r)
2378 return;
2380 if (area_flags >= TLB_MMIO)
2381 return;
2383 #ifdef TARGET_I386
2384 if (must_use_aliases_source(start_addr)) {
2385 p = find_ram_mapping(phys_offset);
2386 if (p) {
2387 kvm_create_memory_alias(kvm_context, start_addr, size,
2388 p->phys + (phys_offset - p->ram));
2390 return;
2392 #endif
2394 r = kvm_register_phys_mem(kvm_context, start_addr,
2395 qemu_get_ram_ptr(phys_offset),
2396 size, 0);
2397 if (r < 0) {
2398 printf("kvm_cpu_register_physical_memory: failed\n");
2399 exit(1);
2402 #ifdef TARGET_I386
2403 drop_mapping(start_addr);
2404 p = &mappings[nr_mappings++];
2405 p->phys = start_addr;
2406 p->ram = phys_offset;
2407 p->len = size;
2408 #endif
2410 return;
2413 int kvm_setup_guest_memory(void *area, unsigned long size)
2415 int ret = 0;
2417 #ifdef MADV_DONTFORK
2418 if (kvm_enabled() && !kvm_has_sync_mmu())
2419 ret = madvise(area, size, MADV_DONTFORK);
2420 #endif
2422 if (ret)
2423 perror ("madvise");
2425 return ret;
2428 int kvm_qemu_check_extension(int ext)
2430 return kvm_check_extension(kvm_context, ext);
2433 int kvm_qemu_init_env(CPUState *cenv)
2435 return kvm_arch_qemu_init_env(cenv);
2438 #ifdef KVM_CAP_SET_GUEST_DEBUG
2439 struct kvm_sw_breakpoint_head kvm_sw_breakpoints =
2440 TAILQ_HEAD_INITIALIZER(kvm_sw_breakpoints);
2442 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc)
2444 struct kvm_sw_breakpoint *bp;
2446 TAILQ_FOREACH(bp, &kvm_sw_breakpoints, entry) {
2447 if (bp->pc == pc)
2448 return bp;
2450 return NULL;
2453 struct kvm_set_guest_debug_data {
2454 struct kvm_guest_debug dbg;
2455 int err;
2458 static void kvm_invoke_set_guest_debug(void *data)
2460 struct kvm_set_guest_debug_data *dbg_data = data;
2462 dbg_data->err = kvm_set_guest_debug(cpu_single_env->kvm_cpu_state.vcpu_ctx,
2463 &dbg_data->dbg);
2466 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2468 struct kvm_set_guest_debug_data data;
2470 data.dbg.control = 0;
2471 if (env->singlestep_enabled)
2472 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
2474 kvm_arch_update_guest_debug(env, &data.dbg);
2475 data.dbg.control |= reinject_trap;
2477 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
2478 return data.err;
2481 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
2482 target_ulong len, int type)
2484 struct kvm_sw_breakpoint *bp;
2485 CPUState *env;
2486 int err;
2488 if (type == GDB_BREAKPOINT_SW) {
2489 bp = kvm_find_sw_breakpoint(addr);
2490 if (bp) {
2491 bp->use_count++;
2492 return 0;
2495 bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
2496 if (!bp)
2497 return -ENOMEM;
2499 bp->pc = addr;
2500 bp->use_count = 1;
2501 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
2502 if (err) {
2503 free(bp);
2504 return err;
2507 TAILQ_INSERT_HEAD(&kvm_sw_breakpoints, bp, entry);
2508 } else {
2509 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
2510 if (err)
2511 return err;
2514 for (env = first_cpu; env != NULL; env = env->next_cpu) {
2515 err = kvm_update_guest_debug(env, 0);
2516 if (err)
2517 return err;
2519 return 0;
2522 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
2523 target_ulong len, int type)
2525 struct kvm_sw_breakpoint *bp;
2526 CPUState *env;
2527 int err;
2529 if (type == GDB_BREAKPOINT_SW) {
2530 bp = kvm_find_sw_breakpoint(addr);
2531 if (!bp)
2532 return -ENOENT;
2534 if (bp->use_count > 1) {
2535 bp->use_count--;
2536 return 0;
2539 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
2540 if (err)
2541 return err;
2543 TAILQ_REMOVE(&kvm_sw_breakpoints, bp, entry);
2544 qemu_free(bp);
2545 } else {
2546 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
2547 if (err)
2548 return err;
2551 for (env = first_cpu; env != NULL; env = env->next_cpu) {
2552 err = kvm_update_guest_debug(env, 0);
2553 if (err)
2554 return err;
2556 return 0;
2559 void kvm_remove_all_breakpoints(CPUState *current_env)
2561 struct kvm_sw_breakpoint *bp, *next;
2562 CPUState *env;
2564 TAILQ_FOREACH_SAFE(bp, &kvm_sw_breakpoints, entry, next) {
2565 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
2566 /* Try harder to find a CPU that currently sees the breakpoint. */
2567 for (env = first_cpu; env != NULL; env = env->next_cpu) {
2568 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
2569 break;
2573 kvm_arch_remove_all_hw_breakpoints();
2575 for (env = first_cpu; env != NULL; env = env->next_cpu)
2576 kvm_update_guest_debug(env, 0);
2579 #else /* !KVM_CAP_SET_GUEST_DEBUG */
2581 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
2583 return -EINVAL;
2586 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
2587 target_ulong len, int type)
2589 return -EINVAL;
2592 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
2593 target_ulong len, int type)
2595 return -EINVAL;
2598 void kvm_remove_all_breakpoints(CPUState *current_env)
2601 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
2604 * dirty pages logging
2606 /* FIXME: use unsigned long pointer instead of unsigned char */
2607 unsigned char *kvm_dirty_bitmap = NULL;
2608 int kvm_physical_memory_set_dirty_tracking(int enable)
2610 int r = 0;
2612 if (!kvm_enabled())
2613 return 0;
2615 if (enable) {
2616 if (!kvm_dirty_bitmap) {
2617 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
2618 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
2619 if (kvm_dirty_bitmap == NULL) {
2620 perror("Failed to allocate dirty pages bitmap");
2621 r=-1;
2623 else {
2624 r = kvm_dirty_pages_log_enable_all(kvm_context);
2628 else {
2629 if (kvm_dirty_bitmap) {
2630 r = kvm_dirty_pages_log_reset(kvm_context);
2631 qemu_free(kvm_dirty_bitmap);
2632 kvm_dirty_bitmap = NULL;
2635 return r;
2638 /* get kvm's dirty pages bitmap and update qemu's */
2639 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
2640 unsigned char *bitmap,
2641 unsigned long offset,
2642 unsigned long mem_size)
2644 unsigned int i, j, n=0;
2645 unsigned char c;
2646 unsigned long page_number, addr, addr1;
2647 ram_addr_t ram_addr;
2648 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
2651 * bitmap-traveling is faster than memory-traveling (for addr...)
2652 * especially when most of the memory is not dirty.
2654 for (i=0; i<len; i++) {
2655 c = bitmap[i];
2656 while (c>0) {
2657 j = ffsl(c) - 1;
2658 c &= ~(1u<<j);
2659 page_number = i * 8 + j;
2660 addr1 = page_number * TARGET_PAGE_SIZE;
2661 addr = offset + addr1;
2662 ram_addr = cpu_get_physical_page_desc(addr);
2663 cpu_physical_memory_set_dirty(ram_addr);
2664 n++;
2667 return 0;
2669 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
2670 void *bitmap, void *opaque)
2672 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
2676 * get kvm's dirty pages bitmap and update qemu's
2677 * we only care about physical ram, which resides in slots 0 and 3
2679 int kvm_update_dirty_pages_log(void)
2681 int r = 0;
2684 r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL,
2685 NULL,
2686 kvm_get_dirty_bitmap_cb);
2687 return r;
2690 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
2691 int log)
2693 if (log)
2694 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
2695 else {
2696 #ifdef TARGET_I386
2697 if (must_use_aliases_target(start))
2698 return;
2699 #endif
2700 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
2704 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
2706 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
2707 unsigned int brsize = BITMAP_SIZE(ram_size);
2708 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
2709 unsigned int extra_bytes = (extra_pages +7)/8;
2710 unsigned int hole_start = BITMAP_SIZE(0xa0000);
2711 unsigned int hole_end = BITMAP_SIZE(0xc0000);
2713 memset(bitmap, 0xFF, brsize + extra_bytes);
2714 memset(bitmap + hole_start, 0, hole_end - hole_start);
2715 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
2717 return 0;
2720 #ifdef KVM_CAP_IRQCHIP
2722 int kvm_set_irq(int irq, int level, int *status)
2724 return kvm_set_irq_level(kvm_context, irq, level, status);
2727 #endif
2729 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
2731 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
2734 void kvm_mutex_unlock(void)
2736 assert(!cpu_single_env);
2737 pthread_mutex_unlock(&qemu_mutex);
2740 void kvm_mutex_lock(void)
2742 pthread_mutex_lock(&qemu_mutex);
2743 cpu_single_env = NULL;
2746 #ifdef USE_KVM_DEVICE_ASSIGNMENT
2747 void kvm_add_ioperm_data(struct ioperm_data *data)
2749 LIST_INSERT_HEAD(&ioperm_head, data, entries);
2752 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
2754 struct ioperm_data *data;
2756 data = LIST_FIRST(&ioperm_head);
2757 while (data) {
2758 struct ioperm_data *next = LIST_NEXT(data, entries);
2760 if (data->start_port == start_port && data->num == num) {
2761 LIST_REMOVE(data, entries);
2762 qemu_free(data);
2765 data = next;
2769 void kvm_ioperm(CPUState *env, void *data)
2771 if (kvm_enabled() && qemu_system_ready)
2772 on_vcpu(env, kvm_arch_do_ioperm, data);
2775 #endif
2777 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
2779 #ifndef TARGET_IA64
2781 #ifdef TARGET_I386
2782 if (must_use_aliases_source(start_addr))
2783 return 0;
2784 #endif
2786 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
2787 NULL, kvm_get_dirty_bitmap_cb);
2788 #endif
2789 return 0;
2792 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
2794 #ifdef TARGET_I386
2795 if (must_use_aliases_source(phys_addr))
2796 return 0;
2797 #endif
2799 #ifndef TARGET_IA64
2800 kvm_qemu_log_memory(phys_addr, len, 1);
2801 #endif
2802 return 0;
2805 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
2807 #ifdef TARGET_I386
2808 if (must_use_aliases_source(phys_addr))
2809 return 0;
2810 #endif
2812 #ifndef TARGET_IA64
2813 kvm_qemu_log_memory(phys_addr, len, 0);
2814 #endif
2815 return 0;
2818 void qemu_kvm_cpu_stop(CPUState *env)
2820 if (kvm_enabled())
2821 env->kvm_cpu_state.stopped = 1;
2824 int kvm_set_boot_cpu_id(uint32_t id)
2826 return kvm_set_boot_vcpu_id(kvm_context, id);