kvm: qemu: mark guest mapping as MADV_DONTFORK
[kvm-userspace.git] / qemu / qemu-kvm.c
blob9018b8d43a7a0d3e62250d3ff3c5665570bc8319
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 int kvm_allowed = 1;
12 int kvm_irqchip = 1;
13 int kvm_pit = 1;
15 #include <assert.h>
16 #include <string.h>
17 #include "hw/hw.h"
18 #include "sysemu.h"
19 #include "qemu-common.h"
20 #include "console.h"
21 #include "block.h"
23 #include "qemu-kvm.h"
24 #include <libkvm.h>
25 #include <pthread.h>
26 #include <sys/utsname.h>
27 #include <sys/syscall.h>
28 #include <sys/mman.h>
30 #define bool _Bool
31 #define false 0
32 #define true 1
34 extern void perror(const char *s);
36 kvm_context_t kvm_context;
38 extern int smp_cpus;
40 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
41 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
42 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
43 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
44 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
45 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
46 __thread struct vcpu_info *vcpu;
48 static int qemu_system_ready;
50 #define SIG_IPI (SIGRTMIN+4)
52 struct qemu_kvm_work_item {
53 struct qemu_kvm_work_item *next;
54 void (*func)(void *data);
55 void *data;
56 bool done;
59 struct vcpu_info {
60 CPUState *env;
61 int sipi_needed;
62 int init;
63 pthread_t thread;
64 int signalled;
65 int stop;
66 int stopped;
67 int created;
68 struct qemu_kvm_work_item *queued_work_first, *queued_work_last;
69 } vcpu_info[256];
71 pthread_t io_thread;
72 static int io_thread_fd = -1;
73 static int io_thread_sigfd = -1;
75 static int kvm_debug_stop_requested;
77 static inline unsigned long kvm_get_thread_id(void)
79 return syscall(SYS_gettid);
82 static void qemu_cond_wait(pthread_cond_t *cond)
84 CPUState *env = cpu_single_env;
85 static const struct timespec ts = {
86 .tv_sec = 0,
87 .tv_nsec = 100000,
90 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
91 /* If we're the I/O thread, some other thread may be waiting for aio
92 * completion */
93 if (!vcpu)
94 qemu_aio_poll();
95 cpu_single_env = env;
98 CPUState *qemu_kvm_cpu_env(int index)
100 return vcpu_info[index].env;
103 static void sig_ipi_handler(int n)
107 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
109 struct vcpu_info *vi = &vcpu_info[env->cpu_index];
110 struct qemu_kvm_work_item wi;
112 if (vi == vcpu) {
113 func(data);
114 return;
117 wi.func = func;
118 wi.data = data;
119 if (!vi->queued_work_first)
120 vi->queued_work_first = &wi;
121 else
122 vi->queued_work_last->next = &wi;
123 vi->queued_work_last = &wi;
124 wi.next = NULL;
125 wi.done = false;
127 pthread_kill(vi->thread, SIG_IPI);
128 while (!wi.done)
129 qemu_cond_wait(&qemu_work_cond);
132 void kvm_update_interrupt_request(CPUState *env)
134 int signal = 0;
136 if (env) {
137 if (!vcpu)
138 signal = 1;
139 if (vcpu && env != vcpu->env && !vcpu_info[env->cpu_index].signalled)
140 signal = 1;
142 if (signal) {
143 vcpu_info[env->cpu_index].signalled = 1;
144 if (vcpu_info[env->cpu_index].thread)
145 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
150 void kvm_update_after_sipi(CPUState *env)
152 vcpu_info[env->cpu_index].sipi_needed = 1;
153 kvm_update_interrupt_request(env);
156 void kvm_apic_init(CPUState *env)
158 if (env->cpu_index != 0)
159 vcpu_info[env->cpu_index].init = 1;
160 kvm_update_interrupt_request(env);
163 #include <signal.h>
165 static int try_push_interrupts(void *opaque)
167 return kvm_arch_try_push_interrupts(opaque);
170 static void post_kvm_run(void *opaque, int vcpu)
173 pthread_mutex_lock(&qemu_mutex);
174 kvm_arch_post_kvm_run(opaque, vcpu);
177 static int pre_kvm_run(void *opaque, int vcpu)
179 CPUState *env = qemu_kvm_cpu_env(vcpu);
181 kvm_arch_pre_kvm_run(opaque, vcpu);
183 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
184 return 1;
185 pthread_mutex_unlock(&qemu_mutex);
186 return 0;
189 static void kvm_do_load_registers(void *_env)
191 CPUState *env = _env;
193 kvm_arch_load_regs(env);
196 void kvm_load_registers(CPUState *env)
198 if (kvm_enabled() && qemu_system_ready)
199 on_vcpu(env, kvm_do_load_registers, env);
202 static void kvm_do_save_registers(void *_env)
204 CPUState *env = _env;
206 kvm_arch_save_regs(env);
209 void kvm_save_registers(CPUState *env)
211 if (kvm_enabled())
212 on_vcpu(env, kvm_do_save_registers, env);
215 int kvm_cpu_exec(CPUState *env)
217 int r;
219 r = kvm_run(kvm_context, env->cpu_index);
220 if (r < 0) {
221 printf("kvm_run returned %d\n", r);
222 exit(1);
225 return 0;
228 extern int vm_running;
230 static int has_work(CPUState *env)
232 if (!vm_running || (env && vcpu_info[env->cpu_index].stopped))
233 return 0;
234 if (!env->halted)
235 return 1;
236 return kvm_arch_has_work(env);
239 static void flush_queued_work(CPUState *env)
241 struct vcpu_info *vi = &vcpu_info[env->cpu_index];
242 struct qemu_kvm_work_item *wi;
244 if (!vi->queued_work_first)
245 return;
247 while ((wi = vi->queued_work_first)) {
248 vi->queued_work_first = wi->next;
249 wi->func(wi->data);
250 wi->done = true;
252 vi->queued_work_last = NULL;
253 pthread_cond_broadcast(&qemu_work_cond);
256 static void kvm_main_loop_wait(CPUState *env, int timeout)
258 struct timespec ts;
259 int r, e;
260 siginfo_t siginfo;
261 sigset_t waitset;
263 pthread_mutex_unlock(&qemu_mutex);
265 ts.tv_sec = timeout / 1000;
266 ts.tv_nsec = (timeout % 1000) * 1000000;
267 sigemptyset(&waitset);
268 sigaddset(&waitset, SIG_IPI);
270 r = sigtimedwait(&waitset, &siginfo, &ts);
271 e = errno;
273 pthread_mutex_lock(&qemu_mutex);
275 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
276 printf("sigtimedwait: %s\n", strerror(e));
277 exit(1);
280 cpu_single_env = env;
281 flush_queued_work(env);
283 if (vcpu_info[env->cpu_index].stop) {
284 vcpu_info[env->cpu_index].stop = 0;
285 vcpu_info[env->cpu_index].stopped = 1;
286 pthread_cond_signal(&qemu_pause_cond);
289 vcpu_info[env->cpu_index].signalled = 0;
292 static int all_threads_paused(void)
294 int i;
296 for (i = 0; i < smp_cpus; ++i)
297 if (vcpu_info[i].stop)
298 return 0;
299 return 1;
302 static void pause_all_threads(void)
304 int i;
306 assert(!cpu_single_env);
308 for (i = 0; i < smp_cpus; ++i) {
309 vcpu_info[i].stop = 1;
310 pthread_kill(vcpu_info[i].thread, SIG_IPI);
312 while (!all_threads_paused())
313 qemu_cond_wait(&qemu_pause_cond);
316 static void resume_all_threads(void)
318 int i;
320 assert(!cpu_single_env);
322 for (i = 0; i < smp_cpus; ++i) {
323 vcpu_info[i].stop = 0;
324 vcpu_info[i].stopped = 0;
325 pthread_kill(vcpu_info[i].thread, SIG_IPI);
329 static void kvm_vm_state_change_handler(void *context, int running)
331 if (running)
332 resume_all_threads();
333 else
334 pause_all_threads();
337 static void update_regs_for_sipi(CPUState *env)
339 kvm_arch_update_regs_for_sipi(env);
340 vcpu_info[env->cpu_index].sipi_needed = 0;
341 vcpu_info[env->cpu_index].init = 0;
344 static void update_regs_for_init(CPUState *env)
346 cpu_reset(env);
347 kvm_arch_load_regs(env);
350 static void setup_kernel_sigmask(CPUState *env)
352 sigset_t set;
354 sigemptyset(&set);
355 sigaddset(&set, SIGUSR2);
356 sigaddset(&set, SIGIO);
357 sigaddset(&set, SIGALRM);
358 sigprocmask(SIG_BLOCK, &set, NULL);
360 sigprocmask(SIG_BLOCK, NULL, &set);
361 sigdelset(&set, SIG_IPI);
363 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
366 void qemu_kvm_system_reset(void)
368 int i;
370 pause_all_threads();
372 qemu_system_reset();
374 for (i = 0; i < smp_cpus; ++i)
375 kvm_arch_cpu_reset(vcpu_info[i].env);
377 resume_all_threads();
380 static int kvm_main_loop_cpu(CPUState *env)
382 struct vcpu_info *info = &vcpu_info[env->cpu_index];
384 setup_kernel_sigmask(env);
386 pthread_mutex_lock(&qemu_mutex);
387 if (kvm_irqchip_in_kernel(kvm_context))
388 env->halted = 0;
390 kvm_qemu_init_env(env);
391 #ifdef TARGET_I386
392 kvm_tpr_vcpu_start(env);
393 #endif
395 cpu_single_env = env;
396 kvm_load_registers(env);
398 while (1) {
399 while (!has_work(env))
400 kvm_main_loop_wait(env, 1000);
401 if (env->interrupt_request & CPU_INTERRUPT_HARD)
402 env->halted = 0;
403 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
404 update_regs_for_sipi(env);
405 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
406 update_regs_for_init(env);
407 if (!env->halted && !info->init)
408 kvm_cpu_exec(env);
409 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
410 kvm_main_loop_wait(env, 0);
412 pthread_mutex_unlock(&qemu_mutex);
413 return 0;
416 static void *ap_main_loop(void *_env)
418 CPUState *env = _env;
419 sigset_t signals;
421 vcpu = &vcpu_info[env->cpu_index];
422 vcpu->env = env;
423 vcpu->env->thread_id = kvm_get_thread_id();
424 sigfillset(&signals);
425 sigprocmask(SIG_BLOCK, &signals, NULL);
426 kvm_create_vcpu(kvm_context, env->cpu_index);
427 kvm_qemu_init_env(env);
429 /* signal VCPU creation */
430 pthread_mutex_lock(&qemu_mutex);
431 vcpu->created = 1;
432 pthread_cond_signal(&qemu_vcpu_cond);
434 /* and wait for machine initialization */
435 while (!qemu_system_ready)
436 qemu_cond_wait(&qemu_system_cond);
437 pthread_mutex_unlock(&qemu_mutex);
439 kvm_main_loop_cpu(env);
440 return NULL;
443 void kvm_init_new_ap(int cpu, CPUState *env)
445 pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
447 while (vcpu_info[cpu].created == 0)
448 qemu_cond_wait(&qemu_vcpu_cond);
451 int kvm_init_ap(void)
453 #ifdef TARGET_I386
454 kvm_tpr_opt_setup();
455 #endif
456 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
458 signal(SIG_IPI, sig_ipi_handler);
459 return 0;
462 void qemu_kvm_notify_work(void)
464 uint64_t value = 1;
465 char buffer[8];
466 size_t offset = 0;
468 if (io_thread_fd == -1)
469 return;
471 memcpy(buffer, &value, sizeof(value));
473 while (offset < 8) {
474 ssize_t len;
476 len = write(io_thread_fd, buffer + offset, 8 - offset);
477 if (len == -1 && errno == EINTR)
478 continue;
480 if (len <= 0)
481 break;
483 offset += len;
486 if (offset != 8)
487 fprintf(stderr, "failed to notify io thread\n");
490 /* If we have signalfd, we mask out the signals we want to handle and then
491 * use signalfd to listen for them. We rely on whatever the current signal
492 * handler is to dispatch the signals when we receive them.
495 static void sigfd_handler(void *opaque)
497 int fd = (unsigned long)opaque;
498 struct signalfd_siginfo info;
499 struct sigaction action;
500 ssize_t len;
502 while (1) {
503 do {
504 len = read(fd, &info, sizeof(info));
505 } while (len == -1 && errno == EINTR);
507 if (len == -1 && errno == EAGAIN)
508 break;
510 if (len != sizeof(info)) {
511 printf("read from sigfd returned %ld: %m\n", len);
512 return;
515 sigaction(info.ssi_signo, NULL, &action);
516 if (action.sa_handler)
517 action.sa_handler(info.ssi_signo);
519 if (info.ssi_signo == SIGUSR2) {
520 pthread_cond_signal(&qemu_aio_cond);
525 /* Used to break IO thread out of select */
526 static void io_thread_wakeup(void *opaque)
528 int fd = (unsigned long)opaque;
529 char buffer[8];
530 size_t offset = 0;
532 while (offset < 8) {
533 ssize_t len;
535 len = read(fd, buffer + offset, 8 - offset);
536 if (len == -1 && errno == EINTR)
537 continue;
539 if (len <= 0)
540 break;
542 offset += len;
546 int kvm_main_loop(void)
548 int fds[2];
549 sigset_t mask;
550 int sigfd;
552 io_thread = pthread_self();
553 qemu_system_ready = 1;
555 if (kvm_eventfd(fds) == -1) {
556 fprintf(stderr, "failed to create eventfd\n");
557 return -errno;
560 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
561 (void *)(unsigned long)fds[0]);
563 io_thread_fd = fds[1];
565 sigemptyset(&mask);
566 sigaddset(&mask, SIGIO);
567 sigaddset(&mask, SIGALRM);
568 sigaddset(&mask, SIGUSR2);
569 sigprocmask(SIG_BLOCK, &mask, NULL);
571 sigfd = kvm_signalfd(&mask);
572 if (sigfd == -1) {
573 fprintf(stderr, "failed to create signalfd\n");
574 return -errno;
577 fcntl(sigfd, F_SETFL, O_NONBLOCK);
579 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
580 (void *)(unsigned long)sigfd);
582 pthread_cond_broadcast(&qemu_system_cond);
584 io_thread_sigfd = sigfd;
585 cpu_single_env = NULL;
587 while (1) {
588 main_loop_wait(1000);
589 if (qemu_shutdown_requested())
590 break;
591 else if (qemu_powerdown_requested())
592 qemu_system_powerdown();
593 else if (qemu_reset_requested())
594 qemu_kvm_system_reset();
595 else if (kvm_debug_stop_requested) {
596 vm_stop(EXCP_DEBUG);
597 kvm_debug_stop_requested = 0;
601 pause_all_threads();
602 pthread_mutex_unlock(&qemu_mutex);
604 return 0;
607 static int kvm_debug(void *opaque, int vcpu)
609 kvm_debug_stop_requested = 1;
610 vcpu_info[vcpu].stopped = 1;
611 return 1;
614 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
616 *data = cpu_inb(0, addr);
617 return 0;
620 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
622 *data = cpu_inw(0, addr);
623 return 0;
626 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
628 *data = cpu_inl(0, addr);
629 return 0;
632 #define PM_IO_BASE 0xb000
634 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
636 if (addr == 0xb2) {
637 switch (data) {
638 case 0: {
639 cpu_outb(0, 0xb3, 0);
640 break;
642 case 0xf0: {
643 unsigned x;
645 /* enable acpi */
646 x = cpu_inw(0, PM_IO_BASE + 4);
647 x &= ~1;
648 cpu_outw(0, PM_IO_BASE + 4, x);
649 break;
651 case 0xf1: {
652 unsigned x;
654 /* enable acpi */
655 x = cpu_inw(0, PM_IO_BASE + 4);
656 x |= 1;
657 cpu_outw(0, PM_IO_BASE + 4, x);
658 break;
660 default:
661 break;
663 return 0;
665 cpu_outb(0, addr, data);
666 return 0;
669 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
671 cpu_outw(0, addr, data);
672 return 0;
675 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
677 cpu_outl(0, addr, data);
678 return 0;
681 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
683 cpu_physical_memory_rw(addr, data, len, 0);
684 return 0;
687 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
689 cpu_physical_memory_rw(addr, data, len, 1);
690 return 0;
693 static int kvm_io_window(void *opaque)
695 return 1;
699 static int kvm_halt(void *opaque, int vcpu)
701 return kvm_arch_halt(opaque, vcpu);
704 static int kvm_shutdown(void *opaque, int vcpu)
706 /* stop the current vcpu from going back to guest mode */
707 vcpu_info[cpu_single_env->cpu_index].stopped = 1;
709 qemu_system_reset_request();
710 return 1;
713 static struct kvm_callbacks qemu_kvm_ops = {
714 .debug = kvm_debug,
715 .inb = kvm_inb,
716 .inw = kvm_inw,
717 .inl = kvm_inl,
718 .outb = kvm_outb,
719 .outw = kvm_outw,
720 .outl = kvm_outl,
721 .mmio_read = kvm_mmio_read,
722 .mmio_write = kvm_mmio_write,
723 .halt = kvm_halt,
724 .shutdown = kvm_shutdown,
725 .io_window = kvm_io_window,
726 .try_push_interrupts = try_push_interrupts,
727 .post_kvm_run = post_kvm_run,
728 .pre_kvm_run = pre_kvm_run,
729 #ifdef TARGET_I386
730 .tpr_access = handle_tpr_access,
731 #endif
732 #ifdef TARGET_PPC
733 .powerpc_dcr_read = handle_powerpc_dcr_read,
734 .powerpc_dcr_write = handle_powerpc_dcr_write,
735 #endif
738 int kvm_qemu_init()
740 /* Try to initialize kvm */
741 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
742 if (!kvm_context) {
743 return -1;
745 pthread_mutex_lock(&qemu_mutex);
747 return 0;
750 int kvm_qemu_create_context(void)
752 int r;
753 if (!kvm_irqchip) {
754 kvm_disable_irqchip_creation(kvm_context);
756 if (!kvm_pit) {
757 kvm_disable_pit_creation(kvm_context);
759 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
760 kvm_qemu_destroy();
761 return -1;
763 r = kvm_arch_qemu_create_context();
764 if(r <0)
765 kvm_qemu_destroy();
766 return 0;
769 void kvm_qemu_destroy(void)
771 kvm_finalize(kvm_context);
774 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
775 unsigned long size,
776 unsigned long phys_offset)
778 #ifdef KVM_CAP_USER_MEMORY
779 int r = 0;
781 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
782 if (r) {
783 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
784 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
785 if (r)
786 return;
787 r = kvm_is_intersecting_mem(kvm_context, start_addr);
788 if (r)
789 kvm_create_mem_hole(kvm_context, start_addr, size);
790 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
791 phys_ram_base + phys_offset,
792 size, 0);
794 if (phys_offset & IO_MEM_ROM) {
795 phys_offset &= ~IO_MEM_ROM;
796 r = kvm_is_intersecting_mem(kvm_context, start_addr);
797 if (r)
798 kvm_create_mem_hole(kvm_context, start_addr, size);
799 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
800 phys_ram_base + phys_offset,
801 size, 0);
803 if (r < 0) {
804 printf("kvm_cpu_register_physical_memory: failed\n");
805 exit(1);
807 return;
809 #endif
810 if (phys_offset & IO_MEM_ROM) {
811 phys_offset &= ~IO_MEM_ROM;
812 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
816 int kvm_setup_guest_memory(void *area, unsigned long size)
818 int ret = 0;
820 if (kvm_enabled() && !kvm_has_sync_mmu(kvm_context))
821 ret = madvise(area, size, MADV_DONTFORK);
823 if (ret)
824 perror ("madvise");
826 return ret;
829 int kvm_qemu_check_extension(int ext)
831 return kvm_check_extension(kvm_context, ext);
834 int kvm_qemu_init_env(CPUState *cenv)
836 return kvm_arch_qemu_init_env(cenv);
839 struct kvm_guest_debug_data {
840 struct kvm_debug_guest dbg;
841 int err;
844 void kvm_invoke_guest_debug(void *data)
846 struct kvm_guest_debug_data *dbg_data = data;
848 dbg_data->err = kvm_guest_debug(kvm_context, cpu_single_env->cpu_index,
849 &dbg_data->dbg);
852 int kvm_update_debugger(CPUState *env)
854 struct kvm_guest_debug_data data;
855 int i;
857 memset(data.dbg.breakpoints, 0, sizeof(data.dbg.breakpoints));
859 data.dbg.enabled = 0;
860 if (env->nb_breakpoints || env->singlestep_enabled) {
861 data.dbg.enabled = 1;
862 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
863 data.dbg.breakpoints[i].enabled = 1;
864 data.dbg.breakpoints[i].address = env->breakpoints[i];
866 data.dbg.singlestep = env->singlestep_enabled;
868 on_vcpu(env, kvm_invoke_guest_debug, &data);
869 return data.err;
874 * dirty pages logging
876 /* FIXME: use unsigned long pointer instead of unsigned char */
877 unsigned char *kvm_dirty_bitmap = NULL;
878 int kvm_physical_memory_set_dirty_tracking(int enable)
880 int r = 0;
882 if (!kvm_enabled())
883 return 0;
885 if (enable) {
886 if (!kvm_dirty_bitmap) {
887 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
888 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
889 if (kvm_dirty_bitmap == NULL) {
890 perror("Failed to allocate dirty pages bitmap");
891 r=-1;
893 else {
894 r = kvm_dirty_pages_log_enable_all(kvm_context);
898 else {
899 if (kvm_dirty_bitmap) {
900 r = kvm_dirty_pages_log_reset(kvm_context);
901 qemu_free(kvm_dirty_bitmap);
902 kvm_dirty_bitmap = NULL;
905 return r;
908 /* get kvm's dirty pages bitmap and update qemu's */
909 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
910 unsigned char *bitmap,
911 unsigned int offset,
912 unsigned long mem_size)
914 unsigned int i, j, n=0;
915 unsigned char c;
916 unsigned page_number, addr, addr1;
917 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
920 * bitmap-traveling is faster than memory-traveling (for addr...)
921 * especially when most of the memory is not dirty.
923 for (i=0; i<len; i++) {
924 c = bitmap[i];
925 while (c>0) {
926 j = ffsl(c) - 1;
927 c &= ~(1u<<j);
928 page_number = i * 8 + j;
929 addr1 = page_number * TARGET_PAGE_SIZE;
930 addr = offset + addr1;
931 cpu_physical_memory_set_dirty(addr);
932 n++;
935 return 0;
937 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
938 void *bitmap, void *opaque)
940 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
944 * get kvm's dirty pages bitmap and update qemu's
945 * we only care about physical ram, which resides in slots 0 and 3
947 int kvm_update_dirty_pages_log(void)
949 int r = 0;
952 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
953 kvm_dirty_bitmap, NULL,
954 kvm_get_dirty_bitmap_cb);
955 return r;
958 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
960 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
961 unsigned int brsize = BITMAP_SIZE(ram_size);
962 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
963 unsigned int extra_bytes = (extra_pages +7)/8;
964 unsigned int hole_start = BITMAP_SIZE(0xa0000);
965 unsigned int hole_end = BITMAP_SIZE(0xc0000);
967 memset(bitmap, 0xFF, brsize + extra_bytes);
968 memset(bitmap + hole_start, 0, hole_end - hole_start);
969 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
971 return 0;
974 #ifdef KVM_CAP_IRQCHIP
976 int kvm_set_irq(int irq, int level)
978 return kvm_set_irq_level(kvm_context, irq, level);
981 #endif
983 void qemu_kvm_aio_wait_start(void)
987 void qemu_kvm_aio_wait(void)
989 if (!cpu_single_env) {
990 if (io_thread_sigfd != -1) {
991 fd_set rfds;
992 int ret;
994 FD_ZERO(&rfds);
995 FD_SET(io_thread_sigfd, &rfds);
997 /* this is a rare case where we do want to hold qemu_mutex
998 * while sleeping. We cannot allow anything else to run
999 * right now. */
1000 ret = select(io_thread_sigfd + 1, &rfds, NULL, NULL, NULL);
1001 if (ret > 0 && FD_ISSET(io_thread_sigfd, &rfds))
1002 sigfd_handler((void *)(unsigned long)io_thread_sigfd);
1004 qemu_aio_poll();
1005 } else
1006 qemu_cond_wait(&qemu_aio_cond);
1009 void qemu_kvm_aio_wait_end(void)
1013 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
1015 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
1018 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
1019 unsigned long size, int log, int writable)
1021 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
1024 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
1025 unsigned long size)
1027 kvm_destroy_phys_mem(kvm_context, start_addr, size);
1030 void kvm_mutex_unlock(void)
1032 assert(!cpu_single_env);
1033 pthread_mutex_unlock(&qemu_mutex);
1036 void kvm_mutex_lock(void)
1038 pthread_mutex_lock(&qemu_mutex);
1039 cpu_single_env = NULL;
1042 int qemu_kvm_register_coalesced_mmio(target_phys_addr_t addr, unsigned int size)
1044 return kvm_register_coalesced_mmio(kvm_context, addr, size);
1047 int qemu_kvm_unregister_coalesced_mmio(target_phys_addr_t addr,
1048 unsigned int size)
1050 return kvm_unregister_coalesced_mmio(kvm_context, addr, size);