kvm: qemu: ignore SIG_IPI signals in userspace
[kvm-userspace.git] / qemu / qemu-kvm.c
blob89e267cba0378135c89d5a45ec02113b689f9acb
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 int kvm_allowed = 1;
12 int kvm_irqchip = 1;
13 int kvm_pit = 1;
15 #include <string.h>
16 #include "hw/hw.h"
17 #include "sysemu.h"
19 #include "qemu-kvm.h"
20 #include <libkvm.h>
21 #include <pthread.h>
22 #include <sys/utsname.h>
23 #include <sys/syscall.h>
25 extern void perror(const char *s);
27 kvm_context_t kvm_context;
29 extern int smp_cpus;
31 static int qemu_kvm_reset_requested;
33 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
34 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
35 __thread struct vcpu_info *vcpu;
37 struct qemu_kvm_signal_table {
38 sigset_t sigset;
39 sigset_t negsigset;
42 static struct qemu_kvm_signal_table io_signal_table;
43 static struct qemu_kvm_signal_table vcpu_signal_table;
45 #define SIG_IPI (SIGRTMIN+4)
47 struct vcpu_info {
48 CPUState *env;
49 int sipi_needed;
50 int init;
51 pthread_t thread;
52 int signalled;
53 int stop;
54 int stopped;
55 } vcpu_info[256];
57 pthread_t io_thread;
59 static inline unsigned long kvm_get_thread_id(void)
61 return syscall(SYS_gettid);
64 CPUState *qemu_kvm_cpu_env(int index)
66 return vcpu_info[index].env;
69 static void sig_ipi_handler(int n)
73 void kvm_update_interrupt_request(CPUState *env)
75 int signal = 0;
77 if (env) {
78 if (!vcpu)
79 signal = 1;
80 if (vcpu && env != vcpu->env && !vcpu_info[env->cpu_index].signalled)
81 signal = 1;
83 if (signal) {
84 vcpu_info[env->cpu_index].signalled = 1;
85 if (vcpu_info[env->cpu_index].thread)
86 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
91 void kvm_update_after_sipi(CPUState *env)
93 vcpu_info[env->cpu_index].sipi_needed = 1;
94 kvm_update_interrupt_request(env);
97 void kvm_apic_init(CPUState *env)
99 if (env->cpu_index != 0)
100 vcpu_info[env->cpu_index].init = 1;
101 kvm_update_interrupt_request(env);
104 #include <signal.h>
106 static int try_push_interrupts(void *opaque)
108 return kvm_arch_try_push_interrupts(opaque);
111 static void post_kvm_run(void *opaque, int vcpu)
114 pthread_mutex_lock(&qemu_mutex);
115 kvm_arch_post_kvm_run(opaque, vcpu);
118 static int pre_kvm_run(void *opaque, int vcpu)
120 CPUState *env = qemu_kvm_cpu_env(vcpu);
122 kvm_arch_pre_kvm_run(opaque, vcpu);
124 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
125 return 1;
126 pthread_mutex_unlock(&qemu_mutex);
127 return 0;
130 void kvm_load_registers(CPUState *env)
132 if (kvm_enabled())
133 kvm_arch_load_regs(env);
136 void kvm_save_registers(CPUState *env)
138 if (kvm_enabled())
139 kvm_arch_save_regs(env);
142 int kvm_cpu_exec(CPUState *env)
144 int r;
146 r = kvm_run(kvm_context, env->cpu_index);
147 if (r < 0) {
148 printf("kvm_run returned %d\n", r);
149 exit(1);
152 return 0;
155 extern int vm_running;
157 static int has_work(CPUState *env)
159 if (!vm_running || (env && vcpu_info[env->cpu_index].stopped))
160 return 0;
161 if (!(env->hflags & HF_HALTED_MASK))
162 return 1;
163 return kvm_arch_has_work(env);
166 static int kvm_process_signal(int si_signo)
168 struct sigaction sa;
170 switch (si_signo) {
171 case SIGUSR2:
172 pthread_cond_signal(&qemu_aio_cond);
173 break;
174 case SIGALRM:
175 case SIGIO:
176 sigaction(si_signo, NULL, &sa);
177 sa.sa_handler(si_signo);
178 break;
181 return 1;
184 static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
185 int timeout)
187 struct timespec ts;
188 int r, e, ret = 0;
189 siginfo_t siginfo;
191 ts.tv_sec = timeout / 1000;
192 ts.tv_nsec = (timeout % 1000) * 1000000;
193 r = sigtimedwait(&waitset->sigset, &siginfo, &ts);
194 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
195 return 0;
196 e = errno;
197 pthread_mutex_lock(&qemu_mutex);
198 if (env && vcpu)
199 cpu_single_env = vcpu->env;
200 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
201 printf("sigtimedwait: %s\n", strerror(e));
202 exit(1);
204 if (r != -1)
205 ret = kvm_process_signal(siginfo.si_signo);
207 if (env && vcpu_info[env->cpu_index].stop) {
208 vcpu_info[env->cpu_index].stop = 0;
209 vcpu_info[env->cpu_index].stopped = 1;
210 pthread_kill(io_thread, SIGUSR1);
212 pthread_mutex_unlock(&qemu_mutex);
214 return ret;
218 static void kvm_eat_signals(CPUState *env, int timeout)
220 int r = 0;
221 struct qemu_kvm_signal_table *waitset = &vcpu_signal_table;
223 while (kvm_eat_signal(waitset, env, 0))
224 r = 1;
225 if (!r && timeout) {
226 r = kvm_eat_signal(waitset, env, timeout);
227 if (r)
228 while (kvm_eat_signal(waitset, env, 0))
233 static void kvm_main_loop_wait(CPUState *env, int timeout)
235 pthread_mutex_unlock(&qemu_mutex);
236 kvm_eat_signals(env, timeout);
237 pthread_mutex_lock(&qemu_mutex);
238 cpu_single_env = env;
239 vcpu_info[env->cpu_index].signalled = 0;
242 static int all_threads_paused(void)
244 int i;
246 for (i = 0; i < smp_cpus; ++i)
247 if (vcpu_info[i].stop)
248 return 0;
249 return 1;
252 static void pause_all_threads(void)
254 int i;
256 for (i = 0; i < smp_cpus; ++i) {
257 vcpu_info[i].stop = 1;
258 pthread_kill(vcpu_info[i].thread, SIG_IPI);
260 while (!all_threads_paused()) {
261 pthread_mutex_unlock(&qemu_mutex);
262 kvm_eat_signal(&io_signal_table, NULL, 1000);
263 pthread_mutex_lock(&qemu_mutex);
264 cpu_single_env = NULL;
268 static void resume_all_threads(void)
270 int i;
272 for (i = 0; i < smp_cpus; ++i) {
273 vcpu_info[i].stop = 0;
274 vcpu_info[i].stopped = 0;
275 pthread_kill(vcpu_info[i].thread, SIG_IPI);
279 static void kvm_vm_state_change_handler(void *context, int running)
281 if (running)
282 resume_all_threads();
283 else
284 pause_all_threads();
287 static void update_regs_for_sipi(CPUState *env)
289 kvm_arch_update_regs_for_sipi(env);
290 vcpu_info[env->cpu_index].sipi_needed = 0;
291 vcpu_info[env->cpu_index].init = 0;
294 static void update_regs_for_init(CPUState *env)
296 cpu_reset(env);
297 kvm_arch_load_regs(env);
300 static void setup_kernel_sigmask(CPUState *env)
302 sigset_t set;
304 sigprocmask(SIG_BLOCK, NULL, &set);
305 sigdelset(&set, SIG_IPI);
307 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
310 static int kvm_main_loop_cpu(CPUState *env)
312 struct vcpu_info *info = &vcpu_info[env->cpu_index];
314 setup_kernel_sigmask(env);
315 pthread_mutex_lock(&qemu_mutex);
317 kvm_qemu_init_env(env);
318 env->ready_for_interrupt_injection = 1;
319 #ifdef TARGET_I386
320 kvm_tpr_vcpu_start(env);
321 #endif
323 cpu_single_env = env;
324 while (1) {
325 while (!has_work(env))
326 kvm_main_loop_wait(env, 10);
327 if (env->interrupt_request & CPU_INTERRUPT_HARD)
328 env->hflags &= ~HF_HALTED_MASK;
329 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
330 update_regs_for_sipi(env);
331 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
332 update_regs_for_init(env);
333 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
334 kvm_cpu_exec(env);
335 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
336 kvm_main_loop_wait(env, 0);
337 if (qemu_kvm_reset_requested && env->cpu_index == 0) {
338 qemu_kvm_reset_requested = 0;
339 env->interrupt_request = 0;
340 qemu_system_reset();
341 kvm_arch_load_regs(env);
344 pthread_mutex_unlock(&qemu_mutex);
345 return 0;
348 static void *ap_main_loop(void *_env)
350 CPUState *env = _env;
351 sigset_t signals;
353 vcpu = &vcpu_info[env->cpu_index];
354 vcpu->env = env;
355 vcpu->env->thread_id = kvm_get_thread_id();
356 sigfillset(&signals);
357 sigprocmask(SIG_BLOCK, &signals, NULL);
358 kvm_create_vcpu(kvm_context, env->cpu_index);
359 kvm_qemu_init_env(env);
360 if (kvm_irqchip_in_kernel(kvm_context))
361 env->hflags &= ~HF_HALTED_MASK;
362 kvm_main_loop_cpu(env);
363 return NULL;
366 static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
368 sigemptyset(&sigtab->sigset);
369 sigfillset(&sigtab->negsigset);
372 static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
374 sigaddset(&sigtab->sigset, signum);
375 sigdelset(&sigtab->negsigset, signum);
378 void kvm_init_new_ap(int cpu, CPUState *env)
380 pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
383 static void qemu_kvm_init_signal_tables(void)
385 qemu_kvm_init_signal_table(&io_signal_table);
386 qemu_kvm_init_signal_table(&vcpu_signal_table);
388 kvm_add_signal(&io_signal_table, SIGIO);
389 kvm_add_signal(&io_signal_table, SIGALRM);
390 kvm_add_signal(&io_signal_table, SIGUSR1);
391 kvm_add_signal(&io_signal_table, SIGUSR2);
393 kvm_add_signal(&vcpu_signal_table, SIG_IPI);
395 sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
398 int kvm_init_ap(void)
400 CPUState *env = first_cpu;
401 int i;
403 #ifdef TARGET_I386
404 kvm_tpr_opt_setup();
405 #endif
406 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
407 qemu_kvm_init_signal_tables();
409 signal(SIG_IPI, sig_ipi_handler);
410 for (i = 0; i < smp_cpus; ++i) {
411 kvm_init_new_ap(i, env);
412 env = env->next_cpu;
414 return 0;
417 void qemu_kvm_notify_work(void)
419 if (io_thread)
420 pthread_kill(io_thread, SIGUSR1);
424 * The IO thread has all signals that inform machine events
425 * blocked (io_signal_table), so it won't get interrupted
426 * while processing in main_loop_wait().
429 int kvm_main_loop(void)
431 io_thread = pthread_self();
432 pthread_mutex_unlock(&qemu_mutex);
433 while (1) {
434 kvm_eat_signal(&io_signal_table, NULL, 1000);
435 pthread_mutex_lock(&qemu_mutex);
436 cpu_single_env = NULL;
437 main_loop_wait(0);
438 if (qemu_shutdown_requested())
439 break;
440 else if (qemu_powerdown_requested())
441 qemu_system_powerdown();
442 else if (qemu_reset_requested()) {
443 pthread_kill(vcpu_info[0].thread, SIG_IPI);
444 qemu_kvm_reset_requested = 1;
446 pthread_mutex_unlock(&qemu_mutex);
449 pthread_mutex_unlock(&qemu_mutex);
450 return 0;
453 static int kvm_debug(void *opaque, int vcpu)
455 CPUState *env = cpu_single_env;
457 env->exception_index = EXCP_DEBUG;
458 return 1;
461 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
463 *data = cpu_inb(0, addr);
464 return 0;
467 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
469 *data = cpu_inw(0, addr);
470 return 0;
473 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
475 *data = cpu_inl(0, addr);
476 return 0;
479 #define PM_IO_BASE 0xb000
481 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
483 if (addr == 0xb2) {
484 switch (data) {
485 case 0: {
486 cpu_outb(0, 0xb3, 0);
487 break;
489 case 0xf0: {
490 unsigned x;
492 /* enable acpi */
493 x = cpu_inw(0, PM_IO_BASE + 4);
494 x &= ~1;
495 cpu_outw(0, PM_IO_BASE + 4, x);
496 break;
498 case 0xf1: {
499 unsigned x;
501 /* enable acpi */
502 x = cpu_inw(0, PM_IO_BASE + 4);
503 x |= 1;
504 cpu_outw(0, PM_IO_BASE + 4, x);
505 break;
507 default:
508 break;
510 return 0;
512 cpu_outb(0, addr, data);
513 return 0;
516 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
518 cpu_outw(0, addr, data);
519 return 0;
522 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
524 cpu_outl(0, addr, data);
525 return 0;
528 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
530 cpu_physical_memory_rw(addr, data, len, 0);
531 return 0;
534 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
536 cpu_physical_memory_rw(addr, data, len, 1);
537 return 0;
540 static int kvm_io_window(void *opaque)
542 return 1;
546 static int kvm_halt(void *opaque, int vcpu)
548 return kvm_arch_halt(opaque, vcpu);
551 static int kvm_shutdown(void *opaque, int vcpu)
553 qemu_system_reset_request();
554 return 1;
557 static struct kvm_callbacks qemu_kvm_ops = {
558 .debug = kvm_debug,
559 .inb = kvm_inb,
560 .inw = kvm_inw,
561 .inl = kvm_inl,
562 .outb = kvm_outb,
563 .outw = kvm_outw,
564 .outl = kvm_outl,
565 .mmio_read = kvm_mmio_read,
566 .mmio_write = kvm_mmio_write,
567 .halt = kvm_halt,
568 .shutdown = kvm_shutdown,
569 .io_window = kvm_io_window,
570 .try_push_interrupts = try_push_interrupts,
571 .post_kvm_run = post_kvm_run,
572 .pre_kvm_run = pre_kvm_run,
573 #ifdef TARGET_I386
574 .tpr_access = handle_tpr_access,
575 #endif
576 #ifdef TARGET_PPC
577 .powerpc_dcr_read = handle_powerpc_dcr_read,
578 .powerpc_dcr_write = handle_powerpc_dcr_write,
579 #endif
582 int kvm_qemu_init()
584 /* Try to initialize kvm */
585 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
586 if (!kvm_context) {
587 return -1;
589 pthread_mutex_lock(&qemu_mutex);
591 return 0;
594 int kvm_qemu_create_context(void)
596 int r;
597 if (!kvm_irqchip) {
598 kvm_disable_irqchip_creation(kvm_context);
600 if (!kvm_pit) {
601 kvm_disable_pit_creation(kvm_context);
603 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
604 kvm_qemu_destroy();
605 return -1;
607 r = kvm_arch_qemu_create_context();
608 if(r <0)
609 kvm_qemu_destroy();
610 return 0;
613 void kvm_qemu_destroy(void)
615 kvm_finalize(kvm_context);
618 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
619 unsigned long size,
620 unsigned long phys_offset)
622 #ifdef KVM_CAP_USER_MEMORY
623 int r = 0;
625 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
626 if (r) {
627 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
628 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
629 if (r)
630 return;
631 r = kvm_is_intersecting_mem(kvm_context, start_addr);
632 if (r)
633 kvm_create_mem_hole(kvm_context, start_addr, size);
634 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
635 phys_ram_base + phys_offset,
636 size, 0);
638 if (phys_offset & IO_MEM_ROM) {
639 phys_offset &= ~IO_MEM_ROM;
640 r = kvm_is_intersecting_mem(kvm_context, start_addr);
641 if (r)
642 kvm_create_mem_hole(kvm_context, start_addr, size);
643 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
644 phys_ram_base + phys_offset,
645 size, 0);
647 if (r < 0) {
648 printf("kvm_cpu_register_physical_memory: failed\n");
649 exit(1);
651 return;
653 #endif
654 if (phys_offset & IO_MEM_ROM) {
655 phys_offset &= ~IO_MEM_ROM;
656 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
660 int kvm_qemu_check_extension(int ext)
662 return kvm_check_extension(kvm_context, ext);
665 int kvm_qemu_init_env(CPUState *cenv)
667 return kvm_arch_qemu_init_env(cenv);
670 int kvm_update_debugger(CPUState *env)
672 struct kvm_debug_guest dbg;
673 int i;
675 dbg.enabled = 0;
676 if (env->nb_breakpoints || env->singlestep_enabled) {
677 dbg.enabled = 1;
678 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
679 dbg.breakpoints[i].enabled = 1;
680 dbg.breakpoints[i].address = env->breakpoints[i];
682 dbg.singlestep = env->singlestep_enabled;
684 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
689 * dirty pages logging
691 /* FIXME: use unsigned long pointer instead of unsigned char */
692 unsigned char *kvm_dirty_bitmap = NULL;
693 int kvm_physical_memory_set_dirty_tracking(int enable)
695 int r = 0;
697 if (!kvm_enabled())
698 return 0;
700 if (enable) {
701 if (!kvm_dirty_bitmap) {
702 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
703 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
704 if (kvm_dirty_bitmap == NULL) {
705 perror("Failed to allocate dirty pages bitmap");
706 r=-1;
708 else {
709 r = kvm_dirty_pages_log_enable_all(kvm_context);
713 else {
714 if (kvm_dirty_bitmap) {
715 r = kvm_dirty_pages_log_reset(kvm_context);
716 qemu_free(kvm_dirty_bitmap);
717 kvm_dirty_bitmap = NULL;
720 return r;
723 /* get kvm's dirty pages bitmap and update qemu's */
724 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
725 unsigned char *bitmap,
726 unsigned int offset,
727 unsigned long mem_size)
729 unsigned int i, j, n=0;
730 unsigned char c;
731 unsigned page_number, addr, addr1;
732 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
735 * bitmap-traveling is faster than memory-traveling (for addr...)
736 * especially when most of the memory is not dirty.
738 for (i=0; i<len; i++) {
739 c = bitmap[i];
740 while (c>0) {
741 j = ffsl(c) - 1;
742 c &= ~(1u<<j);
743 page_number = i * 8 + j;
744 addr1 = page_number * TARGET_PAGE_SIZE;
745 addr = offset + addr1;
746 cpu_physical_memory_set_dirty(addr);
747 n++;
750 return 0;
752 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
753 void *bitmap, void *opaque)
755 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
759 * get kvm's dirty pages bitmap and update qemu's
760 * we only care about physical ram, which resides in slots 0 and 3
762 int kvm_update_dirty_pages_log(void)
764 int r = 0;
767 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
768 kvm_dirty_bitmap, NULL,
769 kvm_get_dirty_bitmap_cb);
770 return r;
773 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
775 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
776 unsigned int brsize = BITMAP_SIZE(ram_size);
777 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
778 unsigned int extra_bytes = (extra_pages +7)/8;
779 unsigned int hole_start = BITMAP_SIZE(0xa0000);
780 unsigned int hole_end = BITMAP_SIZE(0xc0000);
782 memset(bitmap, 0xFF, brsize + extra_bytes);
783 memset(bitmap + hole_start, 0, hole_end - hole_start);
784 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
786 return 0;
789 #ifdef KVM_CAP_IRQCHIP
791 int kvm_set_irq(int irq, int level)
793 return kvm_set_irq_level(kvm_context, irq, level);
796 #endif
798 void qemu_kvm_aio_wait_start(void)
802 void qemu_kvm_aio_wait(void)
804 CPUState *cpu_single = cpu_single_env;
806 if (!cpu_single_env) {
807 pthread_mutex_unlock(&qemu_mutex);
808 kvm_eat_signal(&io_signal_table, NULL, 1000);
809 pthread_mutex_lock(&qemu_mutex);
810 cpu_single_env = NULL;
811 } else {
812 pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
813 cpu_single_env = cpu_single;
817 void qemu_kvm_aio_wait_end(void)
821 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
823 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
826 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
827 unsigned long size, int log, int writable)
829 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
832 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
833 unsigned long size)
835 kvm_destroy_phys_mem(kvm_context, start_addr, size);