kvm: qemu: add cpu_unregister_io_memory and make io mem table index dynamic
[kvm-userspace.git] / libkvm / libkvm-x86.c
blob6dba91df53016818580bb76cde411f44270d4438
1 #include "libkvm.h"
2 #include "kvm-x86.h"
3 #include <errno.h>
4 #include <sys/ioctl.h>
5 #include <string.h>
6 #include <unistd.h>
7 #include <stropts.h>
8 #include <sys/mman.h>
9 #include <stdio.h>
10 #include <errno.h>
11 #include <sys/types.h>
12 #include <sys/stat.h>
13 #include <fcntl.h>
14 #include <stdlib.h>
16 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
17 void **vm_mem)
19 unsigned long dosmem = 0xa0000;
20 unsigned long exmem = 0xc0000;
21 unsigned long pcimem = 0xe0000000;
22 int r;
23 int tss_ext;
24 struct kvm_memory_region low_memory = {
25 .memory_size = memory < dosmem ? memory : dosmem,
26 .guest_phys_addr = 0,
28 struct kvm_memory_region extended_memory = {
29 .memory_size = memory < exmem ? 0 : memory - exmem,
30 .guest_phys_addr = exmem,
32 struct kvm_memory_region above_4g_memory = {
33 .memory_size = memory < pcimem ? 0 : memory - pcimem,
34 .guest_phys_addr = 0x100000000ULL,
37 #ifdef KVM_CAP_SET_TSS_ADDR
38 tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
39 #else
40 tss_ext = 0;
41 #endif
43 if (memory >= pcimem)
44 extended_memory.memory_size = pcimem - exmem;
46 /* 640K should be enough. */
47 low_memory.slot = get_free_slot(kvm);
48 r = ioctl(kvm->vm_fd, KVM_SET_MEMORY_REGION, &low_memory);
49 if (r == -1) {
50 fprintf(stderr, "kvm_create_memory_region: %m\n");
51 return -1;
53 register_slot(low_memory.slot, low_memory.guest_phys_addr,
54 low_memory.memory_size, 0, 0, 0);
57 if (extended_memory.memory_size) {
58 if (tss_ext > 0)
59 extended_memory.slot = get_free_slot(kvm);
60 else
61 extended_memory.slot = 0;
62 r = ioctl(kvm->vm_fd, KVM_SET_MEMORY_REGION, &extended_memory);
63 if (r == -1) {
64 fprintf(stderr, "kvm_create_memory_region: %m\n");
65 return -1;
67 register_slot(extended_memory.slot,
68 extended_memory.guest_phys_addr,
69 extended_memory.memory_size, 0, 0, 0);
72 if (above_4g_memory.memory_size) {
73 above_4g_memory.slot = get_free_slot(kvm);
74 r = ioctl(kvm->vm_fd, KVM_SET_MEMORY_REGION, &above_4g_memory);
75 if (r == -1) {
76 fprintf(stderr, "kvm_create_memory_region: %m\n");
77 return -1;
79 register_slot(above_4g_memory.slot,
80 above_4g_memory.guest_phys_addr,
81 above_4g_memory.memory_size, 0, 0, 0);
84 *vm_mem = mmap(NULL, memory, PROT_READ|PROT_WRITE, MAP_SHARED, kvm->vm_fd, 0);
86 return 0;
89 int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr)
91 #ifdef KVM_CAP_SET_TSS_ADDR
92 int r;
94 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
95 if (r > 0) {
96 r = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, addr);
97 if (r == -1) {
98 fprintf(stderr, "kvm_set_tss_addr: %m\n");
99 return -errno;
101 return 0;
103 #endif
104 return -ENOSYS;
107 static int kvm_init_tss(kvm_context_t kvm)
109 #ifdef KVM_CAP_SET_TSS_ADDR
110 int r;
112 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
113 if (r > 0) {
115 * this address is 3 pages before the bios, and the bios should present
116 * as unavaible memory
118 r = kvm_set_tss_addr(kvm, 0xfffbd000);
119 if (r < 0) {
120 printf("kvm_init_tss: unable to set tss addr\n");
121 return r;
125 #endif
126 return 0;
129 int kvm_arch_create_default_phys_mem(kvm_context_t kvm,
130 unsigned long phys_mem_bytes,
131 void **vm_mem)
133 int zfd;
135 zfd = open("/dev/zero", O_RDONLY);
136 if (zfd == -1) {
137 perror("open /dev/zero");
138 return -1;
140 mmap(*vm_mem + 0xa8000, 0x8000, PROT_READ|PROT_WRITE,
141 MAP_PRIVATE|MAP_FIXED, zfd, 0);
142 close(zfd);
144 return 0;
147 int kvm_create_pit(kvm_context_t kvm)
149 #ifdef KVM_CAP_PIT
150 int r;
152 kvm->pit_in_kernel = 0;
153 if (!kvm->no_pit_creation) {
154 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_PIT);
155 if (r > 0) {
156 r = ioctl(kvm->vm_fd, KVM_CREATE_PIT);
157 if (r >= 0)
158 kvm->pit_in_kernel = 1;
159 else {
160 printf("Create kernel PIC irqchip failed\n");
161 return r;
165 #endif
166 return 0;
169 int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes,
170 void **vm_mem)
172 int r = 0;
174 r = kvm_init_tss(kvm);
175 if (r < 0)
176 return r;
178 r = kvm_create_pit(kvm);
179 if (r < 0)
180 return r;
182 return 0;
185 #ifdef KVM_EXIT_TPR_ACCESS
187 static int handle_tpr_access(kvm_context_t kvm, struct kvm_run *run, int vcpu)
189 return kvm->callbacks->tpr_access(kvm->opaque, vcpu,
190 run->tpr_access.rip,
191 run->tpr_access.is_write);
195 int kvm_enable_vapic(kvm_context_t kvm, int vcpu, uint64_t vapic)
197 int r;
198 struct kvm_vapic_addr va = {
199 .vapic_addr = vapic,
202 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_VAPIC_ADDR, &va);
203 if (r == -1) {
204 r = -errno;
205 perror("kvm_enable_vapic");
206 return r;
208 return 0;
211 #endif
213 int kvm_arch_run(struct kvm_run *run,kvm_context_t kvm, int vcpu)
215 int r = 0;
217 switch (run->exit_reason) {
218 #ifdef KVM_EXIT_SET_TPR
219 case KVM_EXIT_SET_TPR:
220 break;
221 #endif
222 #ifdef KVM_EXIT_TPR_ACCESS
223 case KVM_EXIT_TPR_ACCESS:
224 r = handle_tpr_access(kvm, run, vcpu);
225 break;
226 #endif
227 default:
228 r = 1;
229 break;
232 return r;
235 void *kvm_create_kernel_phys_mem(kvm_context_t kvm, unsigned long phys_start,
236 unsigned long len, int log, int writable)
238 int r;
239 int prot = PROT_READ;
240 void *ptr;
241 struct kvm_memory_region memory = {
242 .memory_size = len,
243 .guest_phys_addr = phys_start,
244 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
247 memory.slot = get_free_slot(kvm);
248 r = ioctl(kvm->vm_fd, KVM_SET_MEMORY_REGION, &memory);
249 if (r == -1) {
250 fprintf(stderr, "create_kernel_phys_mem: %s", strerror(errno));
251 return 0;
253 register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size,
254 0, 0, memory.flags);
256 if (writable)
257 prot |= PROT_WRITE;
259 ptr = mmap(NULL, len, prot, MAP_SHARED, kvm->vm_fd, phys_start);
260 if (ptr == MAP_FAILED) {
261 fprintf(stderr, "create_kernel_phys_mem: %s", strerror(errno));
262 return 0;
265 return ptr;
268 #define MAX_ALIAS_SLOTS 4
269 static struct {
270 uint64_t start;
271 uint64_t len;
272 } kvm_aliases[MAX_ALIAS_SLOTS];
274 static int get_alias_slot(uint64_t start)
276 int i;
278 for (i=0; i<MAX_ALIAS_SLOTS; i++)
279 if (kvm_aliases[i].start == start)
280 return i;
281 return -1;
283 static int get_free_alias_slot(void)
285 int i;
287 for (i=0; i<MAX_ALIAS_SLOTS; i++)
288 if (kvm_aliases[i].len == 0)
289 return i;
290 return -1;
293 static void register_alias(int slot, uint64_t start, uint64_t len)
295 kvm_aliases[slot].start = start;
296 kvm_aliases[slot].len = len;
299 int kvm_create_memory_alias(kvm_context_t kvm,
300 uint64_t phys_start,
301 uint64_t len,
302 uint64_t target_phys)
304 struct kvm_memory_alias alias = {
305 .flags = 0,
306 .guest_phys_addr = phys_start,
307 .memory_size = len,
308 .target_phys_addr = target_phys,
310 int fd = kvm->vm_fd;
311 int r;
312 int slot;
314 slot = get_alias_slot(phys_start);
315 if (slot < 0)
316 slot = get_free_alias_slot();
317 if (slot < 0)
318 return -EBUSY;
319 alias.slot = slot;
321 r = ioctl(fd, KVM_SET_MEMORY_ALIAS, &alias);
322 if (r == -1)
323 return -errno;
325 register_alias(slot, phys_start, len);
326 return 0;
329 int kvm_destroy_memory_alias(kvm_context_t kvm, uint64_t phys_start)
331 return kvm_create_memory_alias(kvm, phys_start, 0, 0);
334 #ifdef KVM_CAP_IRQCHIP
336 int kvm_get_lapic(kvm_context_t kvm, int vcpu, struct kvm_lapic_state *s)
338 int r;
339 if (!kvm->irqchip_in_kernel)
340 return 0;
341 r = ioctl(kvm->vcpu_fd[vcpu], KVM_GET_LAPIC, s);
342 if (r == -1) {
343 r = -errno;
344 perror("kvm_get_lapic");
346 return r;
349 int kvm_set_lapic(kvm_context_t kvm, int vcpu, struct kvm_lapic_state *s)
351 int r;
352 if (!kvm->irqchip_in_kernel)
353 return 0;
354 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_LAPIC, s);
355 if (r == -1) {
356 r = -errno;
357 perror("kvm_set_lapic");
359 return r;
362 #endif
364 #ifdef KVM_CAP_PIT
366 int kvm_get_pit(kvm_context_t kvm, struct kvm_pit_state *s)
368 int r;
369 if (!kvm->pit_in_kernel)
370 return 0;
371 r = ioctl(kvm->vm_fd, KVM_GET_PIT, s);
372 if (r == -1) {
373 r = -errno;
374 perror("kvm_get_pit");
376 return r;
379 int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s)
381 int r;
382 if (!kvm->pit_in_kernel)
383 return 0;
384 r = ioctl(kvm->vm_fd, KVM_SET_PIT, s);
385 if (r == -1) {
386 r = -errno;
387 perror("kvm_set_pit");
389 return r;
392 #endif
394 void kvm_show_code(kvm_context_t kvm, int vcpu)
396 #define CR0_PE_MASK (1ULL<<0)
397 int fd = kvm->vcpu_fd[vcpu];
398 struct kvm_regs regs;
399 struct kvm_sregs sregs;
400 int r;
401 unsigned char code[50];
402 int back_offset;
403 char code_str[sizeof(code) * 3 + 1];
404 unsigned long rip;
406 r = ioctl(fd, KVM_GET_SREGS, &sregs);
407 if (r == -1) {
408 perror("KVM_GET_SREGS");
409 return;
411 if (sregs.cr0 & CR0_PE_MASK)
412 return;
414 r = ioctl(fd, KVM_GET_REGS, &regs);
415 if (r == -1) {
416 perror("KVM_GET_REGS");
417 return;
419 rip = sregs.cs.base + regs.rip;
420 back_offset = regs.rip;
421 if (back_offset > 20)
422 back_offset = 20;
423 memcpy(code, kvm->physical_memory + rip - back_offset, sizeof code);
424 *code_str = 0;
425 for (r = 0; r < sizeof code; ++r) {
426 if (r == back_offset)
427 strcat(code_str, " -->");
428 sprintf(code_str + strlen(code_str), " %02x", code[r]);
430 fprintf(stderr, "code:%s\n", code_str);
435 * Returns available msr list. User must free.
437 struct kvm_msr_list *kvm_get_msr_list(kvm_context_t kvm)
439 struct kvm_msr_list sizer, *msrs;
440 int r, e;
442 sizer.nmsrs = 0;
443 r = ioctl(kvm->fd, KVM_GET_MSR_INDEX_LIST, &sizer);
444 if (r == -1 && errno != E2BIG)
445 return NULL;
446 msrs = malloc(sizeof *msrs + sizer.nmsrs * sizeof *msrs->indices);
447 if (!msrs) {
448 errno = ENOMEM;
449 return NULL;
451 msrs->nmsrs = sizer.nmsrs;
452 r = ioctl(kvm->fd, KVM_GET_MSR_INDEX_LIST, msrs);
453 if (r == -1) {
454 e = errno;
455 free(msrs);
456 errno = e;
457 return NULL;
459 return msrs;
462 int kvm_get_msrs(kvm_context_t kvm, int vcpu, struct kvm_msr_entry *msrs,
463 int n)
465 struct kvm_msrs *kmsrs = malloc(sizeof *kmsrs + n * sizeof *msrs);
466 int r, e;
468 if (!kmsrs) {
469 errno = ENOMEM;
470 return -1;
472 kmsrs->nmsrs = n;
473 memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
474 r = ioctl(kvm->vcpu_fd[vcpu], KVM_GET_MSRS, kmsrs);
475 e = errno;
476 memcpy(msrs, kmsrs->entries, n * sizeof *msrs);
477 free(kmsrs);
478 errno = e;
479 return r;
482 int kvm_set_msrs(kvm_context_t kvm, int vcpu, struct kvm_msr_entry *msrs,
483 int n)
485 struct kvm_msrs *kmsrs = malloc(sizeof *kmsrs + n * sizeof *msrs);
486 int r, e;
488 if (!kmsrs) {
489 errno = ENOMEM;
490 return -1;
492 kmsrs->nmsrs = n;
493 memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
494 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_MSRS, kmsrs);
495 e = errno;
496 free(kmsrs);
497 errno = e;
498 return r;
501 static void print_seg(FILE *file, const char *name, struct kvm_segment *seg)
503 fprintf(stderr,
504 "%s %04x (%08llx/%08x p %d dpl %d db %d s %d type %x l %d"
505 " g %d avl %d)\n",
506 name, seg->selector, seg->base, seg->limit, seg->present,
507 seg->dpl, seg->db, seg->s, seg->type, seg->l, seg->g,
508 seg->avl);
511 static void print_dt(FILE *file, const char *name, struct kvm_dtable *dt)
513 fprintf(stderr, "%s %llx/%x\n", name, dt->base, dt->limit);
516 void kvm_show_regs(kvm_context_t kvm, int vcpu)
518 int fd = kvm->vcpu_fd[vcpu];
519 struct kvm_regs regs;
520 struct kvm_sregs sregs;
521 int r;
523 r = ioctl(fd, KVM_GET_REGS, &regs);
524 if (r == -1) {
525 perror("KVM_GET_REGS");
526 return;
528 fprintf(stderr,
529 "rax %016llx rbx %016llx rcx %016llx rdx %016llx\n"
530 "rsi %016llx rdi %016llx rsp %016llx rbp %016llx\n"
531 "r8 %016llx r9 %016llx r10 %016llx r11 %016llx\n"
532 "r12 %016llx r13 %016llx r14 %016llx r15 %016llx\n"
533 "rip %016llx rflags %08llx\n",
534 regs.rax, regs.rbx, regs.rcx, regs.rdx,
535 regs.rsi, regs.rdi, regs.rsp, regs.rbp,
536 regs.r8, regs.r9, regs.r10, regs.r11,
537 regs.r12, regs.r13, regs.r14, regs.r15,
538 regs.rip, regs.rflags);
539 r = ioctl(fd, KVM_GET_SREGS, &sregs);
540 if (r == -1) {
541 perror("KVM_GET_SREGS");
542 return;
544 print_seg(stderr, "cs", &sregs.cs);
545 print_seg(stderr, "ds", &sregs.ds);
546 print_seg(stderr, "es", &sregs.es);
547 print_seg(stderr, "ss", &sregs.ss);
548 print_seg(stderr, "fs", &sregs.fs);
549 print_seg(stderr, "gs", &sregs.gs);
550 print_seg(stderr, "tr", &sregs.tr);
551 print_seg(stderr, "ldt", &sregs.ldt);
552 print_dt(stderr, "gdt", &sregs.gdt);
553 print_dt(stderr, "idt", &sregs.idt);
554 fprintf(stderr, "cr0 %llx cr2 %llx cr3 %llx cr4 %llx cr8 %llx"
555 " efer %llx\n",
556 sregs.cr0, sregs.cr2, sregs.cr3, sregs.cr4, sregs.cr8,
557 sregs.efer);
560 uint64_t kvm_get_apic_base(kvm_context_t kvm, int vcpu)
562 struct kvm_run *run = kvm->run[vcpu];
564 return run->apic_base;
567 void kvm_set_cr8(kvm_context_t kvm, int vcpu, uint64_t cr8)
569 struct kvm_run *run = kvm->run[vcpu];
571 run->cr8 = cr8;
574 __u64 kvm_get_cr8(kvm_context_t kvm, int vcpu)
576 return kvm->run[vcpu]->cr8;
579 int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent,
580 struct kvm_cpuid_entry *entries)
582 struct kvm_cpuid *cpuid;
583 int r;
585 cpuid = malloc(sizeof(*cpuid) + nent * sizeof(*entries));
586 if (!cpuid)
587 return -ENOMEM;
589 cpuid->nent = nent;
590 memcpy(cpuid->entries, entries, nent * sizeof(*entries));
591 r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_CPUID, cpuid);
593 free(cpuid);
594 return r;
597 int kvm_set_shadow_pages(kvm_context_t kvm, unsigned int nrshadow_pages)
599 #ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL
600 int r;
602 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
603 KVM_CAP_MMU_SHADOW_CACHE_CONTROL);
604 if (r > 0) {
605 r = ioctl(kvm->vm_fd, KVM_SET_NR_MMU_PAGES, nrshadow_pages);
606 if (r == -1) {
607 fprintf(stderr, "kvm_set_shadow_pages: %m\n");
608 return -errno;
610 return 0;
612 #endif
613 return -1;
616 int kvm_get_shadow_pages(kvm_context_t kvm, unsigned int *nrshadow_pages)
618 #ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL
619 int r;
621 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION,
622 KVM_CAP_MMU_SHADOW_CACHE_CONTROL);
623 if (r > 0) {
624 *nrshadow_pages = ioctl(kvm->vm_fd, KVM_GET_NR_MMU_PAGES);
625 return 0;
627 #endif
628 return -1;
631 #ifdef KVM_CAP_VAPIC
633 static int tpr_access_reporting(kvm_context_t kvm, int vcpu, int enabled)
635 int r;
636 struct kvm_tpr_access_ctl tac = {
637 .enabled = enabled,
640 r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_VAPIC);
641 if (r == -1 || r == 0)
642 return -ENOSYS;
643 r = ioctl(kvm->vcpu_fd[vcpu], KVM_TPR_ACCESS_REPORTING, &tac);
644 if (r == -1) {
645 r = -errno;
646 perror("KVM_TPR_ACCESS_REPORTING");
647 return r;
649 return 0;
652 int kvm_enable_tpr_access_reporting(kvm_context_t kvm, int vcpu)
654 return tpr_access_reporting(kvm, vcpu, 1);
657 int kvm_disable_tpr_access_reporting(kvm_context_t kvm, int vcpu)
659 return tpr_access_reporting(kvm, vcpu, 0);
662 #endif