kvm: release: merge from trunk
[kvm-userspace.git] / user / kvmctl.c
blob533d4aa0c6b35e7454ab3e6cdfbcabf0d2bddf2b
1 /*
2 * Kernel-based Virtual Machine control library
4 * This library provides an API to control the kvm hardware virtualization
5 * module.
7 * Copyright (C) 2006 Qumranet
9 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
14 * This work is licensed under the GNU LGPL license, version 2.
17 #include <unistd.h>
18 #include <fcntl.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <sys/mman.h>
22 #include <string.h>
23 #include <errno.h>
24 #include "kvmctl.h"
26 #define EXPECTED_KVM_API_VERSION 3
28 #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
29 #error libkvm: userspace and kernel version mismatch
30 #endif
32 #define PAGE_SIZE 4096ul
34 /**
35 * \brief The KVM context
37 * The verbose KVM context
39 struct kvm_context {
40 /// Filedescriptor to /dev/kvm
41 int fd;
42 /// Callbacks that KVM uses to emulate various unvirtualizable functionality
43 struct kvm_callbacks *callbacks;
44 void *opaque;
45 /// A pointer to the memory used as the physical memory for the guest
46 void *physical_memory;
49 struct translation_cache {
50 unsigned long linear;
51 void *physical;
54 static void translation_cache_init(struct translation_cache *tr)
56 tr->physical = 0;
59 static int translate(kvm_context_t kvm, int vcpu, struct translation_cache *tr,
60 unsigned long linear, void **physical)
62 unsigned long page = linear & ~(PAGE_SIZE-1);
63 unsigned long offset = linear & (PAGE_SIZE-1);
65 if (!(tr->physical && tr->linear == page)) {
66 struct kvm_translation kvm_tr;
67 int r;
69 kvm_tr.linear_address = page;
70 kvm_tr.vcpu = vcpu;
72 r = ioctl(kvm->fd, KVM_TRANSLATE, &kvm_tr);
73 if (r == -1)
74 return -errno;
76 if (!kvm_tr.valid)
77 return -EFAULT;
79 tr->linear = page;
80 tr->physical = kvm->physical_memory + kvm_tr.physical_address;
82 *physical = tr->physical + offset;
83 return 0;
86 kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
87 void *opaque)
89 int fd;
90 kvm_context_t kvm;
91 int r;
93 fd = open("/dev/kvm", O_RDWR);
94 if (fd == -1) {
95 perror("open /dev/kvm");
96 return NULL;
98 r = ioctl(fd, KVM_GET_API_VERSION, 0);
99 if (r == -1) {
100 fprintf(stderr, "kvm kernel version too old\n");
101 goto out_close;
103 if (r < EXPECTED_KVM_API_VERSION) {
104 fprintf(stderr, "kvm kernel version too old\n");
105 goto out_close;
107 if (r > EXPECTED_KVM_API_VERSION) {
108 fprintf(stderr, "kvm userspace version too old\n");
109 goto out_close;
111 kvm = malloc(sizeof(*kvm));
112 kvm->fd = fd;
113 kvm->callbacks = callbacks;
114 kvm->opaque = opaque;
115 return kvm;
116 out_close:
117 close(fd);
118 return NULL;
121 void kvm_finalize(kvm_context_t kvm)
123 close(kvm->fd);
124 free(kvm);
127 int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
129 unsigned long dosmem = 0xa0000;
130 unsigned long exmem = 0xc0000;
131 int fd = kvm->fd;
132 int r;
133 struct kvm_memory_region low_memory = {
134 .slot = 3,
135 .memory_size = memory < dosmem ? memory : dosmem,
136 .guest_phys_addr = 0,
138 struct kvm_memory_region extended_memory = {
139 .slot = 0,
140 .memory_size = memory < exmem ? 0 : memory - exmem,
141 .guest_phys_addr = exmem,
144 /* 640K should be enough. */
145 r = ioctl(fd, KVM_SET_MEMORY_REGION, &low_memory);
146 if (r == -1) {
147 fprintf(stderr, "kvm_create_memory_region: %m\n");
148 return -1;
150 if (extended_memory.memory_size) {
151 r = ioctl(fd, KVM_SET_MEMORY_REGION, &extended_memory);
152 if (r == -1) {
153 fprintf(stderr, "kvm_create_memory_region: %m\n");
154 return -1;
158 *vm_mem = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
159 if (*vm_mem == MAP_FAILED) {
160 fprintf(stderr, "mmap: %m\n");
161 return -1;
163 kvm->physical_memory = *vm_mem;
165 r = ioctl(fd, KVM_CREATE_VCPU, 0);
166 if (r == -1) {
167 fprintf(stderr, "kvm_create_vcpu: %m\n");
168 return -1;
170 return 0;
173 void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start,
174 unsigned long len, int slot, int log, int writable)
176 void *ptr;
177 int r;
178 int fd = kvm->fd;
179 int prot = PROT_READ;
180 struct kvm_memory_region memory = {
181 .slot = slot,
182 .memory_size = len,
183 .guest_phys_addr = phys_start,
184 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
187 r = ioctl(fd, KVM_SET_MEMORY_REGION, &memory);
188 if (r == -1)
189 return 0;
191 if (writable)
192 prot |= PROT_WRITE;
194 ptr = mmap(0, len, prot, MAP_SHARED, fd, phys_start);
195 if (ptr == MAP_FAILED)
196 return 0;
197 return ptr;
200 void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
201 unsigned long len)
203 printf("kvm_destroy_phys_mem: implement me\n");
204 exit(1);
208 int kvm_get_dirty_pages(kvm_context_t kvm, int slot, void *buf)
210 int r;
211 struct kvm_dirty_log log = {
212 .slot = slot,
215 log.dirty_bitmap = buf;
217 r = ioctl(kvm->fd, KVM_GET_DIRTY_LOG, &log);
218 if (r == -1)
219 return -errno;
222 static int more_io(struct kvm_run *run, int first_time)
224 if (!run->io.rep)
225 return first_time;
226 else
227 return run->io.count != 0;
230 static int handle_io(kvm_context_t kvm, struct kvm_run *run)
232 uint16_t addr = run->io.port;
233 struct kvm_regs regs;
234 int first_time = 1;
235 int delta;
236 struct translation_cache tr;
237 int _in = (run->io.direction == KVM_EXIT_IO_IN);
238 int r;
240 translation_cache_init(&tr);
242 if (run->io.string || _in) {
243 regs.vcpu = run->vcpu;
244 r = ioctl(kvm->fd, KVM_GET_REGS, &regs);
245 if (r == -1)
246 return -errno;
249 delta = run->io.string_down ? -run->io.size : run->io.size;
251 while (more_io(run, first_time)) {
252 void *value_addr;
254 if (!run->io.string) {
255 if (_in)
256 value_addr = &regs.rax;
257 else
258 value_addr = &run->io.value;
259 } else {
260 r = translate(kvm, run->vcpu, &tr, run->io.address,
261 &value_addr);
262 if (r) {
263 fprintf(stderr, "failed translating I/O address %x\n",
264 run->io.address);
265 return r;
269 switch (run->io.direction) {
270 case KVM_EXIT_IO_IN: {
271 switch (run->io.size) {
272 case 1: {
273 uint8_t value;
274 r = kvm->callbacks->inb(kvm->opaque, addr, &value);
275 *(uint8_t *)value_addr = value;
276 break;
278 case 2: {
279 uint16_t value;
280 r = kvm->callbacks->inw(kvm->opaque, addr, &value);
281 *(uint16_t *)value_addr = value;
282 break;
284 case 4: {
285 uint32_t value;
286 r = kvm->callbacks->inl(kvm->opaque, addr, &value);
287 *(uint32_t *)value_addr = value;
288 break;
290 default:
291 fprintf(stderr, "bad I/O size %d\n", run->io.size);
292 return -EMSGSIZE;
294 break;
296 case KVM_EXIT_IO_OUT:
297 switch (run->io.size) {
298 case 1:
299 r = kvm->callbacks->outb(kvm->opaque, addr,
300 *(uint8_t *)value_addr);
301 break;
302 case 2:
303 r = kvm->callbacks->outw(kvm->opaque, addr,
304 *(uint16_t *)value_addr);
305 break;
306 case 4:
307 r = kvm->callbacks->outl(kvm->opaque, addr,
308 *(uint32_t *)value_addr);
309 break;
310 default:
311 fprintf(stderr, "bad I/O size %d\n", run->io.size);
312 return -EMSGSIZE;
314 break;
315 default:
316 fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
317 return -EPROTO;
319 if (run->io.string) {
320 run->io.address += delta;
321 switch (run->io.direction) {
322 case KVM_EXIT_IO_IN: regs.rdi += delta; break;
323 case KVM_EXIT_IO_OUT: regs.rsi += delta; break;
325 if (run->io.rep) {
326 --regs.rcx;
327 --run->io.count;
330 first_time = 0;
331 if (r) {
332 int savedret = r;
333 r = ioctl(kvm->fd, KVM_SET_REGS, &regs);
334 if (r == -1)
335 return -errno;
337 return savedret;
341 if (run->io.string || _in) {
342 r = ioctl(kvm->fd, KVM_SET_REGS, &regs);
343 if (r == -1)
344 return -errno;
348 run->emulated = 1;
349 return 0;
352 int handle_debug(kvm_context_t kvm, struct kvm_run *run)
354 return kvm->callbacks->debug(kvm->opaque, run->vcpu);
357 int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
359 regs->vcpu = vcpu;
360 return ioctl(kvm->fd, KVM_GET_REGS, regs);
363 int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
365 regs->vcpu = vcpu;
366 return ioctl(kvm->fd, KVM_SET_REGS, regs);
369 int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
371 sregs->vcpu = vcpu;
372 return ioctl(kvm->fd, KVM_GET_SREGS, sregs);
375 int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
377 sregs->vcpu = vcpu;
378 return ioctl(kvm->fd, KVM_SET_SREGS, sregs);
382 * Returns available msr list. User must free.
384 struct kvm_msr_list *kvm_get_msr_list(kvm_context_t kvm)
386 struct kvm_msr_list sizer, *msrs;
387 int r, e;
389 sizer.nmsrs = 0;
390 r = ioctl(kvm->fd, KVM_GET_MSR_INDEX_LIST, &sizer);
391 if (r == -1 && errno != E2BIG)
392 return 0;
393 msrs = malloc(sizeof *msrs + sizer.nmsrs * sizeof *msrs->indices);
394 if (!msrs) {
395 errno = ENOMEM;
396 return 0;
398 msrs->nmsrs = sizer.nmsrs;
399 r = ioctl(kvm->fd, KVM_GET_MSR_INDEX_LIST, msrs);
400 if (r == -1) {
401 e = errno;
402 free(msrs);
403 errno = e;
404 return 0;
406 return msrs;
409 int kvm_get_msrs(kvm_context_t kvm, int vcpu, struct kvm_msr_entry *msrs,
410 int n)
412 struct kvm_msrs *kmsrs = malloc(sizeof *kmsrs + n * sizeof *msrs);
413 int r, e;
415 if (!kmsrs) {
416 errno = ENOMEM;
417 return -1;
419 kmsrs->vcpu = vcpu;
420 kmsrs->nmsrs = n;
421 memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
422 r = ioctl(kvm->fd, KVM_GET_MSRS, kmsrs);
423 e = errno;
424 memcpy(msrs, kmsrs->entries, n * sizeof *msrs);
425 free(kmsrs);
426 errno = e;
427 return r;
430 int kvm_set_msrs(kvm_context_t kvm, int vcpu, struct kvm_msr_entry *msrs,
431 int n)
433 struct kvm_msrs *kmsrs = malloc(sizeof *kmsrs + n * sizeof *msrs);
434 int r, e;
436 if (!kmsrs) {
437 errno = ENOMEM;
438 return -1;
440 kmsrs->vcpu = vcpu;
441 kmsrs->nmsrs = n;
442 memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
443 r = ioctl(kvm->fd, KVM_SET_MSRS, kmsrs);
444 e = errno;
445 free(kmsrs);
446 errno = e;
447 return r;
450 void kvm_show_regs(kvm_context_t kvm, int vcpu)
452 int fd = kvm->fd;
453 struct kvm_regs regs;
454 int r;
456 regs.vcpu = vcpu;
457 r = ioctl(fd, KVM_GET_REGS, &regs);
458 if (r == -1) {
459 perror("KVM_GET_REGS");
460 return;
462 fprintf(stderr,
463 "rax %016llx rbx %016llx rcx %016llx rdx %016llx\n"
464 "rsi %016llx rdi %016llx rsp %016llx rbp %016llx\n"
465 "r8 %016llx r9 %016llx r10 %016llx r11 %016llx\n"
466 "r12 %016llx r13 %016llx r14 %016llx r15 %016llx\n"
467 "rip %016llx rflags %08llx\n",
468 regs.rax, regs.rbx, regs.rcx, regs.rdx,
469 regs.rsi, regs.rdi, regs.rsp, regs.rbp,
470 regs.r8, regs.r9, regs.r10, regs.r11,
471 regs.r12, regs.r13, regs.r14, regs.r15,
472 regs.rip, regs.rflags);
475 static int handle_cpuid(kvm_context_t kvm, struct kvm_run *run)
477 struct kvm_regs regs;
478 uint32_t orig_eax;
479 int r;
481 kvm_get_regs(kvm, run->vcpu, &regs);
482 orig_eax = regs.rax;
483 r = kvm->callbacks->cpuid(kvm->opaque,
484 &regs.rax, &regs.rbx, &regs.rcx, &regs.rdx);
485 if (orig_eax == 1)
486 regs.rdx &= ~(1ull << 12); /* disable mtrr support */
487 kvm_set_regs(kvm, run->vcpu, &regs);
488 run->emulated = 1;
489 return r;
492 static int handle_mmio(kvm_context_t kvm, struct kvm_run *kvm_run)
494 unsigned long addr = kvm_run->mmio.phys_addr;
495 void *data = kvm_run->mmio.data;
496 int r = -1;
498 if (kvm_run->mmio.is_write) {
499 switch (kvm_run->mmio.len) {
500 case 1:
501 r = kvm->callbacks->writeb(kvm->opaque, addr, *(uint8_t *)data);
502 break;
503 case 2:
504 r = kvm->callbacks->writew(kvm->opaque, addr, *(uint16_t *)data);
505 break;
506 case 4:
507 r = kvm->callbacks->writel(kvm->opaque, addr, *(uint32_t *)data);
508 break;
509 case 8:
510 r = kvm->callbacks->writeq(kvm->opaque, addr, *(uint64_t *)data);
511 break;
513 } else {
514 switch (kvm_run->mmio.len) {
515 case 1:
516 r = kvm->callbacks->readb(kvm->opaque, addr, (uint8_t *)data);
517 break;
518 case 2:
519 r = kvm->callbacks->readw(kvm->opaque, addr, (uint16_t *)data);
520 break;
521 case 4:
522 r = kvm->callbacks->readl(kvm->opaque, addr, (uint32_t *)data);
523 break;
524 case 8:
525 r = kvm->callbacks->readq(kvm->opaque, addr, (uint64_t *)data);
526 break;
528 kvm_run->mmio_completed = 1;
530 return r;
533 static int handle_io_window(kvm_context_t kvm, struct kvm_run *kvm_run)
535 return kvm->callbacks->io_window(kvm->opaque);
538 static int handle_halt(kvm_context_t kvm, struct kvm_run *kvm_run)
540 return kvm->callbacks->halt(kvm->opaque, kvm_run->vcpu);
543 static int handle_shutdown(kvm_context_t kvm, struct kvm_run *kvm_run)
545 return kvm->callbacks->shutdown(kvm->opaque, kvm_run->vcpu);
548 int try_push_interrupts(kvm_context_t kvm)
550 return kvm->callbacks->try_push_interrupts(kvm->opaque);
553 static void post_kvm_run(kvm_context_t kvm, struct kvm_run *kvm_run)
555 kvm->callbacks->post_kvm_run(kvm->opaque, kvm_run);
558 static void pre_kvm_run(kvm_context_t kvm, struct kvm_run *kvm_run)
560 kvm->callbacks->pre_kvm_run(kvm->opaque, kvm_run);
563 int kvm_run(kvm_context_t kvm, int vcpu)
565 int r;
566 int fd = kvm->fd;
567 struct kvm_run kvm_run = {
568 .vcpu = vcpu,
569 .emulated = 0,
570 .mmio_completed = 0,
573 again:
574 kvm_run.request_interrupt_window = try_push_interrupts(kvm);
575 pre_kvm_run(kvm, &kvm_run);
576 r = ioctl(fd, KVM_RUN, &kvm_run);
577 post_kvm_run(kvm, &kvm_run);
579 kvm_run.emulated = 0;
580 kvm_run.mmio_completed = 0;
581 if (r == -1 && errno != EINTR) {
582 r = -errno;
583 printf("kvm_run: %m\n");
584 return r;
586 if (r == -1) {
587 r = handle_io_window(kvm, &kvm_run);
588 goto more;
590 switch (kvm_run.exit_type) {
591 case KVM_EXIT_TYPE_FAIL_ENTRY:
592 fprintf(stderr, "kvm_run: failed entry, reason %u\n",
593 kvm_run.exit_reason & 0xffff);
594 return -ENOEXEC;
595 break;
596 case KVM_EXIT_TYPE_VM_EXIT:
597 switch (kvm_run.exit_reason) {
598 case KVM_EXIT_UNKNOWN:
599 fprintf(stderr, "unhandled vm exit: 0x%x\n",
600 kvm_run.hw.hardware_exit_reason);
601 kvm_show_regs(kvm, vcpu);
602 abort();
603 break;
604 case KVM_EXIT_EXCEPTION:
605 fprintf(stderr, "exception %d (%x)\n",
606 kvm_run.ex.exception,
607 kvm_run.ex.error_code);
608 abort();
609 break;
610 case KVM_EXIT_IO:
611 r = handle_io(kvm, &kvm_run);
612 break;
613 case KVM_EXIT_CPUID:
614 r = handle_cpuid(kvm, &kvm_run);
615 break;
616 case KVM_EXIT_DEBUG:
617 r = handle_debug(kvm, &kvm_run);
618 break;
619 case KVM_EXIT_MMIO:
620 r = handle_mmio(kvm, &kvm_run);
621 break;
622 case KVM_EXIT_HLT:
623 r = handle_halt(kvm, &kvm_run);
624 break;
625 case KVM_EXIT_IRQ_WINDOW_OPEN:
626 break;
627 case KVM_EXIT_SHUTDOWN:
628 r = handle_shutdown(kvm, &kvm_run);
629 break;
630 default:
631 fprintf(stderr, "unhandled vm exit: 0x%x\n", kvm_run.exit_reason);
632 kvm_show_regs(kvm, vcpu);
633 abort();
634 break;
637 more:
638 if (!r)
639 goto again;
640 return r;
643 int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq)
645 struct kvm_interrupt intr;
647 intr.vcpu = vcpu;
648 intr.irq = irq;
649 return ioctl(kvm->fd, KVM_INTERRUPT, &intr);
652 int kvm_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_debug_guest *dbg)
654 dbg->vcpu = vcpu;
656 return ioctl(kvm->fd, KVM_DEBUG_GUEST, dbg);