support add-cow file format
[qemu/robert.git] / target-ppc / kvm.c
blob21f35af762b1500bea12b35c469b55806533ad50
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <sys/types.h>
18 #include <sys/ioctl.h>
19 #include <sys/mman.h>
21 #include <linux/kvm.h>
23 #include "qemu-common.h"
24 #include "qemu-timer.h"
25 #include "sysemu.h"
26 #include "kvm.h"
27 #include "kvm_ppc.h"
28 #include "cpu.h"
29 #include "device_tree.h"
31 //#define DEBUG_KVM
33 #ifdef DEBUG_KVM
34 #define dprintf(fmt, ...) \
35 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
36 #else
37 #define dprintf(fmt, ...) \
38 do { } while (0)
39 #endif
41 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
42 KVM_CAP_LAST_INFO
45 static int cap_interrupt_unset = false;
46 static int cap_interrupt_level = false;
47 static int cap_segstate;
48 static int cap_booke_sregs;
50 /* XXX We have a race condition where we actually have a level triggered
51 * interrupt, but the infrastructure can't expose that yet, so the guest
52 * takes but ignores it, goes to sleep and never gets notified that there's
53 * still an interrupt pending.
55 * As a quick workaround, let's just wake up again 20 ms after we injected
56 * an interrupt. That way we can assure that we're always reinjecting
57 * interrupts in case the guest swallowed them.
59 static QEMUTimer *idle_timer;
61 static void kvm_kick_env(void *env)
63 qemu_cpu_kick(env);
66 int kvm_arch_init(KVMState *s)
68 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
69 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
70 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
71 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
73 if (!cap_interrupt_level) {
74 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
75 "VM to stall at times!\n");
78 return 0;
81 static int kvm_arch_sync_sregs(CPUState *cenv)
83 struct kvm_sregs sregs;
84 int ret;
86 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
87 /* What we're really trying to say is "if we're on BookE, we use
88 the native PVR for now". This is the only sane way to check
89 it though, so we potentially confuse users that they can run
90 BookE guests on BookS. Let's hope nobody dares enough :) */
91 return 0;
92 } else {
93 if (!cap_segstate) {
94 fprintf(stderr, "kvm error: missing PVR setting capability\n");
95 return -ENOSYS;
99 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
100 if (ret) {
101 return ret;
104 sregs.pvr = cenv->spr[SPR_PVR];
105 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
108 int kvm_arch_init_vcpu(CPUState *cenv)
110 int ret;
112 ret = kvm_arch_sync_sregs(cenv);
113 if (ret) {
114 return ret;
117 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
119 return ret;
122 void kvm_arch_reset_vcpu(CPUState *env)
126 int kvm_arch_put_registers(CPUState *env, int level)
128 struct kvm_regs regs;
129 int ret;
130 int i;
132 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
133 if (ret < 0)
134 return ret;
136 regs.ctr = env->ctr;
137 regs.lr = env->lr;
138 regs.xer = env->xer;
139 regs.msr = env->msr;
140 regs.pc = env->nip;
142 regs.srr0 = env->spr[SPR_SRR0];
143 regs.srr1 = env->spr[SPR_SRR1];
145 regs.sprg0 = env->spr[SPR_SPRG0];
146 regs.sprg1 = env->spr[SPR_SPRG1];
147 regs.sprg2 = env->spr[SPR_SPRG2];
148 regs.sprg3 = env->spr[SPR_SPRG3];
149 regs.sprg4 = env->spr[SPR_SPRG4];
150 regs.sprg5 = env->spr[SPR_SPRG5];
151 regs.sprg6 = env->spr[SPR_SPRG6];
152 regs.sprg7 = env->spr[SPR_SPRG7];
154 regs.pid = env->spr[SPR_BOOKE_PID];
156 for (i = 0;i < 32; i++)
157 regs.gpr[i] = env->gpr[i];
159 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
160 if (ret < 0)
161 return ret;
163 return ret;
166 int kvm_arch_get_registers(CPUState *env)
168 struct kvm_regs regs;
169 struct kvm_sregs sregs;
170 uint32_t cr;
171 int i, ret;
173 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
174 if (ret < 0)
175 return ret;
177 cr = regs.cr;
178 for (i = 7; i >= 0; i--) {
179 env->crf[i] = cr & 15;
180 cr >>= 4;
183 env->ctr = regs.ctr;
184 env->lr = regs.lr;
185 env->xer = regs.xer;
186 env->msr = regs.msr;
187 env->nip = regs.pc;
189 env->spr[SPR_SRR0] = regs.srr0;
190 env->spr[SPR_SRR1] = regs.srr1;
192 env->spr[SPR_SPRG0] = regs.sprg0;
193 env->spr[SPR_SPRG1] = regs.sprg1;
194 env->spr[SPR_SPRG2] = regs.sprg2;
195 env->spr[SPR_SPRG3] = regs.sprg3;
196 env->spr[SPR_SPRG4] = regs.sprg4;
197 env->spr[SPR_SPRG5] = regs.sprg5;
198 env->spr[SPR_SPRG6] = regs.sprg6;
199 env->spr[SPR_SPRG7] = regs.sprg7;
201 env->spr[SPR_BOOKE_PID] = regs.pid;
203 for (i = 0;i < 32; i++)
204 env->gpr[i] = regs.gpr[i];
206 if (cap_booke_sregs) {
207 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
208 if (ret < 0) {
209 return ret;
212 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
213 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
214 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
215 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
216 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
217 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
218 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
219 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
220 env->spr[SPR_DECR] = sregs.u.e.dec;
221 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
222 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
223 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
226 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
227 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
228 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
229 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
230 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
231 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
234 if (sregs.u.e.features & KVM_SREGS_E_64) {
235 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
238 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
239 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
242 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
243 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
244 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
245 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
246 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
247 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
248 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
249 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
250 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
251 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
252 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
253 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
254 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
255 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
256 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
257 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
258 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
260 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
261 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
262 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
263 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
266 if (sregs.u.e.features & KVM_SREGS_E_PM) {
267 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
270 if (sregs.u.e.features & KVM_SREGS_E_PC) {
271 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
272 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
276 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
277 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
278 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
279 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
280 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
281 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
282 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
283 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
284 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
285 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
286 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
289 if (sregs.u.e.features & KVM_SREGS_EXP) {
290 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
293 if (sregs.u.e.features & KVM_SREGS_E_PD) {
294 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
295 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
298 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
299 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
300 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
301 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
303 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
304 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
305 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
310 if (cap_segstate) {
311 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
312 if (ret < 0) {
313 return ret;
316 ppc_store_sdr1(env, sregs.u.s.sdr1);
318 /* Sync SLB */
319 #ifdef TARGET_PPC64
320 for (i = 0; i < 64; i++) {
321 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
322 sregs.u.s.ppc64.slb[i].slbv);
324 #endif
326 /* Sync SRs */
327 for (i = 0; i < 16; i++) {
328 env->sr[i] = sregs.u.s.ppc32.sr[i];
331 /* Sync BATs */
332 for (i = 0; i < 8; i++) {
333 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
334 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
335 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
336 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
340 return 0;
343 int kvmppc_set_interrupt(CPUState *env, int irq, int level)
345 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
347 if (irq != PPC_INTERRUPT_EXT) {
348 return 0;
351 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
352 return 0;
355 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
357 return 0;
360 #if defined(TARGET_PPCEMB)
361 #define PPC_INPUT_INT PPC40x_INPUT_INT
362 #elif defined(TARGET_PPC64)
363 #define PPC_INPUT_INT PPC970_INPUT_INT
364 #else
365 #define PPC_INPUT_INT PPC6xx_INPUT_INT
366 #endif
368 void kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
370 int r;
371 unsigned irq;
373 /* PowerPC Qemu tracks the various core input pins (interrupt, critical
374 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
375 if (!cap_interrupt_level &&
376 run->ready_for_interrupt_injection &&
377 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
378 (env->irq_input_state & (1<<PPC_INPUT_INT)))
380 /* For now KVM disregards the 'irq' argument. However, in the
381 * future KVM could cache it in-kernel to avoid a heavyweight exit
382 * when reading the UIC.
384 irq = KVM_INTERRUPT_SET;
386 dprintf("injected interrupt %d\n", irq);
387 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
388 if (r < 0)
389 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
391 /* Always wake up soon in case the interrupt was level based */
392 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
393 (get_ticks_per_sec() / 50));
396 /* We don't know if there are more interrupts pending after this. However,
397 * the guest will return to userspace in the course of handling this one
398 * anyways, so we will get a chance to deliver the rest. */
401 void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
405 int kvm_arch_process_async_events(CPUState *env)
407 return 0;
410 static int kvmppc_handle_halt(CPUState *env)
412 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
413 env->halted = 1;
414 env->exception_index = EXCP_HLT;
417 return 0;
420 /* map dcr access to existing qemu dcr emulation */
421 static int kvmppc_handle_dcr_read(CPUState *env, uint32_t dcrn, uint32_t *data)
423 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
424 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
426 return 0;
429 static int kvmppc_handle_dcr_write(CPUState *env, uint32_t dcrn, uint32_t data)
431 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
432 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
434 return 0;
437 int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
439 int ret;
441 switch (run->exit_reason) {
442 case KVM_EXIT_DCR:
443 if (run->dcr.is_write) {
444 dprintf("handle dcr write\n");
445 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
446 } else {
447 dprintf("handle dcr read\n");
448 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
450 break;
451 case KVM_EXIT_HLT:
452 dprintf("handle halt\n");
453 ret = kvmppc_handle_halt(env);
454 break;
455 default:
456 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
457 ret = -1;
458 break;
461 return ret;
464 static int read_cpuinfo(const char *field, char *value, int len)
466 FILE *f;
467 int ret = -1;
468 int field_len = strlen(field);
469 char line[512];
471 f = fopen("/proc/cpuinfo", "r");
472 if (!f) {
473 return -1;
476 do {
477 if(!fgets(line, sizeof(line), f)) {
478 break;
480 if (!strncmp(line, field, field_len)) {
481 strncpy(value, line, len);
482 ret = 0;
483 break;
485 } while(*line);
487 fclose(f);
489 return ret;
492 uint32_t kvmppc_get_tbfreq(void)
494 char line[512];
495 char *ns;
496 uint32_t retval = get_ticks_per_sec();
498 if (read_cpuinfo("timebase", line, sizeof(line))) {
499 return retval;
502 if (!(ns = strchr(line, ':'))) {
503 return retval;
506 ns++;
508 retval = atoi(ns);
509 return retval;
512 int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len)
514 uint32_t *hc = (uint32_t*)buf;
516 struct kvm_ppc_pvinfo pvinfo;
518 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
519 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
520 memcpy(buf, pvinfo.hcall, buf_len);
522 return 0;
526 * Fallback to always fail hypercalls:
528 * li r3, -1
529 * nop
530 * nop
531 * nop
534 hc[0] = 0x3860ffff;
535 hc[1] = 0x60000000;
536 hc[2] = 0x60000000;
537 hc[3] = 0x60000000;
539 return 0;
542 bool kvm_arch_stop_on_emulation_error(CPUState *env)
544 return true;
547 int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
549 return 1;
552 int kvm_arch_on_sigbus(int code, void *addr)
554 return 1;