2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <sys/types.h>
18 #include <sys/ioctl.h>
21 #include <linux/kvm.h>
23 #include "qemu-common.h"
24 #include "qemu-timer.h"
29 #include "device_tree.h"
34 #define dprintf(fmt, ...) \
35 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
37 #define dprintf(fmt, ...) \
41 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
45 static int cap_interrupt_unset
= false;
46 static int cap_interrupt_level
= false;
47 static int cap_segstate
;
48 #ifdef KVM_CAP_PPC_BOOKE_SREGS
49 static int cap_booke_sregs
;
52 /* XXX We have a race condition where we actually have a level triggered
53 * interrupt, but the infrastructure can't expose that yet, so the guest
54 * takes but ignores it, goes to sleep and never gets notified that there's
55 * still an interrupt pending.
57 * As a quick workaround, let's just wake up again 20 ms after we injected
58 * an interrupt. That way we can assure that we're always reinjecting
59 * interrupts in case the guest swallowed them.
61 static QEMUTimer
*idle_timer
;
63 static void kvm_kick_env(void *env
)
68 int kvm_arch_init(KVMState
*s
)
70 #ifdef KVM_CAP_PPC_UNSET_IRQ
71 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
73 #ifdef KVM_CAP_PPC_IRQ_LEVEL
74 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
76 #ifdef KVM_CAP_PPC_SEGSTATE
77 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
79 #ifdef KVM_CAP_PPC_BOOKE_SREGS
80 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
83 if (!cap_interrupt_level
) {
84 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
85 "VM to stall at times!\n");
91 static int kvm_arch_sync_sregs(CPUState
*cenv
)
93 struct kvm_sregs sregs
;
96 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
97 /* What we're really trying to say is "if we're on BookE, we use
98 the native PVR for now". This is the only sane way to check
99 it though, so we potentially confuse users that they can run
100 BookE guests on BookS. Let's hope nobody dares enough :) */
104 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
109 #if !defined(CONFIG_KVM_PPC_PVR)
111 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
116 ret
= kvm_vcpu_ioctl(cenv
, KVM_GET_SREGS
, &sregs
);
121 #ifdef CONFIG_KVM_PPC_PVR
122 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
124 return kvm_vcpu_ioctl(cenv
, KVM_SET_SREGS
, &sregs
);
127 int kvm_arch_init_vcpu(CPUState
*cenv
)
131 ret
= kvm_arch_sync_sregs(cenv
);
136 idle_timer
= qemu_new_timer_ns(vm_clock
, kvm_kick_env
, cenv
);
141 void kvm_arch_reset_vcpu(CPUState
*env
)
145 int kvm_arch_put_registers(CPUState
*env
, int level
)
147 struct kvm_regs regs
;
151 ret
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
161 regs
.srr0
= env
->spr
[SPR_SRR0
];
162 regs
.srr1
= env
->spr
[SPR_SRR1
];
164 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
165 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
166 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
167 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
168 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
169 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
170 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
171 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
173 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
175 for (i
= 0;i
< 32; i
++)
176 regs
.gpr
[i
] = env
->gpr
[i
];
178 ret
= kvm_vcpu_ioctl(env
, KVM_SET_REGS
, ®s
);
185 int kvm_arch_get_registers(CPUState
*env
)
187 struct kvm_regs regs
;
188 struct kvm_sregs sregs
;
192 ret
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
197 for (i
= 7; i
>= 0; i
--) {
198 env
->crf
[i
] = cr
& 15;
208 env
->spr
[SPR_SRR0
] = regs
.srr0
;
209 env
->spr
[SPR_SRR1
] = regs
.srr1
;
211 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
212 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
213 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
214 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
215 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
216 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
217 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
218 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
220 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
222 for (i
= 0;i
< 32; i
++)
223 env
->gpr
[i
] = regs
.gpr
[i
];
225 #ifdef KVM_CAP_PPC_BOOKE_SREGS
226 if (cap_booke_sregs
) {
227 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
232 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
233 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
234 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
235 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
236 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
237 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
238 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
239 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
240 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
241 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
242 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
243 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
246 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
247 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
248 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
249 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
250 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
251 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
254 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
255 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
258 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
259 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
262 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
263 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
264 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
265 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
266 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
267 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
268 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
269 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
270 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
271 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
272 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
273 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
274 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
275 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
276 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
277 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
278 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
280 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
281 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
282 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
283 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
286 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
287 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
290 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
291 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
292 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
296 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
297 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
298 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
299 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
300 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
301 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
302 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
303 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
304 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
305 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
306 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
309 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
310 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
313 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
314 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
315 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
318 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
319 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
320 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
321 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
323 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
324 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
325 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
331 #ifdef KVM_CAP_PPC_SEGSTATE
333 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
338 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
342 for (i
= 0; i
< 64; i
++) {
343 ppc_store_slb(env
, sregs
.u
.s
.ppc64
.slb
[i
].slbe
,
344 sregs
.u
.s
.ppc64
.slb
[i
].slbv
);
349 for (i
= 0; i
< 16; i
++) {
350 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
354 for (i
= 0; i
< 8; i
++) {
355 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
356 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
357 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
358 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
366 int kvmppc_set_interrupt(CPUState
*env
, int irq
, int level
)
368 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
370 if (irq
!= PPC_INTERRUPT_EXT
) {
374 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
378 kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &virq
);
383 #if defined(TARGET_PPCEMB)
384 #define PPC_INPUT_INT PPC40x_INPUT_INT
385 #elif defined(TARGET_PPC64)
386 #define PPC_INPUT_INT PPC970_INPUT_INT
388 #define PPC_INPUT_INT PPC6xx_INPUT_INT
391 void kvm_arch_pre_run(CPUState
*env
, struct kvm_run
*run
)
396 /* PowerPC Qemu tracks the various core input pins (interrupt, critical
397 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
398 if (!cap_interrupt_level
&&
399 run
->ready_for_interrupt_injection
&&
400 (env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
401 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
403 /* For now KVM disregards the 'irq' argument. However, in the
404 * future KVM could cache it in-kernel to avoid a heavyweight exit
405 * when reading the UIC.
407 irq
= KVM_INTERRUPT_SET
;
409 dprintf("injected interrupt %d\n", irq
);
410 r
= kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &irq
);
412 printf("cpu %d fail inject %x\n", env
->cpu_index
, irq
);
414 /* Always wake up soon in case the interrupt was level based */
415 qemu_mod_timer(idle_timer
, qemu_get_clock_ns(vm_clock
) +
416 (get_ticks_per_sec() / 50));
419 /* We don't know if there are more interrupts pending after this. However,
420 * the guest will return to userspace in the course of handling this one
421 * anyways, so we will get a chance to deliver the rest. */
424 void kvm_arch_post_run(CPUState
*env
, struct kvm_run
*run
)
428 int kvm_arch_process_async_events(CPUState
*env
)
433 static int kvmppc_handle_halt(CPUState
*env
)
435 if (!(env
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
437 env
->exception_index
= EXCP_HLT
;
443 /* map dcr access to existing qemu dcr emulation */
444 static int kvmppc_handle_dcr_read(CPUState
*env
, uint32_t dcrn
, uint32_t *data
)
446 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
447 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
452 static int kvmppc_handle_dcr_write(CPUState
*env
, uint32_t dcrn
, uint32_t data
)
454 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
455 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
460 int kvm_arch_handle_exit(CPUState
*env
, struct kvm_run
*run
)
464 switch (run
->exit_reason
) {
466 if (run
->dcr
.is_write
) {
467 dprintf("handle dcr write\n");
468 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
470 dprintf("handle dcr read\n");
471 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
475 dprintf("handle halt\n");
476 ret
= kvmppc_handle_halt(env
);
479 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
487 static int read_cpuinfo(const char *field
, char *value
, int len
)
491 int field_len
= strlen(field
);
494 f
= fopen("/proc/cpuinfo", "r");
500 if(!fgets(line
, sizeof(line
), f
)) {
503 if (!strncmp(line
, field
, field_len
)) {
504 strncpy(value
, line
, len
);
515 uint32_t kvmppc_get_tbfreq(void)
519 uint32_t retval
= get_ticks_per_sec();
521 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
525 if (!(ns
= strchr(line
, ':'))) {
535 int kvmppc_get_hypercall(CPUState
*env
, uint8_t *buf
, int buf_len
)
537 uint32_t *hc
= (uint32_t*)buf
;
539 #ifdef KVM_CAP_PPC_GET_PVINFO
540 struct kvm_ppc_pvinfo pvinfo
;
542 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
543 !kvm_vm_ioctl(env
->kvm_state
, KVM_PPC_GET_PVINFO
, &pvinfo
)) {
544 memcpy(buf
, pvinfo
.hcall
, buf_len
);
551 * Fallback to always fail hypercalls:
567 bool kvm_arch_stop_on_emulation_error(CPUState
*env
)
572 int kvm_arch_on_sigbus_vcpu(CPUState
*env
, int code
, void *addr
)
577 int kvm_arch_on_sigbus(int code
, void *addr
)