block-migration: fix pending() and iterate() return values
[qemu/pbrook.git] / target-ppc / kvm.c
blob2c64c634f13f05edc0072da724176d3ee91b02af
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
40 //#define DEBUG_KVM
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
65 /* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
74 static QEMUTimer *idle_timer;
76 static void kvm_kick_cpu(void *opaque)
78 PowerPCCPU *cpu = opaque;
80 qemu_cpu_kick(CPU(cpu));
83 int kvm_arch_init(KVMState *s)
85 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
86 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
87 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
88 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
89 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
90 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
91 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
92 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
94 if (!cap_interrupt_level) {
95 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
96 "VM to stall at times!\n");
99 return 0;
102 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
104 CPUPPCState *cenv = &cpu->env;
105 CPUState *cs = CPU(cpu);
106 struct kvm_sregs sregs;
107 int ret;
109 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
110 /* What we're really trying to say is "if we're on BookE, we use
111 the native PVR for now". This is the only sane way to check
112 it though, so we potentially confuse users that they can run
113 BookE guests on BookS. Let's hope nobody dares enough :) */
114 return 0;
115 } else {
116 if (!cap_segstate) {
117 fprintf(stderr, "kvm error: missing PVR setting capability\n");
118 return -ENOSYS;
122 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
123 if (ret) {
124 return ret;
127 sregs.pvr = cenv->spr[SPR_PVR];
128 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
131 /* Set up a shared TLB array with KVM */
132 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
134 CPUPPCState *env = &cpu->env;
135 CPUState *cs = CPU(cpu);
136 struct kvm_book3e_206_tlb_params params = {};
137 struct kvm_config_tlb cfg = {};
138 struct kvm_enable_cap encap = {};
139 unsigned int entries = 0;
140 int ret, i;
142 if (!kvm_enabled() ||
143 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
144 return 0;
147 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
149 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
150 params.tlb_sizes[i] = booke206_tlb_size(env, i);
151 params.tlb_ways[i] = booke206_tlb_ways(env, i);
152 entries += params.tlb_sizes[i];
155 assert(entries == env->nb_tlb);
156 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
158 env->tlb_dirty = true;
160 cfg.array = (uintptr_t)env->tlb.tlbm;
161 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
162 cfg.params = (uintptr_t)&params;
163 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
165 encap.cap = KVM_CAP_SW_TLB;
166 encap.args[0] = (uintptr_t)&cfg;
168 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
169 if (ret < 0) {
170 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
171 __func__, strerror(-ret));
172 return ret;
175 env->kvm_sw_tlb = true;
176 return 0;
180 #if defined(TARGET_PPC64)
181 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
182 struct kvm_ppc_smmu_info *info)
184 CPUPPCState *env = &cpu->env;
185 CPUState *cs = CPU(cpu);
187 memset(info, 0, sizeof(*info));
189 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
190 * need to "guess" what the supported page sizes are.
192 * For that to work we make a few assumptions:
194 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
195 * KVM which only supports 4K and 16M pages, but supports them
196 * regardless of the backing store characteritics. We also don't
197 * support 1T segments.
199 * This is safe as if HV KVM ever supports that capability or PR
200 * KVM grows supports for more page/segment sizes, those versions
201 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
202 * will not hit this fallback
204 * - Else we are running HV KVM. This means we only support page
205 * sizes that fit in the backing store. Additionally we only
206 * advertize 64K pages if the processor is ARCH 2.06 and we assume
207 * P7 encodings for the SLB and hash table. Here too, we assume
208 * support for any newer processor will mean a kernel that
209 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
210 * this fallback.
212 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
213 /* No flags */
214 info->flags = 0;
215 info->slb_size = 64;
217 /* Standard 4k base page size segment */
218 info->sps[0].page_shift = 12;
219 info->sps[0].slb_enc = 0;
220 info->sps[0].enc[0].page_shift = 12;
221 info->sps[0].enc[0].pte_enc = 0;
223 /* Standard 16M large page size segment */
224 info->sps[1].page_shift = 24;
225 info->sps[1].slb_enc = SLB_VSID_L;
226 info->sps[1].enc[0].page_shift = 24;
227 info->sps[1].enc[0].pte_enc = 0;
228 } else {
229 int i = 0;
231 /* HV KVM has backing store size restrictions */
232 info->flags = KVM_PPC_PAGE_SIZES_REAL;
234 if (env->mmu_model & POWERPC_MMU_1TSEG) {
235 info->flags |= KVM_PPC_1T_SEGMENTS;
238 if (env->mmu_model == POWERPC_MMU_2_06) {
239 info->slb_size = 32;
240 } else {
241 info->slb_size = 64;
244 /* Standard 4k base page size segment */
245 info->sps[i].page_shift = 12;
246 info->sps[i].slb_enc = 0;
247 info->sps[i].enc[0].page_shift = 12;
248 info->sps[i].enc[0].pte_enc = 0;
249 i++;
251 /* 64K on MMU 2.06 */
252 if (env->mmu_model == POWERPC_MMU_2_06) {
253 info->sps[i].page_shift = 16;
254 info->sps[i].slb_enc = 0x110;
255 info->sps[i].enc[0].page_shift = 16;
256 info->sps[i].enc[0].pte_enc = 1;
257 i++;
260 /* Standard 16M large page size segment */
261 info->sps[i].page_shift = 24;
262 info->sps[i].slb_enc = SLB_VSID_L;
263 info->sps[i].enc[0].page_shift = 24;
264 info->sps[i].enc[0].pte_enc = 0;
268 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
270 CPUState *cs = CPU(cpu);
271 int ret;
273 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
274 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
275 if (ret == 0) {
276 return;
280 kvm_get_fallback_smmu_info(cpu, info);
283 static long getrampagesize(void)
285 struct statfs fs;
286 int ret;
288 if (!mem_path) {
289 /* guest RAM is backed by normal anonymous pages */
290 return getpagesize();
293 do {
294 ret = statfs(mem_path, &fs);
295 } while (ret != 0 && errno == EINTR);
297 if (ret != 0) {
298 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
299 strerror(errno));
300 exit(1);
303 #define HUGETLBFS_MAGIC 0x958458f6
305 if (fs.f_type != HUGETLBFS_MAGIC) {
306 /* Explicit mempath, but it's ordinary pages */
307 return getpagesize();
310 /* It's hugepage, return the huge page size */
311 return fs.f_bsize;
314 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
316 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
317 return true;
320 return (1ul << shift) <= rampgsize;
323 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
325 static struct kvm_ppc_smmu_info smmu_info;
326 static bool has_smmu_info;
327 CPUPPCState *env = &cpu->env;
328 long rampagesize;
329 int iq, ik, jq, jk;
331 /* We only handle page sizes for 64-bit server guests for now */
332 if (!(env->mmu_model & POWERPC_MMU_64)) {
333 return;
336 /* Collect MMU info from kernel if not already */
337 if (!has_smmu_info) {
338 kvm_get_smmu_info(cpu, &smmu_info);
339 has_smmu_info = true;
342 rampagesize = getrampagesize();
344 /* Convert to QEMU form */
345 memset(&env->sps, 0, sizeof(env->sps));
347 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
348 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
349 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
351 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
352 ksps->page_shift)) {
353 continue;
355 qsps->page_shift = ksps->page_shift;
356 qsps->slb_enc = ksps->slb_enc;
357 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
358 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
359 ksps->enc[jk].page_shift)) {
360 continue;
362 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
363 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
364 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
365 break;
368 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
369 break;
372 env->slb_nr = smmu_info.slb_size;
373 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
374 env->mmu_model |= POWERPC_MMU_1TSEG;
375 } else {
376 env->mmu_model &= ~POWERPC_MMU_1TSEG;
379 #else /* defined (TARGET_PPC64) */
381 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
385 #endif /* !defined (TARGET_PPC64) */
387 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
389 return cpu->cpu_index;
392 int kvm_arch_init_vcpu(CPUState *cs)
394 PowerPCCPU *cpu = POWERPC_CPU(cs);
395 CPUPPCState *cenv = &cpu->env;
396 int ret;
398 /* Gather server mmu info from KVM and update the CPU state */
399 kvm_fixup_page_sizes(cpu);
401 /* Synchronize sregs with kvm */
402 ret = kvm_arch_sync_sregs(cpu);
403 if (ret) {
404 return ret;
407 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
409 /* Some targets support access to KVM's guest TLB. */
410 switch (cenv->mmu_model) {
411 case POWERPC_MMU_BOOKE206:
412 ret = kvm_booke206_tlb_init(cpu);
413 break;
414 default:
415 break;
418 return ret;
421 void kvm_arch_reset_vcpu(CPUState *cpu)
425 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
427 CPUPPCState *env = &cpu->env;
428 CPUState *cs = CPU(cpu);
429 struct kvm_dirty_tlb dirty_tlb;
430 unsigned char *bitmap;
431 int ret;
433 if (!env->kvm_sw_tlb) {
434 return;
437 bitmap = g_malloc((env->nb_tlb + 7) / 8);
438 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
440 dirty_tlb.bitmap = (uintptr_t)bitmap;
441 dirty_tlb.num_dirty = env->nb_tlb;
443 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
444 if (ret) {
445 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
446 __func__, strerror(-ret));
449 g_free(bitmap);
452 int kvm_arch_put_registers(CPUState *cs, int level)
454 PowerPCCPU *cpu = POWERPC_CPU(cs);
455 CPUPPCState *env = &cpu->env;
456 struct kvm_regs regs;
457 int ret;
458 int i;
460 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
461 if (ret < 0) {
462 return ret;
465 regs.ctr = env->ctr;
466 regs.lr = env->lr;
467 regs.xer = env->xer;
468 regs.msr = env->msr;
469 regs.pc = env->nip;
471 regs.srr0 = env->spr[SPR_SRR0];
472 regs.srr1 = env->spr[SPR_SRR1];
474 regs.sprg0 = env->spr[SPR_SPRG0];
475 regs.sprg1 = env->spr[SPR_SPRG1];
476 regs.sprg2 = env->spr[SPR_SPRG2];
477 regs.sprg3 = env->spr[SPR_SPRG3];
478 regs.sprg4 = env->spr[SPR_SPRG4];
479 regs.sprg5 = env->spr[SPR_SPRG5];
480 regs.sprg6 = env->spr[SPR_SPRG6];
481 regs.sprg7 = env->spr[SPR_SPRG7];
483 regs.pid = env->spr[SPR_BOOKE_PID];
485 for (i = 0;i < 32; i++)
486 regs.gpr[i] = env->gpr[i];
488 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
489 if (ret < 0)
490 return ret;
492 if (env->tlb_dirty) {
493 kvm_sw_tlb_put(cpu);
494 env->tlb_dirty = false;
497 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
498 struct kvm_sregs sregs;
500 sregs.pvr = env->spr[SPR_PVR];
502 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
504 /* Sync SLB */
505 #ifdef TARGET_PPC64
506 for (i = 0; i < 64; i++) {
507 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
508 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
510 #endif
512 /* Sync SRs */
513 for (i = 0; i < 16; i++) {
514 sregs.u.s.ppc32.sr[i] = env->sr[i];
517 /* Sync BATs */
518 for (i = 0; i < 8; i++) {
519 /* Beware. We have to swap upper and lower bits here */
520 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
521 | env->DBAT[1][i];
522 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
523 | env->IBAT[1][i];
526 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
527 if (ret) {
528 return ret;
532 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
533 uint64_t hior = env->spr[SPR_HIOR];
534 struct kvm_one_reg reg = {
535 .id = KVM_REG_PPC_HIOR,
536 .addr = (uintptr_t) &hior,
539 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
540 if (ret) {
541 return ret;
545 return ret;
548 int kvm_arch_get_registers(CPUState *cs)
550 PowerPCCPU *cpu = POWERPC_CPU(cs);
551 CPUPPCState *env = &cpu->env;
552 struct kvm_regs regs;
553 struct kvm_sregs sregs;
554 uint32_t cr;
555 int i, ret;
557 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
558 if (ret < 0)
559 return ret;
561 cr = regs.cr;
562 for (i = 7; i >= 0; i--) {
563 env->crf[i] = cr & 15;
564 cr >>= 4;
567 env->ctr = regs.ctr;
568 env->lr = regs.lr;
569 env->xer = regs.xer;
570 env->msr = regs.msr;
571 env->nip = regs.pc;
573 env->spr[SPR_SRR0] = regs.srr0;
574 env->spr[SPR_SRR1] = regs.srr1;
576 env->spr[SPR_SPRG0] = regs.sprg0;
577 env->spr[SPR_SPRG1] = regs.sprg1;
578 env->spr[SPR_SPRG2] = regs.sprg2;
579 env->spr[SPR_SPRG3] = regs.sprg3;
580 env->spr[SPR_SPRG4] = regs.sprg4;
581 env->spr[SPR_SPRG5] = regs.sprg5;
582 env->spr[SPR_SPRG6] = regs.sprg6;
583 env->spr[SPR_SPRG7] = regs.sprg7;
585 env->spr[SPR_BOOKE_PID] = regs.pid;
587 for (i = 0;i < 32; i++)
588 env->gpr[i] = regs.gpr[i];
590 if (cap_booke_sregs) {
591 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
592 if (ret < 0) {
593 return ret;
596 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
597 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
598 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
599 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
600 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
601 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
602 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
603 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
604 env->spr[SPR_DECR] = sregs.u.e.dec;
605 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
606 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
607 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
610 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
611 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
612 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
613 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
614 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
615 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
618 if (sregs.u.e.features & KVM_SREGS_E_64) {
619 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
622 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
623 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
626 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
627 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
628 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
629 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
630 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
631 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
632 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
633 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
634 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
635 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
636 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
637 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
638 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
639 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
640 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
641 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
642 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
644 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
645 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
646 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
647 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
650 if (sregs.u.e.features & KVM_SREGS_E_PM) {
651 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
654 if (sregs.u.e.features & KVM_SREGS_E_PC) {
655 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
656 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
660 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
661 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
662 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
663 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
664 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
665 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
666 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
667 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
668 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
669 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
670 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
673 if (sregs.u.e.features & KVM_SREGS_EXP) {
674 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
677 if (sregs.u.e.features & KVM_SREGS_E_PD) {
678 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
679 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
682 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
683 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
684 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
685 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
687 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
688 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
689 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
694 if (cap_segstate) {
695 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
696 if (ret < 0) {
697 return ret;
700 ppc_store_sdr1(env, sregs.u.s.sdr1);
702 /* Sync SLB */
703 #ifdef TARGET_PPC64
704 for (i = 0; i < 64; i++) {
705 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
706 sregs.u.s.ppc64.slb[i].slbv);
708 #endif
710 /* Sync SRs */
711 for (i = 0; i < 16; i++) {
712 env->sr[i] = sregs.u.s.ppc32.sr[i];
715 /* Sync BATs */
716 for (i = 0; i < 8; i++) {
717 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
718 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
719 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
720 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
724 return 0;
727 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
729 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
731 if (irq != PPC_INTERRUPT_EXT) {
732 return 0;
735 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
736 return 0;
739 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
741 return 0;
744 #if defined(TARGET_PPCEMB)
745 #define PPC_INPUT_INT PPC40x_INPUT_INT
746 #elif defined(TARGET_PPC64)
747 #define PPC_INPUT_INT PPC970_INPUT_INT
748 #else
749 #define PPC_INPUT_INT PPC6xx_INPUT_INT
750 #endif
752 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
754 PowerPCCPU *cpu = POWERPC_CPU(cs);
755 CPUPPCState *env = &cpu->env;
756 int r;
757 unsigned irq;
759 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
760 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
761 if (!cap_interrupt_level &&
762 run->ready_for_interrupt_injection &&
763 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
764 (env->irq_input_state & (1<<PPC_INPUT_INT)))
766 /* For now KVM disregards the 'irq' argument. However, in the
767 * future KVM could cache it in-kernel to avoid a heavyweight exit
768 * when reading the UIC.
770 irq = KVM_INTERRUPT_SET;
772 dprintf("injected interrupt %d\n", irq);
773 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
774 if (r < 0) {
775 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
778 /* Always wake up soon in case the interrupt was level based */
779 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
780 (get_ticks_per_sec() / 50));
783 /* We don't know if there are more interrupts pending after this. However,
784 * the guest will return to userspace in the course of handling this one
785 * anyways, so we will get a chance to deliver the rest. */
788 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
792 int kvm_arch_process_async_events(CPUState *cs)
794 PowerPCCPU *cpu = POWERPC_CPU(cs);
795 return cpu->env.halted;
798 static int kvmppc_handle_halt(CPUPPCState *env)
800 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
801 env->halted = 1;
802 env->exception_index = EXCP_HLT;
805 return 0;
808 /* map dcr access to existing qemu dcr emulation */
809 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
811 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
812 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
814 return 0;
817 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
819 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
820 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
822 return 0;
825 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
827 PowerPCCPU *cpu = POWERPC_CPU(cs);
828 CPUPPCState *env = &cpu->env;
829 int ret;
831 switch (run->exit_reason) {
832 case KVM_EXIT_DCR:
833 if (run->dcr.is_write) {
834 dprintf("handle dcr write\n");
835 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
836 } else {
837 dprintf("handle dcr read\n");
838 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
840 break;
841 case KVM_EXIT_HLT:
842 dprintf("handle halt\n");
843 ret = kvmppc_handle_halt(env);
844 break;
845 #ifdef CONFIG_PSERIES
846 case KVM_EXIT_PAPR_HCALL:
847 dprintf("handle PAPR hypercall\n");
848 run->papr_hcall.ret = spapr_hypercall(cpu,
849 run->papr_hcall.nr,
850 run->papr_hcall.args);
851 ret = 0;
852 break;
853 #endif
854 case KVM_EXIT_EPR:
855 dprintf("handle epr\n");
856 run->epr.epr = ldl_phys(env->mpic_iack);
857 ret = 0;
858 break;
859 default:
860 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
861 ret = -1;
862 break;
865 return ret;
868 static int read_cpuinfo(const char *field, char *value, int len)
870 FILE *f;
871 int ret = -1;
872 int field_len = strlen(field);
873 char line[512];
875 f = fopen("/proc/cpuinfo", "r");
876 if (!f) {
877 return -1;
880 do {
881 if(!fgets(line, sizeof(line), f)) {
882 break;
884 if (!strncmp(line, field, field_len)) {
885 pstrcpy(value, len, line);
886 ret = 0;
887 break;
889 } while(*line);
891 fclose(f);
893 return ret;
896 uint32_t kvmppc_get_tbfreq(void)
898 char line[512];
899 char *ns;
900 uint32_t retval = get_ticks_per_sec();
902 if (read_cpuinfo("timebase", line, sizeof(line))) {
903 return retval;
906 if (!(ns = strchr(line, ':'))) {
907 return retval;
910 ns++;
912 retval = atoi(ns);
913 return retval;
916 /* Try to find a device tree node for a CPU with clock-frequency property */
917 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
919 struct dirent *dirp;
920 DIR *dp;
922 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
923 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
924 return -1;
927 buf[0] = '\0';
928 while ((dirp = readdir(dp)) != NULL) {
929 FILE *f;
930 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
931 dirp->d_name);
932 f = fopen(buf, "r");
933 if (f) {
934 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
935 fclose(f);
936 break;
938 buf[0] = '\0';
940 closedir(dp);
941 if (buf[0] == '\0') {
942 printf("Unknown host!\n");
943 return -1;
946 return 0;
949 /* Read a CPU node property from the host device tree that's a single
950 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
951 * (can't find or open the property, or doesn't understand the
952 * format) */
953 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
955 char buf[PATH_MAX];
956 union {
957 uint32_t v32;
958 uint64_t v64;
959 } u;
960 FILE *f;
961 int len;
963 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
964 return -1;
967 strncat(buf, "/", sizeof(buf) - strlen(buf));
968 strncat(buf, propname, sizeof(buf) - strlen(buf));
970 f = fopen(buf, "rb");
971 if (!f) {
972 return -1;
975 len = fread(&u, 1, sizeof(u), f);
976 fclose(f);
977 switch (len) {
978 case 4:
979 /* property is a 32-bit quantity */
980 return be32_to_cpu(u.v32);
981 case 8:
982 return be64_to_cpu(u.v64);
985 return 0;
988 uint64_t kvmppc_get_clockfreq(void)
990 return kvmppc_read_int_cpu_dt("clock-frequency");
993 uint32_t kvmppc_get_vmx(void)
995 return kvmppc_read_int_cpu_dt("ibm,vmx");
998 uint32_t kvmppc_get_dfp(void)
1000 return kvmppc_read_int_cpu_dt("ibm,dfp");
1003 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1005 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1006 CPUState *cs = CPU(cpu);
1008 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1009 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1010 return 0;
1013 return 1;
1016 int kvmppc_get_hasidle(CPUPPCState *env)
1018 struct kvm_ppc_pvinfo pvinfo;
1020 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1021 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1022 return 1;
1025 return 0;
1028 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1030 uint32_t *hc = (uint32_t*)buf;
1031 struct kvm_ppc_pvinfo pvinfo;
1033 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1034 memcpy(buf, pvinfo.hcall, buf_len);
1035 return 0;
1039 * Fallback to always fail hypercalls:
1041 * li r3, -1
1042 * nop
1043 * nop
1044 * nop
1047 hc[0] = 0x3860ffff;
1048 hc[1] = 0x60000000;
1049 hc[2] = 0x60000000;
1050 hc[3] = 0x60000000;
1052 return 0;
1055 void kvmppc_set_papr(PowerPCCPU *cpu)
1057 CPUPPCState *env = &cpu->env;
1058 CPUState *cs = CPU(cpu);
1059 struct kvm_enable_cap cap = {};
1060 int ret;
1062 cap.cap = KVM_CAP_PPC_PAPR;
1063 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1065 if (ret) {
1066 cpu_abort(env, "This KVM version does not support PAPR\n");
1070 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1072 CPUPPCState *env = &cpu->env;
1073 CPUState *cs = CPU(cpu);
1074 struct kvm_enable_cap cap = {};
1075 int ret;
1077 cap.cap = KVM_CAP_PPC_EPR;
1078 cap.args[0] = mpic_proxy;
1079 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1081 if (ret && mpic_proxy) {
1082 cpu_abort(env, "This KVM version does not support EPR\n");
1086 int kvmppc_smt_threads(void)
1088 return cap_ppc_smt ? cap_ppc_smt : 1;
1091 #ifdef TARGET_PPC64
1092 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1094 void *rma;
1095 off_t size;
1096 int fd;
1097 struct kvm_allocate_rma ret;
1098 MemoryRegion *rma_region;
1100 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1101 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1102 * not necessary on this hardware
1103 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1105 * FIXME: We should allow the user to force contiguous RMA
1106 * allocation in the cap_ppc_rma==1 case.
1108 if (cap_ppc_rma < 2) {
1109 return 0;
1112 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1113 if (fd < 0) {
1114 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1115 strerror(errno));
1116 return -1;
1119 size = MIN(ret.rma_size, 256ul << 20);
1121 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1122 if (rma == MAP_FAILED) {
1123 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1124 return -1;
1127 rma_region = g_new(MemoryRegion, 1);
1128 memory_region_init_ram_ptr(rma_region, name, size, rma);
1129 vmstate_register_ram_global(rma_region);
1130 memory_region_add_subregion(sysmem, 0, rma_region);
1132 return size;
1135 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1137 if (cap_ppc_rma >= 2) {
1138 return current_size;
1140 return MIN(current_size,
1141 getrampagesize() << (hash_shift - 7));
1143 #endif
1145 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1147 struct kvm_create_spapr_tce args = {
1148 .liobn = liobn,
1149 .window_size = window_size,
1151 long len;
1152 int fd;
1153 void *table;
1155 /* Must set fd to -1 so we don't try to munmap when called for
1156 * destroying the table, which the upper layers -will- do
1158 *pfd = -1;
1159 if (!cap_spapr_tce) {
1160 return NULL;
1163 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1164 if (fd < 0) {
1165 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1166 liobn);
1167 return NULL;
1170 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1171 /* FIXME: round this up to page size */
1173 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1174 if (table == MAP_FAILED) {
1175 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1176 liobn);
1177 close(fd);
1178 return NULL;
1181 *pfd = fd;
1182 return table;
1185 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1187 long len;
1189 if (fd < 0) {
1190 return -1;
1193 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1194 if ((munmap(table, len) < 0) ||
1195 (close(fd) < 0)) {
1196 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1197 strerror(errno));
1198 /* Leak the table */
1201 return 0;
1204 int kvmppc_reset_htab(int shift_hint)
1206 uint32_t shift = shift_hint;
1208 if (!kvm_enabled()) {
1209 /* Full emulation, tell caller to allocate htab itself */
1210 return 0;
1212 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1213 int ret;
1214 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1215 if (ret == -ENOTTY) {
1216 /* At least some versions of PR KVM advertise the
1217 * capability, but don't implement the ioctl(). Oops.
1218 * Return 0 so that we allocate the htab in qemu, as is
1219 * correct for PR. */
1220 return 0;
1221 } else if (ret < 0) {
1222 return ret;
1224 return shift;
1227 /* We have a kernel that predates the htab reset calls. For PR
1228 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1229 * this era, it has allocated a 16MB fixed size hash table
1230 * already. Kernels of this era have the GET_PVINFO capability
1231 * only on PR, so we use this hack to determine the right
1232 * answer */
1233 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1234 /* PR - tell caller to allocate htab */
1235 return 0;
1236 } else {
1237 /* HV - assume 16MB kernel allocated htab */
1238 return 24;
1242 static inline uint32_t mfpvr(void)
1244 uint32_t pvr;
1246 asm ("mfpvr %0"
1247 : "=r"(pvr));
1248 return pvr;
1251 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1253 if (on) {
1254 *word |= flags;
1255 } else {
1256 *word &= ~flags;
1260 static void kvmppc_host_cpu_initfn(Object *obj)
1262 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(obj);
1264 assert(kvm_enabled());
1266 if (pcc->info->pvr != mfpvr()) {
1267 fprintf(stderr, "Your host CPU is unsupported.\n"
1268 "Please choose a supported model instead, see -cpu ?.\n");
1269 exit(1);
1273 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1275 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1276 uint32_t host_pvr = mfpvr();
1277 PowerPCCPUClass *pvr_pcc;
1278 ppc_def_t *spec;
1279 uint32_t vmx = kvmppc_get_vmx();
1280 uint32_t dfp = kvmppc_get_dfp();
1282 spec = g_malloc0(sizeof(*spec));
1284 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1285 if (pvr_pcc != NULL) {
1286 memcpy(spec, pvr_pcc->info, sizeof(*spec));
1288 pcc->info = spec;
1289 /* Override the display name for -cpu ? and QMP */
1290 pcc->info->name = "host";
1292 /* Now fix up the spec with information we can query from the host */
1294 if (vmx != -1) {
1295 /* Only override when we know what the host supports */
1296 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1297 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1299 if (dfp != -1) {
1300 /* Only override when we know what the host supports */
1301 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1305 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1307 CPUState *cs = CPU(cpu);
1308 int smt;
1310 /* Adjust cpu index for SMT */
1311 smt = kvmppc_smt_threads();
1312 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1313 + (cs->cpu_index % smp_threads);
1315 return 0;
1319 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1321 return true;
1324 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1326 return 1;
1329 int kvm_arch_on_sigbus(int code, void *addr)
1331 return 1;
1334 static const TypeInfo kvm_host_cpu_type_info = {
1335 .name = TYPE_HOST_POWERPC_CPU,
1336 .parent = TYPE_POWERPC_CPU,
1337 .instance_init = kvmppc_host_cpu_initfn,
1338 .class_init = kvmppc_host_cpu_class_init,
1341 static void kvm_ppc_register_types(void)
1343 type_register_static(&kvm_host_cpu_type_info);
1346 type_init(kvm_ppc_register_types)