2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
51 #include "sysemu/kvm_int.h"
56 #define DPRINTF(fmt, ...) \
57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
59 #define DPRINTF(fmt, ...) \
63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
65 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
69 static int cap_interrupt_unset
= false;
70 static int cap_interrupt_level
= false;
71 static int cap_segstate
;
72 static int cap_booke_sregs
;
73 static int cap_ppc_smt
;
74 static int cap_ppc_smt_possible
;
75 static int cap_spapr_tce
;
76 static int cap_spapr_tce_64
;
77 static int cap_spapr_multitce
;
78 static int cap_spapr_vfio
;
80 static int cap_one_reg
;
82 static int cap_ppc_watchdog
;
84 static int cap_htab_fd
;
85 static int cap_fixup_hcalls
;
86 static int cap_htm
; /* Hardware transactional memory support */
87 static int cap_mmu_radix
;
88 static int cap_mmu_hash_v3
;
89 static int cap_resize_hpt
;
90 static int cap_ppc_pvr_compat
;
91 static int cap_ppc_safe_cache
;
92 static int cap_ppc_safe_bounds_check
;
93 static int cap_ppc_safe_indirect_branch
;
95 static uint32_t debug_inst_opcode
;
97 /* XXX We have a race condition where we actually have a level triggered
98 * interrupt, but the infrastructure can't expose that yet, so the guest
99 * takes but ignores it, goes to sleep and never gets notified that there's
100 * still an interrupt pending.
102 * As a quick workaround, let's just wake up again 20 ms after we injected
103 * an interrupt. That way we can assure that we're always reinjecting
104 * interrupts in case the guest swallowed them.
106 static QEMUTimer
*idle_timer
;
108 static void kvm_kick_cpu(void *opaque
)
110 PowerPCCPU
*cpu
= opaque
;
112 qemu_cpu_kick(CPU(cpu
));
115 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
116 * should only be used for fallback tests - generally we should use
117 * explicit capabilities for the features we want, rather than
118 * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState
*ks
)
121 /* Assume KVM-PR if the GET_PVINFO capability is available */
122 return kvm_vm_check_extension(ks
, KVM_CAP_PPC_GET_PVINFO
) != 0;
125 static int kvm_ppc_register_host_cpu_type(MachineState
*ms
);
126 static void kvmppc_get_cpu_characteristics(KVMState
*s
);
128 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
130 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
131 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
132 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
133 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
134 cap_ppc_smt_possible
= kvm_vm_check_extension(s
, KVM_CAP_PPC_SMT_POSSIBLE
);
135 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
136 cap_spapr_tce_64
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE_64
);
137 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
138 cap_spapr_vfio
= kvm_vm_check_extension(s
, KVM_CAP_SPAPR_TCE_VFIO
);
139 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
140 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
141 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
142 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
143 /* Note: we don't set cap_papr here, because this capability is
144 * only activated after this by kvmppc_set_papr() */
145 cap_htab_fd
= kvm_vm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
146 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
147 cap_ppc_smt
= kvm_vm_check_extension(s
, KVM_CAP_PPC_SMT
);
148 cap_htm
= kvm_vm_check_extension(s
, KVM_CAP_PPC_HTM
);
149 cap_mmu_radix
= kvm_vm_check_extension(s
, KVM_CAP_PPC_MMU_RADIX
);
150 cap_mmu_hash_v3
= kvm_vm_check_extension(s
, KVM_CAP_PPC_MMU_HASH_V3
);
151 cap_resize_hpt
= kvm_vm_check_extension(s
, KVM_CAP_SPAPR_RESIZE_HPT
);
152 kvmppc_get_cpu_characteristics(s
);
154 * Note: setting it to false because there is not such capability
155 * in KVM at this moment.
157 * TODO: call kvm_vm_check_extension() with the right capability
158 * after the kernel starts implementing it.*/
159 cap_ppc_pvr_compat
= false;
161 if (!cap_interrupt_level
) {
162 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
163 "VM to stall at times!\n");
166 kvm_ppc_register_host_cpu_type(ms
);
171 int kvm_arch_irqchip_create(MachineState
*ms
, KVMState
*s
)
176 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
178 CPUPPCState
*cenv
= &cpu
->env
;
179 CPUState
*cs
= CPU(cpu
);
180 struct kvm_sregs sregs
;
183 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
184 /* What we're really trying to say is "if we're on BookE, we use
185 the native PVR for now". This is the only sane way to check
186 it though, so we potentially confuse users that they can run
187 BookE guests on BookS. Let's hope nobody dares enough :) */
191 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
196 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
201 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
202 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
208 CPUPPCState
*env
= &cpu
->env
;
209 CPUState
*cs
= CPU(cpu
);
210 struct kvm_book3e_206_tlb_params params
= {};
211 struct kvm_config_tlb cfg
= {};
212 unsigned int entries
= 0;
215 if (!kvm_enabled() ||
216 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
220 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
222 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
223 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
224 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
225 entries
+= params
.tlb_sizes
[i
];
228 assert(entries
== env
->nb_tlb
);
229 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
231 env
->tlb_dirty
= true;
233 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
234 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
235 cfg
.params
= (uintptr_t)¶ms
;
236 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
238 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
240 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241 __func__
, strerror(-ret
));
245 env
->kvm_sw_tlb
= true;
250 #if defined(TARGET_PPC64)
251 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info
*info
, Error
**errp
)
255 assert(kvm_state
!= NULL
);
257 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
258 error_setg(errp
, "KVM doesn't expose the MMU features it supports");
259 error_append_hint(errp
, "Consider switching to a newer KVM\n");
263 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
268 error_setg_errno(errp
, -ret
,
269 "KVM failed to provide the MMU features it supports");
272 struct ppc_radix_page_info
*kvm_get_radix_page_info(void)
274 KVMState
*s
= KVM_STATE(current_machine
->accelerator
);
275 struct ppc_radix_page_info
*radix_page_info
;
276 struct kvm_ppc_rmmu_info rmmu_info
;
279 if (!kvm_check_extension(s
, KVM_CAP_PPC_MMU_RADIX
)) {
282 if (kvm_vm_ioctl(s
, KVM_PPC_GET_RMMU_INFO
, &rmmu_info
)) {
285 radix_page_info
= g_malloc0(sizeof(*radix_page_info
));
286 radix_page_info
->count
= 0;
287 for (i
= 0; i
< PPC_PAGE_SIZES_MAX_SZ
; i
++) {
288 if (rmmu_info
.ap_encodings
[i
]) {
289 radix_page_info
->entries
[i
] = rmmu_info
.ap_encodings
[i
];
290 radix_page_info
->count
++;
293 return radix_page_info
;
296 target_ulong
kvmppc_configure_v3_mmu(PowerPCCPU
*cpu
,
297 bool radix
, bool gtse
,
300 CPUState
*cs
= CPU(cpu
);
303 struct kvm_ppc_mmuv3_cfg cfg
= {
304 .process_table
= proc_tbl
,
308 flags
|= KVM_PPC_MMUV3_RADIX
;
311 flags
|= KVM_PPC_MMUV3_GTSE
;
314 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_CONFIGURE_V3_MMU
, &cfg
);
321 return H_NOT_AVAILABLE
;
327 bool kvmppc_hpt_needs_host_contiguous_pages(void)
329 static struct kvm_ppc_smmu_info smmu_info
;
331 if (!kvm_enabled()) {
335 kvm_get_smmu_info(&smmu_info
, &error_fatal
);
336 return !!(smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
);
339 void kvm_check_mmu(PowerPCCPU
*cpu
, Error
**errp
)
341 struct kvm_ppc_smmu_info smmu_info
;
343 Error
*local_err
= NULL
;
345 /* For now, we only have anything to check on hash64 MMUs */
346 if (!cpu
->hash64_opts
|| !kvm_enabled()) {
350 kvm_get_smmu_info(&smmu_info
, &local_err
);
352 error_propagate(errp
, local_err
);
356 if (ppc_hash64_has(cpu
, PPC_HASH64_1TSEG
)
357 && !(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
359 "KVM does not support 1TiB segments which guest expects");
363 if (smmu_info
.slb_size
< cpu
->hash64_opts
->slb_size
) {
364 error_setg(errp
, "KVM only supports %u SLB entries, but guest needs %u",
365 smmu_info
.slb_size
, cpu
->hash64_opts
->slb_size
);
370 * Verify that every pagesize supported by the cpu model is
371 * supported by KVM with the same encodings
373 for (iq
= 0; iq
< ARRAY_SIZE(cpu
->hash64_opts
->sps
); iq
++) {
374 PPCHash64SegmentPageSizes
*qsps
= &cpu
->hash64_opts
->sps
[iq
];
375 struct kvm_ppc_one_seg_page_size
*ksps
;
377 for (ik
= 0; ik
< ARRAY_SIZE(smmu_info
.sps
); ik
++) {
378 if (qsps
->page_shift
== smmu_info
.sps
[ik
].page_shift
) {
382 if (ik
>= ARRAY_SIZE(smmu_info
.sps
)) {
383 error_setg(errp
, "KVM doesn't support for base page shift %u",
388 ksps
= &smmu_info
.sps
[ik
];
389 if (ksps
->slb_enc
!= qsps
->slb_enc
) {
391 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
392 ksps
->slb_enc
, ksps
->page_shift
, qsps
->slb_enc
);
396 for (jq
= 0; jq
< ARRAY_SIZE(qsps
->enc
); jq
++) {
397 for (jk
= 0; jk
< ARRAY_SIZE(ksps
->enc
); jk
++) {
398 if (qsps
->enc
[jq
].page_shift
== ksps
->enc
[jk
].page_shift
) {
403 if (jk
>= ARRAY_SIZE(ksps
->enc
)) {
404 error_setg(errp
, "KVM doesn't support page shift %u/%u",
405 qsps
->enc
[jq
].page_shift
, qsps
->page_shift
);
408 if (qsps
->enc
[jq
].pte_enc
!= ksps
->enc
[jk
].pte_enc
) {
410 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
411 ksps
->enc
[jk
].pte_enc
, qsps
->enc
[jq
].page_shift
,
412 qsps
->page_shift
, qsps
->enc
[jq
].pte_enc
);
418 if (ppc_hash64_has(cpu
, PPC_HASH64_CI_LARGEPAGE
)) {
419 /* Mostly what guest pagesizes we can use are related to the
420 * host pages used to map guest RAM, which is handled in the
421 * platform code. Cache-Inhibited largepages (64k) however are
422 * used for I/O, so if they're mapped to the host at all it
423 * will be a normal mapping, not a special hugepage one used
425 if (getpagesize() < 0x10000) {
427 "KVM can't supply 64kiB CI pages, which guest expects");
431 #endif /* !defined (TARGET_PPC64) */
433 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
435 return POWERPC_CPU(cpu
)->vcpu_id
;
438 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
439 * book3s supports only 1 watchpoint, so array size
440 * of 4 is sufficient for now.
442 #define MAX_HW_BKPTS 4
444 static struct HWBreakpoint
{
447 } hw_debug_points
[MAX_HW_BKPTS
];
449 static CPUWatchpoint hw_watchpoint
;
451 /* Default there is no breakpoint and watchpoint supported */
452 static int max_hw_breakpoint
;
453 static int max_hw_watchpoint
;
454 static int nb_hw_breakpoint
;
455 static int nb_hw_watchpoint
;
457 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
459 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
460 max_hw_breakpoint
= 2;
461 max_hw_watchpoint
= 2;
464 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
465 fprintf(stderr
, "Error initializing h/w breakpoints\n");
470 int kvm_arch_init_vcpu(CPUState
*cs
)
472 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
473 CPUPPCState
*cenv
= &cpu
->env
;
476 /* Synchronize sregs with kvm */
477 ret
= kvm_arch_sync_sregs(cpu
);
479 if (ret
== -EINVAL
) {
480 error_report("Register sync failed... If you're using kvm-hv.ko,"
481 " only \"-cpu host\" is possible");
486 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
488 switch (cenv
->mmu_model
) {
489 case POWERPC_MMU_BOOKE206
:
490 /* This target supports access to KVM's guest TLB */
491 ret
= kvm_booke206_tlb_init(cpu
);
493 case POWERPC_MMU_2_07
:
494 if (!cap_htm
&& !kvmppc_is_pr(cs
->kvm_state
)) {
495 /* KVM-HV has transactional memory on POWER8 also without the
496 * KVM_CAP_PPC_HTM extension, so enable it here instead as
497 * long as it's availble to userspace on the host. */
498 if (qemu_getauxval(AT_HWCAP2
) & PPC_FEATURE2_HAS_HTM
) {
507 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
508 kvmppc_hw_debug_points_init(cenv
);
513 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
515 CPUPPCState
*env
= &cpu
->env
;
516 CPUState
*cs
= CPU(cpu
);
517 struct kvm_dirty_tlb dirty_tlb
;
518 unsigned char *bitmap
;
521 if (!env
->kvm_sw_tlb
) {
525 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
526 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
528 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
529 dirty_tlb
.num_dirty
= env
->nb_tlb
;
531 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
533 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
534 __func__
, strerror(-ret
));
540 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
542 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
543 CPUPPCState
*env
= &cpu
->env
;
548 struct kvm_one_reg reg
= {
550 .addr
= (uintptr_t) &val
,
554 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
556 trace_kvm_failed_spr_get(spr
, strerror(errno
));
558 switch (id
& KVM_REG_SIZE_MASK
) {
559 case KVM_REG_SIZE_U32
:
560 env
->spr
[spr
] = val
.u32
;
563 case KVM_REG_SIZE_U64
:
564 env
->spr
[spr
] = val
.u64
;
568 /* Don't handle this size yet */
574 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
576 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
577 CPUPPCState
*env
= &cpu
->env
;
582 struct kvm_one_reg reg
= {
584 .addr
= (uintptr_t) &val
,
588 switch (id
& KVM_REG_SIZE_MASK
) {
589 case KVM_REG_SIZE_U32
:
590 val
.u32
= env
->spr
[spr
];
593 case KVM_REG_SIZE_U64
:
594 val
.u64
= env
->spr
[spr
];
598 /* Don't handle this size yet */
602 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
604 trace_kvm_failed_spr_set(spr
, strerror(errno
));
608 static int kvm_put_fp(CPUState
*cs
)
610 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
611 CPUPPCState
*env
= &cpu
->env
;
612 struct kvm_one_reg reg
;
616 if (env
->insns_flags
& PPC_FLOAT
) {
617 uint64_t fpscr
= env
->fpscr
;
618 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
620 reg
.id
= KVM_REG_PPC_FPSCR
;
621 reg
.addr
= (uintptr_t)&fpscr
;
622 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
624 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
628 for (i
= 0; i
< 32; i
++) {
631 #ifdef HOST_WORDS_BIGENDIAN
632 vsr
[0] = float64_val(env
->fpr
[i
]);
633 vsr
[1] = env
->vsr
[i
];
635 vsr
[0] = env
->vsr
[i
];
636 vsr
[1] = float64_val(env
->fpr
[i
]);
638 reg
.addr
= (uintptr_t) &vsr
;
639 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
641 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
643 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
650 if (env
->insns_flags
& PPC_ALTIVEC
) {
651 reg
.id
= KVM_REG_PPC_VSCR
;
652 reg
.addr
= (uintptr_t)&env
->vscr
;
653 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
655 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
659 for (i
= 0; i
< 32; i
++) {
660 reg
.id
= KVM_REG_PPC_VR(i
);
661 reg
.addr
= (uintptr_t)&env
->avr
[i
];
662 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
664 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
673 static int kvm_get_fp(CPUState
*cs
)
675 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
676 CPUPPCState
*env
= &cpu
->env
;
677 struct kvm_one_reg reg
;
681 if (env
->insns_flags
& PPC_FLOAT
) {
683 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
685 reg
.id
= KVM_REG_PPC_FPSCR
;
686 reg
.addr
= (uintptr_t)&fpscr
;
687 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
689 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
695 for (i
= 0; i
< 32; i
++) {
698 reg
.addr
= (uintptr_t) &vsr
;
699 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
701 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
703 DPRINTF("Unable to get %s%d from KVM: %s\n",
704 vsx
? "VSR" : "FPR", i
, strerror(errno
));
707 #ifdef HOST_WORDS_BIGENDIAN
708 env
->fpr
[i
] = vsr
[0];
710 env
->vsr
[i
] = vsr
[1];
713 env
->fpr
[i
] = vsr
[1];
715 env
->vsr
[i
] = vsr
[0];
722 if (env
->insns_flags
& PPC_ALTIVEC
) {
723 reg
.id
= KVM_REG_PPC_VSCR
;
724 reg
.addr
= (uintptr_t)&env
->vscr
;
725 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
727 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
731 for (i
= 0; i
< 32; i
++) {
732 reg
.id
= KVM_REG_PPC_VR(i
);
733 reg
.addr
= (uintptr_t)&env
->avr
[i
];
734 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
736 DPRINTF("Unable to get VR%d from KVM: %s\n",
746 #if defined(TARGET_PPC64)
747 static int kvm_get_vpa(CPUState
*cs
)
749 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
750 sPAPRCPUState
*spapr_cpu
= spapr_cpu_state(cpu
);
751 struct kvm_one_reg reg
;
754 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
755 reg
.addr
= (uintptr_t)&spapr_cpu
->vpa_addr
;
756 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
758 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
762 assert((uintptr_t)&spapr_cpu
->slb_shadow_size
763 == ((uintptr_t)&spapr_cpu
->slb_shadow_addr
+ 8));
764 reg
.id
= KVM_REG_PPC_VPA_SLB
;
765 reg
.addr
= (uintptr_t)&spapr_cpu
->slb_shadow_addr
;
766 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
768 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
773 assert((uintptr_t)&spapr_cpu
->dtl_size
774 == ((uintptr_t)&spapr_cpu
->dtl_addr
+ 8));
775 reg
.id
= KVM_REG_PPC_VPA_DTL
;
776 reg
.addr
= (uintptr_t)&spapr_cpu
->dtl_addr
;
777 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
779 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
787 static int kvm_put_vpa(CPUState
*cs
)
789 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
790 sPAPRCPUState
*spapr_cpu
= spapr_cpu_state(cpu
);
791 struct kvm_one_reg reg
;
794 /* SLB shadow or DTL can't be registered unless a master VPA is
795 * registered. That means when restoring state, if a VPA *is*
796 * registered, we need to set that up first. If not, we need to
797 * deregister the others before deregistering the master VPA */
798 assert(spapr_cpu
->vpa_addr
799 || !(spapr_cpu
->slb_shadow_addr
|| spapr_cpu
->dtl_addr
));
801 if (spapr_cpu
->vpa_addr
) {
802 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
803 reg
.addr
= (uintptr_t)&spapr_cpu
->vpa_addr
;
804 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
806 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
811 assert((uintptr_t)&spapr_cpu
->slb_shadow_size
812 == ((uintptr_t)&spapr_cpu
->slb_shadow_addr
+ 8));
813 reg
.id
= KVM_REG_PPC_VPA_SLB
;
814 reg
.addr
= (uintptr_t)&spapr_cpu
->slb_shadow_addr
;
815 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
817 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
821 assert((uintptr_t)&spapr_cpu
->dtl_size
822 == ((uintptr_t)&spapr_cpu
->dtl_addr
+ 8));
823 reg
.id
= KVM_REG_PPC_VPA_DTL
;
824 reg
.addr
= (uintptr_t)&spapr_cpu
->dtl_addr
;
825 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
827 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
832 if (!spapr_cpu
->vpa_addr
) {
833 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
834 reg
.addr
= (uintptr_t)&spapr_cpu
->vpa_addr
;
835 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
837 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
844 #endif /* TARGET_PPC64 */
846 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
848 CPUPPCState
*env
= &cpu
->env
;
849 struct kvm_sregs sregs
;
852 sregs
.pvr
= env
->spr
[SPR_PVR
];
855 PPCVirtualHypervisorClass
*vhc
=
856 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu
->vhyp
);
857 sregs
.u
.s
.sdr1
= vhc
->encode_hpt_for_kvm_pr(cpu
->vhyp
);
859 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
864 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
865 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
866 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
867 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
869 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
874 for (i
= 0; i
< 16; i
++) {
875 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
879 for (i
= 0; i
< 8; i
++) {
880 /* Beware. We have to swap upper and lower bits here */
881 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
883 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
887 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
890 int kvm_arch_put_registers(CPUState
*cs
, int level
)
892 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
893 CPUPPCState
*env
= &cpu
->env
;
894 struct kvm_regs regs
;
898 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
905 regs
.xer
= cpu_read_xer(env
);
909 regs
.srr0
= env
->spr
[SPR_SRR0
];
910 regs
.srr1
= env
->spr
[SPR_SRR1
];
912 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
913 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
914 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
915 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
916 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
917 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
918 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
919 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
921 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
923 for (i
= 0;i
< 32; i
++)
924 regs
.gpr
[i
] = env
->gpr
[i
];
927 for (i
= 0; i
< 8; i
++) {
928 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
931 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
937 if (env
->tlb_dirty
) {
939 env
->tlb_dirty
= false;
942 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
943 ret
= kvmppc_put_books_sregs(cpu
);
949 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
950 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
956 /* We deliberately ignore errors here, for kernels which have
957 * the ONE_REG calls, but don't support the specific
958 * registers, there's a reasonable chance things will still
959 * work, at least until we try to migrate. */
960 for (i
= 0; i
< 1024; i
++) {
961 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
964 kvm_put_one_spr(cs
, id
, i
);
970 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
971 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
973 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
974 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
976 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
977 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
978 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
979 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
980 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
981 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
982 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
983 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
984 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
985 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
989 if (kvm_put_vpa(cs
) < 0) {
990 DPRINTF("Warning: Unable to set VPA information to KVM\n");
994 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
995 #endif /* TARGET_PPC64 */
1001 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1003 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1006 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1008 CPUPPCState
*env
= &cpu
->env
;
1009 struct kvm_sregs sregs
;
1012 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1017 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1018 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1019 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1020 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1021 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1022 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1023 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1024 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1025 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1026 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1027 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1028 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1031 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1032 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1033 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1034 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1035 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1036 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1039 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1040 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1043 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1044 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1047 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1048 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1049 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1050 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1051 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1052 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1053 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1054 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1055 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1056 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1057 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1058 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1059 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1060 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1061 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1062 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1063 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1064 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1065 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1066 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1067 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1068 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1069 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1070 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1071 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1072 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1073 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1074 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1075 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1076 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1077 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1078 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1079 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1081 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1082 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1083 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1084 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1085 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1086 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1087 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1090 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1091 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1092 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1095 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1096 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1097 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1098 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1099 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1103 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1104 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1105 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1106 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1107 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1108 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1109 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1110 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1111 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1112 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1113 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1116 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1117 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1120 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1121 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1122 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1125 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1126 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1127 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1128 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1130 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1131 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1132 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1139 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1141 CPUPPCState
*env
= &cpu
->env
;
1142 struct kvm_sregs sregs
;
1146 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1152 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1158 * The packed SLB array we get from KVM_GET_SREGS only contains
1159 * information about valid entries. So we flush our internal copy
1160 * to get rid of stale ones, then put all valid SLB entries back
1163 memset(env
->slb
, 0, sizeof(env
->slb
));
1164 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1165 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1166 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1168 * Only restore valid entries
1170 if (rb
& SLB_ESID_V
) {
1171 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1177 for (i
= 0; i
< 16; i
++) {
1178 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1182 for (i
= 0; i
< 8; i
++) {
1183 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1184 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1185 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1186 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1192 int kvm_arch_get_registers(CPUState
*cs
)
1194 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1195 CPUPPCState
*env
= &cpu
->env
;
1196 struct kvm_regs regs
;
1200 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1205 for (i
= 7; i
>= 0; i
--) {
1206 env
->crf
[i
] = cr
& 15;
1210 env
->ctr
= regs
.ctr
;
1212 cpu_write_xer(env
, regs
.xer
);
1213 env
->msr
= regs
.msr
;
1216 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1217 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1219 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1220 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1221 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1222 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1223 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1224 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1225 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1226 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1228 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1230 for (i
= 0;i
< 32; i
++)
1231 env
->gpr
[i
] = regs
.gpr
[i
];
1235 if (cap_booke_sregs
) {
1236 ret
= kvmppc_get_booke_sregs(cpu
);
1243 ret
= kvmppc_get_books_sregs(cpu
);
1250 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1256 /* We deliberately ignore errors here, for kernels which have
1257 * the ONE_REG calls, but don't support the specific
1258 * registers, there's a reasonable chance things will still
1259 * work, at least until we try to migrate. */
1260 for (i
= 0; i
< 1024; i
++) {
1261 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1264 kvm_get_one_spr(cs
, id
, i
);
1270 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1271 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1273 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1274 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1276 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1277 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1278 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1279 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1280 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1281 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1282 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1283 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1284 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1285 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1289 if (kvm_get_vpa(cs
) < 0) {
1290 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1294 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1301 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1303 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1305 if (irq
!= PPC_INTERRUPT_EXT
) {
1309 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1313 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1318 #if defined(TARGET_PPC64)
1319 #define PPC_INPUT_INT PPC970_INPUT_INT
1321 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1324 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1326 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1327 CPUPPCState
*env
= &cpu
->env
;
1331 qemu_mutex_lock_iothread();
1333 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1334 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1335 if (!cap_interrupt_level
&&
1336 run
->ready_for_interrupt_injection
&&
1337 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1338 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1340 /* For now KVM disregards the 'irq' argument. However, in the
1341 * future KVM could cache it in-kernel to avoid a heavyweight exit
1342 * when reading the UIC.
1344 irq
= KVM_INTERRUPT_SET
;
1346 DPRINTF("injected interrupt %d\n", irq
);
1347 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1349 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1352 /* Always wake up soon in case the interrupt was level based */
1353 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1354 (NANOSECONDS_PER_SECOND
/ 50));
1357 /* We don't know if there are more interrupts pending after this. However,
1358 * the guest will return to userspace in the course of handling this one
1359 * anyways, so we will get a chance to deliver the rest. */
1361 qemu_mutex_unlock_iothread();
1364 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1366 return MEMTXATTRS_UNSPECIFIED
;
1369 int kvm_arch_process_async_events(CPUState
*cs
)
1374 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1376 CPUState
*cs
= CPU(cpu
);
1377 CPUPPCState
*env
= &cpu
->env
;
1379 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1381 cs
->exception_index
= EXCP_HLT
;
1387 /* map dcr access to existing qemu dcr emulation */
1388 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1390 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1391 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1396 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1398 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1399 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1404 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1406 /* Mixed endian case is not handled */
1407 uint32_t sc
= debug_inst_opcode
;
1409 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1411 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1418 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1422 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1423 sc
!= debug_inst_opcode
||
1424 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1432 static int find_hw_breakpoint(target_ulong addr
, int type
)
1436 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1437 <= ARRAY_SIZE(hw_debug_points
));
1439 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1440 if (hw_debug_points
[n
].addr
== addr
&&
1441 hw_debug_points
[n
].type
== type
) {
1449 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1453 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1455 *flag
= BP_MEM_ACCESS
;
1459 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1461 *flag
= BP_MEM_WRITE
;
1465 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1467 *flag
= BP_MEM_READ
;
1474 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1475 target_ulong len
, int type
)
1477 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1481 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1482 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1485 case GDB_BREAKPOINT_HW
:
1486 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1490 if (find_hw_breakpoint(addr
, type
) >= 0) {
1497 case GDB_WATCHPOINT_WRITE
:
1498 case GDB_WATCHPOINT_READ
:
1499 case GDB_WATCHPOINT_ACCESS
:
1500 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1504 if (find_hw_breakpoint(addr
, type
) >= 0) {
1518 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1519 target_ulong len
, int type
)
1523 n
= find_hw_breakpoint(addr
, type
);
1529 case GDB_BREAKPOINT_HW
:
1533 case GDB_WATCHPOINT_WRITE
:
1534 case GDB_WATCHPOINT_READ
:
1535 case GDB_WATCHPOINT_ACCESS
:
1542 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1547 void kvm_arch_remove_all_hw_breakpoints(void)
1549 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1552 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1556 /* Software Breakpoint updates */
1557 if (kvm_sw_breakpoints_active(cs
)) {
1558 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1561 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1562 <= ARRAY_SIZE(hw_debug_points
));
1563 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1565 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1566 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1567 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1568 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1569 switch (hw_debug_points
[n
].type
) {
1570 case GDB_BREAKPOINT_HW
:
1571 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1573 case GDB_WATCHPOINT_WRITE
:
1574 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1576 case GDB_WATCHPOINT_READ
:
1577 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1579 case GDB_WATCHPOINT_ACCESS
:
1580 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1581 KVMPPC_DEBUG_WATCH_READ
;
1584 cpu_abort(cs
, "Unsupported breakpoint type\n");
1586 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1591 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1593 CPUState
*cs
= CPU(cpu
);
1594 CPUPPCState
*env
= &cpu
->env
;
1595 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1600 if (cs
->singlestep_enabled
) {
1602 } else if (arch_info
->status
) {
1603 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1604 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1605 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1609 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1610 KVMPPC_DEBUG_WATCH_WRITE
)) {
1611 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1614 cs
->watchpoint_hit
= &hw_watchpoint
;
1615 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1616 hw_watchpoint
.flags
= flag
;
1620 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1623 /* QEMU is not able to handle debug exception, so inject
1624 * program exception to guest;
1625 * Yes program exception NOT debug exception !!
1626 * When QEMU is using debug resources then debug exception must
1627 * be always set. To achieve this we set MSR_DE and also set
1628 * MSRP_DEP so guest cannot change MSR_DE.
1629 * When emulating debug resource for guest we want guest
1630 * to control MSR_DE (enable/disable debug interrupt on need).
1631 * Supporting both configurations are NOT possible.
1632 * So the result is that we cannot share debug resources
1633 * between QEMU and Guest on BOOKE architecture.
1634 * In the current design QEMU gets the priority over guest,
1635 * this means that if QEMU is using debug resources then guest
1637 * For software breakpoint QEMU uses a privileged instruction;
1638 * So there cannot be any reason that we are here for guest
1639 * set debug exception, only possibility is guest executed a
1640 * privileged / illegal instruction and that's why we are
1641 * injecting a program interrupt.
1644 cpu_synchronize_state(cs
);
1645 /* env->nip is PC, so increment this by 4 to use
1646 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1649 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1650 env
->error_code
= POWERPC_EXCP_INVAL
;
1651 ppc_cpu_do_interrupt(cs
);
1657 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1659 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1660 CPUPPCState
*env
= &cpu
->env
;
1663 qemu_mutex_lock_iothread();
1665 switch (run
->exit_reason
) {
1667 if (run
->dcr
.is_write
) {
1668 DPRINTF("handle dcr write\n");
1669 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1671 DPRINTF("handle dcr read\n");
1672 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1676 DPRINTF("handle halt\n");
1677 ret
= kvmppc_handle_halt(cpu
);
1679 #if defined(TARGET_PPC64)
1680 case KVM_EXIT_PAPR_HCALL
:
1681 DPRINTF("handle PAPR hypercall\n");
1682 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1684 run
->papr_hcall
.args
);
1689 DPRINTF("handle epr\n");
1690 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1693 case KVM_EXIT_WATCHDOG
:
1694 DPRINTF("handle watchdog expiry\n");
1695 watchdog_perform_action();
1699 case KVM_EXIT_DEBUG
:
1700 DPRINTF("handle debug exception\n");
1701 if (kvm_handle_debug(cpu
, run
)) {
1705 /* re-enter, this exception was guest-internal */
1710 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1715 qemu_mutex_unlock_iothread();
1719 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1721 CPUState
*cs
= CPU(cpu
);
1722 uint32_t bits
= tsr_bits
;
1723 struct kvm_one_reg reg
= {
1724 .id
= KVM_REG_PPC_OR_TSR
,
1725 .addr
= (uintptr_t) &bits
,
1728 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1731 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1734 CPUState
*cs
= CPU(cpu
);
1735 uint32_t bits
= tsr_bits
;
1736 struct kvm_one_reg reg
= {
1737 .id
= KVM_REG_PPC_CLEAR_TSR
,
1738 .addr
= (uintptr_t) &bits
,
1741 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1744 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1746 CPUState
*cs
= CPU(cpu
);
1747 CPUPPCState
*env
= &cpu
->env
;
1748 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1750 struct kvm_one_reg reg
= {
1751 .id
= KVM_REG_PPC_TCR
,
1752 .addr
= (uintptr_t) &tcr
,
1755 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1758 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1760 CPUState
*cs
= CPU(cpu
);
1763 if (!kvm_enabled()) {
1767 if (!cap_ppc_watchdog
) {
1768 printf("warning: KVM does not support watchdog");
1772 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1774 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1775 __func__
, strerror(-ret
));
1782 static int read_cpuinfo(const char *field
, char *value
, int len
)
1786 int field_len
= strlen(field
);
1789 f
= fopen("/proc/cpuinfo", "r");
1795 if (!fgets(line
, sizeof(line
), f
)) {
1798 if (!strncmp(line
, field
, field_len
)) {
1799 pstrcpy(value
, len
, line
);
1810 uint32_t kvmppc_get_tbfreq(void)
1814 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1816 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1820 if (!(ns
= strchr(line
, ':'))) {
1829 bool kvmppc_get_host_serial(char **value
)
1831 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1835 bool kvmppc_get_host_model(char **value
)
1837 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1840 /* Try to find a device tree node for a CPU with clock-frequency property */
1841 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1843 struct dirent
*dirp
;
1846 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1847 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1852 while ((dirp
= readdir(dp
)) != NULL
) {
1854 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1856 f
= fopen(buf
, "r");
1858 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1865 if (buf
[0] == '\0') {
1866 printf("Unknown host!\n");
1873 static uint64_t kvmppc_read_int_dt(const char *filename
)
1882 f
= fopen(filename
, "rb");
1887 len
= fread(&u
, 1, sizeof(u
), f
);
1891 /* property is a 32-bit quantity */
1892 return be32_to_cpu(u
.v32
);
1894 return be64_to_cpu(u
.v64
);
1900 /* Read a CPU node property from the host device tree that's a single
1901 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1902 * (can't find or open the property, or doesn't understand the
1904 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1906 char buf
[PATH_MAX
], *tmp
;
1909 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1913 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1914 val
= kvmppc_read_int_dt(tmp
);
1920 uint64_t kvmppc_get_clockfreq(void)
1922 return kvmppc_read_int_cpu_dt("clock-frequency");
1925 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1927 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1928 CPUState
*cs
= CPU(cpu
);
1930 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1931 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1938 int kvmppc_get_hasidle(CPUPPCState
*env
)
1940 struct kvm_ppc_pvinfo pvinfo
;
1942 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1943 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1950 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1952 uint32_t *hc
= (uint32_t*)buf
;
1953 struct kvm_ppc_pvinfo pvinfo
;
1955 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1956 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1961 * Fallback to always fail hypercalls regardless of endianness:
1963 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1965 * b .+8 (becomes nop in wrong endian)
1966 * bswap32(li r3, -1)
1969 hc
[0] = cpu_to_be32(0x08000048);
1970 hc
[1] = cpu_to_be32(0x3860ffff);
1971 hc
[2] = cpu_to_be32(0x48000008);
1972 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
1977 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
1979 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
1982 void kvmppc_enable_logical_ci_hcalls(void)
1985 * FIXME: it would be nice if we could detect the cases where
1986 * we're using a device which requires the in kernel
1987 * implementation of these hcalls, but the kernel lacks them and
1988 * produce a warning.
1990 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
1991 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
1994 void kvmppc_enable_set_mode_hcall(void)
1996 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
1999 void kvmppc_enable_clear_ref_mod_hcalls(void)
2001 kvmppc_enable_hcall(kvm_state
, H_CLEAR_REF
);
2002 kvmppc_enable_hcall(kvm_state
, H_CLEAR_MOD
);
2005 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2007 CPUState
*cs
= CPU(cpu
);
2010 if (!kvm_enabled()) {
2014 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2016 error_report("This vCPU type or KVM version does not support PAPR");
2020 /* Update the capability flag so we sync the right information
2025 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t compat_pvr
)
2027 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &compat_pvr
);
2030 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2032 CPUState
*cs
= CPU(cpu
);
2035 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2036 if (ret
&& mpic_proxy
) {
2037 error_report("This KVM version does not support EPR");
2042 int kvmppc_smt_threads(void)
2044 return cap_ppc_smt
? cap_ppc_smt
: 1;
2047 int kvmppc_set_smt_threads(int smt
)
2051 ret
= kvm_vm_enable_cap(kvm_state
, KVM_CAP_PPC_SMT
, 0, smt
, 0);
2058 void kvmppc_hint_smt_possible(Error
**errp
)
2064 assert(kvm_enabled());
2065 if (cap_ppc_smt_possible
) {
2066 g
= g_string_new("Available VSMT modes:");
2067 for (i
= 63; i
>= 0; i
--) {
2068 if ((1UL << i
) & cap_ppc_smt_possible
) {
2069 g_string_append_printf(g
, " %lu", (1UL << i
));
2072 s
= g_string_free(g
, false);
2073 error_append_hint(errp
, "%s.\n", s
);
2076 error_append_hint(errp
,
2077 "This KVM seems to be too old to support VSMT.\n");
2083 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2085 struct kvm_ppc_smmu_info info
;
2086 long rampagesize
, best_page_shift
;
2089 /* Find the largest hardware supported page size that's less than
2090 * or equal to the (logical) backing page size of guest RAM */
2091 kvm_get_smmu_info(&info
, &error_fatal
);
2092 rampagesize
= qemu_getrampagesize();
2093 best_page_shift
= 0;
2095 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2096 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2098 if (!sps
->page_shift
) {
2102 if ((sps
->page_shift
> best_page_shift
)
2103 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2104 best_page_shift
= sps
->page_shift
;
2108 return MIN(current_size
,
2109 1ULL << (best_page_shift
+ hash_shift
- 7));
2113 bool kvmppc_spapr_use_multitce(void)
2115 return cap_spapr_multitce
;
2118 int kvmppc_spapr_enable_inkernel_multitce(void)
2122 ret
= kvm_vm_enable_cap(kvm_state
, KVM_CAP_PPC_ENABLE_HCALL
, 0,
2123 H_PUT_TCE_INDIRECT
, 1);
2125 ret
= kvm_vm_enable_cap(kvm_state
, KVM_CAP_PPC_ENABLE_HCALL
, 0,
2132 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t page_shift
,
2133 uint64_t bus_offset
, uint32_t nb_table
,
2134 int *pfd
, bool need_vfio
)
2140 /* Must set fd to -1 so we don't try to munmap when called for
2141 * destroying the table, which the upper layers -will- do
2144 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2148 if (cap_spapr_tce_64
) {
2149 struct kvm_create_spapr_tce_64 args
= {
2151 .page_shift
= page_shift
,
2152 .offset
= bus_offset
>> page_shift
,
2156 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE_64
, &args
);
2159 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2163 } else if (cap_spapr_tce
) {
2164 uint64_t window_size
= (uint64_t) nb_table
<< page_shift
;
2165 struct kvm_create_spapr_tce args
= {
2167 .window_size
= window_size
,
2169 if ((window_size
!= args
.window_size
) || bus_offset
) {
2172 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2174 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2182 len
= nb_table
* sizeof(uint64_t);
2183 /* FIXME: round this up to page size */
2185 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2186 if (table
== MAP_FAILED
) {
2187 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2197 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2205 len
= nb_table
* sizeof(uint64_t);
2206 if ((munmap(table
, len
) < 0) ||
2208 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2210 /* Leak the table */
2216 int kvmppc_reset_htab(int shift_hint
)
2218 uint32_t shift
= shift_hint
;
2220 if (!kvm_enabled()) {
2221 /* Full emulation, tell caller to allocate htab itself */
2224 if (kvm_vm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2226 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2227 if (ret
== -ENOTTY
) {
2228 /* At least some versions of PR KVM advertise the
2229 * capability, but don't implement the ioctl(). Oops.
2230 * Return 0 so that we allocate the htab in qemu, as is
2231 * correct for PR. */
2233 } else if (ret
< 0) {
2239 /* We have a kernel that predates the htab reset calls. For PR
2240 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2241 * this era, it has allocated a 16MB fixed size hash table already. */
2242 if (kvmppc_is_pr(kvm_state
)) {
2243 /* PR - tell caller to allocate htab */
2246 /* HV - assume 16MB kernel allocated htab */
2251 static inline uint32_t mfpvr(void)
2260 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2269 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2271 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2272 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2273 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2275 /* Now fix up the class with information we can query from the host */
2278 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
,
2279 qemu_getauxval(AT_HWCAP
) & PPC_FEATURE_HAS_ALTIVEC
);
2280 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
,
2281 qemu_getauxval(AT_HWCAP
) & PPC_FEATURE_HAS_VSX
);
2282 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
,
2283 qemu_getauxval(AT_HWCAP
) & PPC_FEATURE_HAS_DFP
);
2285 if (dcache_size
!= -1) {
2286 pcc
->l1_dcache_size
= dcache_size
;
2289 if (icache_size
!= -1) {
2290 pcc
->l1_icache_size
= icache_size
;
2293 #if defined(TARGET_PPC64)
2294 pcc
->radix_page_info
= kvm_get_radix_page_info();
2296 if ((pcc
->pvr
& 0xffffff00) == CPU_POWERPC_POWER9_DD1
) {
2298 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2299 * compliant. More importantly, advertising ISA 3.00
2300 * architected mode may prevent guests from activating
2301 * necessary DD1 workarounds.
2303 pcc
->pcr_supported
&= ~(PCR_COMPAT_3_00
| PCR_COMPAT_2_07
2304 | PCR_COMPAT_2_06
| PCR_COMPAT_2_05
);
2306 #endif /* defined(TARGET_PPC64) */
2309 bool kvmppc_has_cap_epr(void)
2314 bool kvmppc_has_cap_fixup_hcalls(void)
2316 return cap_fixup_hcalls
;
2319 bool kvmppc_has_cap_htm(void)
2324 bool kvmppc_has_cap_mmu_radix(void)
2326 return cap_mmu_radix
;
2329 bool kvmppc_has_cap_mmu_hash_v3(void)
2331 return cap_mmu_hash_v3
;
2334 static bool kvmppc_power8_host(void)
2339 uint32_t base_pvr
= CPU_POWERPC_POWER_SERVER_MASK
& mfpvr();
2340 ret
= (base_pvr
== CPU_POWERPC_POWER8E_BASE
) ||
2341 (base_pvr
== CPU_POWERPC_POWER8NVL_BASE
) ||
2342 (base_pvr
== CPU_POWERPC_POWER8_BASE
);
2344 #endif /* TARGET_PPC64 */
2348 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c
)
2350 bool l1d_thread_priv_req
= !kvmppc_power8_host();
2352 if (~c
.behaviour
& c
.behaviour_mask
& H_CPU_BEHAV_L1D_FLUSH_PR
) {
2354 } else if ((!l1d_thread_priv_req
||
2355 c
.character
& c
.character_mask
& H_CPU_CHAR_L1D_THREAD_PRIV
) &&
2356 (c
.character
& c
.character_mask
2357 & (H_CPU_CHAR_L1D_FLUSH_ORI30
| H_CPU_CHAR_L1D_FLUSH_TRIG2
))) {
2364 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c
)
2366 if (~c
.behaviour
& c
.behaviour_mask
& H_CPU_BEHAV_BNDS_CHK_SPEC_BAR
) {
2368 } else if (c
.character
& c
.character_mask
& H_CPU_CHAR_SPEC_BAR_ORI31
) {
2375 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c
)
2377 if (c
.character
& c
.character_mask
& H_CPU_CHAR_CACHE_COUNT_DIS
) {
2378 return SPAPR_CAP_FIXED_CCD
;
2379 } else if (c
.character
& c
.character_mask
& H_CPU_CHAR_BCCTRL_SERIALISED
) {
2380 return SPAPR_CAP_FIXED_IBS
;
2386 static void kvmppc_get_cpu_characteristics(KVMState
*s
)
2388 struct kvm_ppc_cpu_char c
;
2392 cap_ppc_safe_cache
= 0;
2393 cap_ppc_safe_bounds_check
= 0;
2394 cap_ppc_safe_indirect_branch
= 0;
2396 ret
= kvm_vm_check_extension(s
, KVM_CAP_PPC_GET_CPU_CHAR
);
2400 ret
= kvm_vm_ioctl(s
, KVM_PPC_GET_CPU_CHAR
, &c
);
2405 cap_ppc_safe_cache
= parse_cap_ppc_safe_cache(c
);
2406 cap_ppc_safe_bounds_check
= parse_cap_ppc_safe_bounds_check(c
);
2407 cap_ppc_safe_indirect_branch
= parse_cap_ppc_safe_indirect_branch(c
);
2410 int kvmppc_get_cap_safe_cache(void)
2412 return cap_ppc_safe_cache
;
2415 int kvmppc_get_cap_safe_bounds_check(void)
2417 return cap_ppc_safe_bounds_check
;
2420 int kvmppc_get_cap_safe_indirect_branch(void)
2422 return cap_ppc_safe_indirect_branch
;
2425 bool kvmppc_has_cap_spapr_vfio(void)
2427 return cap_spapr_vfio
;
2430 PowerPCCPUClass
*kvm_ppc_get_host_cpu_class(void)
2432 uint32_t host_pvr
= mfpvr();
2433 PowerPCCPUClass
*pvr_pcc
;
2435 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2436 if (pvr_pcc
== NULL
) {
2437 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2443 static int kvm_ppc_register_host_cpu_type(MachineState
*ms
)
2445 TypeInfo type_info
= {
2446 .name
= TYPE_HOST_POWERPC_CPU
,
2447 .class_init
= kvmppc_host_cpu_class_init
,
2449 MachineClass
*mc
= MACHINE_GET_CLASS(ms
);
2450 PowerPCCPUClass
*pvr_pcc
;
2455 pvr_pcc
= kvm_ppc_get_host_cpu_class();
2456 if (pvr_pcc
== NULL
) {
2459 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2460 type_register(&type_info
);
2461 if (object_dynamic_cast(OBJECT(ms
), TYPE_SPAPR_MACHINE
)) {
2462 /* override TCG default cpu type with 'host' cpu model */
2463 mc
->default_cpu_type
= TYPE_HOST_POWERPC_CPU
;
2466 oc
= object_class_by_name(type_info
.name
);
2470 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2471 * we want "POWER8" to be a "family" alias that points to the current
2472 * host CPU type, too)
2474 dc
= DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc
));
2475 for (i
= 0; ppc_cpu_aliases
[i
].alias
!= NULL
; i
++) {
2476 if (strcasecmp(ppc_cpu_aliases
[i
].alias
, dc
->desc
) == 0) {
2479 ppc_cpu_aliases
[i
].model
= g_strdup(object_class_get_name(oc
));
2480 suffix
= strstr(ppc_cpu_aliases
[i
].model
, POWERPC_CPU_TYPE_SUFFIX
);
2491 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2493 struct kvm_rtas_token_args args
= {
2497 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2501 strncpy(args
.name
, function
, sizeof(args
.name
));
2503 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2506 int kvmppc_get_htab_fd(bool write
, uint64_t index
, Error
**errp
)
2508 struct kvm_get_htab_fd s
= {
2509 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2510 .start_index
= index
,
2515 error_setg(errp
, "KVM version doesn't support %s the HPT",
2516 write
? "writing" : "reading");
2520 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2522 error_setg(errp
, "Unable to open fd for %s HPT %s KVM: %s",
2523 write
? "writing" : "reading", write
? "to" : "from",
2531 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2533 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2534 uint8_t buf
[bufsize
];
2538 rc
= read(fd
, buf
, bufsize
);
2540 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2544 uint8_t *buffer
= buf
;
2547 struct kvm_get_htab_header
*head
=
2548 (struct kvm_get_htab_header
*) buffer
;
2549 size_t chunksize
= sizeof(*head
) +
2550 HASH_PTE_SIZE_64
* head
->n_valid
;
2552 qemu_put_be32(f
, head
->index
);
2553 qemu_put_be16(f
, head
->n_valid
);
2554 qemu_put_be16(f
, head
->n_invalid
);
2555 qemu_put_buffer(f
, (void *)(head
+ 1),
2556 HASH_PTE_SIZE_64
* head
->n_valid
);
2558 buffer
+= chunksize
;
2564 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2566 return (rc
== 0) ? 1 : 0;
2569 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2570 uint16_t n_valid
, uint16_t n_invalid
)
2572 struct kvm_get_htab_header
*buf
;
2573 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2576 buf
= alloca(chunksize
);
2578 buf
->n_valid
= n_valid
;
2579 buf
->n_invalid
= n_invalid
;
2581 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2583 rc
= write(fd
, buf
, chunksize
);
2585 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2589 if (rc
!= chunksize
) {
2590 /* We should never get a short write on a single chunk */
2591 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2597 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2602 void kvm_arch_init_irq_routing(KVMState
*s
)
2606 void kvmppc_read_hptes(ppc_hash_pte64_t
*hptes
, hwaddr ptex
, int n
)
2611 fd
= kvmppc_get_htab_fd(false, ptex
, &error_abort
);
2615 struct kvm_get_htab_header
*hdr
;
2616 int m
= n
< HPTES_PER_GROUP
? n
: HPTES_PER_GROUP
;
2617 char buf
[sizeof(*hdr
) + m
* HASH_PTE_SIZE_64
];
2619 rc
= read(fd
, buf
, sizeof(buf
));
2621 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2624 hdr
= (struct kvm_get_htab_header
*)buf
;
2625 while ((i
< n
) && ((char *)hdr
< (buf
+ rc
))) {
2626 int invalid
= hdr
->n_invalid
, valid
= hdr
->n_valid
;
2628 if (hdr
->index
!= (ptex
+ i
)) {
2629 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2630 " != (%"HWADDR_PRIu
" + %d", hdr
->index
, ptex
, i
);
2633 if (n
- i
< valid
) {
2636 memcpy(hptes
+ i
, hdr
+ 1, HASH_PTE_SIZE_64
* valid
);
2639 if ((n
- i
) < invalid
) {
2642 memset(hptes
+ i
, 0, invalid
* HASH_PTE_SIZE_64
);
2645 hdr
= (struct kvm_get_htab_header
*)
2646 ((char *)(hdr
+ 1) + HASH_PTE_SIZE_64
* hdr
->n_valid
);
2653 void kvmppc_write_hpte(hwaddr ptex
, uint64_t pte0
, uint64_t pte1
)
2657 struct kvm_get_htab_header hdr
;
2662 fd
= kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort
);
2664 buf
.hdr
.n_valid
= 1;
2665 buf
.hdr
.n_invalid
= 0;
2666 buf
.hdr
.index
= ptex
;
2667 buf
.pte0
= cpu_to_be64(pte0
);
2668 buf
.pte1
= cpu_to_be64(pte1
);
2670 rc
= write(fd
, &buf
, sizeof(buf
));
2671 if (rc
!= sizeof(buf
)) {
2672 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2677 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2678 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2683 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry
*route
,
2684 int vector
, PCIDevice
*dev
)
2689 int kvm_arch_release_virq_post(int virq
)
2694 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2696 return data
& 0xffff;
2699 int kvmppc_enable_hwrng(void)
2701 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2705 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);
2708 void kvmppc_check_papr_resize_hpt(Error
**errp
)
2710 if (!kvm_enabled()) {
2711 return; /* No KVM, we're good */
2714 if (cap_resize_hpt
) {
2715 return; /* Kernel has explicit support, we're good */
2718 /* Otherwise fallback on looking for PR KVM */
2719 if (kvmppc_is_pr(kvm_state
)) {
2724 "Hash page table resizing not available with this KVM version");
2727 int kvmppc_resize_hpt_prepare(PowerPCCPU
*cpu
, target_ulong flags
, int shift
)
2729 CPUState
*cs
= CPU(cpu
);
2730 struct kvm_ppc_resize_hpt rhpt
= {
2735 if (!cap_resize_hpt
) {
2739 return kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_RESIZE_HPT_PREPARE
, &rhpt
);
2742 int kvmppc_resize_hpt_commit(PowerPCCPU
*cpu
, target_ulong flags
, int shift
)
2744 CPUState
*cs
= CPU(cpu
);
2745 struct kvm_ppc_resize_hpt rhpt
= {
2750 if (!cap_resize_hpt
) {
2754 return kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_RESIZE_HPT_COMMIT
, &rhpt
);
2758 * This is a helper function to detect a post migration scenario
2759 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2760 * the guest kernel can't handle a PVR value other than the actual host
2761 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2763 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2764 * (so, we're HV), return true. The workaround itself is done in
2767 * The order here is important: we'll only check for KVM PR as a
2768 * fallback if the guest kernel can't handle the situation itself.
2769 * We need to avoid as much as possible querying the running KVM type
2772 bool kvmppc_pvr_workaround_required(PowerPCCPU
*cpu
)
2774 CPUState
*cs
= CPU(cpu
);
2776 if (!kvm_enabled()) {
2780 if (cap_ppc_pvr_compat
) {
2784 return !kvmppc_is_pr(cs
->kvm_state
);
2787 void kvmppc_set_reg_ppc_online(PowerPCCPU
*cpu
, unsigned int online
)
2789 CPUState
*cs
= CPU(cpu
);
2791 if (kvm_enabled()) {
2792 kvm_set_one_reg(cs
, KVM_REG_PPC_ONLINE
, &online
);