1 #include "qemu/osdep.h"
2 #include "qemu/cutils.h"
3 #include "qapi/error.h"
4 #include "sysemu/hw_accel.h"
5 #include "sysemu/runstate.h"
7 #include "qemu/main-loop.h"
8 #include "qemu/module.h"
9 #include "qemu/error-report.h"
10 #include "exec/exec-all.h"
11 #include "exec/tb-flush.h"
12 #include "helper_regs.h"
13 #include "hw/ppc/ppc.h"
14 #include "hw/ppc/spapr.h"
15 #include "hw/ppc/spapr_cpu_core.h"
16 #include "mmu-hash64.h"
17 #include "cpu-models.h"
20 #include "hw/ppc/fdt.h"
21 #include "hw/ppc/spapr_ovec.h"
22 #include "hw/ppc/spapr_numa.h"
23 #include "mmu-book3s-v3.h"
24 #include "hw/mem/memory-device.h"
26 bool is_ram_address(SpaprMachineState
*spapr
, hwaddr addr
)
28 MachineState
*machine
= MACHINE(spapr
);
29 DeviceMemoryState
*dms
= machine
->device_memory
;
31 if (addr
< machine
->ram_size
) {
34 if ((addr
>= dms
->base
)
35 && ((addr
- dms
->base
) < memory_region_size(&dms
->mr
))) {
42 /* Convert a return code from the KVM ioctl()s implementing resize HPT
43 * into a PAPR hypercall return code */
44 static target_ulong
resize_hpt_convert_rc(int ret
)
47 return H_LONG_BUSY_ORDER_100_SEC
;
48 } else if (ret
>= 10000) {
49 return H_LONG_BUSY_ORDER_10_SEC
;
50 } else if (ret
>= 1000) {
51 return H_LONG_BUSY_ORDER_1_SEC
;
52 } else if (ret
>= 100) {
53 return H_LONG_BUSY_ORDER_100_MSEC
;
54 } else if (ret
>= 10) {
55 return H_LONG_BUSY_ORDER_10_MSEC
;
57 return H_LONG_BUSY_ORDER_1_MSEC
;
80 static target_ulong
h_resize_hpt_prepare(PowerPCCPU
*cpu
,
81 SpaprMachineState
*spapr
,
85 target_ulong flags
= args
[0];
87 uint64_t current_ram_size
;
90 if (spapr
->resize_hpt
== SPAPR_RESIZE_HPT_DISABLED
) {
94 if (!spapr
->htab_shift
) {
95 /* Radix guest, no HPT */
96 return H_NOT_AVAILABLE
;
99 trace_spapr_h_resize_hpt_prepare(flags
, shift
);
105 if (shift
&& ((shift
< 18) || (shift
> 46))) {
109 current_ram_size
= MACHINE(spapr
)->ram_size
+ get_plugged_memory_size();
111 /* We only allow the guest to allocate an HPT one order above what
112 * we'd normally give them (to stop a small guest claiming a huge
113 * chunk of resources in the HPT */
114 if (shift
> (spapr_hpt_shift_for_ramsize(current_ram_size
) + 1)) {
118 rc
= kvmppc_resize_hpt_prepare(cpu
, flags
, shift
);
120 return resize_hpt_convert_rc(rc
);
127 return softmmu_resize_hpt_prepare(cpu
, spapr
, shift
);
130 static void do_push_sregs_to_kvm_pr(CPUState
*cs
, run_on_cpu_data data
)
134 cpu_synchronize_state(cs
);
136 ret
= kvmppc_put_books_sregs(POWERPC_CPU(cs
));
138 error_report("failed to push sregs to KVM: %s", strerror(-ret
));
143 void push_sregs_to_kvm_pr(SpaprMachineState
*spapr
)
148 * This is a hack for the benefit of KVM PR - it abuses the SDR1
149 * slot in kvm_sregs to communicate the userspace address of the
152 if (!kvm_enabled() || !spapr
->htab
) {
157 run_on_cpu(cs
, do_push_sregs_to_kvm_pr
, RUN_ON_CPU_NULL
);
161 static target_ulong
h_resize_hpt_commit(PowerPCCPU
*cpu
,
162 SpaprMachineState
*spapr
,
166 target_ulong flags
= args
[0];
167 target_ulong shift
= args
[1];
170 if (spapr
->resize_hpt
== SPAPR_RESIZE_HPT_DISABLED
) {
174 if (!spapr
->htab_shift
) {
175 /* Radix guest, no HPT */
176 return H_NOT_AVAILABLE
;
179 trace_spapr_h_resize_hpt_commit(flags
, shift
);
181 rc
= kvmppc_resize_hpt_commit(cpu
, flags
, shift
);
183 rc
= resize_hpt_convert_rc(rc
);
184 if (rc
== H_SUCCESS
) {
185 /* Need to set the new htab_shift in the machine state */
186 spapr
->htab_shift
= shift
;
195 return softmmu_resize_hpt_commit(cpu
, spapr
, flags
, shift
);
200 static target_ulong
h_set_sprg0(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
201 target_ulong opcode
, target_ulong
*args
)
203 cpu_synchronize_state(CPU(cpu
));
204 cpu
->env
.spr
[SPR_SPRG0
] = args
[0];
209 static target_ulong
h_set_dabr(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
210 target_ulong opcode
, target_ulong
*args
)
212 if (!ppc_has_spr(cpu
, SPR_DABR
)) {
213 return H_HARDWARE
; /* DABR register not available */
215 cpu_synchronize_state(CPU(cpu
));
217 if (ppc_has_spr(cpu
, SPR_DABRX
)) {
218 cpu
->env
.spr
[SPR_DABRX
] = 0x3; /* Use Problem and Privileged state */
219 } else if (!(args
[0] & 0x4)) { /* Breakpoint Translation set? */
220 return H_RESERVED_DABR
;
223 cpu
->env
.spr
[SPR_DABR
] = args
[0];
227 static target_ulong
h_set_xdabr(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
228 target_ulong opcode
, target_ulong
*args
)
230 target_ulong dabrx
= args
[1];
232 if (!ppc_has_spr(cpu
, SPR_DABR
) || !ppc_has_spr(cpu
, SPR_DABRX
)) {
236 if ((dabrx
& ~0xfULL
) != 0 || (dabrx
& H_DABRX_HYPERVISOR
) != 0
237 || (dabrx
& (H_DABRX_KERNEL
| H_DABRX_USER
)) == 0) {
241 cpu_synchronize_state(CPU(cpu
));
242 cpu
->env
.spr
[SPR_DABRX
] = dabrx
;
243 cpu
->env
.spr
[SPR_DABR
] = args
[0];
248 static target_ulong
h_page_init(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
249 target_ulong opcode
, target_ulong
*args
)
251 target_ulong flags
= args
[0];
252 hwaddr dst
= args
[1];
253 hwaddr src
= args
[2];
254 hwaddr len
= TARGET_PAGE_SIZE
;
255 uint8_t *pdst
, *psrc
;
256 target_long ret
= H_SUCCESS
;
258 if (flags
& ~(H_ICACHE_SYNCHRONIZE
| H_ICACHE_INVALIDATE
259 | H_COPY_PAGE
| H_ZERO_PAGE
)) {
260 qemu_log_mask(LOG_UNIMP
, "h_page_init: Bad flags (" TARGET_FMT_lx
"\n",
265 /* Map-in destination */
266 if (!is_ram_address(spapr
, dst
) || (dst
& ~TARGET_PAGE_MASK
) != 0) {
269 pdst
= cpu_physical_memory_map(dst
, &len
, true);
270 if (!pdst
|| len
!= TARGET_PAGE_SIZE
) {
274 if (flags
& H_COPY_PAGE
) {
275 /* Map-in source, copy to destination, and unmap source again */
276 if (!is_ram_address(spapr
, src
) || (src
& ~TARGET_PAGE_MASK
) != 0) {
280 psrc
= cpu_physical_memory_map(src
, &len
, false);
281 if (!psrc
|| len
!= TARGET_PAGE_SIZE
) {
285 memcpy(pdst
, psrc
, len
);
286 cpu_physical_memory_unmap(psrc
, len
, 0, len
);
287 } else if (flags
& H_ZERO_PAGE
) {
288 memset(pdst
, 0, len
); /* Just clear the destination page */
291 if (kvm_enabled() && (flags
& H_ICACHE_SYNCHRONIZE
) != 0) {
292 kvmppc_dcbst_range(cpu
, pdst
, len
);
294 if (flags
& (H_ICACHE_SYNCHRONIZE
| H_ICACHE_INVALIDATE
)) {
296 kvmppc_icbi_range(cpu
, pdst
, len
);
303 cpu_physical_memory_unmap(pdst
, TARGET_PAGE_SIZE
, 1, len
);
307 #define FLAGS_REGISTER_VPA 0x0000200000000000ULL
308 #define FLAGS_REGISTER_DTL 0x0000400000000000ULL
309 #define FLAGS_REGISTER_SLBSHADOW 0x0000600000000000ULL
310 #define FLAGS_DEREGISTER_VPA 0x0000a00000000000ULL
311 #define FLAGS_DEREGISTER_DTL 0x0000c00000000000ULL
312 #define FLAGS_DEREGISTER_SLBSHADOW 0x0000e00000000000ULL
314 static target_ulong
register_vpa(PowerPCCPU
*cpu
, target_ulong vpa
)
316 CPUState
*cs
= CPU(cpu
);
317 CPUPPCState
*env
= &cpu
->env
;
318 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
323 hcall_dprintf("Can't cope with registering a VPA at logical 0\n");
327 if (vpa
% env
->dcache_line_size
) {
330 /* FIXME: bounds check the address */
332 size
= lduw_be_phys(cs
->as
, vpa
+ 0x4);
334 if (size
< VPA_MIN_SIZE
) {
338 /* VPA is not allowed to cross a page boundary */
339 if ((vpa
/ 4096) != ((vpa
+ size
- 1) / 4096)) {
343 spapr_cpu
->vpa_addr
= vpa
;
345 tmp
= ldub_phys(cs
->as
, spapr_cpu
->vpa_addr
+ VPA_SHARED_PROC_OFFSET
);
346 tmp
|= VPA_SHARED_PROC_VAL
;
347 stb_phys(cs
->as
, spapr_cpu
->vpa_addr
+ VPA_SHARED_PROC_OFFSET
, tmp
);
352 static target_ulong
deregister_vpa(PowerPCCPU
*cpu
, target_ulong vpa
)
354 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
356 if (spapr_cpu
->slb_shadow_addr
) {
360 if (spapr_cpu
->dtl_addr
) {
364 spapr_cpu
->vpa_addr
= 0;
368 static target_ulong
register_slb_shadow(PowerPCCPU
*cpu
, target_ulong addr
)
370 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
374 hcall_dprintf("Can't cope with SLB shadow at logical 0\n");
378 size
= ldl_be_phys(CPU(cpu
)->as
, addr
+ 0x4);
383 if ((addr
/ 4096) != ((addr
+ size
- 1) / 4096)) {
387 if (!spapr_cpu
->vpa_addr
) {
391 spapr_cpu
->slb_shadow_addr
= addr
;
392 spapr_cpu
->slb_shadow_size
= size
;
397 static target_ulong
deregister_slb_shadow(PowerPCCPU
*cpu
, target_ulong addr
)
399 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
401 spapr_cpu
->slb_shadow_addr
= 0;
402 spapr_cpu
->slb_shadow_size
= 0;
406 static target_ulong
register_dtl(PowerPCCPU
*cpu
, target_ulong addr
)
408 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
412 hcall_dprintf("Can't cope with DTL at logical 0\n");
416 size
= ldl_be_phys(CPU(cpu
)->as
, addr
+ 0x4);
422 if (!spapr_cpu
->vpa_addr
) {
426 spapr_cpu
->dtl_addr
= addr
;
427 spapr_cpu
->dtl_size
= size
;
432 static target_ulong
deregister_dtl(PowerPCCPU
*cpu
, target_ulong addr
)
434 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
436 spapr_cpu
->dtl_addr
= 0;
437 spapr_cpu
->dtl_size
= 0;
442 static target_ulong
h_register_vpa(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
443 target_ulong opcode
, target_ulong
*args
)
445 target_ulong flags
= args
[0];
446 target_ulong procno
= args
[1];
447 target_ulong vpa
= args
[2];
448 target_ulong ret
= H_PARAMETER
;
451 tcpu
= spapr_find_cpu(procno
);
457 case FLAGS_REGISTER_VPA
:
458 ret
= register_vpa(tcpu
, vpa
);
461 case FLAGS_DEREGISTER_VPA
:
462 ret
= deregister_vpa(tcpu
, vpa
);
465 case FLAGS_REGISTER_SLBSHADOW
:
466 ret
= register_slb_shadow(tcpu
, vpa
);
469 case FLAGS_DEREGISTER_SLBSHADOW
:
470 ret
= deregister_slb_shadow(tcpu
, vpa
);
473 case FLAGS_REGISTER_DTL
:
474 ret
= register_dtl(tcpu
, vpa
);
477 case FLAGS_DEREGISTER_DTL
:
478 ret
= deregister_dtl(tcpu
, vpa
);
485 static target_ulong
h_cede(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
486 target_ulong opcode
, target_ulong
*args
)
488 CPUPPCState
*env
= &cpu
->env
;
489 CPUState
*cs
= CPU(cpu
);
490 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
492 env
->msr
|= (1ULL << MSR_EE
);
493 hreg_compute_hflags(env
);
494 ppc_maybe_interrupt(env
);
496 if (spapr_cpu
->prod
) {
497 spapr_cpu
->prod
= false;
501 if (!cpu_has_work(cs
)) {
503 cs
->exception_index
= EXCP_HLT
;
504 cs
->exit_request
= 1;
505 ppc_maybe_interrupt(env
);
512 * Confer to self, aka join. Cede could use the same pattern as well, if
513 * EXCP_HLT can be changed to ECXP_HALTED.
515 static target_ulong
h_confer_self(PowerPCCPU
*cpu
)
517 CPUState
*cs
= CPU(cpu
);
518 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
520 if (spapr_cpu
->prod
) {
521 spapr_cpu
->prod
= false;
525 cs
->exception_index
= EXCP_HALTED
;
526 cs
->exit_request
= 1;
527 ppc_maybe_interrupt(&cpu
->env
);
532 static target_ulong
h_join(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
533 target_ulong opcode
, target_ulong
*args
)
535 CPUPPCState
*env
= &cpu
->env
;
537 bool last_unjoined
= true;
539 if (env
->msr
& (1ULL << MSR_EE
)) {
544 * Must not join the last CPU running. Interestingly, no such restriction
545 * for H_CONFER-to-self, but that is probably not intended to be used
546 * when H_JOIN is available.
549 PowerPCCPU
*c
= POWERPC_CPU(cs
);
550 CPUPPCState
*e
= &c
->env
;
555 /* Don't have a way to indicate joined, so use halted && MSR[EE]=0 */
556 if (!cs
->halted
|| (e
->msr
& (1ULL << MSR_EE
))) {
557 last_unjoined
= false;
565 return h_confer_self(cpu
);
568 static target_ulong
h_confer(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
569 target_ulong opcode
, target_ulong
*args
)
571 target_long target
= args
[0];
572 uint32_t dispatch
= args
[1];
573 CPUState
*cs
= CPU(cpu
);
574 SpaprCpuState
*spapr_cpu
;
577 * -1 means confer to all other CPUs without dispatch counter check,
578 * otherwise it's a targeted confer.
581 PowerPCCPU
*target_cpu
= spapr_find_cpu(target
);
582 uint32_t target_dispatch
;
589 * target == self is a special case, we wait until prodded, without
590 * dispatch counter check.
592 if (cpu
== target_cpu
) {
593 return h_confer_self(cpu
);
596 spapr_cpu
= spapr_cpu_state(target_cpu
);
597 if (!spapr_cpu
->vpa_addr
|| ((dispatch
& 1) == 0)) {
601 target_dispatch
= ldl_be_phys(cs
->as
,
602 spapr_cpu
->vpa_addr
+ VPA_DISPATCH_COUNTER
);
603 if (target_dispatch
!= dispatch
) {
608 * The targeted confer does not do anything special beyond yielding
609 * the current vCPU, but even this should be better than nothing.
610 * At least for single-threaded tcg, it gives the target a chance to
611 * run before we run again. Multi-threaded tcg does not really do
612 * anything with EXCP_YIELD yet.
616 cs
->exception_index
= EXCP_YIELD
;
617 cs
->exit_request
= 1;
623 static target_ulong
h_prod(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
624 target_ulong opcode
, target_ulong
*args
)
626 target_long target
= args
[0];
629 SpaprCpuState
*spapr_cpu
;
631 tcpu
= spapr_find_cpu(target
);
637 spapr_cpu
= spapr_cpu_state(tcpu
);
638 spapr_cpu
->prod
= true;
640 ppc_maybe_interrupt(&cpu
->env
);
646 static target_ulong
h_rtas(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
647 target_ulong opcode
, target_ulong
*args
)
649 target_ulong rtas_r3
= args
[0];
650 uint32_t token
= rtas_ld(rtas_r3
, 0);
651 uint32_t nargs
= rtas_ld(rtas_r3
, 1);
652 uint32_t nret
= rtas_ld(rtas_r3
, 2);
654 return spapr_rtas_call(cpu
, spapr
, token
, nargs
, rtas_r3
+ 12,
655 nret
, rtas_r3
+ 12 + 4*nargs
);
658 static target_ulong
h_logical_load(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
659 target_ulong opcode
, target_ulong
*args
)
661 CPUState
*cs
= CPU(cpu
);
662 target_ulong size
= args
[0];
663 target_ulong addr
= args
[1];
667 args
[0] = ldub_phys(cs
->as
, addr
);
670 args
[0] = lduw_phys(cs
->as
, addr
);
673 args
[0] = ldl_phys(cs
->as
, addr
);
676 args
[0] = ldq_phys(cs
->as
, addr
);
682 static target_ulong
h_logical_store(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
683 target_ulong opcode
, target_ulong
*args
)
685 CPUState
*cs
= CPU(cpu
);
687 target_ulong size
= args
[0];
688 target_ulong addr
= args
[1];
689 target_ulong val
= args
[2];
693 stb_phys(cs
->as
, addr
, val
);
696 stw_phys(cs
->as
, addr
, val
);
699 stl_phys(cs
->as
, addr
, val
);
702 stq_phys(cs
->as
, addr
, val
);
708 static target_ulong
h_logical_memop(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
709 target_ulong opcode
, target_ulong
*args
)
711 CPUState
*cs
= CPU(cpu
);
713 target_ulong dst
= args
[0]; /* Destination address */
714 target_ulong src
= args
[1]; /* Source address */
715 target_ulong esize
= args
[2]; /* Element size (0=1,1=2,2=4,3=8) */
716 target_ulong count
= args
[3]; /* Element count */
717 target_ulong op
= args
[4]; /* 0 = copy, 1 = invert */
719 unsigned int mask
= (1 << esize
) - 1;
720 int step
= 1 << esize
;
722 if (count
> 0x80000000) {
726 if ((dst
& mask
) || (src
& mask
) || (op
> 1)) {
730 if (dst
>= src
&& dst
< (src
+ (count
<< esize
))) {
731 dst
= dst
+ ((count
- 1) << esize
);
732 src
= src
+ ((count
- 1) << esize
);
739 tmp
= ldub_phys(cs
->as
, src
);
742 tmp
= lduw_phys(cs
->as
, src
);
745 tmp
= ldl_phys(cs
->as
, src
);
748 tmp
= ldq_phys(cs
->as
, src
);
758 stb_phys(cs
->as
, dst
, tmp
);
761 stw_phys(cs
->as
, dst
, tmp
);
764 stl_phys(cs
->as
, dst
, tmp
);
767 stq_phys(cs
->as
, dst
, tmp
);
777 static target_ulong
h_logical_icbi(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
778 target_ulong opcode
, target_ulong
*args
)
780 /* Nothing to do on emulation, KVM will trap this in the kernel */
784 static target_ulong
h_logical_dcbf(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
785 target_ulong opcode
, target_ulong
*args
)
787 /* Nothing to do on emulation, KVM will trap this in the kernel */
791 static target_ulong
h_set_mode_resource_le(PowerPCCPU
*cpu
,
792 SpaprMachineState
*spapr
,
805 case H_SET_MODE_ENDIAN_BIG
:
806 spapr_set_all_lpcrs(0, LPCR_ILE
);
807 spapr_pci_switch_vga(spapr
, true);
810 case H_SET_MODE_ENDIAN_LITTLE
:
811 spapr_set_all_lpcrs(LPCR_ILE
, LPCR_ILE
);
812 spapr_pci_switch_vga(spapr
, false);
816 return H_UNSUPPORTED_FLAG
;
819 static target_ulong
h_set_mode_resource_addr_trans_mode(PowerPCCPU
*cpu
,
824 PowerPCCPUClass
*pcc
= POWERPC_CPU_GET_CLASS(cpu
);
826 if (!(pcc
->insns_flags2
& PPC2_ISA207S
)) {
837 /* AIL=1 is reserved in POWER8/POWER9/POWER10 */
838 return H_UNSUPPORTED_FLAG
;
841 if (mflags
== 2 && (pcc
->insns_flags2
& PPC2_ISA310
)) {
842 /* AIL=2 is reserved in POWER10 (ISA v3.1) */
843 return H_UNSUPPORTED_FLAG
;
846 spapr_set_all_lpcrs(mflags
<< LPCR_AIL_SHIFT
, LPCR_AIL
);
851 static target_ulong
h_set_mode(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
852 target_ulong opcode
, target_ulong
*args
)
854 target_ulong resource
= args
[1];
855 target_ulong ret
= H_P2
;
858 case H_SET_MODE_RESOURCE_LE
:
859 ret
= h_set_mode_resource_le(cpu
, spapr
, args
[0], args
[2], args
[3]);
861 case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE
:
862 ret
= h_set_mode_resource_addr_trans_mode(cpu
, args
[0],
870 static target_ulong
h_clean_slb(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
871 target_ulong opcode
, target_ulong
*args
)
873 qemu_log_mask(LOG_UNIMP
, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx
"%s\n",
874 opcode
, " (H_CLEAN_SLB)");
878 static target_ulong
h_invalidate_pid(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
879 target_ulong opcode
, target_ulong
*args
)
881 qemu_log_mask(LOG_UNIMP
, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx
"%s\n",
882 opcode
, " (H_INVALIDATE_PID)");
886 static void spapr_check_setup_free_hpt(SpaprMachineState
*spapr
,
887 uint64_t patbe_old
, uint64_t patbe_new
)
891 * HASH->HASH || RADIX->RADIX || NOTHING->RADIX : Do Nothing
892 * HASH->RADIX : Free HPT
893 * RADIX->HASH : Allocate HPT
894 * NOTHING->HASH : Allocate HPT
895 * Note: NOTHING implies the case where we said the guest could choose
896 * later and so assumed radix and now it's called H_REG_PROC_TBL
899 if ((patbe_old
& PATE1_GR
) == (patbe_new
& PATE1_GR
)) {
900 /* We assume RADIX, so this catches all the "Do Nothing" cases */
901 } else if (!(patbe_old
& PATE1_GR
)) {
902 /* HASH->RADIX : Free HPT */
903 spapr_free_hpt(spapr
);
904 } else if (!(patbe_new
& PATE1_GR
)) {
905 /* RADIX->HASH || NOTHING->HASH : Allocate HPT */
906 spapr_setup_hpt(spapr
);
911 #define FLAGS_MASK 0x01FULL
912 #define FLAG_MODIFY 0x10
913 #define FLAG_REGISTER 0x08
914 #define FLAG_RADIX 0x04
915 #define FLAG_HASH_PROC_TBL 0x02
916 #define FLAG_GTSE 0x01
918 static target_ulong
h_register_process_table(PowerPCCPU
*cpu
,
919 SpaprMachineState
*spapr
,
923 target_ulong flags
= args
[0];
924 target_ulong proc_tbl
= args
[1];
925 target_ulong page_size
= args
[2];
926 target_ulong table_size
= args
[3];
927 target_ulong update_lpcr
= 0;
928 target_ulong table_byte_size
;
931 if (flags
& ~FLAGS_MASK
) { /* Check no reserved bits are set */
934 if (flags
& FLAG_MODIFY
) {
935 if (flags
& FLAG_REGISTER
) {
936 /* Check process table alignment */
937 table_byte_size
= 1ULL << (table_size
+ 12);
938 if (proc_tbl
& (table_byte_size
- 1)) {
939 qemu_log_mask(LOG_GUEST_ERROR
,
940 "%s: process table not properly aligned: proc_tbl 0x"
941 TARGET_FMT_lx
" proc_tbl_size 0x"TARGET_FMT_lx
"\n",
942 __func__
, proc_tbl
, table_byte_size
);
944 if (flags
& FLAG_RADIX
) { /* Register new RADIX process table */
945 if (proc_tbl
& 0xfff || proc_tbl
>> 60) {
947 } else if (page_size
) {
949 } else if (table_size
> 24) {
952 cproc
= PATE1_GR
| proc_tbl
| table_size
;
953 } else { /* Register new HPT process table */
954 if (flags
& FLAG_HASH_PROC_TBL
) { /* Hash with Segment Tables */
955 /* TODO - Not Supported */
956 /* Technically caused by flag bits => H_PARAMETER */
958 } else { /* Hash with SLB */
959 if (proc_tbl
>> 38) {
961 } else if (page_size
& ~0x7) {
963 } else if (table_size
> 24) {
967 cproc
= (proc_tbl
<< 25) | page_size
<< 5 | table_size
;
970 } else { /* Deregister current process table */
972 * Set to benign value: (current GR) | 0. This allows
973 * deregistration in KVM to succeed even if the radix bit
974 * in flags doesn't match the radix bit in the old PATE.
976 cproc
= spapr
->patb_entry
& PATE1_GR
;
978 } else { /* Maintain current registration */
979 if (!(flags
& FLAG_RADIX
) != !(spapr
->patb_entry
& PATE1_GR
)) {
980 /* Technically caused by flag bits => H_PARAMETER */
981 return H_PARAMETER
; /* Existing Process Table Mismatch */
983 cproc
= spapr
->patb_entry
;
986 /* Check if we need to setup OR free the hpt */
987 spapr_check_setup_free_hpt(spapr
, spapr
->patb_entry
, cproc
);
989 spapr
->patb_entry
= cproc
; /* Save new process table */
991 /* Update the UPRT, HR and GTSE bits in the LPCR for all cpus */
992 if (flags
& FLAG_RADIX
) /* Radix must use process tables, also set HR */
993 update_lpcr
|= (LPCR_UPRT
| LPCR_HR
);
994 else if (flags
& FLAG_HASH_PROC_TBL
) /* Hash with process tables */
995 update_lpcr
|= LPCR_UPRT
;
996 if (flags
& FLAG_GTSE
) /* Guest translation shootdown enable */
997 update_lpcr
|= LPCR_GTSE
;
999 spapr_set_all_lpcrs(update_lpcr
, LPCR_UPRT
| LPCR_HR
| LPCR_GTSE
);
1001 if (kvm_enabled()) {
1002 return kvmppc_configure_v3_mmu(cpu
, flags
& FLAG_RADIX
,
1003 flags
& FLAG_GTSE
, cproc
);
1008 #define H_SIGNAL_SYS_RESET_ALL -1
1009 #define H_SIGNAL_SYS_RESET_ALLBUTSELF -2
1011 static target_ulong
h_signal_sys_reset(PowerPCCPU
*cpu
,
1012 SpaprMachineState
*spapr
,
1013 target_ulong opcode
, target_ulong
*args
)
1015 target_long target
= args
[0];
1020 if (target
< H_SIGNAL_SYS_RESET_ALLBUTSELF
) {
1025 PowerPCCPU
*c
= POWERPC_CPU(cs
);
1027 if (target
== H_SIGNAL_SYS_RESET_ALLBUTSELF
) {
1032 run_on_cpu(cs
, spapr_do_system_reset_on_cpu
, RUN_ON_CPU_NULL
);
1038 cs
= CPU(spapr_find_cpu(target
));
1040 run_on_cpu(cs
, spapr_do_system_reset_on_cpu
, RUN_ON_CPU_NULL
);
1047 /* Returns either a logical PVR or zero if none was found */
1048 static uint32_t cas_check_pvr(PowerPCCPU
*cpu
, uint32_t max_compat
,
1049 target_ulong
*addr
, bool *raw_mode_supported
)
1051 bool explicit_match
= false; /* Matched the CPU's real PVR */
1052 uint32_t best_compat
= 0;
1056 * We scan the supplied table of PVRs looking for two things
1057 * 1. Is our real CPU PVR in the list?
1058 * 2. What's the "best" listed logical PVR
1060 for (i
= 0; i
< 512; ++i
) {
1061 uint32_t pvr
, pvr_mask
;
1063 pvr_mask
= ldl_be_phys(&address_space_memory
, *addr
);
1064 pvr
= ldl_be_phys(&address_space_memory
, *addr
+ 4);
1067 if (~pvr_mask
& pvr
) {
1068 break; /* Terminator record */
1071 if ((cpu
->env
.spr
[SPR_PVR
] & pvr_mask
) == (pvr
& pvr_mask
)) {
1072 explicit_match
= true;
1074 if (ppc_check_compat(cpu
, pvr
, best_compat
, max_compat
)) {
1080 *raw_mode_supported
= explicit_match
;
1082 /* Parsing finished */
1083 trace_spapr_cas_pvr(cpu
->compat_pvr
, explicit_match
, best_compat
);
1089 target_ulong
do_client_architecture_support(PowerPCCPU
*cpu
,
1090 SpaprMachineState
*spapr
,
1092 target_ulong fdt_bufsize
)
1094 target_ulong ov_table
; /* Working address in data buffer */
1096 SpaprOptionVector
*ov1_guest
, *ov5_guest
;
1098 bool raw_mode_supported
= false;
1102 uint32_t max_compat
= spapr
->max_compat_pvr
;
1104 /* CAS is supposed to be called early when only the boot vCPU is active. */
1106 if (cs
== CPU(cpu
)) {
1110 warn_report("guest has multiple active vCPUs at CAS, which is not allowed");
1111 return H_MULTI_THREADS_ACTIVE
;
1115 cas_pvr
= cas_check_pvr(cpu
, max_compat
, &vec
, &raw_mode_supported
);
1116 if (!cas_pvr
&& (!raw_mode_supported
|| max_compat
)) {
1118 * We couldn't find a suitable compatibility mode, and either
1119 * the guest doesn't support "raw" mode for this CPU, or "raw"
1120 * mode is disabled because a maximum compat mode is set.
1122 error_report("Couldn't negotiate a suitable PVR during CAS");
1127 if (cpu
->compat_pvr
!= cas_pvr
) {
1128 Error
*local_err
= NULL
;
1130 if (ppc_set_compat_all(cas_pvr
, &local_err
) < 0) {
1131 /* We fail to set compat mode (likely because running with KVM PR),
1132 * but maybe we can fallback to raw mode if the guest supports it.
1134 if (!raw_mode_supported
) {
1135 error_report_err(local_err
);
1138 error_free(local_err
);
1142 /* For the future use: here @ov_table points to the first option vector */
1145 ov1_guest
= spapr_ovec_parse_vector(ov_table
, 1);
1147 warn_report("guest didn't provide option vector 1");
1150 ov5_guest
= spapr_ovec_parse_vector(ov_table
, 5);
1152 spapr_ovec_cleanup(ov1_guest
);
1153 warn_report("guest didn't provide option vector 5");
1156 if (spapr_ovec_test(ov5_guest
, OV5_MMU_BOTH
)) {
1157 error_report("guest requested hash and radix MMU, which is invalid.");
1160 if (spapr_ovec_test(ov5_guest
, OV5_XIVE_BOTH
)) {
1161 error_report("guest requested an invalid interrupt mode");
1165 guest_radix
= spapr_ovec_test(ov5_guest
, OV5_MMU_RADIX_300
);
1167 guest_xive
= spapr_ovec_test(ov5_guest
, OV5_XIVE_EXPLOIT
);
1170 * HPT resizing is a bit of a special case, because when enabled
1171 * we assume an HPT guest will support it until it says it
1172 * doesn't, instead of assuming it won't support it until it says
1173 * it does. Strictly speaking that approach could break for
1174 * guests which don't make a CAS call, but those are so old we
1175 * don't care about them. Without that assumption we'd have to
1176 * make at least a temporary allocation of an HPT sized for max
1177 * memory, which could be impossibly difficult under KVM HV if
1180 if (!guest_radix
&& !spapr_ovec_test(ov5_guest
, OV5_HPT_RESIZE
)) {
1181 int maxshift
= spapr_hpt_shift_for_ramsize(MACHINE(spapr
)->maxram_size
);
1183 if (spapr
->resize_hpt
== SPAPR_RESIZE_HPT_REQUIRED
) {
1185 "h_client_architecture_support: Guest doesn't support HPT resizing, but resize-hpt=required");
1189 if (spapr
->htab_shift
< maxshift
) {
1190 /* Guest doesn't know about HPT resizing, so we
1191 * pre-emptively resize for the maximum permitted RAM. At
1192 * the point this is called, nothing should have been
1193 * entered into the existing HPT */
1194 spapr_reallocate_hpt(spapr
, maxshift
, &error_fatal
);
1195 push_sregs_to_kvm_pr(spapr
);
1199 /* NOTE: there are actually a number of ov5 bits where input from the
1200 * guest is always zero, and the platform/QEMU enables them independently
1201 * of guest input. To model these properly we'd want some sort of mask,
1202 * but since they only currently apply to memory migration as defined
1203 * by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need
1204 * to worry about this for now.
1207 /* full range of negotiated ov5 capabilities */
1208 spapr_ovec_intersect(spapr
->ov5_cas
, spapr
->ov5
, ov5_guest
);
1209 spapr_ovec_cleanup(ov5_guest
);
1211 spapr_check_mmu_mode(guest_radix
);
1213 spapr
->cas_pre_isa3_guest
= !spapr_ovec_test(ov1_guest
, OV1_PPC_3_00
);
1214 spapr_ovec_cleanup(ov1_guest
);
1217 * Check for NUMA affinity conditions now that we know which NUMA
1218 * affinity the guest will use.
1220 spapr_numa_associativity_check(spapr
);
1223 * Ensure the guest asks for an interrupt mode we support;
1224 * otherwise terminate the boot.
1227 if (!spapr
->irq
->xive
) {
1229 "Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
1233 if (!spapr
->irq
->xics
) {
1235 "Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
1240 spapr_irq_update_active_intc(spapr
);
1243 * Process all pending hot-plug/unplug requests now. An updated full
1244 * rendered FDT will be returned to the guest.
1246 spapr_drc_reset_all(spapr
);
1247 spapr_clear_pending_hotplug_events(spapr
);
1250 * If spapr_machine_reset() did not set up a HPT but one is necessary
1251 * (because the guest isn't going to use radix) then set it up here.
1253 if ((spapr
->patb_entry
& PATE1_GR
) && !guest_radix
) {
1254 /* legacy hash or new hash: */
1255 spapr_setup_hpt(spapr
);
1258 fdt
= spapr_build_fdt(spapr
, spapr
->vof
!= NULL
, fdt_bufsize
);
1259 g_free(spapr
->fdt_blob
);
1260 spapr
->fdt_size
= fdt_totalsize(fdt
);
1261 spapr
->fdt_initial_size
= spapr
->fdt_size
;
1262 spapr
->fdt_blob
= fdt
;
1265 * Set the machine->fdt pointer again since we just freed
1266 * it above (by freeing spapr->fdt_blob). We set this
1267 * pointer to enable support for the 'dumpdtb' QMP/HMP
1270 MACHINE(spapr
)->fdt
= fdt
;
1275 static target_ulong
h_client_architecture_support(PowerPCCPU
*cpu
,
1276 SpaprMachineState
*spapr
,
1277 target_ulong opcode
,
1280 target_ulong vec
= ppc64_phys_to_real(args
[0]);
1281 target_ulong fdt_buf
= args
[1];
1282 target_ulong fdt_bufsize
= args
[2];
1284 SpaprDeviceTreeUpdateHeader hdr
= { .version_id
= 1 };
1286 if (fdt_bufsize
< sizeof(hdr
)) {
1287 error_report("SLOF provided insufficient CAS buffer "
1288 TARGET_FMT_lu
" (min: %zu)", fdt_bufsize
, sizeof(hdr
));
1292 fdt_bufsize
-= sizeof(hdr
);
1294 ret
= do_client_architecture_support(cpu
, spapr
, vec
, fdt_bufsize
);
1295 if (ret
== H_SUCCESS
) {
1296 _FDT((fdt_pack(spapr
->fdt_blob
)));
1297 spapr
->fdt_size
= fdt_totalsize(spapr
->fdt_blob
);
1298 spapr
->fdt_initial_size
= spapr
->fdt_size
;
1300 cpu_physical_memory_write(fdt_buf
, &hdr
, sizeof(hdr
));
1301 cpu_physical_memory_write(fdt_buf
+ sizeof(hdr
), spapr
->fdt_blob
,
1303 trace_spapr_cas_continue(spapr
->fdt_size
+ sizeof(hdr
));
1309 target_ulong
spapr_vof_client_architecture_support(MachineState
*ms
,
1311 target_ulong ovec_addr
)
1313 SpaprMachineState
*spapr
= SPAPR_MACHINE(ms
);
1315 target_ulong ret
= do_client_architecture_support(POWERPC_CPU(cs
), spapr
,
1316 ovec_addr
, FDT_MAX_SIZE
);
1319 * This adds stdout and generates phandles for boottime and CAS FDTs.
1320 * It is alright to update the FDT here as do_client_architecture_support()
1323 spapr_vof_client_dt_finalize(spapr
, spapr
->fdt_blob
);
1328 static target_ulong
h_get_cpu_characteristics(PowerPCCPU
*cpu
,
1329 SpaprMachineState
*spapr
,
1330 target_ulong opcode
,
1333 uint64_t characteristics
= H_CPU_CHAR_HON_BRANCH_HINTS
&
1334 ~H_CPU_CHAR_THR_RECONF_TRIG
;
1335 uint64_t behaviour
= H_CPU_BEHAV_FAVOUR_SECURITY
;
1336 uint8_t safe_cache
= spapr_get_cap(spapr
, SPAPR_CAP_CFPC
);
1337 uint8_t safe_bounds_check
= spapr_get_cap(spapr
, SPAPR_CAP_SBBC
);
1338 uint8_t safe_indirect_branch
= spapr_get_cap(spapr
, SPAPR_CAP_IBS
);
1339 uint8_t count_cache_flush_assist
= spapr_get_cap(spapr
,
1340 SPAPR_CAP_CCF_ASSIST
);
1342 switch (safe_cache
) {
1343 case SPAPR_CAP_WORKAROUND
:
1344 characteristics
|= H_CPU_CHAR_L1D_FLUSH_ORI30
;
1345 characteristics
|= H_CPU_CHAR_L1D_FLUSH_TRIG2
;
1346 characteristics
|= H_CPU_CHAR_L1D_THREAD_PRIV
;
1347 behaviour
|= H_CPU_BEHAV_L1D_FLUSH_PR
;
1349 case SPAPR_CAP_FIXED
:
1350 behaviour
|= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY
;
1351 behaviour
|= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS
;
1353 default: /* broken */
1354 assert(safe_cache
== SPAPR_CAP_BROKEN
);
1355 behaviour
|= H_CPU_BEHAV_L1D_FLUSH_PR
;
1359 switch (safe_bounds_check
) {
1360 case SPAPR_CAP_WORKAROUND
:
1361 characteristics
|= H_CPU_CHAR_SPEC_BAR_ORI31
;
1362 behaviour
|= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR
;
1364 case SPAPR_CAP_FIXED
:
1366 default: /* broken */
1367 assert(safe_bounds_check
== SPAPR_CAP_BROKEN
);
1368 behaviour
|= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR
;
1372 switch (safe_indirect_branch
) {
1373 case SPAPR_CAP_FIXED_NA
:
1375 case SPAPR_CAP_FIXED_CCD
:
1376 characteristics
|= H_CPU_CHAR_CACHE_COUNT_DIS
;
1378 case SPAPR_CAP_FIXED_IBS
:
1379 characteristics
|= H_CPU_CHAR_BCCTRL_SERIALISED
;
1381 case SPAPR_CAP_WORKAROUND
:
1382 behaviour
|= H_CPU_BEHAV_FLUSH_COUNT_CACHE
;
1383 if (count_cache_flush_assist
) {
1384 characteristics
|= H_CPU_CHAR_BCCTR_FLUSH_ASSIST
;
1387 default: /* broken */
1388 assert(safe_indirect_branch
== SPAPR_CAP_BROKEN
);
1392 args
[0] = characteristics
;
1393 args
[1] = behaviour
;
1397 static target_ulong
h_update_dt(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
1398 target_ulong opcode
, target_ulong
*args
)
1400 target_ulong dt
= ppc64_phys_to_real(args
[0]);
1401 struct fdt_header hdr
= { 0 };
1403 SpaprMachineClass
*smc
= SPAPR_MACHINE_GET_CLASS(spapr
);
1406 cpu_physical_memory_read(dt
, &hdr
, sizeof(hdr
));
1407 cb
= fdt32_to_cpu(hdr
.totalsize
);
1409 if (!smc
->update_dt_enabled
) {
1413 /* Check that the fdt did not grow out of proportion */
1414 if (cb
> spapr
->fdt_initial_size
* 2) {
1415 trace_spapr_update_dt_failed_size(spapr
->fdt_initial_size
, cb
,
1416 fdt32_to_cpu(hdr
.magic
));
1420 fdt
= g_malloc0(cb
);
1421 cpu_physical_memory_read(dt
, fdt
, cb
);
1423 /* Check the fdt consistency */
1424 if (fdt_check_full(fdt
, cb
)) {
1425 trace_spapr_update_dt_failed_check(spapr
->fdt_initial_size
, cb
,
1426 fdt32_to_cpu(hdr
.magic
));
1430 g_free(spapr
->fdt_blob
);
1431 spapr
->fdt_size
= cb
;
1432 spapr
->fdt_blob
= fdt
;
1433 trace_spapr_update_dt(cb
);
1438 static spapr_hcall_fn papr_hypercall_table
[(MAX_HCALL_OPCODE
/ 4) + 1];
1439 static spapr_hcall_fn kvmppc_hypercall_table
[KVMPPC_HCALL_MAX
- KVMPPC_HCALL_BASE
+ 1];
1440 static spapr_hcall_fn svm_hypercall_table
[(SVM_HCALL_MAX
- SVM_HCALL_BASE
) / 4 + 1];
1442 void spapr_register_hypercall(target_ulong opcode
, spapr_hcall_fn fn
)
1444 spapr_hcall_fn
*slot
;
1446 if (opcode
<= MAX_HCALL_OPCODE
) {
1447 assert((opcode
& 0x3) == 0);
1449 slot
= &papr_hypercall_table
[opcode
/ 4];
1450 } else if (opcode
>= SVM_HCALL_BASE
&& opcode
<= SVM_HCALL_MAX
) {
1451 /* we only have SVM-related hcall numbers assigned in multiples of 4 */
1452 assert((opcode
& 0x3) == 0);
1454 slot
= &svm_hypercall_table
[(opcode
- SVM_HCALL_BASE
) / 4];
1456 assert((opcode
>= KVMPPC_HCALL_BASE
) && (opcode
<= KVMPPC_HCALL_MAX
));
1458 slot
= &kvmppc_hypercall_table
[opcode
- KVMPPC_HCALL_BASE
];
1465 target_ulong
spapr_hypercall(PowerPCCPU
*cpu
, target_ulong opcode
,
1468 SpaprMachineState
*spapr
= SPAPR_MACHINE(qdev_get_machine());
1470 if ((opcode
<= MAX_HCALL_OPCODE
)
1471 && ((opcode
& 0x3) == 0)) {
1472 spapr_hcall_fn fn
= papr_hypercall_table
[opcode
/ 4];
1475 return fn(cpu
, spapr
, opcode
, args
);
1477 } else if ((opcode
>= SVM_HCALL_BASE
) &&
1478 (opcode
<= SVM_HCALL_MAX
)) {
1479 spapr_hcall_fn fn
= svm_hypercall_table
[(opcode
- SVM_HCALL_BASE
) / 4];
1482 return fn(cpu
, spapr
, opcode
, args
);
1484 } else if ((opcode
>= KVMPPC_HCALL_BASE
) &&
1485 (opcode
<= KVMPPC_HCALL_MAX
)) {
1486 spapr_hcall_fn fn
= kvmppc_hypercall_table
[opcode
- KVMPPC_HCALL_BASE
];
1489 return fn(cpu
, spapr
, opcode
, args
);
1493 qemu_log_mask(LOG_UNIMP
, "Unimplemented SPAPR hcall 0x" TARGET_FMT_lx
"\n",
1499 #define PRTS_MASK 0x1f
1501 static target_ulong
h_set_ptbl(PowerPCCPU
*cpu
,
1502 SpaprMachineState
*spapr
,
1503 target_ulong opcode
,
1506 target_ulong ptcr
= args
[0];
1508 if (!spapr_get_cap(spapr
, SPAPR_CAP_NESTED_KVM_HV
)) {
1512 if ((ptcr
& PRTS_MASK
) + 12 - 4 > 12) {
1516 spapr
->nested_ptcr
= ptcr
; /* Save new partition table */
1521 static target_ulong
h_tlb_invalidate(PowerPCCPU
*cpu
,
1522 SpaprMachineState
*spapr
,
1523 target_ulong opcode
,
1527 * The spapr virtual hypervisor nested HV implementation retains no L2
1528 * translation state except for TLB. And the TLB is always invalidated
1529 * across L1<->L2 transitions, so nothing is required here.
1535 static target_ulong
h_copy_tofrom_guest(PowerPCCPU
*cpu
,
1536 SpaprMachineState
*spapr
,
1537 target_ulong opcode
,
1541 * This HCALL is not required, L1 KVM will take a slow path and walk the
1542 * page tables manually to do the data copy.
1548 * When this handler returns, the environment is switched to the L2 guest
1549 * and TCG begins running that. spapr_exit_nested() performs the switch from
1550 * L2 back to L1 and returns from the H_ENTER_NESTED hcall.
1552 static target_ulong
h_enter_nested(PowerPCCPU
*cpu
,
1553 SpaprMachineState
*spapr
,
1554 target_ulong opcode
,
1557 PowerPCCPUClass
*pcc
= POWERPC_CPU_GET_CLASS(cpu
);
1558 CPUState
*cs
= CPU(cpu
);
1559 CPUPPCState
*env
= &cpu
->env
;
1560 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
1561 target_ulong hv_ptr
= args
[0];
1562 target_ulong regs_ptr
= args
[1];
1563 target_ulong hdec
, now
= cpu_ppc_load_tbl(env
);
1564 target_ulong lpcr
, lpcr_mask
;
1565 struct kvmppc_hv_guest_state
*hvstate
;
1566 struct kvmppc_hv_guest_state hv_state
;
1567 struct kvmppc_pt_regs
*regs
;
1572 if (spapr
->nested_ptcr
== 0) {
1573 return H_NOT_AVAILABLE
;
1576 len
= sizeof(*hvstate
);
1577 hvstate
= address_space_map(CPU(cpu
)->as
, hv_ptr
, &len
, false,
1578 MEMTXATTRS_UNSPECIFIED
);
1579 if (len
!= sizeof(*hvstate
)) {
1580 address_space_unmap(CPU(cpu
)->as
, hvstate
, len
, 0, false);
1584 memcpy(&hv_state
, hvstate
, len
);
1586 address_space_unmap(CPU(cpu
)->as
, hvstate
, len
, len
, false);
1589 * We accept versions 1 and 2. Version 2 fields are unused because TCG
1590 * does not implement DAWR*.
1592 if (hv_state
.version
> HV_GUEST_STATE_VERSION
) {
1596 spapr_cpu
->nested_host_state
= g_try_new(CPUPPCState
, 1);
1597 if (!spapr_cpu
->nested_host_state
) {
1601 memcpy(spapr_cpu
->nested_host_state
, env
, sizeof(CPUPPCState
));
1603 len
= sizeof(*regs
);
1604 regs
= address_space_map(CPU(cpu
)->as
, regs_ptr
, &len
, false,
1605 MEMTXATTRS_UNSPECIFIED
);
1606 if (!regs
|| len
!= sizeof(*regs
)) {
1607 address_space_unmap(CPU(cpu
)->as
, regs
, len
, 0, false);
1608 g_free(spapr_cpu
->nested_host_state
);
1612 len
= sizeof(env
->gpr
);
1613 assert(len
== sizeof(regs
->gpr
));
1614 memcpy(env
->gpr
, regs
->gpr
, len
);
1616 env
->lr
= regs
->link
;
1617 env
->ctr
= regs
->ctr
;
1618 cpu_write_xer(env
, regs
->xer
);
1621 for (i
= 7; i
>= 0; i
--) {
1622 env
->crf
[i
] = cr
& 15;
1626 env
->msr
= regs
->msr
;
1627 env
->nip
= regs
->nip
;
1629 address_space_unmap(CPU(cpu
)->as
, regs
, len
, len
, false);
1631 env
->cfar
= hv_state
.cfar
;
1633 assert(env
->spr
[SPR_LPIDR
] == 0);
1634 env
->spr
[SPR_LPIDR
] = hv_state
.lpid
;
1636 lpcr_mask
= LPCR_DPFD
| LPCR_ILE
| LPCR_AIL
| LPCR_LD
| LPCR_MER
;
1637 lpcr
= (env
->spr
[SPR_LPCR
] & ~lpcr_mask
) | (hv_state
.lpcr
& lpcr_mask
);
1638 lpcr
|= LPCR_HR
| LPCR_UPRT
| LPCR_GTSE
| LPCR_HVICE
| LPCR_HDICE
;
1639 lpcr
&= ~LPCR_LPES0
;
1640 env
->spr
[SPR_LPCR
] = lpcr
& pcc
->lpcr_mask
;
1642 env
->spr
[SPR_PCR
] = hv_state
.pcr
;
1643 /* hv_state.amor is not used */
1644 env
->spr
[SPR_DPDES
] = hv_state
.dpdes
;
1645 env
->spr
[SPR_HFSCR
] = hv_state
.hfscr
;
1646 hdec
= hv_state
.hdec_expiry
- now
;
1647 spapr_cpu
->nested_tb_offset
= hv_state
.tb_offset
;
1648 /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/
1649 env
->spr
[SPR_SRR0
] = hv_state
.srr0
;
1650 env
->spr
[SPR_SRR1
] = hv_state
.srr1
;
1651 env
->spr
[SPR_SPRG0
] = hv_state
.sprg
[0];
1652 env
->spr
[SPR_SPRG1
] = hv_state
.sprg
[1];
1653 env
->spr
[SPR_SPRG2
] = hv_state
.sprg
[2];
1654 env
->spr
[SPR_SPRG3
] = hv_state
.sprg
[3];
1655 env
->spr
[SPR_BOOKS_PID
] = hv_state
.pidr
;
1656 env
->spr
[SPR_PPR
] = hv_state
.ppr
;
1658 cpu_ppc_hdecr_init(env
);
1659 cpu_ppc_store_hdecr(env
, hdec
);
1662 * The hv_state.vcpu_token is not needed. It is used by the KVM
1663 * implementation to remember which L2 vCPU last ran on which physical
1664 * CPU so as to invalidate process scope translations if it is moved
1665 * between physical CPUs. For now TLBs are always flushed on L1<->L2
1666 * transitions so this is not a problem.
1668 * Could validate that the same vcpu_token does not attempt to run on
1669 * different L1 vCPUs at the same time, but that would be a L1 KVM bug
1670 * and it's not obviously worth a new data structure to do it.
1673 env
->tb_env
->tb_offset
+= spapr_cpu
->nested_tb_offset
;
1674 spapr_cpu
->in_nested
= true;
1676 hreg_compute_hflags(env
);
1677 ppc_maybe_interrupt(env
);
1679 env
->reserve_addr
= -1; /* Reset the reservation */
1682 * The spapr hcall helper sets env->gpr[3] to the return value, but at
1683 * this point the L1 is not returning from the hcall but rather we
1684 * start running the L2, so r3 must not be clobbered, so return env->gpr[3]
1685 * to leave it unchanged.
1690 void spapr_exit_nested(PowerPCCPU
*cpu
, int excp
)
1692 CPUState
*cs
= CPU(cpu
);
1693 CPUPPCState
*env
= &cpu
->env
;
1694 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
1695 target_ulong r3_return
= env
->excp_vectors
[excp
]; /* hcall return value */
1696 target_ulong hv_ptr
= spapr_cpu
->nested_host_state
->gpr
[4];
1697 target_ulong regs_ptr
= spapr_cpu
->nested_host_state
->gpr
[5];
1698 struct kvmppc_hv_guest_state
*hvstate
;
1699 struct kvmppc_pt_regs
*regs
;
1704 assert(spapr_cpu
->in_nested
);
1706 cpu_ppc_hdecr_exit(env
);
1708 len
= sizeof(*hvstate
);
1709 hvstate
= address_space_map(CPU(cpu
)->as
, hv_ptr
, &len
, true,
1710 MEMTXATTRS_UNSPECIFIED
);
1711 if (len
!= sizeof(*hvstate
)) {
1712 address_space_unmap(CPU(cpu
)->as
, hvstate
, len
, 0, true);
1713 r3_return
= H_PARAMETER
;
1714 goto out_restore_l1
;
1717 hvstate
->cfar
= env
->cfar
;
1718 hvstate
->lpcr
= env
->spr
[SPR_LPCR
];
1719 hvstate
->pcr
= env
->spr
[SPR_PCR
];
1720 hvstate
->dpdes
= env
->spr
[SPR_DPDES
];
1721 hvstate
->hfscr
= env
->spr
[SPR_HFSCR
];
1723 if (excp
== POWERPC_EXCP_HDSI
) {
1724 hvstate
->hdar
= env
->spr
[SPR_HDAR
];
1725 hvstate
->hdsisr
= env
->spr
[SPR_HDSISR
];
1726 hvstate
->asdr
= env
->spr
[SPR_ASDR
];
1727 } else if (excp
== POWERPC_EXCP_HISI
) {
1728 hvstate
->asdr
= env
->spr
[SPR_ASDR
];
1731 /* HEIR should be implemented for HV mode and saved here. */
1732 hvstate
->srr0
= env
->spr
[SPR_SRR0
];
1733 hvstate
->srr1
= env
->spr
[SPR_SRR1
];
1734 hvstate
->sprg
[0] = env
->spr
[SPR_SPRG0
];
1735 hvstate
->sprg
[1] = env
->spr
[SPR_SPRG1
];
1736 hvstate
->sprg
[2] = env
->spr
[SPR_SPRG2
];
1737 hvstate
->sprg
[3] = env
->spr
[SPR_SPRG3
];
1738 hvstate
->pidr
= env
->spr
[SPR_BOOKS_PID
];
1739 hvstate
->ppr
= env
->spr
[SPR_PPR
];
1741 /* Is it okay to specify write length larger than actual data written? */
1742 address_space_unmap(CPU(cpu
)->as
, hvstate
, len
, len
, true);
1744 len
= sizeof(*regs
);
1745 regs
= address_space_map(CPU(cpu
)->as
, regs_ptr
, &len
, true,
1746 MEMTXATTRS_UNSPECIFIED
);
1747 if (!regs
|| len
!= sizeof(*regs
)) {
1748 address_space_unmap(CPU(cpu
)->as
, regs
, len
, 0, true);
1750 goto out_restore_l1
;
1753 len
= sizeof(env
->gpr
);
1754 assert(len
== sizeof(regs
->gpr
));
1755 memcpy(regs
->gpr
, env
->gpr
, len
);
1757 regs
->link
= env
->lr
;
1758 regs
->ctr
= env
->ctr
;
1759 regs
->xer
= cpu_read_xer(env
);
1762 for (i
= 0; i
< 8; i
++) {
1763 cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
1767 if (excp
== POWERPC_EXCP_MCHECK
||
1768 excp
== POWERPC_EXCP_RESET
||
1769 excp
== POWERPC_EXCP_SYSCALL
) {
1770 regs
->nip
= env
->spr
[SPR_SRR0
];
1771 regs
->msr
= env
->spr
[SPR_SRR1
] & env
->msr_mask
;
1773 regs
->nip
= env
->spr
[SPR_HSRR0
];
1774 regs
->msr
= env
->spr
[SPR_HSRR1
] & env
->msr_mask
;
1777 /* Is it okay to specify write length larger than actual data written? */
1778 address_space_unmap(CPU(cpu
)->as
, regs
, len
, len
, true);
1781 memcpy(env
->gpr
, spapr_cpu
->nested_host_state
->gpr
, sizeof(env
->gpr
));
1782 env
->lr
= spapr_cpu
->nested_host_state
->lr
;
1783 env
->ctr
= spapr_cpu
->nested_host_state
->ctr
;
1784 memcpy(env
->crf
, spapr_cpu
->nested_host_state
->crf
, sizeof(env
->crf
));
1785 env
->cfar
= spapr_cpu
->nested_host_state
->cfar
;
1786 env
->xer
= spapr_cpu
->nested_host_state
->xer
;
1787 env
->so
= spapr_cpu
->nested_host_state
->so
;
1788 env
->ov
= spapr_cpu
->nested_host_state
->ov
;
1789 env
->ov32
= spapr_cpu
->nested_host_state
->ov32
;
1790 env
->ca32
= spapr_cpu
->nested_host_state
->ca32
;
1791 env
->msr
= spapr_cpu
->nested_host_state
->msr
;
1792 env
->nip
= spapr_cpu
->nested_host_state
->nip
;
1794 assert(env
->spr
[SPR_LPIDR
] != 0);
1795 env
->spr
[SPR_LPCR
] = spapr_cpu
->nested_host_state
->spr
[SPR_LPCR
];
1796 env
->spr
[SPR_LPIDR
] = spapr_cpu
->nested_host_state
->spr
[SPR_LPIDR
];
1797 env
->spr
[SPR_PCR
] = spapr_cpu
->nested_host_state
->spr
[SPR_PCR
];
1798 env
->spr
[SPR_DPDES
] = 0;
1799 env
->spr
[SPR_HFSCR
] = spapr_cpu
->nested_host_state
->spr
[SPR_HFSCR
];
1800 env
->spr
[SPR_SRR0
] = spapr_cpu
->nested_host_state
->spr
[SPR_SRR0
];
1801 env
->spr
[SPR_SRR1
] = spapr_cpu
->nested_host_state
->spr
[SPR_SRR1
];
1802 env
->spr
[SPR_SPRG0
] = spapr_cpu
->nested_host_state
->spr
[SPR_SPRG0
];
1803 env
->spr
[SPR_SPRG1
] = spapr_cpu
->nested_host_state
->spr
[SPR_SPRG1
];
1804 env
->spr
[SPR_SPRG2
] = spapr_cpu
->nested_host_state
->spr
[SPR_SPRG2
];
1805 env
->spr
[SPR_SPRG3
] = spapr_cpu
->nested_host_state
->spr
[SPR_SPRG3
];
1806 env
->spr
[SPR_BOOKS_PID
] = spapr_cpu
->nested_host_state
->spr
[SPR_BOOKS_PID
];
1807 env
->spr
[SPR_PPR
] = spapr_cpu
->nested_host_state
->spr
[SPR_PPR
];
1810 * Return the interrupt vector address from H_ENTER_NESTED to the L1
1813 env
->gpr
[3] = r3_return
;
1815 env
->tb_env
->tb_offset
-= spapr_cpu
->nested_tb_offset
;
1816 spapr_cpu
->in_nested
= false;
1818 hreg_compute_hflags(env
);
1819 ppc_maybe_interrupt(env
);
1821 env
->reserve_addr
= -1; /* Reset the reservation */
1823 g_free(spapr_cpu
->nested_host_state
);
1824 spapr_cpu
->nested_host_state
= NULL
;
1827 static void hypercall_register_nested(void)
1829 spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE
, h_set_ptbl
);
1830 spapr_register_hypercall(KVMPPC_H_ENTER_NESTED
, h_enter_nested
);
1831 spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE
, h_tlb_invalidate
);
1832 spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST
, h_copy_tofrom_guest
);
1835 static void hypercall_register_softmmu(void)
1840 void spapr_exit_nested(PowerPCCPU
*cpu
, int excp
)
1842 g_assert_not_reached();
1845 static target_ulong
h_softmmu(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
1846 target_ulong opcode
, target_ulong
*args
)
1848 g_assert_not_reached();
1851 static void hypercall_register_nested(void)
1856 static void hypercall_register_softmmu(void)
1859 spapr_register_hypercall(H_ENTER
, h_softmmu
);
1860 spapr_register_hypercall(H_REMOVE
, h_softmmu
);
1861 spapr_register_hypercall(H_PROTECT
, h_softmmu
);
1862 spapr_register_hypercall(H_READ
, h_softmmu
);
1865 spapr_register_hypercall(H_BULK_REMOVE
, h_softmmu
);
1869 static void hypercall_register_types(void)
1871 hypercall_register_softmmu();
1873 /* hcall-hpt-resize */
1874 spapr_register_hypercall(H_RESIZE_HPT_PREPARE
, h_resize_hpt_prepare
);
1875 spapr_register_hypercall(H_RESIZE_HPT_COMMIT
, h_resize_hpt_commit
);
1878 spapr_register_hypercall(H_REGISTER_VPA
, h_register_vpa
);
1879 spapr_register_hypercall(H_CEDE
, h_cede
);
1880 spapr_register_hypercall(H_CONFER
, h_confer
);
1881 spapr_register_hypercall(H_PROD
, h_prod
);
1884 spapr_register_hypercall(H_JOIN
, h_join
);
1886 spapr_register_hypercall(H_SIGNAL_SYS_RESET
, h_signal_sys_reset
);
1888 /* processor register resource access h-calls */
1889 spapr_register_hypercall(H_SET_SPRG0
, h_set_sprg0
);
1890 spapr_register_hypercall(H_SET_DABR
, h_set_dabr
);
1891 spapr_register_hypercall(H_SET_XDABR
, h_set_xdabr
);
1892 spapr_register_hypercall(H_PAGE_INIT
, h_page_init
);
1893 spapr_register_hypercall(H_SET_MODE
, h_set_mode
);
1895 /* In Memory Table MMU h-calls */
1896 spapr_register_hypercall(H_CLEAN_SLB
, h_clean_slb
);
1897 spapr_register_hypercall(H_INVALIDATE_PID
, h_invalidate_pid
);
1898 spapr_register_hypercall(H_REGISTER_PROC_TBL
, h_register_process_table
);
1900 /* hcall-get-cpu-characteristics */
1901 spapr_register_hypercall(H_GET_CPU_CHARACTERISTICS
,
1902 h_get_cpu_characteristics
);
1904 /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
1905 * here between the "CI" and the "CACHE" variants, they will use whatever
1906 * mapping attributes qemu is using. When using KVM, the kernel will
1907 * enforce the attributes more strongly
1909 spapr_register_hypercall(H_LOGICAL_CI_LOAD
, h_logical_load
);
1910 spapr_register_hypercall(H_LOGICAL_CI_STORE
, h_logical_store
);
1911 spapr_register_hypercall(H_LOGICAL_CACHE_LOAD
, h_logical_load
);
1912 spapr_register_hypercall(H_LOGICAL_CACHE_STORE
, h_logical_store
);
1913 spapr_register_hypercall(H_LOGICAL_ICBI
, h_logical_icbi
);
1914 spapr_register_hypercall(H_LOGICAL_DCBF
, h_logical_dcbf
);
1915 spapr_register_hypercall(KVMPPC_H_LOGICAL_MEMOP
, h_logical_memop
);
1917 /* qemu/KVM-PPC specific hcalls */
1918 spapr_register_hypercall(KVMPPC_H_RTAS
, h_rtas
);
1920 /* ibm,client-architecture-support support */
1921 spapr_register_hypercall(KVMPPC_H_CAS
, h_client_architecture_support
);
1923 spapr_register_hypercall(KVMPPC_H_UPDATE_DT
, h_update_dt
);
1925 hypercall_register_nested();
1928 type_init(hypercall_register_types
)