1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2017 - Linaro Ltd
4 * Author: Jintack Lim <jintack.lim@linaro.org>
7 #include <linux/kvm_host.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
21 enum trans_regime regime
;
22 unsigned int max_oa_bits
;
34 struct s1_walk_result
{
63 static void fail_s1_walk(struct s1_walk_result
*wr
, u8 fst
, bool ptw
, bool s2
)
71 #define S1_MMU_DISABLED (-127)
73 static int get_ia_size(struct s1_walk_info
*wi
)
78 /* Return true if the IPA is out of the OA range */
79 static bool check_output_size(u64 ipa
, struct s1_walk_info
*wi
)
81 return wi
->max_oa_bits
< 48 && (ipa
& GENMASK_ULL(47, wi
->max_oa_bits
));
84 /* Return the translation regime that applies to an AT instruction */
85 static enum trans_regime
compute_translation_regime(struct kvm_vcpu
*vcpu
, u32 op
)
88 * We only get here from guest EL2, so the translation
89 * regime AT applies to is solely defined by {E2H,TGE}.
95 return vcpu_el2_e2h_is_set(vcpu
) ? TR_EL20
: TR_EL2
;
98 return (vcpu_el2_e2h_is_set(vcpu
) &&
99 vcpu_el2_tge_is_set(vcpu
)) ? TR_EL20
: TR_EL10
;
103 static bool s1pie_enabled(struct kvm_vcpu
*vcpu
, enum trans_regime regime
)
105 if (!kvm_has_s1pie(vcpu
->kvm
))
111 return vcpu_read_sys_reg(vcpu
, TCR2_EL2
) & TCR2_EL2_PIE
;
113 return (__vcpu_sys_reg(vcpu
, HCRX_EL2
) & HCRX_EL2_TCR2En
) &&
114 (__vcpu_sys_reg(vcpu
, TCR2_EL1
) & TCR2_EL1x_PIE
);
120 static void compute_s1poe(struct kvm_vcpu
*vcpu
, struct s1_walk_info
*wi
)
124 if (!kvm_has_s1poe(vcpu
->kvm
)) {
125 wi
->poe
= wi
->e0poe
= false;
129 switch (wi
->regime
) {
132 val
= vcpu_read_sys_reg(vcpu
, TCR2_EL2
);
133 wi
->poe
= val
& TCR2_EL2_POE
;
134 wi
->e0poe
= (wi
->regime
== TR_EL20
) && (val
& TCR2_EL2_E0POE
);
137 if (__vcpu_sys_reg(vcpu
, HCRX_EL2
) & HCRX_EL2_TCR2En
) {
138 wi
->poe
= wi
->e0poe
= false;
142 val
= __vcpu_sys_reg(vcpu
, TCR2_EL1
);
143 wi
->poe
= val
& TCR2_EL1x_POE
;
144 wi
->e0poe
= val
& TCR2_EL1x_E0POE
;
148 static int setup_s1_walk(struct kvm_vcpu
*vcpu
, u32 op
, struct s1_walk_info
*wi
,
149 struct s1_walk_result
*wr
, u64 va
)
151 u64 hcr
, sctlr
, tcr
, tg
, ps
, ia_bits
, ttbr
;
152 unsigned int stride
, x
;
153 bool va55
, tbi
, lva
, as_el0
;
155 hcr
= __vcpu_sys_reg(vcpu
, HCR_EL2
);
157 wi
->regime
= compute_translation_regime(vcpu
, op
);
158 as_el0
= (op
== OP_AT_S1E0R
|| op
== OP_AT_S1E0W
);
159 wi
->pan
= (op
== OP_AT_S1E1RP
|| op
== OP_AT_S1E1WP
) &&
160 (*vcpu_cpsr(vcpu
) & PSR_PAN_BIT
);
164 if (wi
->regime
== TR_EL2
&& va55
)
167 wi
->s2
= wi
->regime
== TR_EL10
&& (hcr
& (HCR_VM
| HCR_DC
));
169 switch (wi
->regime
) {
171 sctlr
= vcpu_read_sys_reg(vcpu
, SCTLR_EL1
);
172 tcr
= vcpu_read_sys_reg(vcpu
, TCR_EL1
);
174 vcpu_read_sys_reg(vcpu
, TTBR1_EL1
) :
175 vcpu_read_sys_reg(vcpu
, TTBR0_EL1
));
179 sctlr
= vcpu_read_sys_reg(vcpu
, SCTLR_EL2
);
180 tcr
= vcpu_read_sys_reg(vcpu
, TCR_EL2
);
182 vcpu_read_sys_reg(vcpu
, TTBR1_EL2
) :
183 vcpu_read_sys_reg(vcpu
, TTBR0_EL2
));
189 tbi
= (wi
->regime
== TR_EL2
?
190 FIELD_GET(TCR_EL2_TBI
, tcr
) :
192 FIELD_GET(TCR_TBI1
, tcr
) :
193 FIELD_GET(TCR_TBI0
, tcr
)));
195 if (!tbi
&& (u64
)sign_extend64(va
, 55) != va
)
198 va
= (u64
)sign_extend64(va
, 55);
200 /* Let's put the MMU disabled case aside immediately */
201 switch (wi
->regime
) {
204 * If dealing with the EL1&0 translation regime, 3 things
205 * can disable the S1 translation:
208 * - HCR_EL2.{E2H,TGE} = {0,1}
211 * The TGE part is interesting. If we have decided that this
212 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
213 * {0,x}, and we only need to test for TGE == 1.
215 if (hcr
& (HCR_DC
| HCR_TGE
)) {
216 wr
->level
= S1_MMU_DISABLED
;
222 if (!(sctlr
& SCTLR_ELx_M
))
223 wr
->level
= S1_MMU_DISABLED
;
227 if (wr
->level
== S1_MMU_DISABLED
) {
228 if (va
>= BIT(kvm_get_pa_bits(vcpu
->kvm
)))
235 wi
->be
= sctlr
& SCTLR_ELx_EE
;
237 wi
->hpd
= kvm_has_feat(vcpu
->kvm
, ID_AA64MMFR1_EL1
, HPDS
, IMP
);
238 wi
->hpd
&= (wi
->regime
== TR_EL2
?
239 FIELD_GET(TCR_EL2_HPD
, tcr
) :
241 FIELD_GET(TCR_HPD1
, tcr
) :
242 FIELD_GET(TCR_HPD0
, tcr
)));
244 wi
->hpd
|= s1pie_enabled(vcpu
, wi
->regime
);
246 /* Do we have POE? */
247 compute_s1poe(vcpu
, wi
);
250 wi
->hpd
|= (wi
->poe
|| wi
->e0poe
);
252 /* Someone was silly enough to encode TG0/TG1 differently */
254 wi
->txsz
= FIELD_GET(TCR_T1SZ_MASK
, tcr
);
255 tg
= FIELD_GET(TCR_TG1_MASK
, tcr
);
257 switch (tg
<< TCR_TG1_SHIFT
) {
259 wi
->pgshift
= 12; break;
261 wi
->pgshift
= 14; break;
263 default: /* IMPDEF: treat any other value as 64k */
264 wi
->pgshift
= 16; break;
267 wi
->txsz
= FIELD_GET(TCR_T0SZ_MASK
, tcr
);
268 tg
= FIELD_GET(TCR_TG0_MASK
, tcr
);
270 switch (tg
<< TCR_TG0_SHIFT
) {
272 wi
->pgshift
= 12; break;
274 wi
->pgshift
= 14; break;
276 default: /* IMPDEF: treat any other value as 64k */
277 wi
->pgshift
= 16; break;
281 /* R_PLCGL, R_YXNYW */
282 if (!kvm_has_feat_enum(vcpu
->kvm
, ID_AA64MMFR2_EL1
, ST
, 48_47
)) {
286 if (wi
->txsz
> 48 || (BIT(wi
->pgshift
) == SZ_64K
&& wi
->txsz
> 47))
290 /* R_GTJBY, R_SXWGM */
291 switch (BIT(wi
->pgshift
)) {
293 lva
= kvm_has_feat(vcpu
->kvm
, ID_AA64MMFR0_EL1
, TGRAN4
, 52_BIT
);
294 lva
&= tcr
& (wi
->regime
== TR_EL2
? TCR_EL2_DS
: TCR_DS
);
297 lva
= kvm_has_feat(vcpu
->kvm
, ID_AA64MMFR0_EL1
, TGRAN16
, 52_BIT
);
298 lva
&= tcr
& (wi
->regime
== TR_EL2
? TCR_EL2_DS
: TCR_DS
);
301 lva
= kvm_has_feat(vcpu
->kvm
, ID_AA64MMFR2_EL1
, VARange
, 52);
305 if ((lva
&& wi
->txsz
< 12) || (!lva
&& wi
->txsz
< 16))
308 ia_bits
= get_ia_size(wi
);
310 /* R_YYVYV, I_THCZK */
311 if ((!va55
&& va
> GENMASK(ia_bits
- 1, 0)) ||
312 (va55
&& va
< GENMASK(63, ia_bits
)))
316 if (wi
->regime
!= TR_EL2
&&
317 (tcr
& (va55
? TCR_EPD1_MASK
: TCR_EPD0_MASK
)))
320 /* R_BNDVG and following statements */
321 if (kvm_has_feat(vcpu
->kvm
, ID_AA64MMFR2_EL1
, E0PD
, IMP
) &&
322 as_el0
&& (tcr
& (va55
? TCR_E0PD1
: TCR_E0PD0
)))
325 /* AArch64.S1StartLevel() */
326 stride
= wi
->pgshift
- 3;
327 wi
->sl
= 3 - (((ia_bits
- 1) - wi
->pgshift
) / stride
);
329 ps
= (wi
->regime
== TR_EL2
?
330 FIELD_GET(TCR_EL2_PS_MASK
, tcr
) : FIELD_GET(TCR_IPS_MASK
, tcr
));
332 wi
->max_oa_bits
= min(get_kvm_ipa_limit(), ps_to_output_size(ps
));
334 /* Compute minimal alignment */
335 x
= 3 + ia_bits
- ((3 - wi
->sl
) * stride
+ wi
->pgshift
);
337 wi
->baddr
= ttbr
& TTBRx_EL1_BADDR
;
340 if (check_output_size(wi
->baddr
, wi
))
343 wi
->baddr
&= GENMASK_ULL(wi
->max_oa_bits
- 1, x
);
347 addrsz
: /* Address Size Fault level 0 */
348 fail_s1_walk(wr
, ESR_ELx_FSC_ADDRSZ_L(0), false, false);
351 transfault_l0
: /* Translation Fault level 0 */
352 fail_s1_walk(wr
, ESR_ELx_FSC_FAULT_L(0), false, false);
356 static int walk_s1(struct kvm_vcpu
*vcpu
, struct s1_walk_info
*wi
,
357 struct s1_walk_result
*wr
, u64 va
)
359 u64 va_top
, va_bottom
, baddr
, desc
;
360 int level
, stride
, ret
;
363 stride
= wi
->pgshift
- 3;
366 va_top
= get_ia_size(wi
) - 1;
371 va_bottom
= (3 - level
) * stride
+ wi
->pgshift
;
372 index
= (va
& GENMASK_ULL(va_top
, va_bottom
)) >> (va_bottom
- 3);
377 struct kvm_s2_trans s2_trans
= {};
379 ret
= kvm_walk_nested_s2(vcpu
, ipa
, &s2_trans
);
382 (s2_trans
.esr
& ~ESR_ELx_FSC_LEVEL
) | level
,
387 if (!kvm_s2_trans_readable(&s2_trans
)) {
388 fail_s1_walk(wr
, ESR_ELx_FSC_PERM_L(level
),
394 ipa
= kvm_s2_trans_output(&s2_trans
);
397 ret
= kvm_read_guest(vcpu
->kvm
, ipa
, &desc
, sizeof(desc
));
399 fail_s1_walk(wr
, ESR_ELx_FSC_SEA_TTW(level
),
405 desc
= be64_to_cpu((__force __be64
)desc
);
407 desc
= le64_to_cpu((__force __le64
)desc
);
409 /* Invalid descriptor */
410 if (!(desc
& BIT(0)))
413 /* Block mapping, check validity down the line */
414 if (!(desc
& BIT(1)))
423 wr
->APTable
|= FIELD_GET(S1_TABLE_AP
, desc
);
424 wr
->UXNTable
|= FIELD_GET(PMD_TABLE_UXN
, desc
);
425 wr
->PXNTable
|= FIELD_GET(PMD_TABLE_PXN
, desc
);
428 baddr
= desc
& GENMASK_ULL(47, wi
->pgshift
);
430 /* Check for out-of-range OA */
431 if (check_output_size(baddr
, wi
))
434 /* Prepare for next round */
435 va_top
= va_bottom
- 1;
439 /* Block mapping, check the validity of the level */
440 if (!(desc
& BIT(1))) {
441 bool valid_block
= false;
443 switch (BIT(wi
->pgshift
)) {
445 valid_block
= level
== 1 || level
== 2;
449 valid_block
= level
== 2;
457 if (check_output_size(desc
& GENMASK(47, va_bottom
), wi
))
460 va_bottom
+= contiguous_bit_shift(desc
, wi
, level
);
465 wr
->pa
= desc
& GENMASK(47, va_bottom
);
466 wr
->pa
|= va
& GENMASK_ULL(va_bottom
- 1, 0);
471 fail_s1_walk(wr
, ESR_ELx_FSC_ADDRSZ_L(level
), true, false);
474 fail_s1_walk(wr
, ESR_ELx_FSC_FAULT_L(level
), true, false);
494 static void __mmu_config_save(struct mmu_config
*config
)
496 config
->ttbr0
= read_sysreg_el1(SYS_TTBR0
);
497 config
->ttbr1
= read_sysreg_el1(SYS_TTBR1
);
498 config
->tcr
= read_sysreg_el1(SYS_TCR
);
499 config
->mair
= read_sysreg_el1(SYS_MAIR
);
500 if (cpus_have_final_cap(ARM64_HAS_TCR2
)) {
501 config
->tcr2
= read_sysreg_el1(SYS_TCR2
);
502 if (cpus_have_final_cap(ARM64_HAS_S1PIE
)) {
503 config
->pir
= read_sysreg_el1(SYS_PIR
);
504 config
->pire0
= read_sysreg_el1(SYS_PIRE0
);
506 if (system_supports_poe()) {
507 config
->por_el1
= read_sysreg_el1(SYS_POR
);
508 config
->por_el0
= read_sysreg_s(SYS_POR_EL0
);
511 config
->sctlr
= read_sysreg_el1(SYS_SCTLR
);
512 config
->vttbr
= read_sysreg(vttbr_el2
);
513 config
->vtcr
= read_sysreg(vtcr_el2
);
514 config
->hcr
= read_sysreg(hcr_el2
);
517 static void __mmu_config_restore(struct mmu_config
*config
)
519 write_sysreg(config
->hcr
, hcr_el2
);
522 * ARM errata 1165522 and 1530923 require TGE to be 1 before
523 * we update the guest state.
525 asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT
));
527 write_sysreg_el1(config
->ttbr0
, SYS_TTBR0
);
528 write_sysreg_el1(config
->ttbr1
, SYS_TTBR1
);
529 write_sysreg_el1(config
->tcr
, SYS_TCR
);
530 write_sysreg_el1(config
->mair
, SYS_MAIR
);
531 if (cpus_have_final_cap(ARM64_HAS_TCR2
)) {
532 write_sysreg_el1(config
->tcr2
, SYS_TCR2
);
533 if (cpus_have_final_cap(ARM64_HAS_S1PIE
)) {
534 write_sysreg_el1(config
->pir
, SYS_PIR
);
535 write_sysreg_el1(config
->pire0
, SYS_PIRE0
);
537 if (system_supports_poe()) {
538 write_sysreg_el1(config
->por_el1
, SYS_POR
);
539 write_sysreg_s(config
->por_el0
, SYS_POR_EL0
);
542 write_sysreg_el1(config
->sctlr
, SYS_SCTLR
);
543 write_sysreg(config
->vttbr
, vttbr_el2
);
544 write_sysreg(config
->vtcr
, vtcr_el2
);
547 static bool at_s1e1p_fast(struct kvm_vcpu
*vcpu
, u32 op
, u64 vaddr
)
552 host_pan
= read_sysreg_s(SYS_PSTATE_PAN
);
553 write_sysreg_s(*vcpu_cpsr(vcpu
) & PSTATE_PAN
, SYS_PSTATE_PAN
);
557 fail
= __kvm_at(OP_AT_S1E1RP
, vaddr
);
560 fail
= __kvm_at(OP_AT_S1E1WP
, vaddr
);
564 write_sysreg_s(host_pan
, SYS_PSTATE_PAN
);
569 #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic)
570 #define MEMATTR_NC 0b0100
571 #define MEMATTR_Wt 0b1000
572 #define MEMATTR_Wb 0b1100
573 #define MEMATTR_WbRaWa 0b1111
575 #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0)
577 static u8
s2_memattr_to_attr(u8 memattr
)
588 return MEMATTR(Wb
, Wb
);
590 return MEMATTR(NC
, NC
);
592 return MEMATTR(Wt
, NC
);
594 return MEMATTR(Wb
, NC
);
596 /* Reserved, assume NC */
597 return MEMATTR(NC
, NC
);
599 return MEMATTR(NC
, Wt
);
601 return MEMATTR(Wt
, Wt
);
603 return MEMATTR(Wb
, Wt
);
605 /* Reserved, assume NC */
606 return MEMATTR(NC
, NC
);
608 return MEMATTR(NC
, Wb
);
610 return MEMATTR(Wt
, Wb
);
612 return MEMATTR(Wb
, Wb
);
618 static u8
combine_s1_s2_attr(u8 s1
, u8 s2
)
623 /* Upgrade transient s1 to non-transient to simplify things */
625 case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
627 s1
= MEMATTR_Wt
| (s1
& GENMASK(1,0));
629 case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
631 s1
= MEMATTR_Wb
| (s1
& GENMASK(1,0));
637 /* S2CombineS1AttrHints() */
638 if ((s1
& GENMASK(3, 2)) == MEMATTR_NC
||
639 (s2
& GENMASK(3, 2)) == MEMATTR_NC
)
641 else if ((s1
& GENMASK(3, 2)) == MEMATTR_Wt
||
642 (s2
& GENMASK(3, 2)) == MEMATTR_Wt
)
647 if (final
!= MEMATTR_NC
) {
648 /* Inherit RaWa hints form S1 */
650 switch (s1
& GENMASK(3, 2)) {
660 final
|= s1
& GENMASK(1, 0);
666 #define ATTR_NSH 0b00
667 #define ATTR_RSV 0b01
668 #define ATTR_OSH 0b10
669 #define ATTR_ISH 0b11
671 static u8
compute_sh(u8 attr
, u64 desc
)
675 /* Any form of device, as well as NC has SH[1:0]=0b10 */
676 if (MEMATTR_IS_DEVICE(attr
) || attr
== MEMATTR(NC
, NC
))
679 sh
= FIELD_GET(PTE_SHARED
, desc
);
680 if (sh
== ATTR_RSV
) /* Reserved, mapped to NSH */
686 static u8
combine_sh(u8 s1_sh
, u8 s2_sh
)
688 if (s1_sh
== ATTR_OSH
|| s2_sh
== ATTR_OSH
)
690 if (s1_sh
== ATTR_ISH
|| s2_sh
== ATTR_ISH
)
696 static u64
compute_par_s12(struct kvm_vcpu
*vcpu
, u64 s1_par
,
697 struct kvm_s2_trans
*tr
)
699 u8 s1_parattr
, s2_memattr
, final_attr
;
702 /* If S2 has failed to translate, report the damage */
704 par
= SYS_PAR_EL1_RES1
;
705 par
|= SYS_PAR_EL1_F
;
706 par
|= SYS_PAR_EL1_S
;
707 par
|= FIELD_PREP(SYS_PAR_EL1_FST
, tr
->esr
);
711 s1_parattr
= FIELD_GET(SYS_PAR_EL1_ATTR
, s1_par
);
712 s2_memattr
= FIELD_GET(GENMASK(5, 2), tr
->desc
);
714 if (__vcpu_sys_reg(vcpu
, HCR_EL2
) & HCR_FWB
) {
715 if (!kvm_has_feat(vcpu
->kvm
, ID_AA64PFR2_EL1
, MTEPERM
, IMP
))
716 s2_memattr
&= ~BIT(3);
718 /* Combination of R_VRJSW and R_RHWZM */
719 switch (s2_memattr
) {
721 if (MEMATTR_IS_DEVICE(s1_parattr
))
722 final_attr
= s1_parattr
;
724 final_attr
= MEMATTR(NC
, NC
);
728 final_attr
= MEMATTR(WbRaWa
, WbRaWa
);
732 /* Preserve S1 attribute */
733 final_attr
= s1_parattr
;
738 /* Reserved, do something non-silly */
739 final_attr
= s1_parattr
;
742 /* MemAttr[2]=0, Device from S2 */
743 final_attr
= s2_memattr
& GENMASK(1,0) << 2;
746 /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
747 u8 s2_parattr
= s2_memattr_to_attr(s2_memattr
);
749 if (MEMATTR_IS_DEVICE(s1_parattr
) ||
750 MEMATTR_IS_DEVICE(s2_parattr
)) {
751 final_attr
= min(s1_parattr
, s2_parattr
);
753 /* At this stage, this is memory vs memory */
754 final_attr
= combine_s1_s2_attr(s1_parattr
& 0xf,
756 final_attr
|= combine_s1_s2_attr(s1_parattr
>> 4,
757 s2_parattr
>> 4) << 4;
761 if ((__vcpu_sys_reg(vcpu
, HCR_EL2
) & HCR_CD
) &&
762 !MEMATTR_IS_DEVICE(final_attr
))
763 final_attr
= MEMATTR(NC
, NC
);
765 par
= FIELD_PREP(SYS_PAR_EL1_ATTR
, final_attr
);
766 par
|= tr
->output
& GENMASK(47, 12);
767 par
|= FIELD_PREP(SYS_PAR_EL1_SH
,
768 combine_sh(FIELD_GET(SYS_PAR_EL1_SH
, s1_par
),
769 compute_sh(final_attr
, tr
->desc
)));
774 static u64
compute_par_s1(struct kvm_vcpu
*vcpu
, struct s1_walk_result
*wr
,
775 enum trans_regime regime
)
780 par
= SYS_PAR_EL1_RES1
;
781 par
|= SYS_PAR_EL1_F
;
782 par
|= FIELD_PREP(SYS_PAR_EL1_FST
, wr
->fst
);
783 par
|= wr
->ptw
? SYS_PAR_EL1_PTW
: 0;
784 par
|= wr
->s2
? SYS_PAR_EL1_S
: 0;
785 } else if (wr
->level
== S1_MMU_DISABLED
) {
786 /* MMU off or HCR_EL2.DC == 1 */
787 par
= SYS_PAR_EL1_NSE
;
788 par
|= wr
->pa
& GENMASK_ULL(47, 12);
790 if (regime
== TR_EL10
&&
791 (__vcpu_sys_reg(vcpu
, HCR_EL2
) & HCR_DC
)) {
792 par
|= FIELD_PREP(SYS_PAR_EL1_ATTR
,
793 MEMATTR(WbRaWa
, WbRaWa
));
794 par
|= FIELD_PREP(SYS_PAR_EL1_SH
, ATTR_NSH
);
796 par
|= FIELD_PREP(SYS_PAR_EL1_ATTR
, 0); /* nGnRnE */
797 par
|= FIELD_PREP(SYS_PAR_EL1_SH
, ATTR_OSH
);
803 par
= SYS_PAR_EL1_NSE
;
805 mair
= (regime
== TR_EL10
?
806 vcpu_read_sys_reg(vcpu
, MAIR_EL1
) :
807 vcpu_read_sys_reg(vcpu
, MAIR_EL2
));
809 mair
>>= FIELD_GET(PTE_ATTRINDX_MASK
, wr
->desc
) * 8;
812 sctlr
= (regime
== TR_EL10
?
813 vcpu_read_sys_reg(vcpu
, SCTLR_EL1
) :
814 vcpu_read_sys_reg(vcpu
, SCTLR_EL2
));
816 /* Force NC for memory if SCTLR_ELx.C is clear */
817 if (!(sctlr
& SCTLR_EL1_C
) && !MEMATTR_IS_DEVICE(mair
))
818 mair
= MEMATTR(NC
, NC
);
820 par
|= FIELD_PREP(SYS_PAR_EL1_ATTR
, mair
);
821 par
|= wr
->pa
& GENMASK_ULL(47, 12);
823 sh
= compute_sh(mair
, wr
->desc
);
824 par
|= FIELD_PREP(SYS_PAR_EL1_SH
, sh
);
830 static bool pan3_enabled(struct kvm_vcpu
*vcpu
, enum trans_regime regime
)
834 if (!kvm_has_feat(vcpu
->kvm
, ID_AA64MMFR1_EL1
, PAN
, PAN3
))
837 if (s1pie_enabled(vcpu
, regime
))
840 if (regime
== TR_EL10
)
841 sctlr
= vcpu_read_sys_reg(vcpu
, SCTLR_EL1
);
843 sctlr
= vcpu_read_sys_reg(vcpu
, SCTLR_EL2
);
845 return sctlr
& SCTLR_EL1_EPAN
;
848 static void compute_s1_direct_permissions(struct kvm_vcpu
*vcpu
,
849 struct s1_walk_info
*wi
,
850 struct s1_walk_result
*wr
)
854 /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
855 if (wi
->regime
!= TR_EL2
) {
856 switch (FIELD_GET(PTE_USER
| PTE_RDONLY
, wr
->desc
)) {
858 wr
->pr
= wr
->pw
= true;
859 wr
->ur
= wr
->uw
= false;
862 wr
->pr
= wr
->pw
= wr
->ur
= wr
->uw
= true;
866 wr
->pw
= wr
->ur
= wr
->uw
= false;
869 wr
->pr
= wr
->ur
= true;
870 wr
->pw
= wr
->uw
= false;
874 /* We don't use px for anything yet, but hey... */
875 wr
->px
= !((wr
->desc
& PTE_PXN
) || wr
->uw
);
876 wr
->ux
= !(wr
->desc
& PTE_UXN
);
878 wr
->ur
= wr
->uw
= wr
->ux
= false;
880 if (!(wr
->desc
& PTE_RDONLY
)) {
881 wr
->pr
= wr
->pw
= true;
888 wr
->px
= !(wr
->desc
& PTE_UXN
);
891 switch (wi
->regime
) {
894 wxn
= (vcpu_read_sys_reg(vcpu
, SCTLR_EL2
) & SCTLR_ELx_WXN
);
897 wxn
= (__vcpu_sys_reg(vcpu
, SCTLR_EL1
) & SCTLR_ELx_WXN
);
901 wr
->pwxn
= wr
->uwxn
= wxn
;
906 static void compute_s1_hierarchical_permissions(struct kvm_vcpu
*vcpu
,
907 struct s1_walk_info
*wi
,
908 struct s1_walk_result
*wr
)
910 /* Hierarchical part of AArch64.S1DirectBasePermissions() */
911 if (wi
->regime
!= TR_EL2
) {
912 switch (wr
->APTable
) {
916 wr
->ur
= wr
->uw
= false;
919 wr
->pw
= wr
->uw
= false;
922 wr
->pw
= wr
->ur
= wr
->uw
= false;
926 wr
->px
&= !wr
->PXNTable
;
927 wr
->ux
&= !wr
->UXNTable
;
929 if (wr
->APTable
& BIT(1))
933 wr
->px
&= !wr
->UXNTable
;
937 #define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
939 #define set_priv_perms(wr, r, w, x) \
946 #define set_unpriv_perms(wr, r, w, x) \
953 #define set_priv_wxn(wr, v) \
958 #define set_unpriv_wxn(wr, v) \
963 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS */
964 #define set_perms(w, wr, ip) \
969 set_ ## w ## _perms((wr), false, false, false); \
972 set_ ## w ## _perms((wr), true , false, false); \
975 set_ ## w ## _perms((wr), false, false, true ); \
978 set_ ## w ## _perms((wr), true , false, true ); \
981 set_ ## w ## _perms((wr), false, false, false); \
984 set_ ## w ## _perms((wr), true , true , false); \
987 set_ ## w ## _perms((wr), true , true , true ); \
990 set_ ## w ## _perms((wr), true , true , true ); \
993 set_ ## w ## _perms((wr), true , false, false); \
996 set_ ## w ## _perms((wr), true , false, false); \
999 set_ ## w ## _perms((wr), true , false, true ); \
1002 set_ ## w ## _perms((wr), false, false, false); \
1005 set_ ## w ## _perms((wr), true , true , false); \
1008 set_ ## w ## _perms((wr), false, false, false); \
1011 set_ ## w ## _perms((wr), true , true , true ); \
1014 set_ ## w ## _perms((wr), false, false, false); \
1019 set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \
1023 static void compute_s1_indirect_permissions(struct kvm_vcpu
*vcpu
,
1024 struct s1_walk_info
*wi
,
1025 struct s1_walk_result
*wr
)
1029 idx
= pte_pi_index(wr
->desc
);
1031 switch (wi
->regime
) {
1033 pp
= perm_idx(vcpu
, PIR_EL1
, idx
);
1034 up
= perm_idx(vcpu
, PIRE0_EL1
, idx
);
1037 pp
= perm_idx(vcpu
, PIR_EL2
, idx
);
1038 up
= perm_idx(vcpu
, PIRE0_EL2
, idx
);
1041 pp
= perm_idx(vcpu
, PIR_EL2
, idx
);
1046 set_perms(priv
, wr
, pp
);
1048 if (wi
->regime
!= TR_EL2
)
1049 set_perms(unpriv
, wr
, up
);
1051 set_unpriv_perms(wr
, false, false, false);
1053 wr
->pov
= wi
->poe
&& !(pp
& BIT(3));
1054 wr
->uov
= wi
->e0poe
&& !(up
& BIT(3));
1057 if (wr
->px
&& wr
->uw
) {
1058 set_priv_perms(wr
, false, false, false);
1059 set_unpriv_perms(wr
, false, false, false);
1063 static void compute_s1_overlay_permissions(struct kvm_vcpu
*vcpu
,
1064 struct s1_walk_info
*wi
,
1065 struct s1_walk_result
*wr
)
1067 u8 idx
, pov_perms
, uov_perms
;
1069 idx
= FIELD_GET(PTE_PO_IDX_MASK
, wr
->desc
);
1071 switch (wi
->regime
) {
1073 pov_perms
= perm_idx(vcpu
, POR_EL1
, idx
);
1074 uov_perms
= perm_idx(vcpu
, POR_EL0
, idx
);
1077 pov_perms
= perm_idx(vcpu
, POR_EL2
, idx
);
1078 uov_perms
= perm_idx(vcpu
, POR_EL0
, idx
);
1081 pov_perms
= perm_idx(vcpu
, POR_EL2
, idx
);
1086 if (pov_perms
& ~POE_RXW
)
1087 pov_perms
= POE_NONE
;
1089 if (wi
->poe
&& wr
->pov
) {
1090 wr
->pr
&= pov_perms
& POE_R
;
1091 wr
->px
&= pov_perms
& POE_X
;
1092 wr
->pw
&= pov_perms
& POE_W
;
1095 if (uov_perms
& ~POE_RXW
)
1096 uov_perms
= POE_NONE
;
1098 if (wi
->e0poe
&& wr
->uov
) {
1099 wr
->ur
&= uov_perms
& POE_R
;
1100 wr
->ux
&= uov_perms
& POE_X
;
1101 wr
->uw
&= uov_perms
& POE_W
;
1105 static void compute_s1_permissions(struct kvm_vcpu
*vcpu
,
1106 struct s1_walk_info
*wi
,
1107 struct s1_walk_result
*wr
)
1111 if (!s1pie_enabled(vcpu
, wi
->regime
))
1112 compute_s1_direct_permissions(vcpu
, wi
, wr
);
1114 compute_s1_indirect_permissions(vcpu
, wi
, wr
);
1117 compute_s1_hierarchical_permissions(vcpu
, wi
, wr
);
1119 if (wi
->poe
|| wi
->e0poe
)
1120 compute_s1_overlay_permissions(vcpu
, wi
, wr
);
1124 if (!wr
->pov
&& wr
->pw
)
1126 if (wr
->pov
&& wr
->px
)
1132 if (!wr
->uov
&& wr
->uw
)
1134 if (wr
->uov
&& wr
->ux
)
1138 pan
= wi
->pan
&& (wr
->ur
|| wr
->uw
||
1139 (pan3_enabled(vcpu
, wi
->regime
) && wr
->ux
));
1144 static u64
handle_at_slow(struct kvm_vcpu
*vcpu
, u32 op
, u64 vaddr
)
1146 struct s1_walk_result wr
= {};
1147 struct s1_walk_info wi
= {};
1148 bool perm_fail
= false;
1151 ret
= setup_s1_walk(vcpu
, op
, &wi
, &wr
, vaddr
);
1155 if (wr
.level
== S1_MMU_DISABLED
)
1158 idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
1160 ret
= walk_s1(vcpu
, &wi
, &wr
, vaddr
);
1162 srcu_read_unlock(&vcpu
->kvm
->srcu
, idx
);
1167 compute_s1_permissions(vcpu
, &wi
, &wr
);
1194 fail_s1_walk(&wr
, ESR_ELx_FSC_PERM_L(wr
.level
), false, false);
1197 return compute_par_s1(vcpu
, &wr
, wi
.regime
);
1201 * Return the PAR_EL1 value as the result of a valid translation.
1203 * If the translation is unsuccessful, the value may only contain
1204 * PAR_EL1.F, and cannot be taken at face value. It isn't an
1205 * indication of the translation having failed, only that the fast
1206 * path did not succeed, *unless* it indicates a S1 permission fault.
1208 static u64
__kvm_at_s1e01_fast(struct kvm_vcpu
*vcpu
, u32 op
, u64 vaddr
)
1210 struct mmu_config config
;
1211 struct kvm_s2_mmu
*mmu
;
1215 par
= SYS_PAR_EL1_F
;
1218 * We've trapped, so everything is live on the CPU. As we will
1219 * be switching contexts behind everybody's back, disable
1220 * interrupts while holding the mmu lock.
1222 guard(write_lock_irqsave
)(&vcpu
->kvm
->mmu_lock
);
1225 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1226 * the right one (as we trapped from vEL2). If not, save the
1229 if (vcpu_el2_e2h_is_set(vcpu
) && vcpu_el2_tge_is_set(vcpu
))
1230 goto skip_mmu_switch
;
1233 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1234 * find it (recycled by another vcpu, for example). When this
1235 * happens, admit defeat immediately and use the SW (slow) path.
1237 mmu
= lookup_s2_mmu(vcpu
);
1241 __mmu_config_save(&config
);
1243 write_sysreg_el1(vcpu_read_sys_reg(vcpu
, TTBR0_EL1
), SYS_TTBR0
);
1244 write_sysreg_el1(vcpu_read_sys_reg(vcpu
, TTBR1_EL1
), SYS_TTBR1
);
1245 write_sysreg_el1(vcpu_read_sys_reg(vcpu
, TCR_EL1
), SYS_TCR
);
1246 write_sysreg_el1(vcpu_read_sys_reg(vcpu
, MAIR_EL1
), SYS_MAIR
);
1247 if (kvm_has_tcr2(vcpu
->kvm
)) {
1248 write_sysreg_el1(vcpu_read_sys_reg(vcpu
, TCR2_EL1
), SYS_TCR2
);
1249 if (kvm_has_s1pie(vcpu
->kvm
)) {
1250 write_sysreg_el1(vcpu_read_sys_reg(vcpu
, PIR_EL1
), SYS_PIR
);
1251 write_sysreg_el1(vcpu_read_sys_reg(vcpu
, PIRE0_EL1
), SYS_PIRE0
);
1253 if (kvm_has_s1poe(vcpu
->kvm
)) {
1254 write_sysreg_el1(vcpu_read_sys_reg(vcpu
, POR_EL1
), SYS_POR
);
1255 write_sysreg_s(vcpu_read_sys_reg(vcpu
, POR_EL0
), SYS_POR_EL0
);
1258 write_sysreg_el1(vcpu_read_sys_reg(vcpu
, SCTLR_EL1
), SYS_SCTLR
);
1259 __load_stage2(mmu
, mmu
->arch
);
1262 /* Clear TGE, enable S2 translation, we're rolling */
1263 write_sysreg((config
.hcr
& ~HCR_TGE
) | HCR_VM
, hcr_el2
);
1269 fail
= at_s1e1p_fast(vcpu
, op
, vaddr
);
1272 fail
= __kvm_at(OP_AT_S1E1R
, vaddr
);
1275 fail
= __kvm_at(OP_AT_S1E1W
, vaddr
);
1278 fail
= __kvm_at(OP_AT_S1E0R
, vaddr
);
1281 fail
= __kvm_at(OP_AT_S1E0W
, vaddr
);
1284 fail
= __kvm_at(OP_AT_S1E1A
, vaddr
);
1293 par
= read_sysreg_par();
1295 if (!(vcpu_el2_e2h_is_set(vcpu
) && vcpu_el2_tge_is_set(vcpu
)))
1296 __mmu_config_restore(&config
);
1301 static bool par_check_s1_perm_fault(u64 par
)
1303 u8 fst
= FIELD_GET(SYS_PAR_EL1_FST
, par
);
1305 return ((fst
& ESR_ELx_FSC_TYPE
) == ESR_ELx_FSC_PERM
&&
1306 !(par
& SYS_PAR_EL1_S
));
1309 void __kvm_at_s1e01(struct kvm_vcpu
*vcpu
, u32 op
, u64 vaddr
)
1311 u64 par
= __kvm_at_s1e01_fast(vcpu
, op
, vaddr
);
1314 * If PAR_EL1 reports that AT failed on a S1 permission fault, we
1315 * know for sure that the PTW was able to walk the S1 tables and
1316 * there's nothing else to do.
1318 * If AT failed for any other reason, then we must walk the guest S1
1319 * to emulate the instruction.
1321 if ((par
& SYS_PAR_EL1_F
) && !par_check_s1_perm_fault(par
))
1322 par
= handle_at_slow(vcpu
, op
, vaddr
);
1324 vcpu_write_sys_reg(vcpu
, par
, PAR_EL1
);
1327 void __kvm_at_s1e2(struct kvm_vcpu
*vcpu
, u32 op
, u64 vaddr
)
1332 * We've trapped, so everything is live on the CPU. As we will be
1333 * switching context behind everybody's back, disable interrupts...
1335 scoped_guard(write_lock_irqsave
, &vcpu
->kvm
->mmu_lock
) {
1339 val
= hcr
= read_sysreg(hcr_el2
);
1343 if (!vcpu_el2_e2h_is_set(vcpu
))
1344 val
|= HCR_NV
| HCR_NV1
;
1346 write_sysreg(val
, hcr_el2
);
1349 par
= SYS_PAR_EL1_F
;
1353 fail
= __kvm_at(OP_AT_S1E1R
, vaddr
);
1356 fail
= __kvm_at(OP_AT_S1E1W
, vaddr
);
1359 fail
= __kvm_at(OP_AT_S1E1A
, vaddr
);
1369 par
= read_sysreg_par();
1371 write_sysreg(hcr
, hcr_el2
);
1375 /* We failed the translation, let's replay it in slow motion */
1376 if ((par
& SYS_PAR_EL1_F
) && !par_check_s1_perm_fault(par
))
1377 par
= handle_at_slow(vcpu
, op
, vaddr
);
1379 vcpu_write_sys_reg(vcpu
, par
, PAR_EL1
);
1382 void __kvm_at_s12(struct kvm_vcpu
*vcpu
, u32 op
, u64 vaddr
)
1384 struct kvm_s2_trans out
= {};
1389 /* Do the stage-1 translation */
1412 __kvm_at_s1e01(vcpu
, op
, vaddr
);
1413 par
= vcpu_read_sys_reg(vcpu
, PAR_EL1
);
1414 if (par
& SYS_PAR_EL1_F
)
1418 * If we only have a single stage of translation (E2H=0 or
1419 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
1421 if (!vcpu_el2_e2h_is_set(vcpu
) || vcpu_el2_tge_is_set(vcpu
) ||
1422 !(vcpu_read_sys_reg(vcpu
, HCR_EL2
) & (HCR_VM
| HCR_DC
)))
1425 /* Do the stage-2 translation */
1426 ipa
= (par
& GENMASK_ULL(47, 12)) | (vaddr
& GENMASK_ULL(11, 0));
1428 ret
= kvm_walk_nested_s2(vcpu
, ipa
, &out
);
1432 /* Check the access permission */
1434 ((!write
&& !out
.readable
) || (write
&& !out
.writable
)))
1435 out
.esr
= ESR_ELx_FSC_PERM_L(out
.level
& 0x3);
1437 par
= compute_par_s12(vcpu
, par
, &out
);
1438 vcpu_write_sys_reg(vcpu
, par
, PAR_EL1
);