2 * x86 exception helpers - sysemu code
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/cpu_ldst.h"
23 #include "exec/exec-all.h"
24 #include "exec/page-protection.h"
25 #include "tcg/helper-tcg.h"
27 typedef struct TranslateParams
{
33 MMUAccessType access_type
;
36 typedef struct TranslateResult
{
42 typedef enum TranslateFaultStage2
{
46 } TranslateFaultStage2
;
48 typedef struct TranslateFault
{
52 TranslateFaultStage2 stage2
;
55 typedef struct PTETranslate
{
63 static bool ptw_translate(PTETranslate
*inout
, hwaddr addr
, uint64_t ra
)
65 CPUTLBEntryFull
*full
;
69 flags
= probe_access_full(inout
->env
, addr
, 0, MMU_DATA_STORE
,
70 inout
->ptw_idx
, true, &inout
->haddr
, &full
, ra
);
72 if (unlikely(flags
& TLB_INVALID_MASK
)) {
73 TranslateFault
*err
= inout
->err
;
75 assert(inout
->ptw_idx
== MMU_NESTED_IDX
);
76 *err
= (TranslateFault
){
77 .error_code
= inout
->env
->error_code
,
86 static inline uint32_t ptw_ldl(const PTETranslate
*in
, uint64_t ra
)
88 if (likely(in
->haddr
)) {
89 return ldl_p(in
->haddr
);
91 return cpu_ldl_mmuidx_ra(in
->env
, in
->gaddr
, in
->ptw_idx
, ra
);
94 static inline uint64_t ptw_ldq(const PTETranslate
*in
, uint64_t ra
)
96 if (likely(in
->haddr
)) {
97 return ldq_p(in
->haddr
);
99 return cpu_ldq_mmuidx_ra(in
->env
, in
->gaddr
, in
->ptw_idx
, ra
);
103 * Note that we can use a 32-bit cmpxchg for all page table entries,
104 * even 64-bit ones, because PG_PRESENT_MASK, PG_ACCESSED_MASK and
105 * PG_DIRTY_MASK are all in the low 32 bits.
107 static bool ptw_setl_slow(const PTETranslate
*in
, uint32_t old
, uint32_t new)
111 /* Does x86 really perform a rmw cycle on mmio for ptw? */
113 cmp
= cpu_ldl_mmuidx_ra(in
->env
, in
->gaddr
, in
->ptw_idx
, 0);
115 cpu_stl_mmuidx_ra(in
->env
, in
->gaddr
, new, in
->ptw_idx
, 0);
121 static inline bool ptw_setl(const PTETranslate
*in
, uint32_t old
, uint32_t set
)
124 uint32_t new = old
| set
;
125 if (likely(in
->haddr
)) {
126 old
= cpu_to_le32(old
);
127 new = cpu_to_le32(new);
128 return qatomic_cmpxchg((uint32_t *)in
->haddr
, old
, new) == old
;
130 return ptw_setl_slow(in
, old
, new);
135 static bool mmu_translate(CPUX86State
*env
, const TranslateParams
*in
,
136 TranslateResult
*out
, TranslateFault
*err
,
139 const target_ulong addr
= in
->addr
;
140 const int pg_mode
= in
->pg_mode
;
141 const bool is_user
= is_mmu_index_user(in
->mmu_idx
);
142 const MMUAccessType access_type
= in
->access_type
;
143 uint64_t ptep
, pte
, rsvd_mask
;
144 PTETranslate pte_trans
= {
147 .ptw_idx
= in
->ptw_idx
,
149 hwaddr pte_addr
, paddr
;
155 rsvd_mask
= ~MAKE_64BIT_MASK(0, env_archcpu(env
)->phys_bits
);
156 rsvd_mask
&= PG_ADDRESS_MASK
;
157 if (!(pg_mode
& PG_MODE_NXE
)) {
158 rsvd_mask
|= PG_NX_MASK
;
161 if (pg_mode
& PG_MODE_PAE
) {
163 if (pg_mode
& PG_MODE_LMA
) {
164 if (pg_mode
& PG_MODE_LA57
) {
168 pte_addr
= (in
->cr3
& ~0xfff) + (((addr
>> 48) & 0x1ff) << 3);
169 if (!ptw_translate(&pte_trans
, pte_addr
, ra
)) {
173 pte
= ptw_ldq(&pte_trans
, ra
);
174 if (!(pte
& PG_PRESENT_MASK
)) {
177 if (pte
& (rsvd_mask
| PG_PSE_MASK
)) {
180 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
183 ptep
= pte
^ PG_NX_MASK
;
186 ptep
= PG_NX_MASK
| PG_USER_MASK
| PG_RW_MASK
;
192 pte_addr
= (pte
& PG_ADDRESS_MASK
) + (((addr
>> 39) & 0x1ff) << 3);
193 if (!ptw_translate(&pte_trans
, pte_addr
, ra
)) {
197 pte
= ptw_ldq(&pte_trans
, ra
);
198 if (!(pte
& PG_PRESENT_MASK
)) {
201 if (pte
& (rsvd_mask
| PG_PSE_MASK
)) {
204 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
207 ptep
&= pte
^ PG_NX_MASK
;
212 pte_addr
= (pte
& PG_ADDRESS_MASK
) + (((addr
>> 30) & 0x1ff) << 3);
213 if (!ptw_translate(&pte_trans
, pte_addr
, ra
)) {
217 pte
= ptw_ldq(&pte_trans
, ra
);
218 if (!(pte
& PG_PRESENT_MASK
)) {
221 if (pte
& rsvd_mask
) {
224 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
227 ptep
&= pte
^ PG_NX_MASK
;
228 if (pte
& PG_PSE_MASK
) {
230 page_size
= 1024 * 1024 * 1024;
231 goto do_check_protect
;
239 pte_addr
= (in
->cr3
& 0xffffffe0ULL
) + ((addr
>> 27) & 0x18);
240 if (!ptw_translate(&pte_trans
, pte_addr
, ra
)) {
243 rsvd_mask
|= PG_HI_USER_MASK
;
245 pte
= ptw_ldq(&pte_trans
, ra
);
246 if (!(pte
& PG_PRESENT_MASK
)) {
249 if (pte
& (rsvd_mask
| PG_NX_MASK
)) {
252 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
253 goto restart_3_nolma
;
255 ptep
= PG_NX_MASK
| PG_USER_MASK
| PG_RW_MASK
;
261 pte_addr
= (pte
& PG_ADDRESS_MASK
) + (((addr
>> 21) & 0x1ff) << 3);
262 if (!ptw_translate(&pte_trans
, pte_addr
, ra
)) {
266 pte
= ptw_ldq(&pte_trans
, ra
);
267 if (!(pte
& PG_PRESENT_MASK
)) {
270 if (pte
& rsvd_mask
) {
273 if (pte
& PG_PSE_MASK
) {
275 page_size
= 2048 * 1024;
276 ptep
&= pte
^ PG_NX_MASK
;
277 goto do_check_protect
;
279 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
282 ptep
&= pte
^ PG_NX_MASK
;
287 pte_addr
= (pte
& PG_ADDRESS_MASK
) + (((addr
>> 12) & 0x1ff) << 3);
288 if (!ptw_translate(&pte_trans
, pte_addr
, ra
)) {
291 pte
= ptw_ldq(&pte_trans
, ra
);
292 if (!(pte
& PG_PRESENT_MASK
)) {
295 if (pte
& rsvd_mask
) {
298 /* combine pde and pte nx, user and rw protections */
299 ptep
&= pte
^ PG_NX_MASK
;
305 pte_addr
= (in
->cr3
& 0xfffff000ULL
) + ((addr
>> 20) & 0xffc);
306 if (!ptw_translate(&pte_trans
, pte_addr
, ra
)) {
310 pte
= ptw_ldl(&pte_trans
, ra
);
311 if (!(pte
& PG_PRESENT_MASK
)) {
314 ptep
= pte
| PG_NX_MASK
;
316 /* if PSE bit is set, then we use a 4MB page */
317 if ((pte
& PG_PSE_MASK
) && (pg_mode
& PG_MODE_PSE
)) {
318 page_size
= 4096 * 1024;
320 * Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
321 * Leave bits 20-13 in place for setting accessed/dirty bits below.
323 pte
= (uint32_t)pte
| ((pte
& 0x1fe000LL
) << (32 - 13));
324 rsvd_mask
= 0x200000;
325 goto do_check_protect_pse36
;
327 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
328 goto restart_2_nopae
;
334 pte_addr
= (pte
& ~0xfffu
) + ((addr
>> 10) & 0xffc);
335 if (!ptw_translate(&pte_trans
, pte_addr
, ra
)) {
338 pte
= ptw_ldl(&pte_trans
, ra
);
339 if (!(pte
& PG_PRESENT_MASK
)) {
342 /* combine pde and pte user and rw protections */
343 ptep
&= pte
| PG_NX_MASK
;
349 rsvd_mask
|= (page_size
- 1) & PG_ADDRESS_MASK
& ~PG_PSE_PAT_MASK
;
350 do_check_protect_pse36
:
351 if (pte
& rsvd_mask
) {
356 /* can the page can be put in the TLB? prot will tell us */
357 if (is_user
&& !(ptep
& PG_USER_MASK
)) {
358 goto do_fault_protect
;
362 if (!is_mmu_index_smap(in
->mmu_idx
) || !(ptep
& PG_USER_MASK
)) {
364 if ((ptep
& PG_RW_MASK
) || !(is_user
|| (pg_mode
& PG_MODE_WP
))) {
368 if (!(ptep
& PG_NX_MASK
) &&
370 !((pg_mode
& PG_MODE_SMEP
) && (ptep
& PG_USER_MASK
)))) {
374 if (ptep
& PG_USER_MASK
) {
375 pkr
= pg_mode
& PG_MODE_PKE
? env
->pkru
: 0;
377 pkr
= pg_mode
& PG_MODE_PKS
? env
->pkrs
: 0;
380 uint32_t pk
= (pte
& PG_PKRU_MASK
) >> PG_PKRU_BIT
;
381 uint32_t pkr_ad
= (pkr
>> pk
* 2) & 1;
382 uint32_t pkr_wd
= (pkr
>> pk
* 2) & 2;
383 uint32_t pkr_prot
= PAGE_READ
| PAGE_WRITE
| PAGE_EXEC
;
386 pkr_prot
&= ~(PAGE_READ
| PAGE_WRITE
);
387 } else if (pkr_wd
&& (is_user
|| (pg_mode
& PG_MODE_WP
))) {
388 pkr_prot
&= ~PAGE_WRITE
;
390 if ((pkr_prot
& (1 << access_type
)) == 0) {
391 goto do_fault_pk_protect
;
396 if ((prot
& (1 << access_type
)) == 0) {
397 goto do_fault_protect
;
402 uint32_t set
= PG_ACCESSED_MASK
;
403 if (access_type
== MMU_DATA_STORE
) {
404 set
|= PG_DIRTY_MASK
;
405 } else if (!(pte
& PG_DIRTY_MASK
)) {
407 * Only set write access if already dirty...
408 * otherwise wait for dirty access.
412 if (!ptw_setl(&pte_trans
, pte
, set
)) {
414 * We can arrive here from any of 3 levels and 2 formats.
415 * The only safe thing is to restart the entire lookup.
421 /* merge offset within page */
422 paddr
= (pte
& PG_ADDRESS_MASK
& ~(page_size
- 1)) | (addr
& (page_size
- 1));
425 * Note that NPT is walked (for both paging structures and final guest
426 * addresses) using the address with the A20 bit set.
428 if (in
->ptw_idx
== MMU_NESTED_IDX
) {
429 CPUTLBEntryFull
*full
;
430 int flags
, nested_page_size
;
432 flags
= probe_access_full(env
, paddr
, 0, access_type
,
433 MMU_NESTED_IDX
, true,
434 &pte_trans
.haddr
, &full
, 0);
435 if (unlikely(flags
& TLB_INVALID_MASK
)) {
436 *err
= (TranslateFault
){
437 .error_code
= env
->error_code
,
444 /* Merge stage1 & stage2 protection bits. */
447 /* Re-verify resulting protection. */
448 if ((prot
& (1 << access_type
)) == 0) {
449 goto do_fault_protect
;
452 /* Merge stage1 & stage2 addresses to final physical address. */
453 nested_page_size
= 1 << full
->lg_page_size
;
454 paddr
= (full
->phys_addr
& ~(nested_page_size
- 1))
455 | (paddr
& (nested_page_size
- 1));
458 * Use the larger of stage1 & stage2 page sizes, so that
459 * invalidation works.
461 if (nested_page_size
> page_size
) {
462 page_size
= nested_page_size
;
466 out
->paddr
= paddr
& x86_get_a20_mask(env
);
468 out
->page_size
= page_size
;
472 error_code
= PG_ERROR_RSVD_MASK
;
475 error_code
= PG_ERROR_P_MASK
;
478 assert(access_type
!= MMU_INST_FETCH
);
479 error_code
= PG_ERROR_PK_MASK
| PG_ERROR_P_MASK
;
485 error_code
|= PG_ERROR_U_MASK
;
487 switch (access_type
) {
491 error_code
|= PG_ERROR_W_MASK
;
494 if (pg_mode
& (PG_MODE_NXE
| PG_MODE_SMEP
)) {
495 error_code
|= PG_ERROR_I_D_MASK
;
499 *err
= (TranslateFault
){
500 .exception_index
= EXCP0E_PAGE
,
501 .error_code
= error_code
,
507 static G_NORETURN
void raise_stage2(CPUX86State
*env
, TranslateFault
*err
,
510 uint64_t exit_info_1
= err
->error_code
;
512 switch (err
->stage2
) {
514 exit_info_1
|= SVM_NPTEXIT_GPT
;
517 exit_info_1
|= SVM_NPTEXIT_GPA
;
520 g_assert_not_reached();
523 x86_stq_phys(env_cpu(env
),
524 env
->vm_vmcb
+ offsetof(struct vmcb
, control
.exit_info_2
),
526 cpu_vmexit(env
, SVM_EXIT_NPF
, exit_info_1
, retaddr
);
529 static bool get_physical_address(CPUX86State
*env
, vaddr addr
,
530 MMUAccessType access_type
, int mmu_idx
,
531 TranslateResult
*out
, TranslateFault
*err
,
535 bool use_stage2
= env
->hflags2
& HF2_NPT_MASK
;
538 in
.access_type
= access_type
;
545 if (likely(use_stage2
)) {
546 in
.cr3
= env
->nested_cr3
;
547 in
.pg_mode
= env
->nested_pg_mode
;
549 env
->nested_pg_mode
& PG_MODE_LMA
? MMU_USER64_IDX
: MMU_USER32_IDX
;
550 in
.ptw_idx
= MMU_PHYS_IDX
;
552 if (!mmu_translate(env
, &in
, out
, err
, ra
)) {
553 err
->stage2
= S2_GPA
;
561 if (is_mmu_index_32(mmu_idx
)) {
562 addr
= (uint32_t)addr
;
565 if (likely(env
->cr
[0] & CR0_PG_MASK
)) {
567 in
.mmu_idx
= mmu_idx
;
568 in
.ptw_idx
= use_stage2
? MMU_NESTED_IDX
: MMU_PHYS_IDX
;
569 in
.pg_mode
= get_pg_mode(env
);
571 if (in
.pg_mode
& PG_MODE_LMA
) {
572 /* test virtual address sign extension */
573 int shift
= in
.pg_mode
& PG_MODE_LA57
? 56 : 47;
574 int64_t sext
= (int64_t)addr
>> shift
;
575 if (sext
!= 0 && sext
!= -1) {
576 *err
= (TranslateFault
){
577 .exception_index
= EXCP0D_GPF
,
583 return mmu_translate(env
, &in
, out
, err
, ra
);
588 /* No translation needed. */
589 out
->paddr
= addr
& x86_get_a20_mask(env
);
590 out
->prot
= PAGE_READ
| PAGE_WRITE
| PAGE_EXEC
;
591 out
->page_size
= TARGET_PAGE_SIZE
;
595 bool x86_cpu_tlb_fill(CPUState
*cs
, vaddr addr
, int size
,
596 MMUAccessType access_type
, int mmu_idx
,
597 bool probe
, uintptr_t retaddr
)
599 CPUX86State
*env
= cpu_env(cs
);
603 if (get_physical_address(env
, addr
, access_type
, mmu_idx
, &out
, &err
,
606 * Even if 4MB pages, we map only one 4KB page in the cache to
607 * avoid filling it too fast.
609 assert(out
.prot
& (1 << access_type
));
610 tlb_set_page_with_attrs(cs
, addr
& TARGET_PAGE_MASK
,
611 out
.paddr
& TARGET_PAGE_MASK
,
612 cpu_get_mem_attrs(env
),
613 out
.prot
, mmu_idx
, out
.page_size
);
618 /* This will be used if recursing for stage2 translation. */
619 env
->error_code
= err
.error_code
;
623 if (err
.stage2
!= S2_NONE
) {
624 raise_stage2(env
, &err
, retaddr
);
627 if (env
->intercept_exceptions
& (1 << err
.exception_index
)) {
628 /* cr2 is not modified in case of exceptions */
629 x86_stq_phys(cs
, env
->vm_vmcb
+
630 offsetof(struct vmcb
, control
.exit_info_2
),
633 env
->cr
[2] = err
.cr2
;
635 raise_exception_err_ra(env
, err
.exception_index
, err
.error_code
, retaddr
);
638 G_NORETURN
void x86_cpu_do_unaligned_access(CPUState
*cs
, vaddr vaddr
,
639 MMUAccessType access_type
,
640 int mmu_idx
, uintptr_t retaddr
)
642 X86CPU
*cpu
= X86_CPU(cs
);
643 handle_unaligned_access(&cpu
->env
, vaddr
, access_type
, retaddr
);