2 * x86 exception helpers - sysemu code
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
23 #include "tcg/helper-tcg.h"
25 typedef struct TranslateParams
{
31 MMUAccessType access_type
;
34 typedef struct TranslateResult
{
40 typedef enum TranslateFaultStage2
{
44 } TranslateFaultStage2
;
46 typedef struct TranslateFault
{
50 TranslateFaultStage2 stage2
;
53 typedef struct PTETranslate
{
61 static bool ptw_translate(PTETranslate
*inout
, hwaddr addr
)
63 CPUTLBEntryFull
*full
;
67 flags
= probe_access_full(inout
->env
, addr
, 0, MMU_DATA_STORE
,
68 inout
->ptw_idx
, true, &inout
->haddr
, &full
, 0);
70 if (unlikely(flags
& TLB_INVALID_MASK
)) {
71 TranslateFault
*err
= inout
->err
;
73 assert(inout
->ptw_idx
== MMU_NESTED_IDX
);
74 *err
= (TranslateFault
){
75 .error_code
= inout
->env
->error_code
,
84 static inline uint32_t ptw_ldl(const PTETranslate
*in
)
86 if (likely(in
->haddr
)) {
87 return ldl_p(in
->haddr
);
89 return cpu_ldl_mmuidx_ra(in
->env
, in
->gaddr
, in
->ptw_idx
, 0);
92 static inline uint64_t ptw_ldq(const PTETranslate
*in
)
94 if (likely(in
->haddr
)) {
95 return ldq_p(in
->haddr
);
97 return cpu_ldq_mmuidx_ra(in
->env
, in
->gaddr
, in
->ptw_idx
, 0);
101 * Note that we can use a 32-bit cmpxchg for all page table entries,
102 * even 64-bit ones, because PG_PRESENT_MASK, PG_ACCESSED_MASK and
103 * PG_DIRTY_MASK are all in the low 32 bits.
105 static bool ptw_setl_slow(const PTETranslate
*in
, uint32_t old
, uint32_t new)
109 /* Does x86 really perform a rmw cycle on mmio for ptw? */
111 cmp
= cpu_ldl_mmuidx_ra(in
->env
, in
->gaddr
, in
->ptw_idx
, 0);
113 cpu_stl_mmuidx_ra(in
->env
, in
->gaddr
, new, in
->ptw_idx
, 0);
119 static inline bool ptw_setl(const PTETranslate
*in
, uint32_t old
, uint32_t set
)
122 uint32_t new = old
| set
;
123 if (likely(in
->haddr
)) {
124 old
= cpu_to_le32(old
);
125 new = cpu_to_le32(new);
126 return qatomic_cmpxchg((uint32_t *)in
->haddr
, old
, new) == old
;
128 return ptw_setl_slow(in
, old
, new);
133 static bool mmu_translate(CPUX86State
*env
, const TranslateParams
*in
,
134 TranslateResult
*out
, TranslateFault
*err
)
136 const int32_t a20_mask
= x86_get_a20_mask(env
);
137 const target_ulong addr
= in
->addr
;
138 const int pg_mode
= in
->pg_mode
;
139 const bool is_user
= (in
->mmu_idx
== MMU_USER_IDX
);
140 const MMUAccessType access_type
= in
->access_type
;
141 uint64_t ptep
, pte
, rsvd_mask
;
142 PTETranslate pte_trans
= {
145 .ptw_idx
= in
->ptw_idx
,
147 hwaddr pte_addr
, paddr
;
153 rsvd_mask
= ~MAKE_64BIT_MASK(0, env_archcpu(env
)->phys_bits
);
154 rsvd_mask
&= PG_ADDRESS_MASK
;
155 if (!(pg_mode
& PG_MODE_NXE
)) {
156 rsvd_mask
|= PG_NX_MASK
;
159 if (pg_mode
& PG_MODE_PAE
) {
161 if (pg_mode
& PG_MODE_LMA
) {
162 if (pg_mode
& PG_MODE_LA57
) {
166 pte_addr
= ((in
->cr3
& ~0xfff) +
167 (((addr
>> 48) & 0x1ff) << 3)) & a20_mask
;
168 if (!ptw_translate(&pte_trans
, pte_addr
)) {
172 pte
= ptw_ldq(&pte_trans
);
173 if (!(pte
& PG_PRESENT_MASK
)) {
176 if (pte
& (rsvd_mask
| PG_PSE_MASK
)) {
179 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
182 ptep
= pte
^ PG_NX_MASK
;
185 ptep
= PG_NX_MASK
| PG_USER_MASK
| PG_RW_MASK
;
191 pte_addr
= ((pte
& PG_ADDRESS_MASK
) +
192 (((addr
>> 39) & 0x1ff) << 3)) & a20_mask
;
193 if (!ptw_translate(&pte_trans
, pte_addr
)) {
197 pte
= ptw_ldq(&pte_trans
);
198 if (!(pte
& PG_PRESENT_MASK
)) {
201 if (pte
& (rsvd_mask
| PG_PSE_MASK
)) {
204 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
207 ptep
&= pte
^ PG_NX_MASK
;
212 pte_addr
= ((pte
& PG_ADDRESS_MASK
) +
213 (((addr
>> 30) & 0x1ff) << 3)) & a20_mask
;
214 if (!ptw_translate(&pte_trans
, pte_addr
)) {
218 pte
= ptw_ldq(&pte_trans
);
219 if (!(pte
& PG_PRESENT_MASK
)) {
222 if (pte
& rsvd_mask
) {
225 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
228 ptep
&= pte
^ PG_NX_MASK
;
229 if (pte
& PG_PSE_MASK
) {
231 page_size
= 1024 * 1024 * 1024;
232 goto do_check_protect
;
240 pte_addr
= ((in
->cr3
& ~0x1f) + ((addr
>> 27) & 0x18)) & a20_mask
;
241 if (!ptw_translate(&pte_trans
, pte_addr
)) {
244 rsvd_mask
|= PG_HI_USER_MASK
;
246 pte
= ptw_ldq(&pte_trans
);
247 if (!(pte
& PG_PRESENT_MASK
)) {
250 if (pte
& (rsvd_mask
| PG_NX_MASK
)) {
253 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
254 goto restart_3_nolma
;
256 ptep
= PG_NX_MASK
| PG_USER_MASK
| PG_RW_MASK
;
262 pte_addr
= ((pte
& PG_ADDRESS_MASK
) +
263 (((addr
>> 21) & 0x1ff) << 3)) & a20_mask
;
264 if (!ptw_translate(&pte_trans
, pte_addr
)) {
268 pte
= ptw_ldq(&pte_trans
);
269 if (!(pte
& PG_PRESENT_MASK
)) {
272 if (pte
& rsvd_mask
) {
275 if (pte
& PG_PSE_MASK
) {
277 page_size
= 2048 * 1024;
278 ptep
&= pte
^ PG_NX_MASK
;
279 goto do_check_protect
;
281 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
284 ptep
&= pte
^ PG_NX_MASK
;
289 pte_addr
= ((pte
& PG_ADDRESS_MASK
) +
290 (((addr
>> 12) & 0x1ff) << 3)) & a20_mask
;
291 if (!ptw_translate(&pte_trans
, pte_addr
)) {
294 pte
= ptw_ldq(&pte_trans
);
295 if (!(pte
& PG_PRESENT_MASK
)) {
298 if (pte
& rsvd_mask
) {
301 /* combine pde and pte nx, user and rw protections */
302 ptep
&= pte
^ PG_NX_MASK
;
308 pte_addr
= ((in
->cr3
& ~0xfff) + ((addr
>> 20) & 0xffc)) & a20_mask
;
309 if (!ptw_translate(&pte_trans
, pte_addr
)) {
313 pte
= ptw_ldl(&pte_trans
);
314 if (!(pte
& PG_PRESENT_MASK
)) {
317 ptep
= pte
| PG_NX_MASK
;
319 /* if PSE bit is set, then we use a 4MB page */
320 if ((pte
& PG_PSE_MASK
) && (pg_mode
& PG_MODE_PSE
)) {
321 page_size
= 4096 * 1024;
323 * Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
324 * Leave bits 20-13 in place for setting accessed/dirty bits below.
326 pte
= (uint32_t)pte
| ((pte
& 0x1fe000LL
) << (32 - 13));
327 rsvd_mask
= 0x200000;
328 goto do_check_protect_pse36
;
330 if (!ptw_setl(&pte_trans
, pte
, PG_ACCESSED_MASK
)) {
331 goto restart_2_nopae
;
337 pte_addr
= ((pte
& ~0xfffu
) + ((addr
>> 10) & 0xffc)) & a20_mask
;
338 if (!ptw_translate(&pte_trans
, pte_addr
)) {
341 pte
= ptw_ldl(&pte_trans
);
342 if (!(pte
& PG_PRESENT_MASK
)) {
345 /* combine pde and pte user and rw protections */
346 ptep
&= pte
| PG_NX_MASK
;
352 rsvd_mask
|= (page_size
- 1) & PG_ADDRESS_MASK
& ~PG_PSE_PAT_MASK
;
353 do_check_protect_pse36
:
354 if (pte
& rsvd_mask
) {
359 /* can the page can be put in the TLB? prot will tell us */
360 if (is_user
&& !(ptep
& PG_USER_MASK
)) {
361 goto do_fault_protect
;
365 if (in
->mmu_idx
!= MMU_KSMAP_IDX
|| !(ptep
& PG_USER_MASK
)) {
367 if ((ptep
& PG_RW_MASK
) || !(is_user
|| (pg_mode
& PG_MODE_WP
))) {
371 if (!(ptep
& PG_NX_MASK
) &&
373 !((pg_mode
& PG_MODE_SMEP
) && (ptep
& PG_USER_MASK
)))) {
377 if (ptep
& PG_USER_MASK
) {
378 pkr
= pg_mode
& PG_MODE_PKE
? env
->pkru
: 0;
380 pkr
= pg_mode
& PG_MODE_PKS
? env
->pkrs
: 0;
383 uint32_t pk
= (pte
& PG_PKRU_MASK
) >> PG_PKRU_BIT
;
384 uint32_t pkr_ad
= (pkr
>> pk
* 2) & 1;
385 uint32_t pkr_wd
= (pkr
>> pk
* 2) & 2;
386 uint32_t pkr_prot
= PAGE_READ
| PAGE_WRITE
| PAGE_EXEC
;
389 pkr_prot
&= ~(PAGE_READ
| PAGE_WRITE
);
390 } else if (pkr_wd
&& (is_user
|| (pg_mode
& PG_MODE_WP
))) {
391 pkr_prot
&= ~PAGE_WRITE
;
393 if ((pkr_prot
& (1 << access_type
)) == 0) {
394 goto do_fault_pk_protect
;
399 if ((prot
& (1 << access_type
)) == 0) {
400 goto do_fault_protect
;
405 uint32_t set
= PG_ACCESSED_MASK
;
406 if (access_type
== MMU_DATA_STORE
) {
407 set
|= PG_DIRTY_MASK
;
408 } else if (!(pte
& PG_DIRTY_MASK
)) {
410 * Only set write access if already dirty...
411 * otherwise wait for dirty access.
415 if (!ptw_setl(&pte_trans
, pte
, set
)) {
417 * We can arrive here from any of 3 levels and 2 formats.
418 * The only safe thing is to restart the entire lookup.
424 /* align to page_size */
425 paddr
= (pte
& a20_mask
& PG_ADDRESS_MASK
& ~(page_size
- 1))
426 | (addr
& (page_size
- 1));
428 if (in
->ptw_idx
== MMU_NESTED_IDX
) {
429 CPUTLBEntryFull
*full
;
430 int flags
, nested_page_size
;
432 flags
= probe_access_full(env
, paddr
, 0, access_type
,
433 MMU_NESTED_IDX
, true,
434 &pte_trans
.haddr
, &full
, 0);
435 if (unlikely(flags
& TLB_INVALID_MASK
)) {
436 *err
= (TranslateFault
){
437 .error_code
= env
->error_code
,
444 /* Merge stage1 & stage2 protection bits. */
447 /* Re-verify resulting protection. */
448 if ((prot
& (1 << access_type
)) == 0) {
449 goto do_fault_protect
;
452 /* Merge stage1 & stage2 addresses to final physical address. */
453 nested_page_size
= 1 << full
->lg_page_size
;
454 paddr
= (full
->phys_addr
& ~(nested_page_size
- 1))
455 | (paddr
& (nested_page_size
- 1));
458 * Use the larger of stage1 & stage2 page sizes, so that
459 * invalidation works.
461 if (nested_page_size
> page_size
) {
462 page_size
= nested_page_size
;
468 out
->page_size
= page_size
;
472 error_code
= PG_ERROR_RSVD_MASK
;
475 error_code
= PG_ERROR_P_MASK
;
478 assert(access_type
!= MMU_INST_FETCH
);
479 error_code
= PG_ERROR_PK_MASK
| PG_ERROR_P_MASK
;
485 error_code
|= PG_ERROR_U_MASK
;
487 switch (access_type
) {
491 error_code
|= PG_ERROR_W_MASK
;
494 if (pg_mode
& (PG_MODE_NXE
| PG_MODE_SMEP
)) {
495 error_code
|= PG_ERROR_I_D_MASK
;
499 *err
= (TranslateFault
){
500 .exception_index
= EXCP0E_PAGE
,
501 .error_code
= error_code
,
507 static G_NORETURN
void raise_stage2(CPUX86State
*env
, TranslateFault
*err
,
510 uint64_t exit_info_1
= err
->error_code
;
512 switch (err
->stage2
) {
514 exit_info_1
|= SVM_NPTEXIT_GPT
;
517 exit_info_1
|= SVM_NPTEXIT_GPA
;
520 g_assert_not_reached();
523 x86_stq_phys(env_cpu(env
),
524 env
->vm_vmcb
+ offsetof(struct vmcb
, control
.exit_info_2
),
526 cpu_vmexit(env
, SVM_EXIT_NPF
, exit_info_1
, retaddr
);
529 static bool get_physical_address(CPUX86State
*env
, vaddr addr
,
530 MMUAccessType access_type
, int mmu_idx
,
531 TranslateResult
*out
, TranslateFault
*err
)
534 bool use_stage2
= env
->hflags2
& HF2_NPT_MASK
;
537 in
.access_type
= access_type
;
544 if (likely(use_stage2
)) {
545 in
.cr3
= env
->nested_cr3
;
546 in
.pg_mode
= env
->nested_pg_mode
;
547 in
.mmu_idx
= MMU_USER_IDX
;
548 in
.ptw_idx
= MMU_PHYS_IDX
;
550 if (!mmu_translate(env
, &in
, out
, err
)) {
551 err
->stage2
= S2_GPA
;
559 if (likely(env
->cr
[0] & CR0_PG_MASK
)) {
561 in
.mmu_idx
= mmu_idx
;
562 in
.ptw_idx
= use_stage2
? MMU_NESTED_IDX
: MMU_PHYS_IDX
;
563 in
.pg_mode
= get_pg_mode(env
);
565 if (in
.pg_mode
& PG_MODE_LMA
) {
566 /* test virtual address sign extension */
567 int shift
= in
.pg_mode
& PG_MODE_LA57
? 56 : 47;
568 int64_t sext
= (int64_t)addr
>> shift
;
569 if (sext
!= 0 && sext
!= -1) {
570 *err
= (TranslateFault
){
571 .exception_index
= EXCP0D_GPF
,
577 return mmu_translate(env
, &in
, out
, err
);
582 /* Translation disabled. */
583 out
->paddr
= addr
& x86_get_a20_mask(env
);
585 if (!(env
->hflags
& HF_LMA_MASK
)) {
586 /* Without long mode we can only address 32bits in real mode */
587 out
->paddr
= (uint32_t)out
->paddr
;
590 out
->prot
= PAGE_READ
| PAGE_WRITE
| PAGE_EXEC
;
591 out
->page_size
= TARGET_PAGE_SIZE
;
595 bool x86_cpu_tlb_fill(CPUState
*cs
, vaddr addr
, int size
,
596 MMUAccessType access_type
, int mmu_idx
,
597 bool probe
, uintptr_t retaddr
)
599 CPUX86State
*env
= cs
->env_ptr
;
603 if (get_physical_address(env
, addr
, access_type
, mmu_idx
, &out
, &err
)) {
605 * Even if 4MB pages, we map only one 4KB page in the cache to
606 * avoid filling it too fast.
608 assert(out
.prot
& (1 << access_type
));
609 tlb_set_page_with_attrs(cs
, addr
& TARGET_PAGE_MASK
,
610 out
.paddr
& TARGET_PAGE_MASK
,
611 cpu_get_mem_attrs(env
),
612 out
.prot
, mmu_idx
, out
.page_size
);
617 /* This will be used if recursing for stage2 translation. */
618 env
->error_code
= err
.error_code
;
622 if (err
.stage2
!= S2_NONE
) {
623 raise_stage2(env
, &err
, retaddr
);
626 if (env
->intercept_exceptions
& (1 << err
.exception_index
)) {
627 /* cr2 is not modified in case of exceptions */
628 x86_stq_phys(cs
, env
->vm_vmcb
+
629 offsetof(struct vmcb
, control
.exit_info_2
),
632 env
->cr
[2] = err
.cr2
;
634 raise_exception_err_ra(env
, err
.exception_index
, err
.error_code
, retaddr
);
637 G_NORETURN
void x86_cpu_do_unaligned_access(CPUState
*cs
, vaddr vaddr
,
638 MMUAccessType access_type
,
639 int mmu_idx
, uintptr_t retaddr
)
641 X86CPU
*cpu
= X86_CPU(cs
);
642 handle_unaligned_access(&cpu
->env
, vaddr
, access_type
, retaddr
);