1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
4 #include <linux/kvm_host.h>
6 #include "kvm_cache_regs.h"
7 #include "kvm_emulate.h"
12 #define CHECK_SMRAM32_OFFSET(field, offset) \
13 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
15 #define CHECK_SMRAM64_OFFSET(field, offset) \
16 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
18 static void check_smram_offsets(void)
20 /* 32 bit SMRAM image */
21 CHECK_SMRAM32_OFFSET(reserved1
, 0xFE00);
22 CHECK_SMRAM32_OFFSET(smbase
, 0xFEF8);
23 CHECK_SMRAM32_OFFSET(smm_revision
, 0xFEFC);
24 CHECK_SMRAM32_OFFSET(io_inst_restart
, 0xFF00);
25 CHECK_SMRAM32_OFFSET(auto_hlt_restart
, 0xFF02);
26 CHECK_SMRAM32_OFFSET(io_restart_rdi
, 0xFF04);
27 CHECK_SMRAM32_OFFSET(io_restart_rcx
, 0xFF08);
28 CHECK_SMRAM32_OFFSET(io_restart_rsi
, 0xFF0C);
29 CHECK_SMRAM32_OFFSET(io_restart_rip
, 0xFF10);
30 CHECK_SMRAM32_OFFSET(cr4
, 0xFF14);
31 CHECK_SMRAM32_OFFSET(reserved2
, 0xFF18);
32 CHECK_SMRAM32_OFFSET(int_shadow
, 0xFF1A);
33 CHECK_SMRAM32_OFFSET(reserved3
, 0xFF1B);
34 CHECK_SMRAM32_OFFSET(ds
, 0xFF2C);
35 CHECK_SMRAM32_OFFSET(fs
, 0xFF38);
36 CHECK_SMRAM32_OFFSET(gs
, 0xFF44);
37 CHECK_SMRAM32_OFFSET(idtr
, 0xFF50);
38 CHECK_SMRAM32_OFFSET(tr
, 0xFF5C);
39 CHECK_SMRAM32_OFFSET(gdtr
, 0xFF6C);
40 CHECK_SMRAM32_OFFSET(ldtr
, 0xFF78);
41 CHECK_SMRAM32_OFFSET(es
, 0xFF84);
42 CHECK_SMRAM32_OFFSET(cs
, 0xFF90);
43 CHECK_SMRAM32_OFFSET(ss
, 0xFF9C);
44 CHECK_SMRAM32_OFFSET(es_sel
, 0xFFA8);
45 CHECK_SMRAM32_OFFSET(cs_sel
, 0xFFAC);
46 CHECK_SMRAM32_OFFSET(ss_sel
, 0xFFB0);
47 CHECK_SMRAM32_OFFSET(ds_sel
, 0xFFB4);
48 CHECK_SMRAM32_OFFSET(fs_sel
, 0xFFB8);
49 CHECK_SMRAM32_OFFSET(gs_sel
, 0xFFBC);
50 CHECK_SMRAM32_OFFSET(ldtr_sel
, 0xFFC0);
51 CHECK_SMRAM32_OFFSET(tr_sel
, 0xFFC4);
52 CHECK_SMRAM32_OFFSET(dr7
, 0xFFC8);
53 CHECK_SMRAM32_OFFSET(dr6
, 0xFFCC);
54 CHECK_SMRAM32_OFFSET(gprs
, 0xFFD0);
55 CHECK_SMRAM32_OFFSET(eip
, 0xFFF0);
56 CHECK_SMRAM32_OFFSET(eflags
, 0xFFF4);
57 CHECK_SMRAM32_OFFSET(cr3
, 0xFFF8);
58 CHECK_SMRAM32_OFFSET(cr0
, 0xFFFC);
60 /* 64 bit SMRAM image */
61 CHECK_SMRAM64_OFFSET(es
, 0xFE00);
62 CHECK_SMRAM64_OFFSET(cs
, 0xFE10);
63 CHECK_SMRAM64_OFFSET(ss
, 0xFE20);
64 CHECK_SMRAM64_OFFSET(ds
, 0xFE30);
65 CHECK_SMRAM64_OFFSET(fs
, 0xFE40);
66 CHECK_SMRAM64_OFFSET(gs
, 0xFE50);
67 CHECK_SMRAM64_OFFSET(gdtr
, 0xFE60);
68 CHECK_SMRAM64_OFFSET(ldtr
, 0xFE70);
69 CHECK_SMRAM64_OFFSET(idtr
, 0xFE80);
70 CHECK_SMRAM64_OFFSET(tr
, 0xFE90);
71 CHECK_SMRAM64_OFFSET(io_restart_rip
, 0xFEA0);
72 CHECK_SMRAM64_OFFSET(io_restart_rcx
, 0xFEA8);
73 CHECK_SMRAM64_OFFSET(io_restart_rsi
, 0xFEB0);
74 CHECK_SMRAM64_OFFSET(io_restart_rdi
, 0xFEB8);
75 CHECK_SMRAM64_OFFSET(io_restart_dword
, 0xFEC0);
76 CHECK_SMRAM64_OFFSET(reserved1
, 0xFEC4);
77 CHECK_SMRAM64_OFFSET(io_inst_restart
, 0xFEC8);
78 CHECK_SMRAM64_OFFSET(auto_hlt_restart
, 0xFEC9);
79 CHECK_SMRAM64_OFFSET(amd_nmi_mask
, 0xFECA);
80 CHECK_SMRAM64_OFFSET(int_shadow
, 0xFECB);
81 CHECK_SMRAM64_OFFSET(reserved2
, 0xFECC);
82 CHECK_SMRAM64_OFFSET(efer
, 0xFED0);
83 CHECK_SMRAM64_OFFSET(svm_guest_flag
, 0xFED8);
84 CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa
, 0xFEE0);
85 CHECK_SMRAM64_OFFSET(svm_guest_virtual_int
, 0xFEE8);
86 CHECK_SMRAM64_OFFSET(reserved3
, 0xFEF0);
87 CHECK_SMRAM64_OFFSET(smm_revison
, 0xFEFC);
88 CHECK_SMRAM64_OFFSET(smbase
, 0xFF00);
89 CHECK_SMRAM64_OFFSET(reserved4
, 0xFF04);
90 CHECK_SMRAM64_OFFSET(ssp
, 0xFF18);
91 CHECK_SMRAM64_OFFSET(svm_guest_pat
, 0xFF20);
92 CHECK_SMRAM64_OFFSET(svm_host_efer
, 0xFF28);
93 CHECK_SMRAM64_OFFSET(svm_host_cr4
, 0xFF30);
94 CHECK_SMRAM64_OFFSET(svm_host_cr3
, 0xFF38);
95 CHECK_SMRAM64_OFFSET(svm_host_cr0
, 0xFF40);
96 CHECK_SMRAM64_OFFSET(cr4
, 0xFF48);
97 CHECK_SMRAM64_OFFSET(cr3
, 0xFF50);
98 CHECK_SMRAM64_OFFSET(cr0
, 0xFF58);
99 CHECK_SMRAM64_OFFSET(dr7
, 0xFF60);
100 CHECK_SMRAM64_OFFSET(dr6
, 0xFF68);
101 CHECK_SMRAM64_OFFSET(rflags
, 0xFF70);
102 CHECK_SMRAM64_OFFSET(rip
, 0xFF78);
103 CHECK_SMRAM64_OFFSET(gprs
, 0xFF80);
105 BUILD_BUG_ON(sizeof(union kvm_smram
) != 512);
108 #undef CHECK_SMRAM64_OFFSET
109 #undef CHECK_SMRAM32_OFFSET
112 void kvm_smm_changed(struct kvm_vcpu
*vcpu
, bool entering_smm
)
114 trace_kvm_smm_transition(vcpu
->vcpu_id
, vcpu
->arch
.smbase
, entering_smm
);
117 vcpu
->arch
.hflags
|= HF_SMM_MASK
;
119 vcpu
->arch
.hflags
&= ~(HF_SMM_MASK
| HF_SMM_INSIDE_NMI_MASK
);
121 /* Process a latched INIT or SMI, if any. */
122 kvm_make_request(KVM_REQ_EVENT
, vcpu
);
125 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
126 * on SMM exit we still need to reload them from
129 vcpu
->arch
.pdptrs_from_userspace
= false;
132 kvm_mmu_reset_context(vcpu
);
135 void process_smi(struct kvm_vcpu
*vcpu
)
137 vcpu
->arch
.smi_pending
= true;
138 kvm_make_request(KVM_REQ_EVENT
, vcpu
);
141 static u32
enter_smm_get_segment_flags(struct kvm_segment
*seg
)
144 flags
|= seg
->g
<< 23;
145 flags
|= seg
->db
<< 22;
146 flags
|= seg
->l
<< 21;
147 flags
|= seg
->avl
<< 20;
148 flags
|= seg
->present
<< 15;
149 flags
|= seg
->dpl
<< 13;
150 flags
|= seg
->s
<< 12;
151 flags
|= seg
->type
<< 8;
155 static void enter_smm_save_seg_32(struct kvm_vcpu
*vcpu
,
156 struct kvm_smm_seg_state_32
*state
,
157 u32
*selector
, int n
)
159 struct kvm_segment seg
;
161 kvm_get_segment(vcpu
, &seg
, n
);
162 *selector
= seg
.selector
;
163 state
->base
= seg
.base
;
164 state
->limit
= seg
.limit
;
165 state
->flags
= enter_smm_get_segment_flags(&seg
);
169 static void enter_smm_save_seg_64(struct kvm_vcpu
*vcpu
,
170 struct kvm_smm_seg_state_64
*state
,
173 struct kvm_segment seg
;
175 kvm_get_segment(vcpu
, &seg
, n
);
176 state
->selector
= seg
.selector
;
177 state
->attributes
= enter_smm_get_segment_flags(&seg
) >> 8;
178 state
->limit
= seg
.limit
;
179 state
->base
= seg
.base
;
183 static void enter_smm_save_state_32(struct kvm_vcpu
*vcpu
,
184 struct kvm_smram_state_32
*smram
)
189 smram
->cr0
= kvm_read_cr0(vcpu
);
190 smram
->cr3
= kvm_read_cr3(vcpu
);
191 smram
->eflags
= kvm_get_rflags(vcpu
);
192 smram
->eip
= kvm_rip_read(vcpu
);
194 for (i
= 0; i
< 8; i
++)
195 smram
->gprs
[i
] = kvm_register_read_raw(vcpu
, i
);
197 smram
->dr6
= (u32
)vcpu
->arch
.dr6
;
198 smram
->dr7
= (u32
)vcpu
->arch
.dr7
;
200 enter_smm_save_seg_32(vcpu
, &smram
->tr
, &smram
->tr_sel
, VCPU_SREG_TR
);
201 enter_smm_save_seg_32(vcpu
, &smram
->ldtr
, &smram
->ldtr_sel
, VCPU_SREG_LDTR
);
203 kvm_x86_call(get_gdt
)(vcpu
, &dt
);
204 smram
->gdtr
.base
= dt
.address
;
205 smram
->gdtr
.limit
= dt
.size
;
207 kvm_x86_call(get_idt
)(vcpu
, &dt
);
208 smram
->idtr
.base
= dt
.address
;
209 smram
->idtr
.limit
= dt
.size
;
211 enter_smm_save_seg_32(vcpu
, &smram
->es
, &smram
->es_sel
, VCPU_SREG_ES
);
212 enter_smm_save_seg_32(vcpu
, &smram
->cs
, &smram
->cs_sel
, VCPU_SREG_CS
);
213 enter_smm_save_seg_32(vcpu
, &smram
->ss
, &smram
->ss_sel
, VCPU_SREG_SS
);
215 enter_smm_save_seg_32(vcpu
, &smram
->ds
, &smram
->ds_sel
, VCPU_SREG_DS
);
216 enter_smm_save_seg_32(vcpu
, &smram
->fs
, &smram
->fs_sel
, VCPU_SREG_FS
);
217 enter_smm_save_seg_32(vcpu
, &smram
->gs
, &smram
->gs_sel
, VCPU_SREG_GS
);
219 smram
->cr4
= kvm_read_cr4(vcpu
);
220 smram
->smm_revision
= 0x00020000;
221 smram
->smbase
= vcpu
->arch
.smbase
;
223 smram
->int_shadow
= kvm_x86_call(get_interrupt_shadow
)(vcpu
);
227 static void enter_smm_save_state_64(struct kvm_vcpu
*vcpu
,
228 struct kvm_smram_state_64
*smram
)
233 for (i
= 0; i
< 16; i
++)
234 smram
->gprs
[15 - i
] = kvm_register_read_raw(vcpu
, i
);
236 smram
->rip
= kvm_rip_read(vcpu
);
237 smram
->rflags
= kvm_get_rflags(vcpu
);
239 smram
->dr6
= vcpu
->arch
.dr6
;
240 smram
->dr7
= vcpu
->arch
.dr7
;
242 smram
->cr0
= kvm_read_cr0(vcpu
);
243 smram
->cr3
= kvm_read_cr3(vcpu
);
244 smram
->cr4
= kvm_read_cr4(vcpu
);
246 smram
->smbase
= vcpu
->arch
.smbase
;
247 smram
->smm_revison
= 0x00020064;
249 smram
->efer
= vcpu
->arch
.efer
;
251 enter_smm_save_seg_64(vcpu
, &smram
->tr
, VCPU_SREG_TR
);
253 kvm_x86_call(get_idt
)(vcpu
, &dt
);
254 smram
->idtr
.limit
= dt
.size
;
255 smram
->idtr
.base
= dt
.address
;
257 enter_smm_save_seg_64(vcpu
, &smram
->ldtr
, VCPU_SREG_LDTR
);
259 kvm_x86_call(get_gdt
)(vcpu
, &dt
);
260 smram
->gdtr
.limit
= dt
.size
;
261 smram
->gdtr
.base
= dt
.address
;
263 enter_smm_save_seg_64(vcpu
, &smram
->es
, VCPU_SREG_ES
);
264 enter_smm_save_seg_64(vcpu
, &smram
->cs
, VCPU_SREG_CS
);
265 enter_smm_save_seg_64(vcpu
, &smram
->ss
, VCPU_SREG_SS
);
266 enter_smm_save_seg_64(vcpu
, &smram
->ds
, VCPU_SREG_DS
);
267 enter_smm_save_seg_64(vcpu
, &smram
->fs
, VCPU_SREG_FS
);
268 enter_smm_save_seg_64(vcpu
, &smram
->gs
, VCPU_SREG_GS
);
270 smram
->int_shadow
= kvm_x86_call(get_interrupt_shadow
)(vcpu
);
274 void enter_smm(struct kvm_vcpu
*vcpu
)
276 struct kvm_segment cs
, ds
;
279 union kvm_smram smram
;
281 check_smram_offsets();
283 memset(smram
.bytes
, 0, sizeof(smram
.bytes
));
286 if (guest_cpuid_has(vcpu
, X86_FEATURE_LM
))
287 enter_smm_save_state_64(vcpu
, &smram
.smram64
);
290 enter_smm_save_state_32(vcpu
, &smram
.smram32
);
293 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
294 * state (e.g. leave guest mode) after we've saved the state into the
295 * SMM state-save area.
297 * Kill the VM in the unlikely case of failure, because the VM
298 * can be in undefined state in this case.
300 if (kvm_x86_call(enter_smm
)(vcpu
, &smram
))
303 kvm_smm_changed(vcpu
, true);
305 if (kvm_vcpu_write_guest(vcpu
, vcpu
->arch
.smbase
+ 0xfe00, &smram
, sizeof(smram
)))
308 if (kvm_x86_call(get_nmi_mask
)(vcpu
))
309 vcpu
->arch
.hflags
|= HF_SMM_INSIDE_NMI_MASK
;
311 kvm_x86_call(set_nmi_mask
)(vcpu
, true);
313 kvm_set_rflags(vcpu
, X86_EFLAGS_FIXED
);
314 kvm_rip_write(vcpu
, 0x8000);
316 kvm_x86_call(set_interrupt_shadow
)(vcpu
, 0);
318 cr0
= vcpu
->arch
.cr0
& ~(X86_CR0_PE
| X86_CR0_EM
| X86_CR0_TS
| X86_CR0_PG
);
319 kvm_x86_call(set_cr0
)(vcpu
, cr0
);
321 kvm_x86_call(set_cr4
)(vcpu
, 0);
323 /* Undocumented: IDT limit is set to zero on entry to SMM. */
324 dt
.address
= dt
.size
= 0;
325 kvm_x86_call(set_idt
)(vcpu
, &dt
);
327 if (WARN_ON_ONCE(kvm_set_dr(vcpu
, 7, DR7_FIXED_1
)))
330 cs
.selector
= (vcpu
->arch
.smbase
>> 4) & 0xffff;
331 cs
.base
= vcpu
->arch
.smbase
;
336 cs
.limit
= ds
.limit
= 0xffffffff;
337 cs
.type
= ds
.type
= 0x3;
344 cs
.present
= ds
.present
= 1;
345 cs
.unusable
= ds
.unusable
= 0;
346 cs
.padding
= ds
.padding
= 0;
348 kvm_set_segment(vcpu
, &cs
, VCPU_SREG_CS
);
349 kvm_set_segment(vcpu
, &ds
, VCPU_SREG_DS
);
350 kvm_set_segment(vcpu
, &ds
, VCPU_SREG_ES
);
351 kvm_set_segment(vcpu
, &ds
, VCPU_SREG_FS
);
352 kvm_set_segment(vcpu
, &ds
, VCPU_SREG_GS
);
353 kvm_set_segment(vcpu
, &ds
, VCPU_SREG_SS
);
356 if (guest_cpuid_has(vcpu
, X86_FEATURE_LM
))
357 if (kvm_x86_call(set_efer
)(vcpu
, 0))
361 kvm_update_cpuid_runtime(vcpu
);
362 kvm_mmu_reset_context(vcpu
);
365 kvm_vm_dead(vcpu
->kvm
);
368 static void rsm_set_desc_flags(struct kvm_segment
*desc
, u32 flags
)
370 desc
->g
= (flags
>> 23) & 1;
371 desc
->db
= (flags
>> 22) & 1;
372 desc
->l
= (flags
>> 21) & 1;
373 desc
->avl
= (flags
>> 20) & 1;
374 desc
->present
= (flags
>> 15) & 1;
375 desc
->dpl
= (flags
>> 13) & 3;
376 desc
->s
= (flags
>> 12) & 1;
377 desc
->type
= (flags
>> 8) & 15;
379 desc
->unusable
= !desc
->present
;
383 static int rsm_load_seg_32(struct kvm_vcpu
*vcpu
,
384 const struct kvm_smm_seg_state_32
*state
,
387 struct kvm_segment desc
;
389 desc
.selector
= selector
;
390 desc
.base
= state
->base
;
391 desc
.limit
= state
->limit
;
392 rsm_set_desc_flags(&desc
, state
->flags
);
393 kvm_set_segment(vcpu
, &desc
, n
);
394 return X86EMUL_CONTINUE
;
399 static int rsm_load_seg_64(struct kvm_vcpu
*vcpu
,
400 const struct kvm_smm_seg_state_64
*state
,
403 struct kvm_segment desc
;
405 desc
.selector
= state
->selector
;
406 rsm_set_desc_flags(&desc
, state
->attributes
<< 8);
407 desc
.limit
= state
->limit
;
408 desc
.base
= state
->base
;
409 kvm_set_segment(vcpu
, &desc
, n
);
410 return X86EMUL_CONTINUE
;
414 static int rsm_enter_protected_mode(struct kvm_vcpu
*vcpu
,
415 u64 cr0
, u64 cr3
, u64 cr4
)
420 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
422 if (cr4
& X86_CR4_PCIDE
) {
427 bad
= kvm_set_cr3(vcpu
, cr3
);
429 return X86EMUL_UNHANDLEABLE
;
432 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
433 * Then enable protected mode. However, PCID cannot be enabled
434 * if EFER.LMA=0, so set it separately.
436 bad
= kvm_set_cr4(vcpu
, cr4
& ~X86_CR4_PCIDE
);
438 return X86EMUL_UNHANDLEABLE
;
440 bad
= kvm_set_cr0(vcpu
, cr0
);
442 return X86EMUL_UNHANDLEABLE
;
444 if (cr4
& X86_CR4_PCIDE
) {
445 bad
= kvm_set_cr4(vcpu
, cr4
);
447 return X86EMUL_UNHANDLEABLE
;
449 bad
= kvm_set_cr3(vcpu
, cr3
| pcid
);
451 return X86EMUL_UNHANDLEABLE
;
456 return X86EMUL_CONTINUE
;
459 static int rsm_load_state_32(struct x86_emulate_ctxt
*ctxt
,
460 const struct kvm_smram_state_32
*smstate
)
462 struct kvm_vcpu
*vcpu
= ctxt
->vcpu
;
466 ctxt
->eflags
= smstate
->eflags
| X86_EFLAGS_FIXED
;
467 ctxt
->_eip
= smstate
->eip
;
469 for (i
= 0; i
< 8; i
++)
470 *reg_write(ctxt
, i
) = smstate
->gprs
[i
];
472 if (kvm_set_dr(vcpu
, 6, smstate
->dr6
))
473 return X86EMUL_UNHANDLEABLE
;
474 if (kvm_set_dr(vcpu
, 7, smstate
->dr7
))
475 return X86EMUL_UNHANDLEABLE
;
477 rsm_load_seg_32(vcpu
, &smstate
->tr
, smstate
->tr_sel
, VCPU_SREG_TR
);
478 rsm_load_seg_32(vcpu
, &smstate
->ldtr
, smstate
->ldtr_sel
, VCPU_SREG_LDTR
);
480 dt
.address
= smstate
->gdtr
.base
;
481 dt
.size
= smstate
->gdtr
.limit
;
482 kvm_x86_call(set_gdt
)(vcpu
, &dt
);
484 dt
.address
= smstate
->idtr
.base
;
485 dt
.size
= smstate
->idtr
.limit
;
486 kvm_x86_call(set_idt
)(vcpu
, &dt
);
488 rsm_load_seg_32(vcpu
, &smstate
->es
, smstate
->es_sel
, VCPU_SREG_ES
);
489 rsm_load_seg_32(vcpu
, &smstate
->cs
, smstate
->cs_sel
, VCPU_SREG_CS
);
490 rsm_load_seg_32(vcpu
, &smstate
->ss
, smstate
->ss_sel
, VCPU_SREG_SS
);
492 rsm_load_seg_32(vcpu
, &smstate
->ds
, smstate
->ds_sel
, VCPU_SREG_DS
);
493 rsm_load_seg_32(vcpu
, &smstate
->fs
, smstate
->fs_sel
, VCPU_SREG_FS
);
494 rsm_load_seg_32(vcpu
, &smstate
->gs
, smstate
->gs_sel
, VCPU_SREG_GS
);
496 vcpu
->arch
.smbase
= smstate
->smbase
;
498 r
= rsm_enter_protected_mode(vcpu
, smstate
->cr0
,
499 smstate
->cr3
, smstate
->cr4
);
501 if (r
!= X86EMUL_CONTINUE
)
504 kvm_x86_call(set_interrupt_shadow
)(vcpu
, 0);
505 ctxt
->interruptibility
= (u8
)smstate
->int_shadow
;
511 static int rsm_load_state_64(struct x86_emulate_ctxt
*ctxt
,
512 const struct kvm_smram_state_64
*smstate
)
514 struct kvm_vcpu
*vcpu
= ctxt
->vcpu
;
518 for (i
= 0; i
< 16; i
++)
519 *reg_write(ctxt
, i
) = smstate
->gprs
[15 - i
];
521 ctxt
->_eip
= smstate
->rip
;
522 ctxt
->eflags
= smstate
->rflags
| X86_EFLAGS_FIXED
;
524 if (kvm_set_dr(vcpu
, 6, smstate
->dr6
))
525 return X86EMUL_UNHANDLEABLE
;
526 if (kvm_set_dr(vcpu
, 7, smstate
->dr7
))
527 return X86EMUL_UNHANDLEABLE
;
529 vcpu
->arch
.smbase
= smstate
->smbase
;
531 if (kvm_set_msr(vcpu
, MSR_EFER
, smstate
->efer
& ~EFER_LMA
))
532 return X86EMUL_UNHANDLEABLE
;
534 rsm_load_seg_64(vcpu
, &smstate
->tr
, VCPU_SREG_TR
);
536 dt
.size
= smstate
->idtr
.limit
;
537 dt
.address
= smstate
->idtr
.base
;
538 kvm_x86_call(set_idt
)(vcpu
, &dt
);
540 rsm_load_seg_64(vcpu
, &smstate
->ldtr
, VCPU_SREG_LDTR
);
542 dt
.size
= smstate
->gdtr
.limit
;
543 dt
.address
= smstate
->gdtr
.base
;
544 kvm_x86_call(set_gdt
)(vcpu
, &dt
);
546 r
= rsm_enter_protected_mode(vcpu
, smstate
->cr0
, smstate
->cr3
, smstate
->cr4
);
547 if (r
!= X86EMUL_CONTINUE
)
550 rsm_load_seg_64(vcpu
, &smstate
->es
, VCPU_SREG_ES
);
551 rsm_load_seg_64(vcpu
, &smstate
->cs
, VCPU_SREG_CS
);
552 rsm_load_seg_64(vcpu
, &smstate
->ss
, VCPU_SREG_SS
);
553 rsm_load_seg_64(vcpu
, &smstate
->ds
, VCPU_SREG_DS
);
554 rsm_load_seg_64(vcpu
, &smstate
->fs
, VCPU_SREG_FS
);
555 rsm_load_seg_64(vcpu
, &smstate
->gs
, VCPU_SREG_GS
);
557 kvm_x86_call(set_interrupt_shadow
)(vcpu
, 0);
558 ctxt
->interruptibility
= (u8
)smstate
->int_shadow
;
560 return X86EMUL_CONTINUE
;
564 int emulator_leave_smm(struct x86_emulate_ctxt
*ctxt
)
566 struct kvm_vcpu
*vcpu
= ctxt
->vcpu
;
568 union kvm_smram smram
;
572 smbase
= vcpu
->arch
.smbase
;
574 ret
= kvm_vcpu_read_guest(vcpu
, smbase
+ 0xfe00, smram
.bytes
, sizeof(smram
));
576 return X86EMUL_UNHANDLEABLE
;
578 if ((vcpu
->arch
.hflags
& HF_SMM_INSIDE_NMI_MASK
) == 0)
579 kvm_x86_call(set_nmi_mask
)(vcpu
, false);
581 kvm_smm_changed(vcpu
, false);
584 * Get back to real mode, to prepare a safe state in which to load
585 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
586 * supports long mode.
589 if (guest_cpuid_has(vcpu
, X86_FEATURE_LM
)) {
590 struct kvm_segment cs_desc
;
593 /* Zero CR4.PCIDE before CR0.PG. */
594 cr4
= kvm_read_cr4(vcpu
);
595 if (cr4
& X86_CR4_PCIDE
)
596 kvm_set_cr4(vcpu
, cr4
& ~X86_CR4_PCIDE
);
598 /* A 32-bit code segment is required to clear EFER.LMA. */
599 memset(&cs_desc
, 0, sizeof(cs_desc
));
601 cs_desc
.s
= cs_desc
.g
= cs_desc
.present
= 1;
602 kvm_set_segment(vcpu
, &cs_desc
, VCPU_SREG_CS
);
606 /* For the 64-bit case, this will clear EFER.LMA. */
607 cr0
= kvm_read_cr0(vcpu
);
608 if (cr0
& X86_CR0_PE
)
609 kvm_set_cr0(vcpu
, cr0
& ~(X86_CR0_PG
| X86_CR0_PE
));
612 if (guest_cpuid_has(vcpu
, X86_FEATURE_LM
)) {
613 unsigned long cr4
, efer
;
615 /* Clear CR4.PAE before clearing EFER.LME. */
616 cr4
= kvm_read_cr4(vcpu
);
617 if (cr4
& X86_CR4_PAE
)
618 kvm_set_cr4(vcpu
, cr4
& ~X86_CR4_PAE
);
620 /* And finally go back to 32-bit mode. */
622 kvm_set_msr(vcpu
, MSR_EFER
, efer
);
627 * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest
628 * mode should happen _after_ loading state from SMRAM. However, KVM
629 * piggybacks the nested VM-Enter flows (which is wrong for many other
630 * reasons), and so nSVM/nVMX would clobber state that is loaded from
631 * SMRAM and from the VMCS/VMCB.
633 if (kvm_x86_call(leave_smm
)(vcpu
, &smram
))
634 return X86EMUL_UNHANDLEABLE
;
637 if (guest_cpuid_has(vcpu
, X86_FEATURE_LM
))
638 ret
= rsm_load_state_64(ctxt
, &smram
.smram64
);
641 ret
= rsm_load_state_32(ctxt
, &smram
.smram32
);
644 * If RSM fails and triggers shutdown, architecturally the shutdown
645 * occurs *before* the transition to guest mode. But due to KVM's
646 * flawed handling of RSM to L2 (see above), the vCPU may already be
647 * in_guest_mode(). Force the vCPU out of guest mode before delivering
648 * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit
649 * that architecturally shouldn't be possible.
651 if (ret
!= X86EMUL_CONTINUE
&& is_guest_mode(vcpu
))
652 kvm_leave_nested(vcpu
);