4 * Copyright (C) 2006 Qumranet, Inc.
5 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
6 * Copyright(C) 2015 Intel Corporation.
9 * Yaniv Kamay <yaniv@qumranet.com>
10 * Avi Kivity <avi@qumranet.com>
11 * Marcelo Tosatti <mtosatti@redhat.com>
12 * Paolo Bonzini <pbonzini@redhat.com>
13 * Xiao Guangrong <guangrong.xiao@linux.intel.com>
15 * This work is licensed under the terms of the GNU GPL, version 2. See
16 * the COPYING file in the top-level directory.
19 #include <linux/kvm_host.h>
25 #define IA32_MTRR_DEF_TYPE_E (1ULL << 11)
26 #define IA32_MTRR_DEF_TYPE_FE (1ULL << 10)
27 #define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff)
29 static bool msr_mtrr_valid(unsigned msr
)
32 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR
- 1:
33 case MSR_MTRRfix64K_00000
:
34 case MSR_MTRRfix16K_80000
:
35 case MSR_MTRRfix16K_A0000
:
36 case MSR_MTRRfix4K_C0000
:
37 case MSR_MTRRfix4K_C8000
:
38 case MSR_MTRRfix4K_D0000
:
39 case MSR_MTRRfix4K_D8000
:
40 case MSR_MTRRfix4K_E0000
:
41 case MSR_MTRRfix4K_E8000
:
42 case MSR_MTRRfix4K_F0000
:
43 case MSR_MTRRfix4K_F8000
:
51 static bool valid_pat_type(unsigned t
)
53 return t
< 8 && (1 << t
) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
56 static bool valid_mtrr_type(unsigned t
)
58 return t
< 8 && (1 << t
) & 0x73; /* 0, 1, 4, 5, 6 */
61 bool kvm_mtrr_valid(struct kvm_vcpu
*vcpu
, u32 msr
, u64 data
)
66 if (!msr_mtrr_valid(msr
))
69 if (msr
== MSR_IA32_CR_PAT
) {
70 for (i
= 0; i
< 8; i
++)
71 if (!valid_pat_type((data
>> (i
* 8)) & 0xff))
74 } else if (msr
== MSR_MTRRdefType
) {
77 return valid_mtrr_type(data
& 0xff);
78 } else if (msr
>= MSR_MTRRfix64K_00000
&& msr
<= MSR_MTRRfix4K_F8000
) {
79 for (i
= 0; i
< 8 ; i
++)
80 if (!valid_mtrr_type((data
>> (i
* 8)) & 0xff))
86 WARN_ON(!(msr
>= 0x200 && msr
< 0x200 + 2 * KVM_NR_VAR_MTRR
));
88 mask
= (~0ULL) << cpuid_maxphyaddr(vcpu
);
91 if (!valid_mtrr_type(data
& 0xff))
98 kvm_inject_gp(vcpu
, 0);
104 EXPORT_SYMBOL_GPL(kvm_mtrr_valid
);
106 static bool mtrr_is_enabled(struct kvm_mtrr
*mtrr_state
)
108 return !!(mtrr_state
->deftype
& IA32_MTRR_DEF_TYPE_E
);
111 static bool fixed_mtrr_is_enabled(struct kvm_mtrr
*mtrr_state
)
113 return !!(mtrr_state
->deftype
& IA32_MTRR_DEF_TYPE_FE
);
116 static u8
mtrr_default_type(struct kvm_mtrr
*mtrr_state
)
118 return mtrr_state
->deftype
& IA32_MTRR_DEF_TYPE_TYPE_MASK
;
121 static u8
mtrr_disabled_type(struct kvm_vcpu
*vcpu
)
124 * Intel SDM 11.11.2.2: all MTRRs are disabled when
125 * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
126 * memory type is applied to all of physical memory.
128 * However, virtual machines can be run with CPUID such that
129 * there are no MTRRs. In that case, the firmware will never
130 * enable MTRRs and it is obviously undesirable to run the
131 * guest entirely with UC memory and we use WB.
133 if (guest_cpuid_has(vcpu
, X86_FEATURE_MTRR
))
134 return MTRR_TYPE_UNCACHABLE
;
136 return MTRR_TYPE_WRBACK
;
140 * Three terms are used in the following code:
141 * - segment, it indicates the address segments covered by fixed MTRRs.
142 * - unit, it corresponds to the MSR entry in the segment.
143 * - range, a range is covered in one memory cache type.
145 struct fixed_mtrr_segment
{
151 /* the start position in kvm_mtrr.fixed_ranges[]. */
155 static struct fixed_mtrr_segment fixed_seg_table
[] = {
156 /* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */
160 .range_shift
= 16, /* 64K */
165 * MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units,
171 .range_shift
= 14, /* 16K */
176 * MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units,
182 .range_shift
= 12, /* 12K */
188 * The size of unit is covered in one MSR, one MSR entry contains
189 * 8 ranges so that unit size is always 8 * 2^range_shift.
191 static u64
fixed_mtrr_seg_unit_size(int seg
)
193 return 8 << fixed_seg_table
[seg
].range_shift
;
196 static bool fixed_msr_to_seg_unit(u32 msr
, int *seg
, int *unit
)
199 case MSR_MTRRfix64K_00000
:
203 case MSR_MTRRfix16K_80000
... MSR_MTRRfix16K_A0000
:
205 *unit
= array_index_nospec(
206 msr
- MSR_MTRRfix16K_80000
,
207 MSR_MTRRfix16K_A0000
- MSR_MTRRfix16K_80000
+ 1);
209 case MSR_MTRRfix4K_C0000
... MSR_MTRRfix4K_F8000
:
211 *unit
= array_index_nospec(
212 msr
- MSR_MTRRfix4K_C0000
,
213 MSR_MTRRfix4K_F8000
- MSR_MTRRfix4K_C0000
+ 1);
222 static void fixed_mtrr_seg_unit_range(int seg
, int unit
, u64
*start
, u64
*end
)
224 struct fixed_mtrr_segment
*mtrr_seg
= &fixed_seg_table
[seg
];
225 u64 unit_size
= fixed_mtrr_seg_unit_size(seg
);
227 *start
= mtrr_seg
->start
+ unit
* unit_size
;
228 *end
= *start
+ unit_size
;
229 WARN_ON(*end
> mtrr_seg
->end
);
232 static int fixed_mtrr_seg_unit_range_index(int seg
, int unit
)
234 struct fixed_mtrr_segment
*mtrr_seg
= &fixed_seg_table
[seg
];
236 WARN_ON(mtrr_seg
->start
+ unit
* fixed_mtrr_seg_unit_size(seg
)
239 /* each unit has 8 ranges. */
240 return mtrr_seg
->range_start
+ 8 * unit
;
243 static int fixed_mtrr_seg_end_range_index(int seg
)
245 struct fixed_mtrr_segment
*mtrr_seg
= &fixed_seg_table
[seg
];
248 n
= (mtrr_seg
->end
- mtrr_seg
->start
) >> mtrr_seg
->range_shift
;
249 return mtrr_seg
->range_start
+ n
- 1;
252 static bool fixed_msr_to_range(u32 msr
, u64
*start
, u64
*end
)
256 if (!fixed_msr_to_seg_unit(msr
, &seg
, &unit
))
259 fixed_mtrr_seg_unit_range(seg
, unit
, start
, end
);
263 static int fixed_msr_to_range_index(u32 msr
)
267 if (!fixed_msr_to_seg_unit(msr
, &seg
, &unit
))
270 return fixed_mtrr_seg_unit_range_index(seg
, unit
);
273 static int fixed_mtrr_addr_to_seg(u64 addr
)
275 struct fixed_mtrr_segment
*mtrr_seg
;
276 int seg
, seg_num
= ARRAY_SIZE(fixed_seg_table
);
278 for (seg
= 0; seg
< seg_num
; seg
++) {
279 mtrr_seg
= &fixed_seg_table
[seg
];
280 if (mtrr_seg
->start
<= addr
&& addr
< mtrr_seg
->end
)
287 static int fixed_mtrr_addr_seg_to_range_index(u64 addr
, int seg
)
289 struct fixed_mtrr_segment
*mtrr_seg
;
292 mtrr_seg
= &fixed_seg_table
[seg
];
293 index
= mtrr_seg
->range_start
;
294 index
+= (addr
- mtrr_seg
->start
) >> mtrr_seg
->range_shift
;
298 static u64
fixed_mtrr_range_end_addr(int seg
, int index
)
300 struct fixed_mtrr_segment
*mtrr_seg
= &fixed_seg_table
[seg
];
301 int pos
= index
- mtrr_seg
->range_start
;
303 return mtrr_seg
->start
+ ((pos
+ 1) << mtrr_seg
->range_shift
);
306 static void var_mtrr_range(struct kvm_mtrr_range
*range
, u64
*start
, u64
*end
)
310 *start
= range
->base
& PAGE_MASK
;
312 mask
= range
->mask
& PAGE_MASK
;
314 /* This cannot overflow because writing to the reserved bits of
315 * variable MTRRs causes a #GP.
317 *end
= (*start
| ~mask
) + 1;
320 static void update_mtrr(struct kvm_vcpu
*vcpu
, u32 msr
)
322 struct kvm_mtrr
*mtrr_state
= &vcpu
->arch
.mtrr_state
;
326 if (msr
== MSR_IA32_CR_PAT
|| !tdp_enabled
||
327 !kvm_arch_has_noncoherent_dma(vcpu
->kvm
))
330 if (!mtrr_is_enabled(mtrr_state
) && msr
!= MSR_MTRRdefType
)
334 if (fixed_msr_to_range(msr
, &start
, &end
)) {
335 if (!fixed_mtrr_is_enabled(mtrr_state
))
337 } else if (msr
== MSR_MTRRdefType
) {
341 /* variable range MTRRs. */
342 index
= (msr
- 0x200) / 2;
343 var_mtrr_range(&mtrr_state
->var_ranges
[index
], &start
, &end
);
346 kvm_zap_gfn_range(vcpu
->kvm
, gpa_to_gfn(start
), gpa_to_gfn(end
));
349 static bool var_mtrr_range_is_valid(struct kvm_mtrr_range
*range
)
351 return (range
->mask
& (1 << 11)) != 0;
354 static void set_var_mtrr_msr(struct kvm_vcpu
*vcpu
, u32 msr
, u64 data
)
356 struct kvm_mtrr
*mtrr_state
= &vcpu
->arch
.mtrr_state
;
357 struct kvm_mtrr_range
*tmp
, *cur
;
358 int index
, is_mtrr_mask
;
360 index
= (msr
- 0x200) / 2;
361 is_mtrr_mask
= msr
- 0x200 - 2 * index
;
362 cur
= &mtrr_state
->var_ranges
[index
];
364 /* remove the entry if it's in the list. */
365 if (var_mtrr_range_is_valid(cur
))
366 list_del(&mtrr_state
->var_ranges
[index
].node
);
368 /* Extend the mask with all 1 bits to the left, since those
369 * bits must implicitly be 0. The bits are then cleared
375 cur
->mask
= data
| (-1LL << cpuid_maxphyaddr(vcpu
));
377 /* add it to the list if it's enabled. */
378 if (var_mtrr_range_is_valid(cur
)) {
379 list_for_each_entry(tmp
, &mtrr_state
->head
, node
)
380 if (cur
->base
>= tmp
->base
)
382 list_add_tail(&cur
->node
, &tmp
->node
);
386 int kvm_mtrr_set_msr(struct kvm_vcpu
*vcpu
, u32 msr
, u64 data
)
390 if (!kvm_mtrr_valid(vcpu
, msr
, data
))
393 index
= fixed_msr_to_range_index(msr
);
395 *(u64
*)&vcpu
->arch
.mtrr_state
.fixed_ranges
[index
] = data
;
396 else if (msr
== MSR_MTRRdefType
)
397 vcpu
->arch
.mtrr_state
.deftype
= data
;
398 else if (msr
== MSR_IA32_CR_PAT
)
399 vcpu
->arch
.pat
= data
;
401 set_var_mtrr_msr(vcpu
, msr
, data
);
403 update_mtrr(vcpu
, msr
);
407 int kvm_mtrr_get_msr(struct kvm_vcpu
*vcpu
, u32 msr
, u64
*pdata
)
411 /* MSR_MTRRcap is a readonly MSR. */
412 if (msr
== MSR_MTRRcap
) {
417 * VCNT = KVM_NR_VAR_MTRR
419 *pdata
= 0x500 | KVM_NR_VAR_MTRR
;
423 if (!msr_mtrr_valid(msr
))
426 index
= fixed_msr_to_range_index(msr
);
428 *pdata
= *(u64
*)&vcpu
->arch
.mtrr_state
.fixed_ranges
[index
];
429 else if (msr
== MSR_MTRRdefType
)
430 *pdata
= vcpu
->arch
.mtrr_state
.deftype
;
431 else if (msr
== MSR_IA32_CR_PAT
)
432 *pdata
= vcpu
->arch
.pat
;
433 else { /* Variable MTRRs */
436 index
= (msr
- 0x200) / 2;
437 is_mtrr_mask
= msr
- 0x200 - 2 * index
;
439 *pdata
= vcpu
->arch
.mtrr_state
.var_ranges
[index
].base
;
441 *pdata
= vcpu
->arch
.mtrr_state
.var_ranges
[index
].mask
;
443 *pdata
&= (1ULL << cpuid_maxphyaddr(vcpu
)) - 1;
449 void kvm_vcpu_mtrr_init(struct kvm_vcpu
*vcpu
)
451 INIT_LIST_HEAD(&vcpu
->arch
.mtrr_state
.head
);
456 struct kvm_mtrr
*mtrr_state
;
462 /* mtrr is completely disabled? */
464 /* [start, end) is not fully covered in MTRRs? */
467 /* private fields. */
469 /* used for fixed MTRRs. */
475 /* used for var MTRRs. */
477 struct kvm_mtrr_range
*range
;
478 /* max address has been covered in var MTRRs. */
486 static bool mtrr_lookup_fixed_start(struct mtrr_iter
*iter
)
490 if (!fixed_mtrr_is_enabled(iter
->mtrr_state
))
493 seg
= fixed_mtrr_addr_to_seg(iter
->start
);
498 index
= fixed_mtrr_addr_seg_to_range_index(iter
->start
, seg
);
504 static bool match_var_range(struct mtrr_iter
*iter
,
505 struct kvm_mtrr_range
*range
)
509 var_mtrr_range(range
, &start
, &end
);
510 if (!(start
>= iter
->end
|| end
<= iter
->start
)) {
514 * the function is called when we do kvm_mtrr.head walking.
515 * Range has the minimum base address which interleaves
516 * [looker->start_max, looker->end).
518 iter
->partial_map
|= iter
->start_max
< start
;
520 /* update the max address has been covered. */
521 iter
->start_max
= max(iter
->start_max
, end
);
528 static void __mtrr_lookup_var_next(struct mtrr_iter
*iter
)
530 struct kvm_mtrr
*mtrr_state
= iter
->mtrr_state
;
532 list_for_each_entry_continue(iter
->range
, &mtrr_state
->head
, node
)
533 if (match_var_range(iter
, iter
->range
))
537 iter
->partial_map
|= iter
->start_max
< iter
->end
;
540 static void mtrr_lookup_var_start(struct mtrr_iter
*iter
)
542 struct kvm_mtrr
*mtrr_state
= iter
->mtrr_state
;
545 iter
->start_max
= iter
->start
;
547 iter
->range
= list_prepare_entry(iter
->range
, &mtrr_state
->head
, node
);
549 __mtrr_lookup_var_next(iter
);
552 static void mtrr_lookup_fixed_next(struct mtrr_iter
*iter
)
554 /* terminate the lookup. */
555 if (fixed_mtrr_range_end_addr(iter
->seg
, iter
->index
) >= iter
->end
) {
563 /* have looked up for all fixed MTRRs. */
564 if (iter
->index
>= ARRAY_SIZE(iter
->mtrr_state
->fixed_ranges
))
565 return mtrr_lookup_var_start(iter
);
567 /* switch to next segment. */
568 if (iter
->index
> fixed_mtrr_seg_end_range_index(iter
->seg
))
572 static void mtrr_lookup_var_next(struct mtrr_iter
*iter
)
574 __mtrr_lookup_var_next(iter
);
577 static void mtrr_lookup_start(struct mtrr_iter
*iter
)
579 if (!mtrr_is_enabled(iter
->mtrr_state
)) {
580 iter
->mtrr_disabled
= true;
584 if (!mtrr_lookup_fixed_start(iter
))
585 mtrr_lookup_var_start(iter
);
588 static void mtrr_lookup_init(struct mtrr_iter
*iter
,
589 struct kvm_mtrr
*mtrr_state
, u64 start
, u64 end
)
591 iter
->mtrr_state
= mtrr_state
;
594 iter
->mtrr_disabled
= false;
595 iter
->partial_map
= false;
599 mtrr_lookup_start(iter
);
602 static bool mtrr_lookup_okay(struct mtrr_iter
*iter
)
605 iter
->mem_type
= iter
->mtrr_state
->fixed_ranges
[iter
->index
];
610 iter
->mem_type
= iter
->range
->base
& 0xff;
617 static void mtrr_lookup_next(struct mtrr_iter
*iter
)
620 mtrr_lookup_fixed_next(iter
);
622 mtrr_lookup_var_next(iter
);
625 #define mtrr_for_each_mem_type(_iter_, _mtrr_, _gpa_start_, _gpa_end_) \
626 for (mtrr_lookup_init(_iter_, _mtrr_, _gpa_start_, _gpa_end_); \
627 mtrr_lookup_okay(_iter_); mtrr_lookup_next(_iter_))
629 u8
kvm_mtrr_get_guest_memory_type(struct kvm_vcpu
*vcpu
, gfn_t gfn
)
631 struct kvm_mtrr
*mtrr_state
= &vcpu
->arch
.mtrr_state
;
632 struct mtrr_iter iter
;
635 const int wt_wb_mask
= (1 << MTRR_TYPE_WRBACK
)
636 | (1 << MTRR_TYPE_WRTHROUGH
);
638 start
= gfn_to_gpa(gfn
);
639 end
= start
+ PAGE_SIZE
;
641 mtrr_for_each_mem_type(&iter
, mtrr_state
, start
, end
) {
642 int curr_type
= iter
.mem_type
;
645 * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR
655 * If two or more variable memory ranges match and the
656 * memory types are identical, then that memory type is
659 if (type
== curr_type
)
663 * If two or more variable memory ranges match and one of
664 * the memory types is UC, the UC memory type used.
666 if (curr_type
== MTRR_TYPE_UNCACHABLE
)
667 return MTRR_TYPE_UNCACHABLE
;
670 * If two or more variable memory ranges match and the
671 * memory types are WT and WB, the WT memory type is used.
673 if (((1 << type
) & wt_wb_mask
) &&
674 ((1 << curr_type
) & wt_wb_mask
)) {
675 type
= MTRR_TYPE_WRTHROUGH
;
680 * For overlaps not defined by the above rules, processor
681 * behavior is undefined.
684 /* We use WB for this undefined behavior. :( */
685 return MTRR_TYPE_WRBACK
;
688 if (iter
.mtrr_disabled
)
689 return mtrr_disabled_type(vcpu
);
691 /* not contained in any MTRRs. */
693 return mtrr_default_type(mtrr_state
);
696 * We just check one page, partially covered by MTRRs is
699 WARN_ON(iter
.partial_map
);
703 EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type
);
705 bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu
*vcpu
, gfn_t gfn
,
708 struct kvm_mtrr
*mtrr_state
= &vcpu
->arch
.mtrr_state
;
709 struct mtrr_iter iter
;
713 start
= gfn_to_gpa(gfn
);
714 end
= gfn_to_gpa(gfn
+ page_num
);
715 mtrr_for_each_mem_type(&iter
, mtrr_state
, start
, end
) {
717 type
= iter
.mem_type
;
721 if (type
!= iter
.mem_type
)
725 if (iter
.mtrr_disabled
)
728 if (!iter
.partial_map
)
734 return type
== mtrr_default_type(mtrr_state
);