2 * Kernel-based Virtual Machine -- Performance Monitoring Unit support
4 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
7 * Avi Kivity <avi@redhat.com>
8 * Gleb Natapov <gleb@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
15 #include <linux/types.h>
16 #include <linux/kvm_host.h>
17 #include <linux/perf_event.h>
18 #include <asm/perf_event.h>
23 static struct kvm_arch_event_perf_mapping
{
29 /* Index must match CPUID 0x0A.EBX bit vector */
30 [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES
},
31 [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS
},
32 [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES
},
33 [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES
},
34 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES
},
35 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS
},
36 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES
},
37 [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES
},
40 /* mapping between fixed pmc index and arch_events array */
41 static int fixed_pmc_events
[] = {1, 0, 7};
43 static bool pmc_is_gp(struct kvm_pmc
*pmc
)
45 return pmc
->type
== KVM_PMC_GP
;
48 static inline u64
pmc_bitmask(struct kvm_pmc
*pmc
)
50 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
52 return pmu
->counter_bitmask
[pmc
->type
];
55 static inline bool pmc_enabled(struct kvm_pmc
*pmc
)
57 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
58 return test_bit(pmc
->idx
, (unsigned long *)&pmu
->global_ctrl
);
61 static inline struct kvm_pmc
*get_gp_pmc(struct kvm_pmu
*pmu
, u32 msr
,
64 if (msr
>= base
&& msr
< base
+ pmu
->nr_arch_gp_counters
)
65 return &pmu
->gp_counters
[msr
- base
];
69 static inline struct kvm_pmc
*get_fixed_pmc(struct kvm_pmu
*pmu
, u32 msr
)
71 int base
= MSR_CORE_PERF_FIXED_CTR0
;
72 if (msr
>= base
&& msr
< base
+ pmu
->nr_arch_fixed_counters
)
73 return &pmu
->fixed_counters
[msr
- base
];
77 static inline struct kvm_pmc
*get_fixed_pmc_idx(struct kvm_pmu
*pmu
, int idx
)
79 return get_fixed_pmc(pmu
, MSR_CORE_PERF_FIXED_CTR0
+ idx
);
82 static struct kvm_pmc
*global_idx_to_pmc(struct kvm_pmu
*pmu
, int idx
)
84 if (idx
< INTEL_PMC_IDX_FIXED
)
85 return get_gp_pmc(pmu
, MSR_P6_EVNTSEL0
+ idx
, MSR_P6_EVNTSEL0
);
87 return get_fixed_pmc_idx(pmu
, idx
- INTEL_PMC_IDX_FIXED
);
90 void kvm_deliver_pmi(struct kvm_vcpu
*vcpu
)
93 kvm_apic_local_deliver(vcpu
->arch
.apic
, APIC_LVTPC
);
96 static void trigger_pmi(struct irq_work
*irq_work
)
98 struct kvm_pmu
*pmu
= container_of(irq_work
, struct kvm_pmu
,
100 struct kvm_vcpu
*vcpu
= container_of(pmu
, struct kvm_vcpu
,
103 kvm_deliver_pmi(vcpu
);
106 static void kvm_perf_overflow(struct perf_event
*perf_event
,
107 struct perf_sample_data
*data
,
108 struct pt_regs
*regs
)
110 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
111 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
112 if (!test_and_set_bit(pmc
->idx
, (unsigned long *)&pmu
->reprogram_pmi
)) {
113 __set_bit(pmc
->idx
, (unsigned long *)&pmu
->global_status
);
114 kvm_make_request(KVM_REQ_PMU
, pmc
->vcpu
);
118 static void kvm_perf_overflow_intr(struct perf_event
*perf_event
,
119 struct perf_sample_data
*data
, struct pt_regs
*regs
)
121 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
122 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
123 if (!test_and_set_bit(pmc
->idx
, (unsigned long *)&pmu
->reprogram_pmi
)) {
124 __set_bit(pmc
->idx
, (unsigned long *)&pmu
->global_status
);
125 kvm_make_request(KVM_REQ_PMU
, pmc
->vcpu
);
127 * Inject PMI. If vcpu was in a guest mode during NMI PMI
128 * can be ejected on a guest mode re-entry. Otherwise we can't
129 * be sure that vcpu wasn't executing hlt instruction at the
130 * time of vmexit and is not going to re-enter guest mode until,
131 * woken up. So we should wake it, but this is impossible from
132 * NMI context. Do it from irq work instead.
134 if (!kvm_is_in_guest())
135 irq_work_queue(&pmc
->vcpu
->arch
.pmu
.irq_work
);
137 kvm_make_request(KVM_REQ_PMI
, pmc
->vcpu
);
141 static u64
read_pmc(struct kvm_pmc
*pmc
)
143 u64 counter
, enabled
, running
;
145 counter
= pmc
->counter
;
148 counter
+= perf_event_read_value(pmc
->perf_event
,
151 /* FIXME: Scaling needed? */
153 return counter
& pmc_bitmask(pmc
);
156 static void stop_counter(struct kvm_pmc
*pmc
)
158 if (pmc
->perf_event
) {
159 pmc
->counter
= read_pmc(pmc
);
160 perf_event_release_kernel(pmc
->perf_event
);
161 pmc
->perf_event
= NULL
;
165 static void reprogram_counter(struct kvm_pmc
*pmc
, u32 type
,
166 unsigned config
, bool exclude_user
, bool exclude_kernel
,
167 bool intr
, bool in_tx
, bool in_tx_cp
)
169 struct perf_event
*event
;
170 struct perf_event_attr attr
= {
172 .size
= sizeof(attr
),
174 .exclude_idle
= true,
176 .exclude_user
= exclude_user
,
177 .exclude_kernel
= exclude_kernel
,
181 attr
.config
|= HSW_IN_TX
;
183 attr
.config
|= HSW_IN_TX_CHECKPOINTED
;
185 attr
.sample_period
= (-pmc
->counter
) & pmc_bitmask(pmc
);
187 event
= perf_event_create_kernel_counter(&attr
, -1, current
,
188 intr
? kvm_perf_overflow_intr
:
189 kvm_perf_overflow
, pmc
);
191 printk_once("kvm: pmu event creation failed %ld\n",
196 pmc
->perf_event
= event
;
197 clear_bit(pmc
->idx
, (unsigned long*)&pmc
->vcpu
->arch
.pmu
.reprogram_pmi
);
200 static unsigned find_arch_event(struct kvm_pmu
*pmu
, u8 event_select
,
205 for (i
= 0; i
< ARRAY_SIZE(arch_events
); i
++)
206 if (arch_events
[i
].eventsel
== event_select
207 && arch_events
[i
].unit_mask
== unit_mask
208 && (pmu
->available_event_types
& (1 << i
)))
211 if (i
== ARRAY_SIZE(arch_events
))
212 return PERF_COUNT_HW_MAX
;
214 return arch_events
[i
].event_type
;
217 static void reprogram_gp_counter(struct kvm_pmc
*pmc
, u64 eventsel
)
219 unsigned config
, type
= PERF_TYPE_RAW
;
220 u8 event_select
, unit_mask
;
222 if (eventsel
& ARCH_PERFMON_EVENTSEL_PIN_CONTROL
)
223 printk_once("kvm pmu: pin control bit is ignored\n");
225 pmc
->eventsel
= eventsel
;
229 if (!(eventsel
& ARCH_PERFMON_EVENTSEL_ENABLE
) || !pmc_enabled(pmc
))
232 event_select
= eventsel
& ARCH_PERFMON_EVENTSEL_EVENT
;
233 unit_mask
= (eventsel
& ARCH_PERFMON_EVENTSEL_UMASK
) >> 8;
235 if (!(eventsel
& (ARCH_PERFMON_EVENTSEL_EDGE
|
236 ARCH_PERFMON_EVENTSEL_INV
|
237 ARCH_PERFMON_EVENTSEL_CMASK
|
239 HSW_IN_TX_CHECKPOINTED
))) {
240 config
= find_arch_event(&pmc
->vcpu
->arch
.pmu
, event_select
,
242 if (config
!= PERF_COUNT_HW_MAX
)
243 type
= PERF_TYPE_HARDWARE
;
246 if (type
== PERF_TYPE_RAW
)
247 config
= eventsel
& X86_RAW_EVENT_MASK
;
249 reprogram_counter(pmc
, type
, config
,
250 !(eventsel
& ARCH_PERFMON_EVENTSEL_USR
),
251 !(eventsel
& ARCH_PERFMON_EVENTSEL_OS
),
252 eventsel
& ARCH_PERFMON_EVENTSEL_INT
,
253 (eventsel
& HSW_IN_TX
),
254 (eventsel
& HSW_IN_TX_CHECKPOINTED
));
257 static void reprogram_fixed_counter(struct kvm_pmc
*pmc
, u8 en_pmi
, int idx
)
259 unsigned en
= en_pmi
& 0x3;
260 bool pmi
= en_pmi
& 0x8;
264 if (!en
|| !pmc_enabled(pmc
))
267 reprogram_counter(pmc
, PERF_TYPE_HARDWARE
,
268 arch_events
[fixed_pmc_events
[idx
]].event_type
,
269 !(en
& 0x2), /* exclude user */
270 !(en
& 0x1), /* exclude kernel */
274 static inline u8
fixed_en_pmi(u64 ctrl
, int idx
)
276 return (ctrl
>> (idx
* 4)) & 0xf;
279 static void reprogram_fixed_counters(struct kvm_pmu
*pmu
, u64 data
)
283 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++) {
284 u8 en_pmi
= fixed_en_pmi(data
, i
);
285 struct kvm_pmc
*pmc
= get_fixed_pmc_idx(pmu
, i
);
287 if (fixed_en_pmi(pmu
->fixed_ctr_ctrl
, i
) == en_pmi
)
290 reprogram_fixed_counter(pmc
, en_pmi
, i
);
293 pmu
->fixed_ctr_ctrl
= data
;
296 static void reprogram_idx(struct kvm_pmu
*pmu
, int idx
)
298 struct kvm_pmc
*pmc
= global_idx_to_pmc(pmu
, idx
);
304 reprogram_gp_counter(pmc
, pmc
->eventsel
);
306 int fidx
= idx
- INTEL_PMC_IDX_FIXED
;
307 reprogram_fixed_counter(pmc
,
308 fixed_en_pmi(pmu
->fixed_ctr_ctrl
, fidx
), fidx
);
312 static void global_ctrl_changed(struct kvm_pmu
*pmu
, u64 data
)
315 u64 diff
= pmu
->global_ctrl
^ data
;
317 pmu
->global_ctrl
= data
;
319 for_each_set_bit(bit
, (unsigned long *)&diff
, X86_PMC_IDX_MAX
)
320 reprogram_idx(pmu
, bit
);
323 bool kvm_pmu_msr(struct kvm_vcpu
*vcpu
, u32 msr
)
325 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
329 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
330 case MSR_CORE_PERF_GLOBAL_STATUS
:
331 case MSR_CORE_PERF_GLOBAL_CTRL
:
332 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
333 ret
= pmu
->version
> 1;
336 ret
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)
337 || get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
)
338 || get_fixed_pmc(pmu
, msr
);
344 int kvm_pmu_get_msr(struct kvm_vcpu
*vcpu
, u32 index
, u64
*data
)
346 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
350 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
351 *data
= pmu
->fixed_ctr_ctrl
;
353 case MSR_CORE_PERF_GLOBAL_STATUS
:
354 *data
= pmu
->global_status
;
356 case MSR_CORE_PERF_GLOBAL_CTRL
:
357 *data
= pmu
->global_ctrl
;
359 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
360 *data
= pmu
->global_ovf_ctrl
;
363 if ((pmc
= get_gp_pmc(pmu
, index
, MSR_IA32_PERFCTR0
)) ||
364 (pmc
= get_fixed_pmc(pmu
, index
))) {
365 *data
= read_pmc(pmc
);
367 } else if ((pmc
= get_gp_pmc(pmu
, index
, MSR_P6_EVNTSEL0
))) {
368 *data
= pmc
->eventsel
;
375 int kvm_pmu_set_msr(struct kvm_vcpu
*vcpu
, struct msr_data
*msr_info
)
377 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
379 u32 index
= msr_info
->index
;
380 u64 data
= msr_info
->data
;
383 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
384 if (pmu
->fixed_ctr_ctrl
== data
)
386 if (!(data
& 0xfffffffffffff444ull
)) {
387 reprogram_fixed_counters(pmu
, data
);
391 case MSR_CORE_PERF_GLOBAL_STATUS
:
392 if (msr_info
->host_initiated
) {
393 pmu
->global_status
= data
;
397 case MSR_CORE_PERF_GLOBAL_CTRL
:
398 if (pmu
->global_ctrl
== data
)
400 if (!(data
& pmu
->global_ctrl_mask
)) {
401 global_ctrl_changed(pmu
, data
);
405 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
406 if (!(data
& (pmu
->global_ctrl_mask
& ~(3ull<<62)))) {
407 if (!msr_info
->host_initiated
)
408 pmu
->global_status
&= ~data
;
409 pmu
->global_ovf_ctrl
= data
;
414 if ((pmc
= get_gp_pmc(pmu
, index
, MSR_IA32_PERFCTR0
)) ||
415 (pmc
= get_fixed_pmc(pmu
, index
))) {
416 if (!msr_info
->host_initiated
)
417 data
= (s64
)(s32
)data
;
418 pmc
->counter
+= data
- read_pmc(pmc
);
420 } else if ((pmc
= get_gp_pmc(pmu
, index
, MSR_P6_EVNTSEL0
))) {
421 if (data
== pmc
->eventsel
)
423 if (!(data
& pmu
->reserved_bits
)) {
424 reprogram_gp_counter(pmc
, data
);
432 int kvm_pmu_check_pmc(struct kvm_vcpu
*vcpu
, unsigned pmc
)
434 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
435 bool fixed
= pmc
& (1u << 30);
437 return (!fixed
&& pmc
>= pmu
->nr_arch_gp_counters
) ||
438 (fixed
&& pmc
>= pmu
->nr_arch_fixed_counters
);
441 int kvm_pmu_read_pmc(struct kvm_vcpu
*vcpu
, unsigned pmc
, u64
*data
)
443 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
444 bool fast_mode
= pmc
& (1u << 31);
445 bool fixed
= pmc
& (1u << 30);
446 struct kvm_pmc
*counters
;
450 if (!fixed
&& pmc
>= pmu
->nr_arch_gp_counters
)
452 if (fixed
&& pmc
>= pmu
->nr_arch_fixed_counters
)
454 counters
= fixed
? pmu
->fixed_counters
: pmu
->gp_counters
;
455 ctr
= read_pmc(&counters
[pmc
]);
463 void kvm_pmu_cpuid_update(struct kvm_vcpu
*vcpu
)
465 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
466 struct kvm_cpuid_entry2
*entry
;
467 union cpuid10_eax eax
;
468 union cpuid10_edx edx
;
470 pmu
->nr_arch_gp_counters
= 0;
471 pmu
->nr_arch_fixed_counters
= 0;
472 pmu
->counter_bitmask
[KVM_PMC_GP
] = 0;
473 pmu
->counter_bitmask
[KVM_PMC_FIXED
] = 0;
475 pmu
->reserved_bits
= 0xffffffff00200000ull
;
477 entry
= kvm_find_cpuid_entry(vcpu
, 0xa, 0);
480 eax
.full
= entry
->eax
;
481 edx
.full
= entry
->edx
;
483 pmu
->version
= eax
.split
.version_id
;
487 pmu
->nr_arch_gp_counters
= min_t(int, eax
.split
.num_counters
,
488 INTEL_PMC_MAX_GENERIC
);
489 pmu
->counter_bitmask
[KVM_PMC_GP
] = ((u64
)1 << eax
.split
.bit_width
) - 1;
490 pmu
->available_event_types
= ~entry
->ebx
&
491 ((1ull << eax
.split
.mask_length
) - 1);
493 if (pmu
->version
== 1) {
494 pmu
->nr_arch_fixed_counters
= 0;
496 pmu
->nr_arch_fixed_counters
=
497 min_t(int, edx
.split
.num_counters_fixed
,
498 INTEL_PMC_MAX_FIXED
);
499 pmu
->counter_bitmask
[KVM_PMC_FIXED
] =
500 ((u64
)1 << edx
.split
.bit_width_fixed
) - 1;
503 pmu
->global_ctrl
= ((1 << pmu
->nr_arch_gp_counters
) - 1) |
504 (((1ull << pmu
->nr_arch_fixed_counters
) - 1) << INTEL_PMC_IDX_FIXED
);
505 pmu
->global_ctrl_mask
= ~pmu
->global_ctrl
;
507 entry
= kvm_find_cpuid_entry(vcpu
, 7, 0);
509 (boot_cpu_has(X86_FEATURE_HLE
) || boot_cpu_has(X86_FEATURE_RTM
)) &&
510 (entry
->ebx
& (X86_FEATURE_HLE
|X86_FEATURE_RTM
)))
511 pmu
->reserved_bits
^= HSW_IN_TX
|HSW_IN_TX_CHECKPOINTED
;
514 void kvm_pmu_init(struct kvm_vcpu
*vcpu
)
517 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
519 memset(pmu
, 0, sizeof(*pmu
));
520 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
521 pmu
->gp_counters
[i
].type
= KVM_PMC_GP
;
522 pmu
->gp_counters
[i
].vcpu
= vcpu
;
523 pmu
->gp_counters
[i
].idx
= i
;
525 for (i
= 0; i
< INTEL_PMC_MAX_FIXED
; i
++) {
526 pmu
->fixed_counters
[i
].type
= KVM_PMC_FIXED
;
527 pmu
->fixed_counters
[i
].vcpu
= vcpu
;
528 pmu
->fixed_counters
[i
].idx
= i
+ INTEL_PMC_IDX_FIXED
;
530 init_irq_work(&pmu
->irq_work
, trigger_pmi
);
531 kvm_pmu_cpuid_update(vcpu
);
534 void kvm_pmu_reset(struct kvm_vcpu
*vcpu
)
536 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
539 irq_work_sync(&pmu
->irq_work
);
540 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
541 struct kvm_pmc
*pmc
= &pmu
->gp_counters
[i
];
543 pmc
->counter
= pmc
->eventsel
= 0;
546 for (i
= 0; i
< INTEL_PMC_MAX_FIXED
; i
++)
547 stop_counter(&pmu
->fixed_counters
[i
]);
549 pmu
->fixed_ctr_ctrl
= pmu
->global_ctrl
= pmu
->global_status
=
550 pmu
->global_ovf_ctrl
= 0;
553 void kvm_pmu_destroy(struct kvm_vcpu
*vcpu
)
558 void kvm_handle_pmu_event(struct kvm_vcpu
*vcpu
)
560 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
564 bitmask
= pmu
->reprogram_pmi
;
566 for_each_set_bit(bit
, (unsigned long *)&bitmask
, X86_PMC_IDX_MAX
) {
567 struct kvm_pmc
*pmc
= global_idx_to_pmc(pmu
, bit
);
569 if (unlikely(!pmc
|| !pmc
->perf_event
)) {
570 clear_bit(bit
, (unsigned long *)&pmu
->reprogram_pmi
);
574 reprogram_idx(pmu
, bit
);