2 * Kernel-based Virtual Machine -- Performane Monitoring Unit support
4 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
7 * Avi Kivity <avi@redhat.com>
8 * Gleb Natapov <gleb@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
15 #include <linux/types.h>
16 #include <linux/kvm_host.h>
17 #include <linux/perf_event.h>
22 static struct kvm_arch_event_perf_mapping
{
28 /* Index must match CPUID 0x0A.EBX bit vector */
29 [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES
},
30 [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS
},
31 [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES
},
32 [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES
},
33 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES
},
34 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS
},
35 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES
},
36 [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES
},
39 /* mapping between fixed pmc index and arch_events array */
40 int fixed_pmc_events
[] = {1, 0, 7};
42 static bool pmc_is_gp(struct kvm_pmc
*pmc
)
44 return pmc
->type
== KVM_PMC_GP
;
47 static inline u64
pmc_bitmask(struct kvm_pmc
*pmc
)
49 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
51 return pmu
->counter_bitmask
[pmc
->type
];
54 static inline bool pmc_enabled(struct kvm_pmc
*pmc
)
56 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
57 return test_bit(pmc
->idx
, (unsigned long *)&pmu
->global_ctrl
);
60 static inline struct kvm_pmc
*get_gp_pmc(struct kvm_pmu
*pmu
, u32 msr
,
63 if (msr
>= base
&& msr
< base
+ pmu
->nr_arch_gp_counters
)
64 return &pmu
->gp_counters
[msr
- base
];
68 static inline struct kvm_pmc
*get_fixed_pmc(struct kvm_pmu
*pmu
, u32 msr
)
70 int base
= MSR_CORE_PERF_FIXED_CTR0
;
71 if (msr
>= base
&& msr
< base
+ pmu
->nr_arch_fixed_counters
)
72 return &pmu
->fixed_counters
[msr
- base
];
76 static inline struct kvm_pmc
*get_fixed_pmc_idx(struct kvm_pmu
*pmu
, int idx
)
78 return get_fixed_pmc(pmu
, MSR_CORE_PERF_FIXED_CTR0
+ idx
);
81 static struct kvm_pmc
*global_idx_to_pmc(struct kvm_pmu
*pmu
, int idx
)
83 if (idx
< INTEL_PMC_IDX_FIXED
)
84 return get_gp_pmc(pmu
, MSR_P6_EVNTSEL0
+ idx
, MSR_P6_EVNTSEL0
);
86 return get_fixed_pmc_idx(pmu
, idx
- INTEL_PMC_IDX_FIXED
);
89 void kvm_deliver_pmi(struct kvm_vcpu
*vcpu
)
92 kvm_apic_local_deliver(vcpu
->arch
.apic
, APIC_LVTPC
);
95 static void trigger_pmi(struct irq_work
*irq_work
)
97 struct kvm_pmu
*pmu
= container_of(irq_work
, struct kvm_pmu
,
99 struct kvm_vcpu
*vcpu
= container_of(pmu
, struct kvm_vcpu
,
102 kvm_deliver_pmi(vcpu
);
105 static void kvm_perf_overflow(struct perf_event
*perf_event
,
106 struct perf_sample_data
*data
,
107 struct pt_regs
*regs
)
109 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
110 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
111 __set_bit(pmc
->idx
, (unsigned long *)&pmu
->global_status
);
114 static void kvm_perf_overflow_intr(struct perf_event
*perf_event
,
115 struct perf_sample_data
*data
, struct pt_regs
*regs
)
117 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
118 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
119 if (!test_and_set_bit(pmc
->idx
, (unsigned long *)&pmu
->reprogram_pmi
)) {
120 kvm_perf_overflow(perf_event
, data
, regs
);
121 kvm_make_request(KVM_REQ_PMU
, pmc
->vcpu
);
123 * Inject PMI. If vcpu was in a guest mode during NMI PMI
124 * can be ejected on a guest mode re-entry. Otherwise we can't
125 * be sure that vcpu wasn't executing hlt instruction at the
126 * time of vmexit and is not going to re-enter guest mode until,
127 * woken up. So we should wake it, but this is impossible from
128 * NMI context. Do it from irq work instead.
130 if (!kvm_is_in_guest())
131 irq_work_queue(&pmc
->vcpu
->arch
.pmu
.irq_work
);
133 kvm_make_request(KVM_REQ_PMI
, pmc
->vcpu
);
137 static u64
read_pmc(struct kvm_pmc
*pmc
)
139 u64 counter
, enabled
, running
;
141 counter
= pmc
->counter
;
144 counter
+= perf_event_read_value(pmc
->perf_event
,
147 /* FIXME: Scaling needed? */
149 return counter
& pmc_bitmask(pmc
);
152 static void stop_counter(struct kvm_pmc
*pmc
)
154 if (pmc
->perf_event
) {
155 pmc
->counter
= read_pmc(pmc
);
156 perf_event_release_kernel(pmc
->perf_event
);
157 pmc
->perf_event
= NULL
;
161 static void reprogram_counter(struct kvm_pmc
*pmc
, u32 type
,
162 unsigned config
, bool exclude_user
, bool exclude_kernel
,
165 struct perf_event
*event
;
166 struct perf_event_attr attr
= {
168 .size
= sizeof(attr
),
170 .exclude_idle
= true,
172 .exclude_user
= exclude_user
,
173 .exclude_kernel
= exclude_kernel
,
177 attr
.sample_period
= (-pmc
->counter
) & pmc_bitmask(pmc
);
179 event
= perf_event_create_kernel_counter(&attr
, -1, current
,
180 intr
? kvm_perf_overflow_intr
:
181 kvm_perf_overflow
, pmc
);
183 printk_once("kvm: pmu event creation failed %ld\n",
188 pmc
->perf_event
= event
;
189 clear_bit(pmc
->idx
, (unsigned long*)&pmc
->vcpu
->arch
.pmu
.reprogram_pmi
);
192 static unsigned find_arch_event(struct kvm_pmu
*pmu
, u8 event_select
,
197 for (i
= 0; i
< ARRAY_SIZE(arch_events
); i
++)
198 if (arch_events
[i
].eventsel
== event_select
199 && arch_events
[i
].unit_mask
== unit_mask
200 && (pmu
->available_event_types
& (1 << i
)))
203 if (i
== ARRAY_SIZE(arch_events
))
204 return PERF_COUNT_HW_MAX
;
206 return arch_events
[i
].event_type
;
209 static void reprogram_gp_counter(struct kvm_pmc
*pmc
, u64 eventsel
)
211 unsigned config
, type
= PERF_TYPE_RAW
;
212 u8 event_select
, unit_mask
;
214 if (eventsel
& ARCH_PERFMON_EVENTSEL_PIN_CONTROL
)
215 printk_once("kvm pmu: pin control bit is ignored\n");
217 pmc
->eventsel
= eventsel
;
221 if (!(eventsel
& ARCH_PERFMON_EVENTSEL_ENABLE
) || !pmc_enabled(pmc
))
224 event_select
= eventsel
& ARCH_PERFMON_EVENTSEL_EVENT
;
225 unit_mask
= (eventsel
& ARCH_PERFMON_EVENTSEL_UMASK
) >> 8;
227 if (!(eventsel
& (ARCH_PERFMON_EVENTSEL_EDGE
|
228 ARCH_PERFMON_EVENTSEL_INV
|
229 ARCH_PERFMON_EVENTSEL_CMASK
))) {
230 config
= find_arch_event(&pmc
->vcpu
->arch
.pmu
, event_select
,
232 if (config
!= PERF_COUNT_HW_MAX
)
233 type
= PERF_TYPE_HARDWARE
;
236 if (type
== PERF_TYPE_RAW
)
237 config
= eventsel
& X86_RAW_EVENT_MASK
;
239 reprogram_counter(pmc
, type
, config
,
240 !(eventsel
& ARCH_PERFMON_EVENTSEL_USR
),
241 !(eventsel
& ARCH_PERFMON_EVENTSEL_OS
),
242 eventsel
& ARCH_PERFMON_EVENTSEL_INT
);
245 static void reprogram_fixed_counter(struct kvm_pmc
*pmc
, u8 en_pmi
, int idx
)
247 unsigned en
= en_pmi
& 0x3;
248 bool pmi
= en_pmi
& 0x8;
252 if (!en
|| !pmc_enabled(pmc
))
255 reprogram_counter(pmc
, PERF_TYPE_HARDWARE
,
256 arch_events
[fixed_pmc_events
[idx
]].event_type
,
257 !(en
& 0x2), /* exclude user */
258 !(en
& 0x1), /* exclude kernel */
262 static inline u8
fixed_en_pmi(u64 ctrl
, int idx
)
264 return (ctrl
>> (idx
* 4)) & 0xf;
267 static void reprogram_fixed_counters(struct kvm_pmu
*pmu
, u64 data
)
271 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++) {
272 u8 en_pmi
= fixed_en_pmi(data
, i
);
273 struct kvm_pmc
*pmc
= get_fixed_pmc_idx(pmu
, i
);
275 if (fixed_en_pmi(pmu
->fixed_ctr_ctrl
, i
) == en_pmi
)
278 reprogram_fixed_counter(pmc
, en_pmi
, i
);
281 pmu
->fixed_ctr_ctrl
= data
;
284 static void reprogram_idx(struct kvm_pmu
*pmu
, int idx
)
286 struct kvm_pmc
*pmc
= global_idx_to_pmc(pmu
, idx
);
292 reprogram_gp_counter(pmc
, pmc
->eventsel
);
294 int fidx
= idx
- INTEL_PMC_IDX_FIXED
;
295 reprogram_fixed_counter(pmc
,
296 fixed_en_pmi(pmu
->fixed_ctr_ctrl
, fidx
), fidx
);
300 static void global_ctrl_changed(struct kvm_pmu
*pmu
, u64 data
)
303 u64 diff
= pmu
->global_ctrl
^ data
;
305 pmu
->global_ctrl
= data
;
307 for_each_set_bit(bit
, (unsigned long *)&diff
, X86_PMC_IDX_MAX
)
308 reprogram_idx(pmu
, bit
);
311 bool kvm_pmu_msr(struct kvm_vcpu
*vcpu
, u32 msr
)
313 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
317 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
318 case MSR_CORE_PERF_GLOBAL_STATUS
:
319 case MSR_CORE_PERF_GLOBAL_CTRL
:
320 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
321 ret
= pmu
->version
> 1;
324 ret
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)
325 || get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
)
326 || get_fixed_pmc(pmu
, msr
);
332 int kvm_pmu_get_msr(struct kvm_vcpu
*vcpu
, u32 index
, u64
*data
)
334 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
338 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
339 *data
= pmu
->fixed_ctr_ctrl
;
341 case MSR_CORE_PERF_GLOBAL_STATUS
:
342 *data
= pmu
->global_status
;
344 case MSR_CORE_PERF_GLOBAL_CTRL
:
345 *data
= pmu
->global_ctrl
;
347 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
348 *data
= pmu
->global_ovf_ctrl
;
351 if ((pmc
= get_gp_pmc(pmu
, index
, MSR_IA32_PERFCTR0
)) ||
352 (pmc
= get_fixed_pmc(pmu
, index
))) {
353 *data
= read_pmc(pmc
);
355 } else if ((pmc
= get_gp_pmc(pmu
, index
, MSR_P6_EVNTSEL0
))) {
356 *data
= pmc
->eventsel
;
363 int kvm_pmu_set_msr(struct kvm_vcpu
*vcpu
, u32 index
, u64 data
)
365 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
369 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
370 if (pmu
->fixed_ctr_ctrl
== data
)
372 if (!(data
& 0xfffffffffffff444ull
)) {
373 reprogram_fixed_counters(pmu
, data
);
377 case MSR_CORE_PERF_GLOBAL_STATUS
:
379 case MSR_CORE_PERF_GLOBAL_CTRL
:
380 if (pmu
->global_ctrl
== data
)
382 if (!(data
& pmu
->global_ctrl_mask
)) {
383 global_ctrl_changed(pmu
, data
);
387 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
388 if (!(data
& (pmu
->global_ctrl_mask
& ~(3ull<<62)))) {
389 pmu
->global_status
&= ~data
;
390 pmu
->global_ovf_ctrl
= data
;
395 if ((pmc
= get_gp_pmc(pmu
, index
, MSR_IA32_PERFCTR0
)) ||
396 (pmc
= get_fixed_pmc(pmu
, index
))) {
397 data
= (s64
)(s32
)data
;
398 pmc
->counter
+= data
- read_pmc(pmc
);
400 } else if ((pmc
= get_gp_pmc(pmu
, index
, MSR_P6_EVNTSEL0
))) {
401 if (data
== pmc
->eventsel
)
403 if (!(data
& 0xffffffff00200000ull
)) {
404 reprogram_gp_counter(pmc
, data
);
412 int kvm_pmu_read_pmc(struct kvm_vcpu
*vcpu
, unsigned pmc
, u64
*data
)
414 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
415 bool fast_mode
= pmc
& (1u << 31);
416 bool fixed
= pmc
& (1u << 30);
417 struct kvm_pmc
*counters
;
421 if (!fixed
&& pmc
>= pmu
->nr_arch_gp_counters
)
423 if (fixed
&& pmc
>= pmu
->nr_arch_fixed_counters
)
425 counters
= fixed
? pmu
->fixed_counters
: pmu
->gp_counters
;
426 ctr
= read_pmc(&counters
[pmc
]);
434 void kvm_pmu_cpuid_update(struct kvm_vcpu
*vcpu
)
436 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
437 struct kvm_cpuid_entry2
*entry
;
440 pmu
->nr_arch_gp_counters
= 0;
441 pmu
->nr_arch_fixed_counters
= 0;
442 pmu
->counter_bitmask
[KVM_PMC_GP
] = 0;
443 pmu
->counter_bitmask
[KVM_PMC_FIXED
] = 0;
446 entry
= kvm_find_cpuid_entry(vcpu
, 0xa, 0);
450 pmu
->version
= entry
->eax
& 0xff;
454 pmu
->nr_arch_gp_counters
= min((int)(entry
->eax
>> 8) & 0xff,
455 INTEL_PMC_MAX_GENERIC
);
456 pmu
->counter_bitmask
[KVM_PMC_GP
] =
457 ((u64
)1 << ((entry
->eax
>> 16) & 0xff)) - 1;
458 bitmap_len
= (entry
->eax
>> 24) & 0xff;
459 pmu
->available_event_types
= ~entry
->ebx
& ((1ull << bitmap_len
) - 1);
461 if (pmu
->version
== 1) {
462 pmu
->nr_arch_fixed_counters
= 0;
464 pmu
->nr_arch_fixed_counters
= min((int)(entry
->edx
& 0x1f),
465 INTEL_PMC_MAX_FIXED
);
466 pmu
->counter_bitmask
[KVM_PMC_FIXED
] =
467 ((u64
)1 << ((entry
->edx
>> 5) & 0xff)) - 1;
470 pmu
->global_ctrl
= ((1 << pmu
->nr_arch_gp_counters
) - 1) |
471 (((1ull << pmu
->nr_arch_fixed_counters
) - 1) << INTEL_PMC_IDX_FIXED
);
472 pmu
->global_ctrl_mask
= ~pmu
->global_ctrl
;
475 void kvm_pmu_init(struct kvm_vcpu
*vcpu
)
478 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
480 memset(pmu
, 0, sizeof(*pmu
));
481 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
482 pmu
->gp_counters
[i
].type
= KVM_PMC_GP
;
483 pmu
->gp_counters
[i
].vcpu
= vcpu
;
484 pmu
->gp_counters
[i
].idx
= i
;
486 for (i
= 0; i
< INTEL_PMC_MAX_FIXED
; i
++) {
487 pmu
->fixed_counters
[i
].type
= KVM_PMC_FIXED
;
488 pmu
->fixed_counters
[i
].vcpu
= vcpu
;
489 pmu
->fixed_counters
[i
].idx
= i
+ INTEL_PMC_IDX_FIXED
;
491 init_irq_work(&pmu
->irq_work
, trigger_pmi
);
492 kvm_pmu_cpuid_update(vcpu
);
495 void kvm_pmu_reset(struct kvm_vcpu
*vcpu
)
497 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
500 irq_work_sync(&pmu
->irq_work
);
501 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
502 struct kvm_pmc
*pmc
= &pmu
->gp_counters
[i
];
504 pmc
->counter
= pmc
->eventsel
= 0;
507 for (i
= 0; i
< INTEL_PMC_MAX_FIXED
; i
++)
508 stop_counter(&pmu
->fixed_counters
[i
]);
510 pmu
->fixed_ctr_ctrl
= pmu
->global_ctrl
= pmu
->global_status
=
511 pmu
->global_ovf_ctrl
= 0;
514 void kvm_pmu_destroy(struct kvm_vcpu
*vcpu
)
519 void kvm_handle_pmu_event(struct kvm_vcpu
*vcpu
)
521 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
525 bitmask
= pmu
->reprogram_pmi
;
527 for_each_set_bit(bit
, (unsigned long *)&bitmask
, X86_PMC_IDX_MAX
) {
528 struct kvm_pmc
*pmc
= global_idx_to_pmc(pmu
, bit
);
530 if (unlikely(!pmc
|| !pmc
->perf_event
)) {
531 clear_bit(bit
, (unsigned long *)&pmu
->reprogram_pmi
);
535 reprogram_idx(pmu
, bit
);