2 * Kernel-based Virtual Machine -- Performane Monitoring Unit support
4 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
7 * Avi Kivity <avi@redhat.com>
8 * Gleb Natapov <gleb@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
15 #include <linux/types.h>
16 #include <linux/kvm_host.h>
17 #include <linux/perf_event.h>
22 static struct kvm_arch_event_perf_mapping
{
28 /* Index must match CPUID 0x0A.EBX bit vector */
29 [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES
},
30 [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS
},
31 [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES
},
32 [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES
},
33 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES
},
34 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS
},
35 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES
},
38 /* mapping between fixed pmc index and arch_events array */
39 int fixed_pmc_events
[] = {1, 0, 2};
41 static bool pmc_is_gp(struct kvm_pmc
*pmc
)
43 return pmc
->type
== KVM_PMC_GP
;
46 static inline u64
pmc_bitmask(struct kvm_pmc
*pmc
)
48 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
50 return pmu
->counter_bitmask
[pmc
->type
];
53 static inline bool pmc_enabled(struct kvm_pmc
*pmc
)
55 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
56 return test_bit(pmc
->idx
, (unsigned long *)&pmu
->global_ctrl
);
59 static inline struct kvm_pmc
*get_gp_pmc(struct kvm_pmu
*pmu
, u32 msr
,
62 if (msr
>= base
&& msr
< base
+ pmu
->nr_arch_gp_counters
)
63 return &pmu
->gp_counters
[msr
- base
];
67 static inline struct kvm_pmc
*get_fixed_pmc(struct kvm_pmu
*pmu
, u32 msr
)
69 int base
= MSR_CORE_PERF_FIXED_CTR0
;
70 if (msr
>= base
&& msr
< base
+ pmu
->nr_arch_fixed_counters
)
71 return &pmu
->fixed_counters
[msr
- base
];
75 static inline struct kvm_pmc
*get_fixed_pmc_idx(struct kvm_pmu
*pmu
, int idx
)
77 return get_fixed_pmc(pmu
, MSR_CORE_PERF_FIXED_CTR0
+ idx
);
80 static struct kvm_pmc
*global_idx_to_pmc(struct kvm_pmu
*pmu
, int idx
)
82 if (idx
< X86_PMC_IDX_FIXED
)
83 return get_gp_pmc(pmu
, MSR_P6_EVNTSEL0
+ idx
, MSR_P6_EVNTSEL0
);
85 return get_fixed_pmc_idx(pmu
, idx
- X86_PMC_IDX_FIXED
);
88 void kvm_deliver_pmi(struct kvm_vcpu
*vcpu
)
91 kvm_apic_local_deliver(vcpu
->arch
.apic
, APIC_LVTPC
);
94 static void trigger_pmi(struct irq_work
*irq_work
)
96 struct kvm_pmu
*pmu
= container_of(irq_work
, struct kvm_pmu
,
98 struct kvm_vcpu
*vcpu
= container_of(pmu
, struct kvm_vcpu
,
101 kvm_deliver_pmi(vcpu
);
104 static void kvm_perf_overflow(struct perf_event
*perf_event
,
105 struct perf_sample_data
*data
,
106 struct pt_regs
*regs
)
108 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
109 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
110 __set_bit(pmc
->idx
, (unsigned long *)&pmu
->global_status
);
113 static void kvm_perf_overflow_intr(struct perf_event
*perf_event
,
114 struct perf_sample_data
*data
, struct pt_regs
*regs
)
116 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
117 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
118 if (!test_and_set_bit(pmc
->idx
, (unsigned long *)&pmu
->reprogram_pmi
)) {
119 kvm_perf_overflow(perf_event
, data
, regs
);
120 kvm_make_request(KVM_REQ_PMU
, pmc
->vcpu
);
122 * Inject PMI. If vcpu was in a guest mode during NMI PMI
123 * can be ejected on a guest mode re-entry. Otherwise we can't
124 * be sure that vcpu wasn't executing hlt instruction at the
125 * time of vmexit and is not going to re-enter guest mode until,
126 * woken up. So we should wake it, but this is impossible from
127 * NMI context. Do it from irq work instead.
129 if (!kvm_is_in_guest())
130 irq_work_queue(&pmc
->vcpu
->arch
.pmu
.irq_work
);
132 kvm_make_request(KVM_REQ_PMI
, pmc
->vcpu
);
136 static u64
read_pmc(struct kvm_pmc
*pmc
)
138 u64 counter
, enabled
, running
;
140 counter
= pmc
->counter
;
143 counter
+= perf_event_read_value(pmc
->perf_event
,
146 /* FIXME: Scaling needed? */
148 return counter
& pmc_bitmask(pmc
);
151 static void stop_counter(struct kvm_pmc
*pmc
)
153 if (pmc
->perf_event
) {
154 pmc
->counter
= read_pmc(pmc
);
155 perf_event_release_kernel(pmc
->perf_event
);
156 pmc
->perf_event
= NULL
;
160 static void reprogram_counter(struct kvm_pmc
*pmc
, u32 type
,
161 unsigned config
, bool exclude_user
, bool exclude_kernel
,
164 struct perf_event
*event
;
165 struct perf_event_attr attr
= {
167 .size
= sizeof(attr
),
169 .exclude_idle
= true,
171 .exclude_user
= exclude_user
,
172 .exclude_kernel
= exclude_kernel
,
176 attr
.sample_period
= (-pmc
->counter
) & pmc_bitmask(pmc
);
178 event
= perf_event_create_kernel_counter(&attr
, -1, current
,
179 intr
? kvm_perf_overflow_intr
:
180 kvm_perf_overflow
, pmc
);
182 printk_once("kvm: pmu event creation failed %ld\n",
187 pmc
->perf_event
= event
;
188 clear_bit(pmc
->idx
, (unsigned long*)&pmc
->vcpu
->arch
.pmu
.reprogram_pmi
);
191 static unsigned find_arch_event(struct kvm_pmu
*pmu
, u8 event_select
,
196 for (i
= 0; i
< ARRAY_SIZE(arch_events
); i
++)
197 if (arch_events
[i
].eventsel
== event_select
198 && arch_events
[i
].unit_mask
== unit_mask
199 && (pmu
->available_event_types
& (1 << i
)))
202 if (i
== ARRAY_SIZE(arch_events
))
203 return PERF_COUNT_HW_MAX
;
205 return arch_events
[i
].event_type
;
208 static void reprogram_gp_counter(struct kvm_pmc
*pmc
, u64 eventsel
)
210 unsigned config
, type
= PERF_TYPE_RAW
;
211 u8 event_select
, unit_mask
;
213 pmc
->eventsel
= eventsel
;
217 if (!(eventsel
& ARCH_PERFMON_EVENTSEL_ENABLE
) || !pmc_enabled(pmc
))
220 event_select
= eventsel
& ARCH_PERFMON_EVENTSEL_EVENT
;
221 unit_mask
= (eventsel
& ARCH_PERFMON_EVENTSEL_UMASK
) >> 8;
223 if (!(event_select
& (ARCH_PERFMON_EVENTSEL_EDGE
|
224 ARCH_PERFMON_EVENTSEL_INV
|
225 ARCH_PERFMON_EVENTSEL_CMASK
))) {
226 config
= find_arch_event(&pmc
->vcpu
->arch
.pmu
, event_select
,
228 if (config
!= PERF_COUNT_HW_MAX
)
229 type
= PERF_TYPE_HARDWARE
;
232 if (type
== PERF_TYPE_RAW
)
233 config
= eventsel
& X86_RAW_EVENT_MASK
;
235 reprogram_counter(pmc
, type
, config
,
236 !(eventsel
& ARCH_PERFMON_EVENTSEL_USR
),
237 !(eventsel
& ARCH_PERFMON_EVENTSEL_OS
),
238 eventsel
& ARCH_PERFMON_EVENTSEL_INT
);
241 static void reprogram_fixed_counter(struct kvm_pmc
*pmc
, u8 en_pmi
, int idx
)
243 unsigned en
= en_pmi
& 0x3;
244 bool pmi
= en_pmi
& 0x8;
248 if (!en
|| !pmc_enabled(pmc
))
251 reprogram_counter(pmc
, PERF_TYPE_HARDWARE
,
252 arch_events
[fixed_pmc_events
[idx
]].event_type
,
253 !(en
& 0x2), /* exclude user */
254 !(en
& 0x1), /* exclude kernel */
258 static inline u8
fixed_en_pmi(u64 ctrl
, int idx
)
260 return (ctrl
>> (idx
* 4)) & 0xf;
263 static void reprogram_fixed_counters(struct kvm_pmu
*pmu
, u64 data
)
267 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++) {
268 u8 en_pmi
= fixed_en_pmi(data
, i
);
269 struct kvm_pmc
*pmc
= get_fixed_pmc_idx(pmu
, i
);
271 if (fixed_en_pmi(pmu
->fixed_ctr_ctrl
, i
) == en_pmi
)
274 reprogram_fixed_counter(pmc
, en_pmi
, i
);
277 pmu
->fixed_ctr_ctrl
= data
;
280 static void reprogram_idx(struct kvm_pmu
*pmu
, int idx
)
282 struct kvm_pmc
*pmc
= global_idx_to_pmc(pmu
, idx
);
288 reprogram_gp_counter(pmc
, pmc
->eventsel
);
290 int fidx
= idx
- X86_PMC_IDX_FIXED
;
291 reprogram_fixed_counter(pmc
,
292 fixed_en_pmi(pmu
->fixed_ctr_ctrl
, fidx
), fidx
);
296 static void global_ctrl_changed(struct kvm_pmu
*pmu
, u64 data
)
299 u64 diff
= pmu
->global_ctrl
^ data
;
301 pmu
->global_ctrl
= data
;
303 for_each_set_bit(bit
, (unsigned long *)&diff
, X86_PMC_IDX_MAX
)
304 reprogram_idx(pmu
, bit
);
307 bool kvm_pmu_msr(struct kvm_vcpu
*vcpu
, u32 msr
)
309 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
313 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
314 case MSR_CORE_PERF_GLOBAL_STATUS
:
315 case MSR_CORE_PERF_GLOBAL_CTRL
:
316 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
317 ret
= pmu
->version
> 1;
320 ret
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)
321 || get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
)
322 || get_fixed_pmc(pmu
, msr
);
328 int kvm_pmu_get_msr(struct kvm_vcpu
*vcpu
, u32 index
, u64
*data
)
330 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
334 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
335 *data
= pmu
->fixed_ctr_ctrl
;
337 case MSR_CORE_PERF_GLOBAL_STATUS
:
338 *data
= pmu
->global_status
;
340 case MSR_CORE_PERF_GLOBAL_CTRL
:
341 *data
= pmu
->global_ctrl
;
343 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
344 *data
= pmu
->global_ovf_ctrl
;
347 if ((pmc
= get_gp_pmc(pmu
, index
, MSR_IA32_PERFCTR0
)) ||
348 (pmc
= get_fixed_pmc(pmu
, index
))) {
349 *data
= read_pmc(pmc
);
351 } else if ((pmc
= get_gp_pmc(pmu
, index
, MSR_P6_EVNTSEL0
))) {
352 *data
= pmc
->eventsel
;
359 int kvm_pmu_set_msr(struct kvm_vcpu
*vcpu
, u32 index
, u64 data
)
361 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
365 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
366 if (pmu
->fixed_ctr_ctrl
== data
)
368 if (!(data
& 0xfffffffffffff444)) {
369 reprogram_fixed_counters(pmu
, data
);
373 case MSR_CORE_PERF_GLOBAL_STATUS
:
375 case MSR_CORE_PERF_GLOBAL_CTRL
:
376 if (pmu
->global_ctrl
== data
)
378 if (!(data
& pmu
->global_ctrl_mask
)) {
379 global_ctrl_changed(pmu
, data
);
383 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
384 if (!(data
& (pmu
->global_ctrl_mask
& ~(3ull<<62)))) {
385 pmu
->global_status
&= ~data
;
386 pmu
->global_ovf_ctrl
= data
;
391 if ((pmc
= get_gp_pmc(pmu
, index
, MSR_IA32_PERFCTR0
)) ||
392 (pmc
= get_fixed_pmc(pmu
, index
))) {
393 data
= (s64
)(s32
)data
;
394 pmc
->counter
+= data
- read_pmc(pmc
);
396 } else if ((pmc
= get_gp_pmc(pmu
, index
, MSR_P6_EVNTSEL0
))) {
397 if (data
== pmc
->eventsel
)
399 if (!(data
& 0xffffffff00200000ull
)) {
400 reprogram_gp_counter(pmc
, data
);
408 int kvm_pmu_read_pmc(struct kvm_vcpu
*vcpu
, unsigned pmc
, u64
*data
)
410 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
411 bool fast_mode
= pmc
& (1u << 31);
412 bool fixed
= pmc
& (1u << 30);
413 struct kvm_pmc
*counters
;
417 if (!fixed
&& pmc
>= pmu
->nr_arch_gp_counters
)
419 if (fixed
&& pmc
>= pmu
->nr_arch_fixed_counters
)
421 counters
= fixed
? pmu
->fixed_counters
: pmu
->gp_counters
;
422 ctr
= read_pmc(&counters
[pmc
]);
430 void kvm_pmu_cpuid_update(struct kvm_vcpu
*vcpu
)
432 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
433 struct kvm_cpuid_entry2
*entry
;
436 pmu
->nr_arch_gp_counters
= 0;
437 pmu
->nr_arch_fixed_counters
= 0;
438 pmu
->counter_bitmask
[KVM_PMC_GP
] = 0;
439 pmu
->counter_bitmask
[KVM_PMC_FIXED
] = 0;
442 entry
= kvm_find_cpuid_entry(vcpu
, 0xa, 0);
446 pmu
->version
= entry
->eax
& 0xff;
450 pmu
->nr_arch_gp_counters
= min((int)(entry
->eax
>> 8) & 0xff,
451 X86_PMC_MAX_GENERIC
);
452 pmu
->counter_bitmask
[KVM_PMC_GP
] =
453 ((u64
)1 << ((entry
->eax
>> 16) & 0xff)) - 1;
454 bitmap_len
= (entry
->eax
>> 24) & 0xff;
455 pmu
->available_event_types
= ~entry
->ebx
& ((1ull << bitmap_len
) - 1);
457 if (pmu
->version
== 1) {
458 pmu
->global_ctrl
= (1 << pmu
->nr_arch_gp_counters
) - 1;
462 pmu
->nr_arch_fixed_counters
= min((int)(entry
->edx
& 0x1f),
464 pmu
->counter_bitmask
[KVM_PMC_FIXED
] =
465 ((u64
)1 << ((entry
->edx
>> 5) & 0xff)) - 1;
466 pmu
->global_ctrl_mask
= ~(((1 << pmu
->nr_arch_gp_counters
) - 1)
467 | (((1ull << pmu
->nr_arch_fixed_counters
) - 1)
468 << X86_PMC_IDX_FIXED
));
471 void kvm_pmu_init(struct kvm_vcpu
*vcpu
)
474 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
476 memset(pmu
, 0, sizeof(*pmu
));
477 for (i
= 0; i
< X86_PMC_MAX_GENERIC
; i
++) {
478 pmu
->gp_counters
[i
].type
= KVM_PMC_GP
;
479 pmu
->gp_counters
[i
].vcpu
= vcpu
;
480 pmu
->gp_counters
[i
].idx
= i
;
482 for (i
= 0; i
< X86_PMC_MAX_FIXED
; i
++) {
483 pmu
->fixed_counters
[i
].type
= KVM_PMC_FIXED
;
484 pmu
->fixed_counters
[i
].vcpu
= vcpu
;
485 pmu
->fixed_counters
[i
].idx
= i
+ X86_PMC_IDX_FIXED
;
487 init_irq_work(&pmu
->irq_work
, trigger_pmi
);
488 kvm_pmu_cpuid_update(vcpu
);
491 void kvm_pmu_reset(struct kvm_vcpu
*vcpu
)
493 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
496 irq_work_sync(&pmu
->irq_work
);
497 for (i
= 0; i
< X86_PMC_MAX_GENERIC
; i
++) {
498 struct kvm_pmc
*pmc
= &pmu
->gp_counters
[i
];
500 pmc
->counter
= pmc
->eventsel
= 0;
503 for (i
= 0; i
< X86_PMC_MAX_FIXED
; i
++)
504 stop_counter(&pmu
->fixed_counters
[i
]);
506 pmu
->fixed_ctr_ctrl
= pmu
->global_ctrl
= pmu
->global_status
=
507 pmu
->global_ovf_ctrl
= 0;
510 void kvm_pmu_destroy(struct kvm_vcpu
*vcpu
)
515 void kvm_handle_pmu_event(struct kvm_vcpu
*vcpu
)
517 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
521 bitmask
= pmu
->reprogram_pmi
;
523 for_each_set_bit(bit
, (unsigned long *)&bitmask
, X86_PMC_IDX_MAX
) {
524 struct kvm_pmc
*pmc
= global_idx_to_pmc(pmu
, bit
);
526 if (unlikely(!pmc
|| !pmc
->perf_event
)) {
527 clear_bit(bit
, (unsigned long *)&pmu
->reprogram_pmi
);
531 reprogram_idx(pmu
, bit
);