2 * Kernel-based Virtual Machine -- Performance Monitoring Unit support
4 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
7 * Avi Kivity <avi@redhat.com>
8 * Gleb Natapov <gleb@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
15 #include <linux/types.h>
16 #include <linux/kvm_host.h>
17 #include <linux/perf_event.h>
22 static struct kvm_arch_event_perf_mapping
{
28 /* Index must match CPUID 0x0A.EBX bit vector */
29 [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES
},
30 [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS
},
31 [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES
},
32 [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES
},
33 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES
},
34 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS
},
35 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES
},
36 [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES
},
39 /* mapping between fixed pmc index and arch_events array */
40 int fixed_pmc_events
[] = {1, 0, 7};
42 static bool pmc_is_gp(struct kvm_pmc
*pmc
)
44 return pmc
->type
== KVM_PMC_GP
;
47 static inline u64
pmc_bitmask(struct kvm_pmc
*pmc
)
49 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
51 return pmu
->counter_bitmask
[pmc
->type
];
54 static inline bool pmc_enabled(struct kvm_pmc
*pmc
)
56 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
57 return test_bit(pmc
->idx
, (unsigned long *)&pmu
->global_ctrl
);
60 static inline struct kvm_pmc
*get_gp_pmc(struct kvm_pmu
*pmu
, u32 msr
,
63 if (msr
>= base
&& msr
< base
+ pmu
->nr_arch_gp_counters
)
64 return &pmu
->gp_counters
[msr
- base
];
68 static inline struct kvm_pmc
*get_fixed_pmc(struct kvm_pmu
*pmu
, u32 msr
)
70 int base
= MSR_CORE_PERF_FIXED_CTR0
;
71 if (msr
>= base
&& msr
< base
+ pmu
->nr_arch_fixed_counters
)
72 return &pmu
->fixed_counters
[msr
- base
];
76 static inline struct kvm_pmc
*get_fixed_pmc_idx(struct kvm_pmu
*pmu
, int idx
)
78 return get_fixed_pmc(pmu
, MSR_CORE_PERF_FIXED_CTR0
+ idx
);
81 static struct kvm_pmc
*global_idx_to_pmc(struct kvm_pmu
*pmu
, int idx
)
83 if (idx
< INTEL_PMC_IDX_FIXED
)
84 return get_gp_pmc(pmu
, MSR_P6_EVNTSEL0
+ idx
, MSR_P6_EVNTSEL0
);
86 return get_fixed_pmc_idx(pmu
, idx
- INTEL_PMC_IDX_FIXED
);
89 void kvm_deliver_pmi(struct kvm_vcpu
*vcpu
)
92 kvm_apic_local_deliver(vcpu
->arch
.apic
, APIC_LVTPC
);
95 static void trigger_pmi(struct irq_work
*irq_work
)
97 struct kvm_pmu
*pmu
= container_of(irq_work
, struct kvm_pmu
,
99 struct kvm_vcpu
*vcpu
= container_of(pmu
, struct kvm_vcpu
,
102 kvm_deliver_pmi(vcpu
);
105 static void kvm_perf_overflow(struct perf_event
*perf_event
,
106 struct perf_sample_data
*data
,
107 struct pt_regs
*regs
)
109 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
110 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
111 __set_bit(pmc
->idx
, (unsigned long *)&pmu
->global_status
);
114 static void kvm_perf_overflow_intr(struct perf_event
*perf_event
,
115 struct perf_sample_data
*data
, struct pt_regs
*regs
)
117 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
118 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
119 if (!test_and_set_bit(pmc
->idx
, (unsigned long *)&pmu
->reprogram_pmi
)) {
120 kvm_perf_overflow(perf_event
, data
, regs
);
121 kvm_make_request(KVM_REQ_PMU
, pmc
->vcpu
);
123 * Inject PMI. If vcpu was in a guest mode during NMI PMI
124 * can be ejected on a guest mode re-entry. Otherwise we can't
125 * be sure that vcpu wasn't executing hlt instruction at the
126 * time of vmexit and is not going to re-enter guest mode until,
127 * woken up. So we should wake it, but this is impossible from
128 * NMI context. Do it from irq work instead.
130 if (!kvm_is_in_guest())
131 irq_work_queue(&pmc
->vcpu
->arch
.pmu
.irq_work
);
133 kvm_make_request(KVM_REQ_PMI
, pmc
->vcpu
);
137 static u64
read_pmc(struct kvm_pmc
*pmc
)
139 u64 counter
, enabled
, running
;
141 counter
= pmc
->counter
;
144 counter
+= perf_event_read_value(pmc
->perf_event
,
147 /* FIXME: Scaling needed? */
149 return counter
& pmc_bitmask(pmc
);
152 static void stop_counter(struct kvm_pmc
*pmc
)
154 if (pmc
->perf_event
) {
155 pmc
->counter
= read_pmc(pmc
);
156 perf_event_release_kernel(pmc
->perf_event
);
157 pmc
->perf_event
= NULL
;
161 static void reprogram_counter(struct kvm_pmc
*pmc
, u32 type
,
162 unsigned config
, bool exclude_user
, bool exclude_kernel
,
163 bool intr
, bool in_tx
, bool in_tx_cp
)
165 struct perf_event
*event
;
166 struct perf_event_attr attr
= {
168 .size
= sizeof(attr
),
170 .exclude_idle
= true,
172 .exclude_user
= exclude_user
,
173 .exclude_kernel
= exclude_kernel
,
177 attr
.config
|= HSW_IN_TX
;
179 attr
.config
|= HSW_IN_TX_CHECKPOINTED
;
181 attr
.sample_period
= (-pmc
->counter
) & pmc_bitmask(pmc
);
183 event
= perf_event_create_kernel_counter(&attr
, -1, current
,
184 intr
? kvm_perf_overflow_intr
:
185 kvm_perf_overflow
, pmc
);
187 printk_once("kvm: pmu event creation failed %ld\n",
192 pmc
->perf_event
= event
;
193 clear_bit(pmc
->idx
, (unsigned long*)&pmc
->vcpu
->arch
.pmu
.reprogram_pmi
);
196 static unsigned find_arch_event(struct kvm_pmu
*pmu
, u8 event_select
,
201 for (i
= 0; i
< ARRAY_SIZE(arch_events
); i
++)
202 if (arch_events
[i
].eventsel
== event_select
203 && arch_events
[i
].unit_mask
== unit_mask
204 && (pmu
->available_event_types
& (1 << i
)))
207 if (i
== ARRAY_SIZE(arch_events
))
208 return PERF_COUNT_HW_MAX
;
210 return arch_events
[i
].event_type
;
213 static void reprogram_gp_counter(struct kvm_pmc
*pmc
, u64 eventsel
)
215 unsigned config
, type
= PERF_TYPE_RAW
;
216 u8 event_select
, unit_mask
;
218 if (eventsel
& ARCH_PERFMON_EVENTSEL_PIN_CONTROL
)
219 printk_once("kvm pmu: pin control bit is ignored\n");
221 pmc
->eventsel
= eventsel
;
225 if (!(eventsel
& ARCH_PERFMON_EVENTSEL_ENABLE
) || !pmc_enabled(pmc
))
228 event_select
= eventsel
& ARCH_PERFMON_EVENTSEL_EVENT
;
229 unit_mask
= (eventsel
& ARCH_PERFMON_EVENTSEL_UMASK
) >> 8;
231 if (!(eventsel
& (ARCH_PERFMON_EVENTSEL_EDGE
|
232 ARCH_PERFMON_EVENTSEL_INV
|
233 ARCH_PERFMON_EVENTSEL_CMASK
|
235 HSW_IN_TX_CHECKPOINTED
))) {
236 config
= find_arch_event(&pmc
->vcpu
->arch
.pmu
, event_select
,
238 if (config
!= PERF_COUNT_HW_MAX
)
239 type
= PERF_TYPE_HARDWARE
;
242 if (type
== PERF_TYPE_RAW
)
243 config
= eventsel
& X86_RAW_EVENT_MASK
;
245 reprogram_counter(pmc
, type
, config
,
246 !(eventsel
& ARCH_PERFMON_EVENTSEL_USR
),
247 !(eventsel
& ARCH_PERFMON_EVENTSEL_OS
),
248 eventsel
& ARCH_PERFMON_EVENTSEL_INT
,
249 (eventsel
& HSW_IN_TX
),
250 (eventsel
& HSW_IN_TX_CHECKPOINTED
));
253 static void reprogram_fixed_counter(struct kvm_pmc
*pmc
, u8 en_pmi
, int idx
)
255 unsigned en
= en_pmi
& 0x3;
256 bool pmi
= en_pmi
& 0x8;
260 if (!en
|| !pmc_enabled(pmc
))
263 reprogram_counter(pmc
, PERF_TYPE_HARDWARE
,
264 arch_events
[fixed_pmc_events
[idx
]].event_type
,
265 !(en
& 0x2), /* exclude user */
266 !(en
& 0x1), /* exclude kernel */
270 static inline u8
fixed_en_pmi(u64 ctrl
, int idx
)
272 return (ctrl
>> (idx
* 4)) & 0xf;
275 static void reprogram_fixed_counters(struct kvm_pmu
*pmu
, u64 data
)
279 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++) {
280 u8 en_pmi
= fixed_en_pmi(data
, i
);
281 struct kvm_pmc
*pmc
= get_fixed_pmc_idx(pmu
, i
);
283 if (fixed_en_pmi(pmu
->fixed_ctr_ctrl
, i
) == en_pmi
)
286 reprogram_fixed_counter(pmc
, en_pmi
, i
);
289 pmu
->fixed_ctr_ctrl
= data
;
292 static void reprogram_idx(struct kvm_pmu
*pmu
, int idx
)
294 struct kvm_pmc
*pmc
= global_idx_to_pmc(pmu
, idx
);
300 reprogram_gp_counter(pmc
, pmc
->eventsel
);
302 int fidx
= idx
- INTEL_PMC_IDX_FIXED
;
303 reprogram_fixed_counter(pmc
,
304 fixed_en_pmi(pmu
->fixed_ctr_ctrl
, fidx
), fidx
);
308 static void global_ctrl_changed(struct kvm_pmu
*pmu
, u64 data
)
311 u64 diff
= pmu
->global_ctrl
^ data
;
313 pmu
->global_ctrl
= data
;
315 for_each_set_bit(bit
, (unsigned long *)&diff
, X86_PMC_IDX_MAX
)
316 reprogram_idx(pmu
, bit
);
319 bool kvm_pmu_msr(struct kvm_vcpu
*vcpu
, u32 msr
)
321 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
325 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
326 case MSR_CORE_PERF_GLOBAL_STATUS
:
327 case MSR_CORE_PERF_GLOBAL_CTRL
:
328 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
329 ret
= pmu
->version
> 1;
332 ret
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)
333 || get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
)
334 || get_fixed_pmc(pmu
, msr
);
340 int kvm_pmu_get_msr(struct kvm_vcpu
*vcpu
, u32 index
, u64
*data
)
342 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
346 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
347 *data
= pmu
->fixed_ctr_ctrl
;
349 case MSR_CORE_PERF_GLOBAL_STATUS
:
350 *data
= pmu
->global_status
;
352 case MSR_CORE_PERF_GLOBAL_CTRL
:
353 *data
= pmu
->global_ctrl
;
355 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
356 *data
= pmu
->global_ovf_ctrl
;
359 if ((pmc
= get_gp_pmc(pmu
, index
, MSR_IA32_PERFCTR0
)) ||
360 (pmc
= get_fixed_pmc(pmu
, index
))) {
361 *data
= read_pmc(pmc
);
363 } else if ((pmc
= get_gp_pmc(pmu
, index
, MSR_P6_EVNTSEL0
))) {
364 *data
= pmc
->eventsel
;
371 int kvm_pmu_set_msr(struct kvm_vcpu
*vcpu
, struct msr_data
*msr_info
)
373 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
375 u32 index
= msr_info
->index
;
376 u64 data
= msr_info
->data
;
379 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
380 if (pmu
->fixed_ctr_ctrl
== data
)
382 if (!(data
& 0xfffffffffffff444ull
)) {
383 reprogram_fixed_counters(pmu
, data
);
387 case MSR_CORE_PERF_GLOBAL_STATUS
:
388 if (msr_info
->host_initiated
) {
389 pmu
->global_status
= data
;
393 case MSR_CORE_PERF_GLOBAL_CTRL
:
394 if (pmu
->global_ctrl
== data
)
396 if (!(data
& pmu
->global_ctrl_mask
)) {
397 global_ctrl_changed(pmu
, data
);
401 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
402 if (!(data
& (pmu
->global_ctrl_mask
& ~(3ull<<62)))) {
403 if (!msr_info
->host_initiated
)
404 pmu
->global_status
&= ~data
;
405 pmu
->global_ovf_ctrl
= data
;
410 if ((pmc
= get_gp_pmc(pmu
, index
, MSR_IA32_PERFCTR0
)) ||
411 (pmc
= get_fixed_pmc(pmu
, index
))) {
412 if (!msr_info
->host_initiated
)
413 data
= (s64
)(s32
)data
;
414 pmc
->counter
+= data
- read_pmc(pmc
);
416 } else if ((pmc
= get_gp_pmc(pmu
, index
, MSR_P6_EVNTSEL0
))) {
417 if (data
== pmc
->eventsel
)
419 if (!(data
& pmu
->reserved_bits
)) {
420 reprogram_gp_counter(pmc
, data
);
428 int kvm_pmu_read_pmc(struct kvm_vcpu
*vcpu
, unsigned pmc
, u64
*data
)
430 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
431 bool fast_mode
= pmc
& (1u << 31);
432 bool fixed
= pmc
& (1u << 30);
433 struct kvm_pmc
*counters
;
437 if (!fixed
&& pmc
>= pmu
->nr_arch_gp_counters
)
439 if (fixed
&& pmc
>= pmu
->nr_arch_fixed_counters
)
441 counters
= fixed
? pmu
->fixed_counters
: pmu
->gp_counters
;
442 ctr
= read_pmc(&counters
[pmc
]);
450 void kvm_pmu_cpuid_update(struct kvm_vcpu
*vcpu
)
452 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
453 struct kvm_cpuid_entry2
*entry
;
456 pmu
->nr_arch_gp_counters
= 0;
457 pmu
->nr_arch_fixed_counters
= 0;
458 pmu
->counter_bitmask
[KVM_PMC_GP
] = 0;
459 pmu
->counter_bitmask
[KVM_PMC_FIXED
] = 0;
461 pmu
->reserved_bits
= 0xffffffff00200000ull
;
463 entry
= kvm_find_cpuid_entry(vcpu
, 0xa, 0);
467 pmu
->version
= entry
->eax
& 0xff;
471 pmu
->nr_arch_gp_counters
= min((int)(entry
->eax
>> 8) & 0xff,
472 INTEL_PMC_MAX_GENERIC
);
473 pmu
->counter_bitmask
[KVM_PMC_GP
] =
474 ((u64
)1 << ((entry
->eax
>> 16) & 0xff)) - 1;
475 bitmap_len
= (entry
->eax
>> 24) & 0xff;
476 pmu
->available_event_types
= ~entry
->ebx
& ((1ull << bitmap_len
) - 1);
478 if (pmu
->version
== 1) {
479 pmu
->nr_arch_fixed_counters
= 0;
481 pmu
->nr_arch_fixed_counters
= min((int)(entry
->edx
& 0x1f),
482 INTEL_PMC_MAX_FIXED
);
483 pmu
->counter_bitmask
[KVM_PMC_FIXED
] =
484 ((u64
)1 << ((entry
->edx
>> 5) & 0xff)) - 1;
487 pmu
->global_ctrl
= ((1 << pmu
->nr_arch_gp_counters
) - 1) |
488 (((1ull << pmu
->nr_arch_fixed_counters
) - 1) << INTEL_PMC_IDX_FIXED
);
489 pmu
->global_ctrl_mask
= ~pmu
->global_ctrl
;
491 entry
= kvm_find_cpuid_entry(vcpu
, 7, 0);
493 (boot_cpu_has(X86_FEATURE_HLE
) || boot_cpu_has(X86_FEATURE_RTM
)) &&
494 (entry
->ebx
& (X86_FEATURE_HLE
|X86_FEATURE_RTM
)))
495 pmu
->reserved_bits
^= HSW_IN_TX
|HSW_IN_TX_CHECKPOINTED
;
498 void kvm_pmu_init(struct kvm_vcpu
*vcpu
)
501 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
503 memset(pmu
, 0, sizeof(*pmu
));
504 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
505 pmu
->gp_counters
[i
].type
= KVM_PMC_GP
;
506 pmu
->gp_counters
[i
].vcpu
= vcpu
;
507 pmu
->gp_counters
[i
].idx
= i
;
509 for (i
= 0; i
< INTEL_PMC_MAX_FIXED
; i
++) {
510 pmu
->fixed_counters
[i
].type
= KVM_PMC_FIXED
;
511 pmu
->fixed_counters
[i
].vcpu
= vcpu
;
512 pmu
->fixed_counters
[i
].idx
= i
+ INTEL_PMC_IDX_FIXED
;
514 init_irq_work(&pmu
->irq_work
, trigger_pmi
);
515 kvm_pmu_cpuid_update(vcpu
);
518 void kvm_pmu_reset(struct kvm_vcpu
*vcpu
)
520 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
523 irq_work_sync(&pmu
->irq_work
);
524 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
525 struct kvm_pmc
*pmc
= &pmu
->gp_counters
[i
];
527 pmc
->counter
= pmc
->eventsel
= 0;
530 for (i
= 0; i
< INTEL_PMC_MAX_FIXED
; i
++)
531 stop_counter(&pmu
->fixed_counters
[i
]);
533 pmu
->fixed_ctr_ctrl
= pmu
->global_ctrl
= pmu
->global_status
=
534 pmu
->global_ovf_ctrl
= 0;
537 void kvm_pmu_destroy(struct kvm_vcpu
*vcpu
)
542 void kvm_handle_pmu_event(struct kvm_vcpu
*vcpu
)
544 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
548 bitmask
= pmu
->reprogram_pmi
;
550 for_each_set_bit(bit
, (unsigned long *)&bitmask
, X86_PMC_IDX_MAX
) {
551 struct kvm_pmc
*pmc
= global_idx_to_pmc(pmu
, bit
);
553 if (unlikely(!pmc
|| !pmc
->perf_event
)) {
554 clear_bit(bit
, (unsigned long *)&pmu
->reprogram_pmi
);
558 reprogram_idx(pmu
, bit
);