2 * Kernel-based Virtual Machine -- Performance Monitoring Unit support
4 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
7 * Avi Kivity <avi@redhat.com>
8 * Gleb Natapov <gleb@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
15 #include <linux/types.h>
16 #include <linux/kvm_host.h>
17 #include <linux/perf_event.h>
22 static struct kvm_arch_event_perf_mapping
{
28 /* Index must match CPUID 0x0A.EBX bit vector */
29 [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES
},
30 [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS
},
31 [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES
},
32 [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES
},
33 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES
},
34 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS
},
35 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES
},
36 [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES
},
39 /* mapping between fixed pmc index and arch_events array */
40 int fixed_pmc_events
[] = {1, 0, 7};
42 static bool pmc_is_gp(struct kvm_pmc
*pmc
)
44 return pmc
->type
== KVM_PMC_GP
;
47 static inline u64
pmc_bitmask(struct kvm_pmc
*pmc
)
49 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
51 return pmu
->counter_bitmask
[pmc
->type
];
54 static inline bool pmc_enabled(struct kvm_pmc
*pmc
)
56 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
57 return test_bit(pmc
->idx
, (unsigned long *)&pmu
->global_ctrl
);
60 static inline struct kvm_pmc
*get_gp_pmc(struct kvm_pmu
*pmu
, u32 msr
,
63 if (msr
>= base
&& msr
< base
+ pmu
->nr_arch_gp_counters
)
64 return &pmu
->gp_counters
[msr
- base
];
68 static inline struct kvm_pmc
*get_fixed_pmc(struct kvm_pmu
*pmu
, u32 msr
)
70 int base
= MSR_CORE_PERF_FIXED_CTR0
;
71 if (msr
>= base
&& msr
< base
+ pmu
->nr_arch_fixed_counters
)
72 return &pmu
->fixed_counters
[msr
- base
];
76 static inline struct kvm_pmc
*get_fixed_pmc_idx(struct kvm_pmu
*pmu
, int idx
)
78 return get_fixed_pmc(pmu
, MSR_CORE_PERF_FIXED_CTR0
+ idx
);
81 static struct kvm_pmc
*global_idx_to_pmc(struct kvm_pmu
*pmu
, int idx
)
83 if (idx
< INTEL_PMC_IDX_FIXED
)
84 return get_gp_pmc(pmu
, MSR_P6_EVNTSEL0
+ idx
, MSR_P6_EVNTSEL0
);
86 return get_fixed_pmc_idx(pmu
, idx
- INTEL_PMC_IDX_FIXED
);
89 void kvm_deliver_pmi(struct kvm_vcpu
*vcpu
)
92 kvm_apic_local_deliver(vcpu
->arch
.apic
, APIC_LVTPC
);
95 static void trigger_pmi(struct irq_work
*irq_work
)
97 struct kvm_pmu
*pmu
= container_of(irq_work
, struct kvm_pmu
,
99 struct kvm_vcpu
*vcpu
= container_of(pmu
, struct kvm_vcpu
,
102 kvm_deliver_pmi(vcpu
);
105 static void kvm_perf_overflow(struct perf_event
*perf_event
,
106 struct perf_sample_data
*data
,
107 struct pt_regs
*regs
)
109 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
110 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
111 if (!test_and_set_bit(pmc
->idx
, (unsigned long *)&pmu
->reprogram_pmi
)) {
112 __set_bit(pmc
->idx
, (unsigned long *)&pmu
->global_status
);
113 kvm_make_request(KVM_REQ_PMU
, pmc
->vcpu
);
117 static void kvm_perf_overflow_intr(struct perf_event
*perf_event
,
118 struct perf_sample_data
*data
, struct pt_regs
*regs
)
120 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
121 struct kvm_pmu
*pmu
= &pmc
->vcpu
->arch
.pmu
;
122 if (!test_and_set_bit(pmc
->idx
, (unsigned long *)&pmu
->reprogram_pmi
)) {
123 __set_bit(pmc
->idx
, (unsigned long *)&pmu
->global_status
);
124 kvm_make_request(KVM_REQ_PMU
, pmc
->vcpu
);
126 * Inject PMI. If vcpu was in a guest mode during NMI PMI
127 * can be ejected on a guest mode re-entry. Otherwise we can't
128 * be sure that vcpu wasn't executing hlt instruction at the
129 * time of vmexit and is not going to re-enter guest mode until,
130 * woken up. So we should wake it, but this is impossible from
131 * NMI context. Do it from irq work instead.
133 if (!kvm_is_in_guest())
134 irq_work_queue(&pmc
->vcpu
->arch
.pmu
.irq_work
);
136 kvm_make_request(KVM_REQ_PMI
, pmc
->vcpu
);
140 static u64
read_pmc(struct kvm_pmc
*pmc
)
142 u64 counter
, enabled
, running
;
144 counter
= pmc
->counter
;
147 counter
+= perf_event_read_value(pmc
->perf_event
,
150 /* FIXME: Scaling needed? */
152 return counter
& pmc_bitmask(pmc
);
155 static void stop_counter(struct kvm_pmc
*pmc
)
157 if (pmc
->perf_event
) {
158 pmc
->counter
= read_pmc(pmc
);
159 perf_event_release_kernel(pmc
->perf_event
);
160 pmc
->perf_event
= NULL
;
164 static void reprogram_counter(struct kvm_pmc
*pmc
, u32 type
,
165 unsigned config
, bool exclude_user
, bool exclude_kernel
,
166 bool intr
, bool in_tx
, bool in_tx_cp
)
168 struct perf_event
*event
;
169 struct perf_event_attr attr
= {
171 .size
= sizeof(attr
),
173 .exclude_idle
= true,
175 .exclude_user
= exclude_user
,
176 .exclude_kernel
= exclude_kernel
,
180 attr
.config
|= HSW_IN_TX
;
182 attr
.config
|= HSW_IN_TX_CHECKPOINTED
;
184 attr
.sample_period
= (-pmc
->counter
) & pmc_bitmask(pmc
);
186 event
= perf_event_create_kernel_counter(&attr
, -1, current
,
187 intr
? kvm_perf_overflow_intr
:
188 kvm_perf_overflow
, pmc
);
190 printk_once("kvm: pmu event creation failed %ld\n",
195 pmc
->perf_event
= event
;
196 clear_bit(pmc
->idx
, (unsigned long*)&pmc
->vcpu
->arch
.pmu
.reprogram_pmi
);
199 static unsigned find_arch_event(struct kvm_pmu
*pmu
, u8 event_select
,
204 for (i
= 0; i
< ARRAY_SIZE(arch_events
); i
++)
205 if (arch_events
[i
].eventsel
== event_select
206 && arch_events
[i
].unit_mask
== unit_mask
207 && (pmu
->available_event_types
& (1 << i
)))
210 if (i
== ARRAY_SIZE(arch_events
))
211 return PERF_COUNT_HW_MAX
;
213 return arch_events
[i
].event_type
;
216 static void reprogram_gp_counter(struct kvm_pmc
*pmc
, u64 eventsel
)
218 unsigned config
, type
= PERF_TYPE_RAW
;
219 u8 event_select
, unit_mask
;
221 if (eventsel
& ARCH_PERFMON_EVENTSEL_PIN_CONTROL
)
222 printk_once("kvm pmu: pin control bit is ignored\n");
224 pmc
->eventsel
= eventsel
;
228 if (!(eventsel
& ARCH_PERFMON_EVENTSEL_ENABLE
) || !pmc_enabled(pmc
))
231 event_select
= eventsel
& ARCH_PERFMON_EVENTSEL_EVENT
;
232 unit_mask
= (eventsel
& ARCH_PERFMON_EVENTSEL_UMASK
) >> 8;
234 if (!(eventsel
& (ARCH_PERFMON_EVENTSEL_EDGE
|
235 ARCH_PERFMON_EVENTSEL_INV
|
236 ARCH_PERFMON_EVENTSEL_CMASK
|
238 HSW_IN_TX_CHECKPOINTED
))) {
239 config
= find_arch_event(&pmc
->vcpu
->arch
.pmu
, event_select
,
241 if (config
!= PERF_COUNT_HW_MAX
)
242 type
= PERF_TYPE_HARDWARE
;
245 if (type
== PERF_TYPE_RAW
)
246 config
= eventsel
& X86_RAW_EVENT_MASK
;
248 reprogram_counter(pmc
, type
, config
,
249 !(eventsel
& ARCH_PERFMON_EVENTSEL_USR
),
250 !(eventsel
& ARCH_PERFMON_EVENTSEL_OS
),
251 eventsel
& ARCH_PERFMON_EVENTSEL_INT
,
252 (eventsel
& HSW_IN_TX
),
253 (eventsel
& HSW_IN_TX_CHECKPOINTED
));
256 static void reprogram_fixed_counter(struct kvm_pmc
*pmc
, u8 en_pmi
, int idx
)
258 unsigned en
= en_pmi
& 0x3;
259 bool pmi
= en_pmi
& 0x8;
263 if (!en
|| !pmc_enabled(pmc
))
266 reprogram_counter(pmc
, PERF_TYPE_HARDWARE
,
267 arch_events
[fixed_pmc_events
[idx
]].event_type
,
268 !(en
& 0x2), /* exclude user */
269 !(en
& 0x1), /* exclude kernel */
273 static inline u8
fixed_en_pmi(u64 ctrl
, int idx
)
275 return (ctrl
>> (idx
* 4)) & 0xf;
278 static void reprogram_fixed_counters(struct kvm_pmu
*pmu
, u64 data
)
282 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++) {
283 u8 en_pmi
= fixed_en_pmi(data
, i
);
284 struct kvm_pmc
*pmc
= get_fixed_pmc_idx(pmu
, i
);
286 if (fixed_en_pmi(pmu
->fixed_ctr_ctrl
, i
) == en_pmi
)
289 reprogram_fixed_counter(pmc
, en_pmi
, i
);
292 pmu
->fixed_ctr_ctrl
= data
;
295 static void reprogram_idx(struct kvm_pmu
*pmu
, int idx
)
297 struct kvm_pmc
*pmc
= global_idx_to_pmc(pmu
, idx
);
303 reprogram_gp_counter(pmc
, pmc
->eventsel
);
305 int fidx
= idx
- INTEL_PMC_IDX_FIXED
;
306 reprogram_fixed_counter(pmc
,
307 fixed_en_pmi(pmu
->fixed_ctr_ctrl
, fidx
), fidx
);
311 static void global_ctrl_changed(struct kvm_pmu
*pmu
, u64 data
)
314 u64 diff
= pmu
->global_ctrl
^ data
;
316 pmu
->global_ctrl
= data
;
318 for_each_set_bit(bit
, (unsigned long *)&diff
, X86_PMC_IDX_MAX
)
319 reprogram_idx(pmu
, bit
);
322 bool kvm_pmu_msr(struct kvm_vcpu
*vcpu
, u32 msr
)
324 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
328 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
329 case MSR_CORE_PERF_GLOBAL_STATUS
:
330 case MSR_CORE_PERF_GLOBAL_CTRL
:
331 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
332 ret
= pmu
->version
> 1;
335 ret
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)
336 || get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
)
337 || get_fixed_pmc(pmu
, msr
);
343 int kvm_pmu_get_msr(struct kvm_vcpu
*vcpu
, u32 index
, u64
*data
)
345 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
349 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
350 *data
= pmu
->fixed_ctr_ctrl
;
352 case MSR_CORE_PERF_GLOBAL_STATUS
:
353 *data
= pmu
->global_status
;
355 case MSR_CORE_PERF_GLOBAL_CTRL
:
356 *data
= pmu
->global_ctrl
;
358 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
359 *data
= pmu
->global_ovf_ctrl
;
362 if ((pmc
= get_gp_pmc(pmu
, index
, MSR_IA32_PERFCTR0
)) ||
363 (pmc
= get_fixed_pmc(pmu
, index
))) {
364 *data
= read_pmc(pmc
);
366 } else if ((pmc
= get_gp_pmc(pmu
, index
, MSR_P6_EVNTSEL0
))) {
367 *data
= pmc
->eventsel
;
374 int kvm_pmu_set_msr(struct kvm_vcpu
*vcpu
, struct msr_data
*msr_info
)
376 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
378 u32 index
= msr_info
->index
;
379 u64 data
= msr_info
->data
;
382 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
383 if (pmu
->fixed_ctr_ctrl
== data
)
385 if (!(data
& 0xfffffffffffff444ull
)) {
386 reprogram_fixed_counters(pmu
, data
);
390 case MSR_CORE_PERF_GLOBAL_STATUS
:
391 if (msr_info
->host_initiated
) {
392 pmu
->global_status
= data
;
396 case MSR_CORE_PERF_GLOBAL_CTRL
:
397 if (pmu
->global_ctrl
== data
)
399 if (!(data
& pmu
->global_ctrl_mask
)) {
400 global_ctrl_changed(pmu
, data
);
404 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
405 if (!(data
& (pmu
->global_ctrl_mask
& ~(3ull<<62)))) {
406 if (!msr_info
->host_initiated
)
407 pmu
->global_status
&= ~data
;
408 pmu
->global_ovf_ctrl
= data
;
413 if ((pmc
= get_gp_pmc(pmu
, index
, MSR_IA32_PERFCTR0
)) ||
414 (pmc
= get_fixed_pmc(pmu
, index
))) {
415 if (!msr_info
->host_initiated
)
416 data
= (s64
)(s32
)data
;
417 pmc
->counter
+= data
- read_pmc(pmc
);
419 } else if ((pmc
= get_gp_pmc(pmu
, index
, MSR_P6_EVNTSEL0
))) {
420 if (data
== pmc
->eventsel
)
422 if (!(data
& pmu
->reserved_bits
)) {
423 reprogram_gp_counter(pmc
, data
);
431 int kvm_pmu_check_pmc(struct kvm_vcpu
*vcpu
, unsigned pmc
)
433 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
434 bool fixed
= pmc
& (1u << 30);
436 return (!fixed
&& pmc
>= pmu
->nr_arch_gp_counters
) ||
437 (fixed
&& pmc
>= pmu
->nr_arch_fixed_counters
);
440 int kvm_pmu_read_pmc(struct kvm_vcpu
*vcpu
, unsigned pmc
, u64
*data
)
442 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
443 bool fast_mode
= pmc
& (1u << 31);
444 bool fixed
= pmc
& (1u << 30);
445 struct kvm_pmc
*counters
;
449 if (!fixed
&& pmc
>= pmu
->nr_arch_gp_counters
)
451 if (fixed
&& pmc
>= pmu
->nr_arch_fixed_counters
)
453 counters
= fixed
? pmu
->fixed_counters
: pmu
->gp_counters
;
454 ctr
= read_pmc(&counters
[pmc
]);
462 void kvm_pmu_cpuid_update(struct kvm_vcpu
*vcpu
)
464 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
465 struct kvm_cpuid_entry2
*entry
;
468 pmu
->nr_arch_gp_counters
= 0;
469 pmu
->nr_arch_fixed_counters
= 0;
470 pmu
->counter_bitmask
[KVM_PMC_GP
] = 0;
471 pmu
->counter_bitmask
[KVM_PMC_FIXED
] = 0;
473 pmu
->reserved_bits
= 0xffffffff00200000ull
;
475 entry
= kvm_find_cpuid_entry(vcpu
, 0xa, 0);
479 pmu
->version
= entry
->eax
& 0xff;
483 pmu
->nr_arch_gp_counters
= min((int)(entry
->eax
>> 8) & 0xff,
484 INTEL_PMC_MAX_GENERIC
);
485 pmu
->counter_bitmask
[KVM_PMC_GP
] =
486 ((u64
)1 << ((entry
->eax
>> 16) & 0xff)) - 1;
487 bitmap_len
= (entry
->eax
>> 24) & 0xff;
488 pmu
->available_event_types
= ~entry
->ebx
& ((1ull << bitmap_len
) - 1);
490 if (pmu
->version
== 1) {
491 pmu
->nr_arch_fixed_counters
= 0;
493 pmu
->nr_arch_fixed_counters
= min((int)(entry
->edx
& 0x1f),
494 INTEL_PMC_MAX_FIXED
);
495 pmu
->counter_bitmask
[KVM_PMC_FIXED
] =
496 ((u64
)1 << ((entry
->edx
>> 5) & 0xff)) - 1;
499 pmu
->global_ctrl
= ((1 << pmu
->nr_arch_gp_counters
) - 1) |
500 (((1ull << pmu
->nr_arch_fixed_counters
) - 1) << INTEL_PMC_IDX_FIXED
);
501 pmu
->global_ctrl_mask
= ~pmu
->global_ctrl
;
503 entry
= kvm_find_cpuid_entry(vcpu
, 7, 0);
505 (boot_cpu_has(X86_FEATURE_HLE
) || boot_cpu_has(X86_FEATURE_RTM
)) &&
506 (entry
->ebx
& (X86_FEATURE_HLE
|X86_FEATURE_RTM
)))
507 pmu
->reserved_bits
^= HSW_IN_TX
|HSW_IN_TX_CHECKPOINTED
;
510 void kvm_pmu_init(struct kvm_vcpu
*vcpu
)
513 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
515 memset(pmu
, 0, sizeof(*pmu
));
516 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
517 pmu
->gp_counters
[i
].type
= KVM_PMC_GP
;
518 pmu
->gp_counters
[i
].vcpu
= vcpu
;
519 pmu
->gp_counters
[i
].idx
= i
;
521 for (i
= 0; i
< INTEL_PMC_MAX_FIXED
; i
++) {
522 pmu
->fixed_counters
[i
].type
= KVM_PMC_FIXED
;
523 pmu
->fixed_counters
[i
].vcpu
= vcpu
;
524 pmu
->fixed_counters
[i
].idx
= i
+ INTEL_PMC_IDX_FIXED
;
526 init_irq_work(&pmu
->irq_work
, trigger_pmi
);
527 kvm_pmu_cpuid_update(vcpu
);
530 void kvm_pmu_reset(struct kvm_vcpu
*vcpu
)
532 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
535 irq_work_sync(&pmu
->irq_work
);
536 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
537 struct kvm_pmc
*pmc
= &pmu
->gp_counters
[i
];
539 pmc
->counter
= pmc
->eventsel
= 0;
542 for (i
= 0; i
< INTEL_PMC_MAX_FIXED
; i
++)
543 stop_counter(&pmu
->fixed_counters
[i
]);
545 pmu
->fixed_ctr_ctrl
= pmu
->global_ctrl
= pmu
->global_status
=
546 pmu
->global_ovf_ctrl
= 0;
549 void kvm_pmu_destroy(struct kvm_vcpu
*vcpu
)
554 void kvm_handle_pmu_event(struct kvm_vcpu
*vcpu
)
556 struct kvm_pmu
*pmu
= &vcpu
->arch
.pmu
;
560 bitmask
= pmu
->reprogram_pmi
;
562 for_each_set_bit(bit
, (unsigned long *)&bitmask
, X86_PMC_IDX_MAX
) {
563 struct kvm_pmc
*pmc
= global_idx_to_pmc(pmu
, bit
);
565 if (unlikely(!pmc
|| !pmc
->perf_event
)) {
566 clear_bit(bit
, (unsigned long *)&pmu
->reprogram_pmi
);
570 reprogram_idx(pmu
, bit
);