2 * Kernel-based Virtual Machine -- Performance Monitoring Unit support
4 * Copyright 2015 Red Hat, Inc. and/or its affiliates.
7 * Avi Kivity <avi@redhat.com>
8 * Gleb Natapov <gleb@redhat.com>
9 * Wei Huang <wei@redhat.com>
11 * This work is licensed under the terms of the GNU GPL, version 2. See
12 * the COPYING file in the top-level directory.
16 #include <linux/types.h>
17 #include <linux/kvm_host.h>
18 #include <linux/perf_event.h>
19 #include <asm/perf_event.h>
26 * - Each perf counter is defined as "struct kvm_pmc";
27 * - There are two types of perf counters: general purpose (gp) and fixed.
28 * gp counters are stored in gp_counters[] and fixed counters are stored
29 * in fixed_counters[] respectively. Both of them are part of "struct
31 * - pmu.c understands the difference between gp counters and fixed counters.
32 * However AMD doesn't support fixed-counters;
33 * - There are three types of index to access perf counters (PMC):
34 * 1. MSR (named msr): For example Intel has MSR_IA32_PERFCTRn and AMD
35 * has MSR_K7_PERFCTRn.
36 * 2. MSR Index (named idx): This normally is used by RDPMC instruction.
37 * For instance AMD RDPMC instruction uses 0000_0003h in ECX to access
38 * C001_0007h (MSR_K7_PERCTR3). Intel has a similar mechanism, except
39 * that it also supports fixed counters. idx can be used to as index to
40 * gp and fixed counters.
41 * 3. Global PMC Index (named pmc): pmc is an index specific to PMU
42 * code. Each pmc, stored in kvm_pmc.idx field, is unique across
43 * all perf counters (both gp and fixed). The mapping relationship
44 * between pmc and perf counters is as the following:
45 * * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters
46 * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
47 * * AMD: [0 .. AMD64_NUM_COUNTERS-1] <=> gp counters
50 static void kvm_pmi_trigger_fn(struct irq_work
*irq_work
)
52 struct kvm_pmu
*pmu
= container_of(irq_work
, struct kvm_pmu
, irq_work
);
53 struct kvm_vcpu
*vcpu
= pmu_to_vcpu(pmu
);
55 kvm_pmu_deliver_pmi(vcpu
);
58 static void kvm_perf_overflow(struct perf_event
*perf_event
,
59 struct perf_sample_data
*data
,
62 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
63 struct kvm_pmu
*pmu
= pmc_to_pmu(pmc
);
65 if (!test_and_set_bit(pmc
->idx
,
66 (unsigned long *)&pmu
->reprogram_pmi
)) {
67 __set_bit(pmc
->idx
, (unsigned long *)&pmu
->global_status
);
68 kvm_make_request(KVM_REQ_PMU
, pmc
->vcpu
);
72 static void kvm_perf_overflow_intr(struct perf_event
*perf_event
,
73 struct perf_sample_data
*data
,
76 struct kvm_pmc
*pmc
= perf_event
->overflow_handler_context
;
77 struct kvm_pmu
*pmu
= pmc_to_pmu(pmc
);
79 if (!test_and_set_bit(pmc
->idx
,
80 (unsigned long *)&pmu
->reprogram_pmi
)) {
81 __set_bit(pmc
->idx
, (unsigned long *)&pmu
->global_status
);
82 kvm_make_request(KVM_REQ_PMU
, pmc
->vcpu
);
85 * Inject PMI. If vcpu was in a guest mode during NMI PMI
86 * can be ejected on a guest mode re-entry. Otherwise we can't
87 * be sure that vcpu wasn't executing hlt instruction at the
88 * time of vmexit and is not going to re-enter guest mode until
89 * woken up. So we should wake it, but this is impossible from
90 * NMI context. Do it from irq work instead.
92 if (!kvm_is_in_guest())
93 irq_work_queue(&pmc_to_pmu(pmc
)->irq_work
);
95 kvm_make_request(KVM_REQ_PMI
, pmc
->vcpu
);
99 static void pmc_reprogram_counter(struct kvm_pmc
*pmc
, u32 type
,
100 unsigned config
, bool exclude_user
,
101 bool exclude_kernel
, bool intr
,
102 bool in_tx
, bool in_tx_cp
)
104 struct perf_event
*event
;
105 struct perf_event_attr attr
= {
107 .size
= sizeof(attr
),
109 .exclude_idle
= true,
111 .exclude_user
= exclude_user
,
112 .exclude_kernel
= exclude_kernel
,
117 attr
.config
|= HSW_IN_TX
;
119 attr
.config
|= HSW_IN_TX_CHECKPOINTED
;
121 attr
.sample_period
= (-pmc
->counter
) & pmc_bitmask(pmc
);
123 event
= perf_event_create_kernel_counter(&attr
, -1, current
,
124 intr
? kvm_perf_overflow_intr
:
125 kvm_perf_overflow
, pmc
);
127 printk_once("kvm_pmu: event creation failed %ld\n",
132 pmc
->perf_event
= event
;
133 clear_bit(pmc
->idx
, (unsigned long*)&pmc_to_pmu(pmc
)->reprogram_pmi
);
136 void reprogram_gp_counter(struct kvm_pmc
*pmc
, u64 eventsel
)
138 unsigned config
, type
= PERF_TYPE_RAW
;
139 u8 event_select
, unit_mask
;
141 if (eventsel
& ARCH_PERFMON_EVENTSEL_PIN_CONTROL
)
142 printk_once("kvm pmu: pin control bit is ignored\n");
144 pmc
->eventsel
= eventsel
;
146 pmc_stop_counter(pmc
);
148 if (!(eventsel
& ARCH_PERFMON_EVENTSEL_ENABLE
) || !pmc_is_enabled(pmc
))
151 event_select
= eventsel
& ARCH_PERFMON_EVENTSEL_EVENT
;
152 unit_mask
= (eventsel
& ARCH_PERFMON_EVENTSEL_UMASK
) >> 8;
154 if (!(eventsel
& (ARCH_PERFMON_EVENTSEL_EDGE
|
155 ARCH_PERFMON_EVENTSEL_INV
|
156 ARCH_PERFMON_EVENTSEL_CMASK
|
158 HSW_IN_TX_CHECKPOINTED
))) {
159 config
= kvm_x86_ops
->pmu_ops
->find_arch_event(pmc_to_pmu(pmc
),
162 if (config
!= PERF_COUNT_HW_MAX
)
163 type
= PERF_TYPE_HARDWARE
;
166 if (type
== PERF_TYPE_RAW
)
167 config
= eventsel
& X86_RAW_EVENT_MASK
;
169 pmc_reprogram_counter(pmc
, type
, config
,
170 !(eventsel
& ARCH_PERFMON_EVENTSEL_USR
),
171 !(eventsel
& ARCH_PERFMON_EVENTSEL_OS
),
172 eventsel
& ARCH_PERFMON_EVENTSEL_INT
,
173 (eventsel
& HSW_IN_TX
),
174 (eventsel
& HSW_IN_TX_CHECKPOINTED
));
176 EXPORT_SYMBOL_GPL(reprogram_gp_counter
);
178 void reprogram_fixed_counter(struct kvm_pmc
*pmc
, u8 ctrl
, int idx
)
180 unsigned en_field
= ctrl
& 0x3;
181 bool pmi
= ctrl
& 0x8;
183 pmc_stop_counter(pmc
);
185 if (!en_field
|| !pmc_is_enabled(pmc
))
188 pmc_reprogram_counter(pmc
, PERF_TYPE_HARDWARE
,
189 kvm_x86_ops
->pmu_ops
->find_fixed_event(idx
),
190 !(en_field
& 0x2), /* exclude user */
191 !(en_field
& 0x1), /* exclude kernel */
194 EXPORT_SYMBOL_GPL(reprogram_fixed_counter
);
196 void reprogram_counter(struct kvm_pmu
*pmu
, int pmc_idx
)
198 struct kvm_pmc
*pmc
= kvm_x86_ops
->pmu_ops
->pmc_idx_to_pmc(pmu
, pmc_idx
);
204 reprogram_gp_counter(pmc
, pmc
->eventsel
);
206 int idx
= pmc_idx
- INTEL_PMC_IDX_FIXED
;
207 u8 ctrl
= fixed_ctrl_field(pmu
->fixed_ctr_ctrl
, idx
);
209 reprogram_fixed_counter(pmc
, ctrl
, idx
);
212 EXPORT_SYMBOL_GPL(reprogram_counter
);
214 void kvm_pmu_handle_event(struct kvm_vcpu
*vcpu
)
216 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
220 bitmask
= pmu
->reprogram_pmi
;
222 for_each_set_bit(bit
, (unsigned long *)&bitmask
, X86_PMC_IDX_MAX
) {
223 struct kvm_pmc
*pmc
= kvm_x86_ops
->pmu_ops
->pmc_idx_to_pmc(pmu
, bit
);
225 if (unlikely(!pmc
|| !pmc
->perf_event
)) {
226 clear_bit(bit
, (unsigned long *)&pmu
->reprogram_pmi
);
230 reprogram_counter(pmu
, bit
);
234 /* check if idx is a valid index to access PMU */
235 int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu
*vcpu
, unsigned idx
)
237 return kvm_x86_ops
->pmu_ops
->is_valid_msr_idx(vcpu
, idx
);
240 int kvm_pmu_rdpmc(struct kvm_vcpu
*vcpu
, unsigned idx
, u64
*data
)
242 bool fast_mode
= idx
& (1u << 31);
246 pmc
= kvm_x86_ops
->pmu_ops
->msr_idx_to_pmc(vcpu
, idx
);
250 ctr_val
= pmc_read_counter(pmc
);
252 ctr_val
= (u32
)ctr_val
;
258 void kvm_pmu_deliver_pmi(struct kvm_vcpu
*vcpu
)
261 kvm_apic_local_deliver(vcpu
->arch
.apic
, APIC_LVTPC
);
264 bool kvm_pmu_is_valid_msr(struct kvm_vcpu
*vcpu
, u32 msr
)
266 return kvm_x86_ops
->pmu_ops
->is_valid_msr(vcpu
, msr
);
269 int kvm_pmu_get_msr(struct kvm_vcpu
*vcpu
, u32 msr
, u64
*data
)
271 return kvm_x86_ops
->pmu_ops
->get_msr(vcpu
, msr
, data
);
274 int kvm_pmu_set_msr(struct kvm_vcpu
*vcpu
, struct msr_data
*msr_info
)
276 return kvm_x86_ops
->pmu_ops
->set_msr(vcpu
, msr_info
);
279 /* refresh PMU settings. This function generally is called when underlying
280 * settings are changed (such as changes of PMU CPUID by guest VMs), which
281 * should rarely happen.
283 void kvm_pmu_refresh(struct kvm_vcpu
*vcpu
)
285 kvm_x86_ops
->pmu_ops
->refresh(vcpu
);
288 void kvm_pmu_reset(struct kvm_vcpu
*vcpu
)
290 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
292 irq_work_sync(&pmu
->irq_work
);
293 kvm_x86_ops
->pmu_ops
->reset(vcpu
);
296 void kvm_pmu_init(struct kvm_vcpu
*vcpu
)
298 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
300 memset(pmu
, 0, sizeof(*pmu
));
301 kvm_x86_ops
->pmu_ops
->init(vcpu
);
302 init_irq_work(&pmu
->irq_work
, kvm_pmi_trigger_fn
);
303 kvm_pmu_refresh(vcpu
);
306 void kvm_pmu_destroy(struct kvm_vcpu
*vcpu
)