1 #ifdef CONFIG_CPU_SUP_AMD
3 static DEFINE_RAW_SPINLOCK(amd_nb_lock
);
5 static __initconst u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX
]
7 [PERF_COUNT_HW_CACHE_OP_MAX
]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX
] =
12 [ C(RESULT_ACCESS
) ] = 0x0040, /* Data Cache Accesses */
13 [ C(RESULT_MISS
) ] = 0x0041, /* Data Cache Misses */
16 [ C(RESULT_ACCESS
) ] = 0x0142, /* Data Cache Refills :system */
17 [ C(RESULT_MISS
) ] = 0,
19 [ C(OP_PREFETCH
) ] = {
20 [ C(RESULT_ACCESS
) ] = 0x0267, /* Data Prefetcher :attempts */
21 [ C(RESULT_MISS
) ] = 0x0167, /* Data Prefetcher :cancelled */
26 [ C(RESULT_ACCESS
) ] = 0x0080, /* Instruction cache fetches */
27 [ C(RESULT_MISS
) ] = 0x0081, /* Instruction cache misses */
30 [ C(RESULT_ACCESS
) ] = -1,
31 [ C(RESULT_MISS
) ] = -1,
33 [ C(OP_PREFETCH
) ] = {
34 [ C(RESULT_ACCESS
) ] = 0x014B, /* Prefetch Instructions :Load */
35 [ C(RESULT_MISS
) ] = 0,
40 [ C(RESULT_ACCESS
) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41 [ C(RESULT_MISS
) ] = 0x037E, /* L2 Cache Misses : IC+DC */
44 [ C(RESULT_ACCESS
) ] = 0x017F, /* L2 Fill/Writeback */
45 [ C(RESULT_MISS
) ] = 0,
47 [ C(OP_PREFETCH
) ] = {
48 [ C(RESULT_ACCESS
) ] = 0,
49 [ C(RESULT_MISS
) ] = 0,
54 [ C(RESULT_ACCESS
) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS
) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
58 [ C(RESULT_ACCESS
) ] = 0,
59 [ C(RESULT_MISS
) ] = 0,
61 [ C(OP_PREFETCH
) ] = {
62 [ C(RESULT_ACCESS
) ] = 0,
63 [ C(RESULT_MISS
) ] = 0,
68 [ C(RESULT_ACCESS
) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS
) ] = 0x0085, /* Instr. fetch ITLB misses */
72 [ C(RESULT_ACCESS
) ] = -1,
73 [ C(RESULT_MISS
) ] = -1,
75 [ C(OP_PREFETCH
) ] = {
76 [ C(RESULT_ACCESS
) ] = -1,
77 [ C(RESULT_MISS
) ] = -1,
82 [ C(RESULT_ACCESS
) ] = 0x00c2, /* Retired Branch Instr. */
83 [ C(RESULT_MISS
) ] = 0x00c3, /* Retired Mispredicted BI */
86 [ C(RESULT_ACCESS
) ] = -1,
87 [ C(RESULT_MISS
) ] = -1,
89 [ C(OP_PREFETCH
) ] = {
90 [ C(RESULT_ACCESS
) ] = -1,
91 [ C(RESULT_MISS
) ] = -1,
97 * AMD Performance Monitor K7 and later.
99 static const u64 amd_perfmon_event_map
[] =
101 [PERF_COUNT_HW_CPU_CYCLES
] = 0x0076,
102 [PERF_COUNT_HW_INSTRUCTIONS
] = 0x00c0,
103 [PERF_COUNT_HW_CACHE_REFERENCES
] = 0x0080,
104 [PERF_COUNT_HW_CACHE_MISSES
] = 0x0081,
105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS
] = 0x00c4,
106 [PERF_COUNT_HW_BRANCH_MISSES
] = 0x00c5,
109 static u64
amd_pmu_event_map(int hw_event
)
111 return amd_perfmon_event_map
[hw_event
];
114 static u64
amd_pmu_raw_event(u64 hw_event
)
116 #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
117 #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
118 #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
119 #define K7_EVNTSEL_INV_MASK 0x000800000ULL
120 #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
122 #define K7_EVNTSEL_MASK \
123 (K7_EVNTSEL_EVENT_MASK | \
124 K7_EVNTSEL_UNIT_MASK | \
125 K7_EVNTSEL_EDGE_MASK | \
126 K7_EVNTSEL_INV_MASK | \
129 return hw_event
& K7_EVNTSEL_MASK
;
133 * AMD64 events are detected based on their event codes.
135 static inline int amd_is_nb_event(struct hw_perf_event
*hwc
)
137 return (hwc
->config
& 0xe0) == 0xe0;
140 static inline int amd_has_nb(struct cpu_hw_events
*cpuc
)
142 struct amd_nb
*nb
= cpuc
->amd_nb
;
144 return nb
&& nb
->nb_id
!= -1;
147 static void amd_put_event_constraints(struct cpu_hw_events
*cpuc
,
148 struct perf_event
*event
)
150 struct hw_perf_event
*hwc
= &event
->hw
;
151 struct amd_nb
*nb
= cpuc
->amd_nb
;
155 * only care about NB events
157 if (!(amd_has_nb(cpuc
) && amd_is_nb_event(hwc
)))
161 * need to scan whole list because event may not have
162 * been assigned during scheduling
164 * no race condition possible because event can only
165 * be removed on one CPU at a time AND PMU is disabled
168 for (i
= 0; i
< x86_pmu
.num_events
; i
++) {
169 if (nb
->owners
[i
] == event
) {
170 cmpxchg(nb
->owners
+i
, event
, NULL
);
177 * AMD64 NorthBridge events need special treatment because
178 * counter access needs to be synchronized across all cores
179 * of a package. Refer to BKDG section 3.12
181 * NB events are events measuring L3 cache, Hypertransport
182 * traffic. They are identified by an event code >= 0xe00.
183 * They measure events on the NorthBride which is shared
184 * by all cores on a package. NB events are counted on a
185 * shared set of counters. When a NB event is programmed
186 * in a counter, the data actually comes from a shared
187 * counter. Thus, access to those counters needs to be
190 * We implement the synchronization such that no two cores
191 * can be measuring NB events using the same counters. Thus,
192 * we maintain a per-NB allocation table. The available slot
193 * is propagated using the event_constraint structure.
195 * We provide only one choice for each NB event based on
196 * the fact that only NB events have restrictions. Consequently,
197 * if a counter is available, there is a guarantee the NB event
198 * will be assigned to it. If no slot is available, an empty
199 * constraint is returned and scheduling will eventually fail
202 * Note that all cores attached the same NB compete for the same
203 * counters to host NB events, this is why we use atomic ops. Some
204 * multi-chip CPUs may have more than one NB.
206 * Given that resources are allocated (cmpxchg), they must be
207 * eventually freed for others to use. This is accomplished by
208 * calling amd_put_event_constraints().
210 * Non NB events are not impacted by this restriction.
212 static struct event_constraint
*
213 amd_get_event_constraints(struct cpu_hw_events
*cpuc
, struct perf_event
*event
)
215 struct hw_perf_event
*hwc
= &event
->hw
;
216 struct amd_nb
*nb
= cpuc
->amd_nb
;
217 struct perf_event
*old
= NULL
;
218 int max
= x86_pmu
.num_events
;
222 * if not NB event or no NB, then no constraints
224 if (!(amd_has_nb(cpuc
) && amd_is_nb_event(hwc
)))
225 return &unconstrained
;
228 * detect if already present, if so reuse
230 * cannot merge with actual allocation
231 * because of possible holes
233 * event can already be present yet not assigned (in hwc->idx)
234 * because of successive calls to x86_schedule_events() from
235 * hw_perf_group_sched_in() without hw_perf_enable()
237 for (i
= 0; i
< max
; i
++) {
239 * keep track of first free slot
241 if (k
== -1 && !nb
->owners
[i
])
244 /* already present, reuse */
245 if (nb
->owners
[i
] == event
)
249 * not present, so grab a new slot
250 * starting either at:
252 if (hwc
->idx
!= -1) {
253 /* previous assignment */
255 } else if (k
!= -1) {
256 /* start from free slot found */
260 * event not found, no slot found in
261 * first pass, try again from the
268 old
= cmpxchg(nb
->owners
+i
, NULL
, event
);
276 return &nb
->event_constraints
[i
];
278 return &emptyconstraint
;
281 static struct amd_nb
*amd_alloc_nb(int cpu
, int nb_id
)
286 nb
= kmalloc(sizeof(struct amd_nb
), GFP_KERNEL
);
290 memset(nb
, 0, sizeof(*nb
));
294 * initialize all possible NB constraints
296 for (i
= 0; i
< x86_pmu
.num_events
; i
++) {
297 __set_bit(i
, nb
->event_constraints
[i
].idxmsk
);
298 nb
->event_constraints
[i
].weight
= 1;
303 static int amd_pmu_cpu_prepare(int cpu
)
305 struct cpu_hw_events
*cpuc
= &per_cpu(cpu_hw_events
, cpu
);
307 WARN_ON_ONCE(cpuc
->amd_nb
);
309 if (boot_cpu_data
.x86_max_cores
< 2)
312 cpuc
->amd_nb
= amd_alloc_nb(cpu
, -1);
319 static void amd_pmu_cpu_starting(int cpu
)
321 struct cpu_hw_events
*cpuc
= &per_cpu(cpu_hw_events
, cpu
);
325 if (boot_cpu_data
.x86_max_cores
< 2)
328 nb_id
= amd_get_nb_id(cpu
);
329 WARN_ON_ONCE(nb_id
== BAD_APICID
);
331 raw_spin_lock(&amd_nb_lock
);
333 for_each_online_cpu(i
) {
334 nb
= per_cpu(cpu_hw_events
, i
).amd_nb
;
335 if (WARN_ON_ONCE(!nb
))
338 if (nb
->nb_id
== nb_id
) {
345 cpuc
->amd_nb
->nb_id
= nb_id
;
346 cpuc
->amd_nb
->refcnt
++;
348 raw_spin_unlock(&amd_nb_lock
);
351 static void amd_pmu_cpu_dead(int cpu
)
353 struct cpu_hw_events
*cpuhw
;
355 if (boot_cpu_data
.x86_max_cores
< 2)
358 cpuhw
= &per_cpu(cpu_hw_events
, cpu
);
360 raw_spin_lock(&amd_nb_lock
);
363 struct amd_nb
*nb
= cpuhw
->amd_nb
;
365 if (nb
->nb_id
== -1 || --nb
->refcnt
== 0)
368 cpuhw
->amd_nb
= NULL
;
371 raw_spin_unlock(&amd_nb_lock
);
374 static __initconst
struct x86_pmu amd_pmu
= {
376 .handle_irq
= x86_pmu_handle_irq
,
377 .disable_all
= x86_pmu_disable_all
,
378 .enable_all
= x86_pmu_enable_all
,
379 .enable
= x86_pmu_enable_event
,
380 .disable
= x86_pmu_disable_event
,
381 .eventsel
= MSR_K7_EVNTSEL0
,
382 .perfctr
= MSR_K7_PERFCTR0
,
383 .event_map
= amd_pmu_event_map
,
384 .raw_event
= amd_pmu_raw_event
,
385 .max_events
= ARRAY_SIZE(amd_perfmon_event_map
),
388 .event_mask
= (1ULL << 48) - 1,
390 /* use highest bit to detect overflow */
391 .max_period
= (1ULL << 47) - 1,
392 .get_event_constraints
= amd_get_event_constraints
,
393 .put_event_constraints
= amd_put_event_constraints
,
395 .cpu_prepare
= amd_pmu_cpu_prepare
,
396 .cpu_starting
= amd_pmu_cpu_starting
,
397 .cpu_dead
= amd_pmu_cpu_dead
,
400 static __init
int amd_pmu_init(void)
402 /* Performance-monitoring supported from K7 and later: */
403 if (boot_cpu_data
.x86
< 6)
408 /* Events are common for all AMDs */
409 memcpy(hw_cache_event_ids
, amd_hw_cache_event_ids
,
410 sizeof(hw_cache_event_ids
));
415 #else /* CONFIG_CPU_SUP_AMD */
417 static int amd_pmu_init(void)