2 * Netburst Performance Events (P4, old Xeon)
4 * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
5 * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
7 * For licencing details see kernel-base/COPYING
10 #include <linux/perf_event.h>
12 #include <asm/perf_event_p4.h>
13 #include <asm/hardirq.h>
16 #include "perf_event.h"
18 #define P4_CNTR_LIMIT 3
20 * array indices: 0,1 - HT threads, used with HT enabled cpu
22 struct p4_event_bind
{
23 unsigned int opcode
; /* Event code and ESCR selector */
24 unsigned int escr_msr
[2]; /* ESCR MSR for this event */
25 unsigned int escr_emask
; /* valid ESCR EventMask bits */
26 unsigned int shared
; /* event is shared across threads */
27 char cntr
[2][P4_CNTR_LIMIT
]; /* counter index (offset), -1 on abscence */
31 unsigned int metric_pebs
;
32 unsigned int metric_vert
;
35 /* it sets P4_PEBS_ENABLE_UOP_TAG as well */
36 #define P4_GEN_PEBS_BIND(name, pebs, vert) \
37 [P4_PEBS_METRIC__##name] = { \
38 .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
39 .metric_vert = vert, \
43 * note we have P4_PEBS_ENABLE_UOP_TAG always set here
45 * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
46 * event configuration to find out which values are to be
47 * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
50 static struct p4_pebs_bind p4_pebs_bind_map
[] = {
51 P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired
, 0x0000001, 0x0000001),
52 P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired
, 0x0000002, 0x0000001),
53 P4_GEN_PEBS_BIND(dtlb_load_miss_retired
, 0x0000004, 0x0000001),
54 P4_GEN_PEBS_BIND(dtlb_store_miss_retired
, 0x0000004, 0x0000002),
55 P4_GEN_PEBS_BIND(dtlb_all_miss_retired
, 0x0000004, 0x0000003),
56 P4_GEN_PEBS_BIND(tagged_mispred_branch
, 0x0018000, 0x0000010),
57 P4_GEN_PEBS_BIND(mob_load_replay_retired
, 0x0000200, 0x0000001),
58 P4_GEN_PEBS_BIND(split_load_retired
, 0x0000400, 0x0000001),
59 P4_GEN_PEBS_BIND(split_store_retired
, 0x0000400, 0x0000002),
63 * Note that we don't use CCCR1 here, there is an
64 * exception for P4_BSQ_ALLOCATION but we just have
67 * consider this binding as resources which particular
68 * event may borrow, it doesn't contain EventMask,
69 * Tags and friends -- they are left to a caller
71 static struct p4_event_bind p4_event_bind_map
[] = {
72 [P4_EVENT_TC_DELIVER_MODE
] = {
73 .opcode
= P4_OPCODE(P4_EVENT_TC_DELIVER_MODE
),
74 .escr_msr
= { MSR_P4_TC_ESCR0
, MSR_P4_TC_ESCR1
},
76 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE
, DD
) |
77 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE
, DB
) |
78 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE
, DI
) |
79 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE
, BD
) |
80 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE
, BB
) |
81 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE
, BI
) |
82 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE
, ID
),
84 .cntr
= { {4, 5, -1}, {6, 7, -1} },
86 [P4_EVENT_BPU_FETCH_REQUEST
] = {
87 .opcode
= P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST
),
88 .escr_msr
= { MSR_P4_BPU_ESCR0
, MSR_P4_BPU_ESCR1
},
90 P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST
, TCMISS
),
91 .cntr
= { {0, -1, -1}, {2, -1, -1} },
93 [P4_EVENT_ITLB_REFERENCE
] = {
94 .opcode
= P4_OPCODE(P4_EVENT_ITLB_REFERENCE
),
95 .escr_msr
= { MSR_P4_ITLB_ESCR0
, MSR_P4_ITLB_ESCR1
},
97 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE
, HIT
) |
98 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE
, MISS
) |
99 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE
, HIT_UK
),
100 .cntr
= { {0, -1, -1}, {2, -1, -1} },
102 [P4_EVENT_MEMORY_CANCEL
] = {
103 .opcode
= P4_OPCODE(P4_EVENT_MEMORY_CANCEL
),
104 .escr_msr
= { MSR_P4_DAC_ESCR0
, MSR_P4_DAC_ESCR1
},
106 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL
, ST_RB_FULL
) |
107 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL
, 64K_CONF
),
108 .cntr
= { {8, 9, -1}, {10, 11, -1} },
110 [P4_EVENT_MEMORY_COMPLETE
] = {
111 .opcode
= P4_OPCODE(P4_EVENT_MEMORY_COMPLETE
),
112 .escr_msr
= { MSR_P4_SAAT_ESCR0
, MSR_P4_SAAT_ESCR1
},
114 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE
, LSC
) |
115 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE
, SSC
),
116 .cntr
= { {8, 9, -1}, {10, 11, -1} },
118 [P4_EVENT_LOAD_PORT_REPLAY
] = {
119 .opcode
= P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY
),
120 .escr_msr
= { MSR_P4_SAAT_ESCR0
, MSR_P4_SAAT_ESCR1
},
122 P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY
, SPLIT_LD
),
123 .cntr
= { {8, 9, -1}, {10, 11, -1} },
125 [P4_EVENT_STORE_PORT_REPLAY
] = {
126 .opcode
= P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY
),
127 .escr_msr
= { MSR_P4_SAAT_ESCR0
, MSR_P4_SAAT_ESCR1
},
129 P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY
, SPLIT_ST
),
130 .cntr
= { {8, 9, -1}, {10, 11, -1} },
132 [P4_EVENT_MOB_LOAD_REPLAY
] = {
133 .opcode
= P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY
),
134 .escr_msr
= { MSR_P4_MOB_ESCR0
, MSR_P4_MOB_ESCR1
},
136 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY
, NO_STA
) |
137 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY
, NO_STD
) |
138 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY
, PARTIAL_DATA
) |
139 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY
, UNALGN_ADDR
),
140 .cntr
= { {0, -1, -1}, {2, -1, -1} },
142 [P4_EVENT_PAGE_WALK_TYPE
] = {
143 .opcode
= P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE
),
144 .escr_msr
= { MSR_P4_PMH_ESCR0
, MSR_P4_PMH_ESCR1
},
146 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE
, DTMISS
) |
147 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE
, ITMISS
),
149 .cntr
= { {0, -1, -1}, {2, -1, -1} },
151 [P4_EVENT_BSQ_CACHE_REFERENCE
] = {
152 .opcode
= P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE
),
153 .escr_msr
= { MSR_P4_BSU_ESCR0
, MSR_P4_BSU_ESCR1
},
155 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_2ndL_HITS
) |
156 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_2ndL_HITE
) |
157 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_2ndL_HITM
) |
158 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_3rdL_HITS
) |
159 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_3rdL_HITE
) |
160 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_3rdL_HITM
) |
161 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_2ndL_MISS
) |
162 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_3rdL_MISS
) |
163 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, WR_2ndL_MISS
),
164 .cntr
= { {0, -1, -1}, {2, -1, -1} },
166 [P4_EVENT_IOQ_ALLOCATION
] = {
167 .opcode
= P4_OPCODE(P4_EVENT_IOQ_ALLOCATION
),
168 .escr_msr
= { MSR_P4_FSB_ESCR0
, MSR_P4_FSB_ESCR1
},
170 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, DEFAULT
) |
171 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, ALL_READ
) |
172 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, ALL_WRITE
) |
173 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, MEM_UC
) |
174 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, MEM_WC
) |
175 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, MEM_WT
) |
176 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, MEM_WP
) |
177 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, MEM_WB
) |
178 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, OWN
) |
179 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, OTHER
) |
180 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION
, PREFETCH
),
181 .cntr
= { {0, -1, -1}, {2, -1, -1} },
183 [P4_EVENT_IOQ_ACTIVE_ENTRIES
] = { /* shared ESCR */
184 .opcode
= P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES
),
185 .escr_msr
= { MSR_P4_FSB_ESCR1
, MSR_P4_FSB_ESCR1
},
187 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, DEFAULT
) |
188 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, ALL_READ
) |
189 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, ALL_WRITE
) |
190 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, MEM_UC
) |
191 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, MEM_WC
) |
192 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, MEM_WT
) |
193 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, MEM_WP
) |
194 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, MEM_WB
) |
195 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, OWN
) |
196 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, OTHER
) |
197 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES
, PREFETCH
),
198 .cntr
= { {2, -1, -1}, {3, -1, -1} },
200 [P4_EVENT_FSB_DATA_ACTIVITY
] = {
201 .opcode
= P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY
),
202 .escr_msr
= { MSR_P4_FSB_ESCR0
, MSR_P4_FSB_ESCR1
},
204 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY
, DRDY_DRV
) |
205 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY
, DRDY_OWN
) |
206 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY
, DRDY_OTHER
) |
207 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY
, DBSY_DRV
) |
208 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY
, DBSY_OWN
) |
209 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY
, DBSY_OTHER
),
211 .cntr
= { {0, -1, -1}, {2, -1, -1} },
213 [P4_EVENT_BSQ_ALLOCATION
] = { /* shared ESCR, broken CCCR1 */
214 .opcode
= P4_OPCODE(P4_EVENT_BSQ_ALLOCATION
),
215 .escr_msr
= { MSR_P4_BSU_ESCR0
, MSR_P4_BSU_ESCR0
},
217 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, REQ_TYPE0
) |
218 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, REQ_TYPE1
) |
219 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, REQ_LEN0
) |
220 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, REQ_LEN1
) |
221 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, REQ_IO_TYPE
) |
222 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, REQ_LOCK_TYPE
) |
223 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, REQ_CACHE_TYPE
) |
224 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, REQ_SPLIT_TYPE
) |
225 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, REQ_DEM_TYPE
) |
226 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, REQ_ORD_TYPE
) |
227 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, MEM_TYPE0
) |
228 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, MEM_TYPE1
) |
229 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION
, MEM_TYPE2
),
230 .cntr
= { {0, -1, -1}, {1, -1, -1} },
232 [P4_EVENT_BSQ_ACTIVE_ENTRIES
] = { /* shared ESCR */
233 .opcode
= P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES
),
234 .escr_msr
= { MSR_P4_BSU_ESCR1
, MSR_P4_BSU_ESCR1
},
236 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, REQ_TYPE0
) |
237 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, REQ_TYPE1
) |
238 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, REQ_LEN0
) |
239 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, REQ_LEN1
) |
240 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, REQ_IO_TYPE
) |
241 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, REQ_LOCK_TYPE
) |
242 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, REQ_CACHE_TYPE
) |
243 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, REQ_SPLIT_TYPE
) |
244 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, REQ_DEM_TYPE
) |
245 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, REQ_ORD_TYPE
) |
246 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, MEM_TYPE0
) |
247 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, MEM_TYPE1
) |
248 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES
, MEM_TYPE2
),
249 .cntr
= { {2, -1, -1}, {3, -1, -1} },
251 [P4_EVENT_SSE_INPUT_ASSIST
] = {
252 .opcode
= P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST
),
253 .escr_msr
= { MSR_P4_FIRM_ESCR0
, MSR_P4_FIRM_ESCR1
},
255 P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST
, ALL
),
257 .cntr
= { {8, 9, -1}, {10, 11, -1} },
259 [P4_EVENT_PACKED_SP_UOP
] = {
260 .opcode
= P4_OPCODE(P4_EVENT_PACKED_SP_UOP
),
261 .escr_msr
= { MSR_P4_FIRM_ESCR0
, MSR_P4_FIRM_ESCR1
},
263 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP
, ALL
),
265 .cntr
= { {8, 9, -1}, {10, 11, -1} },
267 [P4_EVENT_PACKED_DP_UOP
] = {
268 .opcode
= P4_OPCODE(P4_EVENT_PACKED_DP_UOP
),
269 .escr_msr
= { MSR_P4_FIRM_ESCR0
, MSR_P4_FIRM_ESCR1
},
271 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP
, ALL
),
273 .cntr
= { {8, 9, -1}, {10, 11, -1} },
275 [P4_EVENT_SCALAR_SP_UOP
] = {
276 .opcode
= P4_OPCODE(P4_EVENT_SCALAR_SP_UOP
),
277 .escr_msr
= { MSR_P4_FIRM_ESCR0
, MSR_P4_FIRM_ESCR1
},
279 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP
, ALL
),
281 .cntr
= { {8, 9, -1}, {10, 11, -1} },
283 [P4_EVENT_SCALAR_DP_UOP
] = {
284 .opcode
= P4_OPCODE(P4_EVENT_SCALAR_DP_UOP
),
285 .escr_msr
= { MSR_P4_FIRM_ESCR0
, MSR_P4_FIRM_ESCR1
},
287 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP
, ALL
),
289 .cntr
= { {8, 9, -1}, {10, 11, -1} },
291 [P4_EVENT_64BIT_MMX_UOP
] = {
292 .opcode
= P4_OPCODE(P4_EVENT_64BIT_MMX_UOP
),
293 .escr_msr
= { MSR_P4_FIRM_ESCR0
, MSR_P4_FIRM_ESCR1
},
295 P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP
, ALL
),
297 .cntr
= { {8, 9, -1}, {10, 11, -1} },
299 [P4_EVENT_128BIT_MMX_UOP
] = {
300 .opcode
= P4_OPCODE(P4_EVENT_128BIT_MMX_UOP
),
301 .escr_msr
= { MSR_P4_FIRM_ESCR0
, MSR_P4_FIRM_ESCR1
},
303 P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP
, ALL
),
305 .cntr
= { {8, 9, -1}, {10, 11, -1} },
307 [P4_EVENT_X87_FP_UOP
] = {
308 .opcode
= P4_OPCODE(P4_EVENT_X87_FP_UOP
),
309 .escr_msr
= { MSR_P4_FIRM_ESCR0
, MSR_P4_FIRM_ESCR1
},
311 P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP
, ALL
),
313 .cntr
= { {8, 9, -1}, {10, 11, -1} },
315 [P4_EVENT_TC_MISC
] = {
316 .opcode
= P4_OPCODE(P4_EVENT_TC_MISC
),
317 .escr_msr
= { MSR_P4_TC_ESCR0
, MSR_P4_TC_ESCR1
},
319 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC
, FLUSH
),
320 .cntr
= { {4, 5, -1}, {6, 7, -1} },
322 [P4_EVENT_GLOBAL_POWER_EVENTS
] = {
323 .opcode
= P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS
),
324 .escr_msr
= { MSR_P4_FSB_ESCR0
, MSR_P4_FSB_ESCR1
},
326 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS
, RUNNING
),
327 .cntr
= { {0, -1, -1}, {2, -1, -1} },
329 [P4_EVENT_TC_MS_XFER
] = {
330 .opcode
= P4_OPCODE(P4_EVENT_TC_MS_XFER
),
331 .escr_msr
= { MSR_P4_MS_ESCR0
, MSR_P4_MS_ESCR1
},
333 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER
, CISC
),
334 .cntr
= { {4, 5, -1}, {6, 7, -1} },
336 [P4_EVENT_UOP_QUEUE_WRITES
] = {
337 .opcode
= P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES
),
338 .escr_msr
= { MSR_P4_MS_ESCR0
, MSR_P4_MS_ESCR1
},
340 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES
, FROM_TC_BUILD
) |
341 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES
, FROM_TC_DELIVER
) |
342 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES
, FROM_ROM
),
343 .cntr
= { {4, 5, -1}, {6, 7, -1} },
345 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE
] = {
346 .opcode
= P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE
),
347 .escr_msr
= { MSR_P4_TBPU_ESCR0
, MSR_P4_TBPU_ESCR0
},
349 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE
, CONDITIONAL
) |
350 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE
, CALL
) |
351 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE
, RETURN
) |
352 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE
, INDIRECT
),
353 .cntr
= { {4, 5, -1}, {6, 7, -1} },
355 [P4_EVENT_RETIRED_BRANCH_TYPE
] = {
356 .opcode
= P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE
),
357 .escr_msr
= { MSR_P4_TBPU_ESCR0
, MSR_P4_TBPU_ESCR1
},
359 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE
, CONDITIONAL
) |
360 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE
, CALL
) |
361 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE
, RETURN
) |
362 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE
, INDIRECT
),
363 .cntr
= { {4, 5, -1}, {6, 7, -1} },
365 [P4_EVENT_RESOURCE_STALL
] = {
366 .opcode
= P4_OPCODE(P4_EVENT_RESOURCE_STALL
),
367 .escr_msr
= { MSR_P4_ALF_ESCR0
, MSR_P4_ALF_ESCR1
},
369 P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL
, SBFULL
),
370 .cntr
= { {12, 13, 16}, {14, 15, 17} },
372 [P4_EVENT_WC_BUFFER
] = {
373 .opcode
= P4_OPCODE(P4_EVENT_WC_BUFFER
),
374 .escr_msr
= { MSR_P4_DAC_ESCR0
, MSR_P4_DAC_ESCR1
},
376 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER
, WCB_EVICTS
) |
377 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER
, WCB_FULL_EVICTS
),
379 .cntr
= { {8, 9, -1}, {10, 11, -1} },
381 [P4_EVENT_B2B_CYCLES
] = {
382 .opcode
= P4_OPCODE(P4_EVENT_B2B_CYCLES
),
383 .escr_msr
= { MSR_P4_FSB_ESCR0
, MSR_P4_FSB_ESCR1
},
385 .cntr
= { {0, -1, -1}, {2, -1, -1} },
388 .opcode
= P4_OPCODE(P4_EVENT_BNR
),
389 .escr_msr
= { MSR_P4_FSB_ESCR0
, MSR_P4_FSB_ESCR1
},
391 .cntr
= { {0, -1, -1}, {2, -1, -1} },
394 .opcode
= P4_OPCODE(P4_EVENT_SNOOP
),
395 .escr_msr
= { MSR_P4_FSB_ESCR0
, MSR_P4_FSB_ESCR1
},
397 .cntr
= { {0, -1, -1}, {2, -1, -1} },
399 [P4_EVENT_RESPONSE
] = {
400 .opcode
= P4_OPCODE(P4_EVENT_RESPONSE
),
401 .escr_msr
= { MSR_P4_FSB_ESCR0
, MSR_P4_FSB_ESCR1
},
403 .cntr
= { {0, -1, -1}, {2, -1, -1} },
405 [P4_EVENT_FRONT_END_EVENT
] = {
406 .opcode
= P4_OPCODE(P4_EVENT_FRONT_END_EVENT
),
407 .escr_msr
= { MSR_P4_CRU_ESCR2
, MSR_P4_CRU_ESCR3
},
409 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT
, NBOGUS
) |
410 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT
, BOGUS
),
411 .cntr
= { {12, 13, 16}, {14, 15, 17} },
413 [P4_EVENT_EXECUTION_EVENT
] = {
414 .opcode
= P4_OPCODE(P4_EVENT_EXECUTION_EVENT
),
415 .escr_msr
= { MSR_P4_CRU_ESCR2
, MSR_P4_CRU_ESCR3
},
417 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, NBOGUS0
) |
418 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, NBOGUS1
) |
419 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, NBOGUS2
) |
420 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, NBOGUS3
) |
421 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, BOGUS0
) |
422 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, BOGUS1
) |
423 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, BOGUS2
) |
424 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, BOGUS3
),
425 .cntr
= { {12, 13, 16}, {14, 15, 17} },
427 [P4_EVENT_REPLAY_EVENT
] = {
428 .opcode
= P4_OPCODE(P4_EVENT_REPLAY_EVENT
),
429 .escr_msr
= { MSR_P4_CRU_ESCR2
, MSR_P4_CRU_ESCR3
},
431 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT
, NBOGUS
) |
432 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT
, BOGUS
),
433 .cntr
= { {12, 13, 16}, {14, 15, 17} },
435 [P4_EVENT_INSTR_RETIRED
] = {
436 .opcode
= P4_OPCODE(P4_EVENT_INSTR_RETIRED
),
437 .escr_msr
= { MSR_P4_CRU_ESCR0
, MSR_P4_CRU_ESCR1
},
439 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED
, NBOGUSNTAG
) |
440 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED
, NBOGUSTAG
) |
441 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED
, BOGUSNTAG
) |
442 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED
, BOGUSTAG
),
443 .cntr
= { {12, 13, 16}, {14, 15, 17} },
445 [P4_EVENT_UOPS_RETIRED
] = {
446 .opcode
= P4_OPCODE(P4_EVENT_UOPS_RETIRED
),
447 .escr_msr
= { MSR_P4_CRU_ESCR0
, MSR_P4_CRU_ESCR1
},
449 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED
, NBOGUS
) |
450 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED
, BOGUS
),
451 .cntr
= { {12, 13, 16}, {14, 15, 17} },
453 [P4_EVENT_UOP_TYPE
] = {
454 .opcode
= P4_OPCODE(P4_EVENT_UOP_TYPE
),
455 .escr_msr
= { MSR_P4_RAT_ESCR0
, MSR_P4_RAT_ESCR1
},
457 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE
, TAGLOADS
) |
458 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE
, TAGSTORES
),
459 .cntr
= { {12, 13, 16}, {14, 15, 17} },
461 [P4_EVENT_BRANCH_RETIRED
] = {
462 .opcode
= P4_OPCODE(P4_EVENT_BRANCH_RETIRED
),
463 .escr_msr
= { MSR_P4_CRU_ESCR2
, MSR_P4_CRU_ESCR3
},
465 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED
, MMNP
) |
466 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED
, MMNM
) |
467 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED
, MMTP
) |
468 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED
, MMTM
),
469 .cntr
= { {12, 13, 16}, {14, 15, 17} },
471 [P4_EVENT_MISPRED_BRANCH_RETIRED
] = {
472 .opcode
= P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED
),
473 .escr_msr
= { MSR_P4_CRU_ESCR0
, MSR_P4_CRU_ESCR1
},
475 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED
, NBOGUS
),
476 .cntr
= { {12, 13, 16}, {14, 15, 17} },
478 [P4_EVENT_X87_ASSIST
] = {
479 .opcode
= P4_OPCODE(P4_EVENT_X87_ASSIST
),
480 .escr_msr
= { MSR_P4_CRU_ESCR2
, MSR_P4_CRU_ESCR3
},
482 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST
, FPSU
) |
483 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST
, FPSO
) |
484 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST
, POAO
) |
485 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST
, POAU
) |
486 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST
, PREA
),
487 .cntr
= { {12, 13, 16}, {14, 15, 17} },
489 [P4_EVENT_MACHINE_CLEAR
] = {
490 .opcode
= P4_OPCODE(P4_EVENT_MACHINE_CLEAR
),
491 .escr_msr
= { MSR_P4_CRU_ESCR2
, MSR_P4_CRU_ESCR3
},
493 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR
, CLEAR
) |
494 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR
, MOCLEAR
) |
495 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR
, SMCLEAR
),
496 .cntr
= { {12, 13, 16}, {14, 15, 17} },
498 [P4_EVENT_INSTR_COMPLETED
] = {
499 .opcode
= P4_OPCODE(P4_EVENT_INSTR_COMPLETED
),
500 .escr_msr
= { MSR_P4_CRU_ESCR0
, MSR_P4_CRU_ESCR1
},
502 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED
, NBOGUS
) |
503 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED
, BOGUS
),
504 .cntr
= { {12, 13, 16}, {14, 15, 17} },
508 #define P4_GEN_CACHE_EVENT(event, bit, metric) \
509 p4_config_pack_escr(P4_ESCR_EVENT(event) | \
510 P4_ESCR_EMASK_BIT(event, bit)) | \
511 p4_config_pack_cccr(metric | \
512 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
514 static __initconst
const u64 p4_hw_cache_event_ids
515 [PERF_COUNT_HW_CACHE_MAX
]
516 [PERF_COUNT_HW_CACHE_OP_MAX
]
517 [PERF_COUNT_HW_CACHE_RESULT_MAX
] =
521 [ C(RESULT_ACCESS
) ] = 0x0,
522 [ C(RESULT_MISS
) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT
, NBOGUS
,
523 P4_PEBS_METRIC__1stl_cache_load_miss_retired
),
528 [ C(RESULT_ACCESS
) ] = 0x0,
529 [ C(RESULT_MISS
) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT
, NBOGUS
,
530 P4_PEBS_METRIC__2ndl_cache_load_miss_retired
),
535 [ C(RESULT_ACCESS
) ] = 0x0,
536 [ C(RESULT_MISS
) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT
, NBOGUS
,
537 P4_PEBS_METRIC__dtlb_load_miss_retired
),
540 [ C(RESULT_ACCESS
) ] = 0x0,
541 [ C(RESULT_MISS
) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT
, NBOGUS
,
542 P4_PEBS_METRIC__dtlb_store_miss_retired
),
547 [ C(RESULT_ACCESS
) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE
, HIT
,
548 P4_PEBS_METRIC__none
),
549 [ C(RESULT_MISS
) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE
, MISS
,
550 P4_PEBS_METRIC__none
),
553 [ C(RESULT_ACCESS
) ] = -1,
554 [ C(RESULT_MISS
) ] = -1,
556 [ C(OP_PREFETCH
) ] = {
557 [ C(RESULT_ACCESS
) ] = -1,
558 [ C(RESULT_MISS
) ] = -1,
563 [ C(RESULT_ACCESS
) ] = -1,
564 [ C(RESULT_MISS
) ] = -1,
567 [ C(RESULT_ACCESS
) ] = -1,
568 [ C(RESULT_MISS
) ] = -1,
570 [ C(OP_PREFETCH
) ] = {
571 [ C(RESULT_ACCESS
) ] = -1,
572 [ C(RESULT_MISS
) ] = -1,
578 * Because of Netburst being quite restricted in how many
579 * identical events may run simultaneously, we introduce event aliases,
580 * ie the different events which have the same functionality but
581 * utilize non-intersected resources (ESCR/CCCR/counter registers).
583 * This allow us to relax restrictions a bit and run two or more
584 * identical events together.
586 * Never set any custom internal bits such as P4_CONFIG_HT,
587 * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
588 * either up to date automatically or not applicable at all.
590 struct p4_event_alias
{
593 } p4_event_aliases
[] = {
596 * Non-halted cycles can be substituted with non-sleeping cycles (see
597 * Intel SDM Vol3b for details). We need this alias to be able
598 * to run nmi-watchdog and 'perf top' (or any other user space tool
599 * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
603 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS
) |
604 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS
, RUNNING
)),
606 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT
) |
607 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, NBOGUS0
)|
608 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, NBOGUS1
)|
609 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, NBOGUS2
)|
610 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, NBOGUS3
)|
611 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, BOGUS0
) |
612 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, BOGUS1
) |
613 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, BOGUS2
) |
614 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT
, BOGUS3
))|
615 p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT
|
620 static u64
p4_get_alias_event(u64 config
)
626 * Only event with special mark is allowed,
627 * we're to be sure it didn't come as malformed
630 if (!(config
& P4_CONFIG_ALIASABLE
))
633 config_match
= config
& P4_CONFIG_EVENT_ALIAS_MASK
;
635 for (i
= 0; i
< ARRAY_SIZE(p4_event_aliases
); i
++) {
636 if (config_match
== p4_event_aliases
[i
].original
) {
637 config_match
= p4_event_aliases
[i
].alternative
;
639 } else if (config_match
== p4_event_aliases
[i
].alternative
) {
640 config_match
= p4_event_aliases
[i
].original
;
645 if (i
>= ARRAY_SIZE(p4_event_aliases
))
648 return config_match
| (config
& P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS
);
651 static u64 p4_general_events
[PERF_COUNT_HW_MAX
] = {
652 /* non-halted CPU clocks */
653 [PERF_COUNT_HW_CPU_CYCLES
] =
654 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS
) |
655 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS
, RUNNING
)) |
659 * retired instructions
660 * in a sake of simplicity we don't use the FSB tagging
662 [PERF_COUNT_HW_INSTRUCTIONS
] =
663 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED
) |
664 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED
, NBOGUSNTAG
) |
665 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED
, BOGUSNTAG
)),
668 [PERF_COUNT_HW_CACHE_REFERENCES
] =
669 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE
) |
670 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_2ndL_HITS
) |
671 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_2ndL_HITE
) |
672 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_2ndL_HITM
) |
673 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_3rdL_HITS
) |
674 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_3rdL_HITE
) |
675 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_3rdL_HITM
)),
678 [PERF_COUNT_HW_CACHE_MISSES
] =
679 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE
) |
680 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_2ndL_MISS
) |
681 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, RD_3rdL_MISS
) |
682 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE
, WR_2ndL_MISS
)),
684 /* branch instructions retired */
685 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS
] =
686 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE
) |
687 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE
, CONDITIONAL
) |
688 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE
, CALL
) |
689 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE
, RETURN
) |
690 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE
, INDIRECT
)),
692 /* mispredicted branches retired */
693 [PERF_COUNT_HW_BRANCH_MISSES
] =
694 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED
) |
695 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED
, NBOGUS
)),
697 /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
698 [PERF_COUNT_HW_BUS_CYCLES
] =
699 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY
) |
700 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY
, DRDY_DRV
) |
701 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY
, DRDY_OWN
)) |
702 p4_config_pack_cccr(P4_CCCR_EDGE
| P4_CCCR_COMPARE
),
705 static struct p4_event_bind
*p4_config_get_bind(u64 config
)
707 unsigned int evnt
= p4_config_unpack_event(config
);
708 struct p4_event_bind
*bind
= NULL
;
710 if (evnt
< ARRAY_SIZE(p4_event_bind_map
))
711 bind
= &p4_event_bind_map
[evnt
];
716 static u64
p4_pmu_event_map(int hw_event
)
718 struct p4_event_bind
*bind
;
722 config
= p4_general_events
[hw_event
];
723 bind
= p4_config_get_bind(config
);
724 esel
= P4_OPCODE_ESEL(bind
->opcode
);
725 config
|= p4_config_pack_cccr(P4_CCCR_ESEL(esel
));
730 /* check cpu model specifics */
731 static bool p4_event_match_cpu_model(unsigned int event_idx
)
733 /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
734 if (event_idx
== P4_EVENT_INSTR_COMPLETED
) {
735 if (boot_cpu_data
.x86_model
!= 3 &&
736 boot_cpu_data
.x86_model
!= 4 &&
737 boot_cpu_data
.x86_model
!= 6)
743 * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
749 static int p4_validate_raw_event(struct perf_event
*event
)
751 unsigned int v
, emask
;
753 /* User data may have out-of-bound event index */
754 v
= p4_config_unpack_event(event
->attr
.config
);
755 if (v
>= ARRAY_SIZE(p4_event_bind_map
))
758 /* It may be unsupported: */
759 if (!p4_event_match_cpu_model(v
))
763 * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
764 * in Architectural Performance Monitoring, it means not
765 * on _which_ logical cpu to count but rather _when_, ie it
766 * depends on logical cpu state -- count event if one cpu active,
767 * none, both or any, so we just allow user to pass any value
770 * In turn we always set Tx_OS/Tx_USR bits bound to logical
771 * cpu without their propagation to another cpu
775 * if an event is shared across the logical threads
776 * the user needs special permissions to be able to use it
778 if (p4_ht_active() && p4_event_bind_map
[v
].shared
) {
779 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN
))
783 /* ESCR EventMask bits may be invalid */
784 emask
= p4_config_unpack_escr(event
->attr
.config
) & P4_ESCR_EVENTMASK_MASK
;
785 if (emask
& ~p4_event_bind_map
[v
].escr_emask
)
789 * it may have some invalid PEBS bits
791 if (p4_config_pebs_has(event
->attr
.config
, P4_PEBS_CONFIG_ENABLE
))
794 v
= p4_config_unpack_metric(event
->attr
.config
);
795 if (v
>= ARRAY_SIZE(p4_pebs_bind_map
))
801 static int p4_hw_config(struct perf_event
*event
)
808 * the reason we use cpu that early is that: if we get scheduled
809 * first time on the same cpu -- we will not need swap thread
810 * specific flags in config (and will save some cpu cycles)
813 cccr
= p4_default_cccr_conf(cpu
);
814 escr
= p4_default_escr_conf(cpu
, event
->attr
.exclude_kernel
,
815 event
->attr
.exclude_user
);
816 event
->hw
.config
= p4_config_pack_escr(escr
) |
817 p4_config_pack_cccr(cccr
);
819 if (p4_ht_active() && p4_ht_thread(cpu
))
820 event
->hw
.config
= p4_set_ht_bit(event
->hw
.config
);
822 if (event
->attr
.type
== PERF_TYPE_RAW
) {
823 struct p4_event_bind
*bind
;
826 * Clear bits we reserve to be managed by kernel itself
827 * and never allowed from a user space
829 event
->attr
.config
&= P4_CONFIG_MASK
;
831 rc
= p4_validate_raw_event(event
);
836 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
837 * bits since we keep additional info here (for cache events and etc)
839 event
->hw
.config
|= event
->attr
.config
;
840 bind
= p4_config_get_bind(event
->attr
.config
);
845 esel
= P4_OPCODE_ESEL(bind
->opcode
);
846 event
->hw
.config
|= p4_config_pack_cccr(P4_CCCR_ESEL(esel
));
849 rc
= x86_setup_perfctr(event
);
855 static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event
*hwc
)
859 /* an official way for overflow indication */
860 rdmsrl(hwc
->config_base
, v
);
861 if (v
& P4_CCCR_OVF
) {
862 wrmsrl(hwc
->config_base
, v
& ~P4_CCCR_OVF
);
867 * In some circumstances the overflow might issue an NMI but did
868 * not set P4_CCCR_OVF bit. Because a counter holds a negative value
869 * we simply check for high bit being set, if it's cleared it means
870 * the counter has reached zero value and continued counting before
871 * real NMI signal was received:
873 rdmsrl(hwc
->event_base
, v
);
874 if (!(v
& ARCH_P4_UNFLAGGED_BIT
))
880 static void p4_pmu_disable_pebs(void)
885 * It's still allowed that two threads setup same cache
886 * events so we can't simply clear metrics until we knew
887 * no one is depending on us, so we need kind of counter
888 * for "ReplayEvent" users.
890 * What is more complex -- RAW events, if user (for some
891 * reason) will pass some cache event metric with improper
892 * event opcode -- it's fine from hardware point of view
893 * but completely nonsense from "meaning" of such action.
895 * So at moment let leave metrics turned on forever -- it's
896 * ok for now but need to be revisited!
898 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
899 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
903 static inline void p4_pmu_disable_event(struct perf_event
*event
)
905 struct hw_perf_event
*hwc
= &event
->hw
;
908 * If event gets disabled while counter is in overflowed
909 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
910 * asserted again and again
912 (void)checking_wrmsrl(hwc
->config_base
,
913 (u64
)(p4_config_unpack_cccr(hwc
->config
)) &
914 ~P4_CCCR_ENABLE
& ~P4_CCCR_OVF
& ~P4_CCCR_RESERVED
);
917 static void p4_pmu_disable_all(void)
919 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
922 for (idx
= 0; idx
< x86_pmu
.num_counters
; idx
++) {
923 struct perf_event
*event
= cpuc
->events
[idx
];
924 if (!test_bit(idx
, cpuc
->active_mask
))
926 p4_pmu_disable_event(event
);
929 p4_pmu_disable_pebs();
932 /* configuration must be valid */
933 static void p4_pmu_enable_pebs(u64 config
)
935 struct p4_pebs_bind
*bind
;
938 BUILD_BUG_ON(P4_PEBS_METRIC__max
> P4_PEBS_CONFIG_METRIC_MASK
);
940 idx
= p4_config_unpack_metric(config
);
941 if (idx
== P4_PEBS_METRIC__none
)
944 bind
= &p4_pebs_bind_map
[idx
];
946 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE
, (u64
)bind
->metric_pebs
);
947 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT
, (u64
)bind
->metric_vert
);
950 static void p4_pmu_enable_event(struct perf_event
*event
)
952 struct hw_perf_event
*hwc
= &event
->hw
;
953 int thread
= p4_ht_config_thread(hwc
->config
);
954 u64 escr_conf
= p4_config_unpack_escr(p4_clear_ht_bit(hwc
->config
));
955 unsigned int idx
= p4_config_unpack_event(hwc
->config
);
956 struct p4_event_bind
*bind
;
959 bind
= &p4_event_bind_map
[idx
];
960 escr_addr
= (u64
)bind
->escr_msr
[thread
];
963 * - we dont support cascaded counters yet
964 * - and counter 1 is broken (erratum)
966 WARN_ON_ONCE(p4_is_event_cascaded(hwc
->config
));
967 WARN_ON_ONCE(hwc
->idx
== 1);
969 /* we need a real Event value */
970 escr_conf
&= ~P4_ESCR_EVENT_MASK
;
971 escr_conf
|= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind
->opcode
));
973 cccr
= p4_config_unpack_cccr(hwc
->config
);
976 * it could be Cache event so we need to write metrics
977 * into additional MSRs
979 p4_pmu_enable_pebs(hwc
->config
);
981 (void)checking_wrmsrl(escr_addr
, escr_conf
);
982 (void)checking_wrmsrl(hwc
->config_base
,
983 (cccr
& ~P4_CCCR_RESERVED
) | P4_CCCR_ENABLE
);
986 static void p4_pmu_enable_all(int added
)
988 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
991 for (idx
= 0; idx
< x86_pmu
.num_counters
; idx
++) {
992 struct perf_event
*event
= cpuc
->events
[idx
];
993 if (!test_bit(idx
, cpuc
->active_mask
))
995 p4_pmu_enable_event(event
);
999 static int p4_pmu_handle_irq(struct pt_regs
*regs
)
1001 struct perf_sample_data data
;
1002 struct cpu_hw_events
*cpuc
;
1003 struct perf_event
*event
;
1004 struct hw_perf_event
*hwc
;
1005 int idx
, handled
= 0;
1008 perf_sample_data_init(&data
, 0);
1010 cpuc
= &__get_cpu_var(cpu_hw_events
);
1012 for (idx
= 0; idx
< x86_pmu
.num_counters
; idx
++) {
1015 if (!test_bit(idx
, cpuc
->active_mask
)) {
1016 /* catch in-flight IRQs */
1017 if (__test_and_clear_bit(idx
, cpuc
->running
))
1022 event
= cpuc
->events
[idx
];
1025 WARN_ON_ONCE(hwc
->idx
!= idx
);
1027 /* it might be unflagged overflow */
1028 overflow
= p4_pmu_clear_cccr_ovf(hwc
);
1030 val
= x86_perf_event_update(event
);
1031 if (!overflow
&& (val
& (1ULL << (x86_pmu
.cntval_bits
- 1))))
1034 handled
+= overflow
;
1036 /* event overflow for sure */
1037 data
.period
= event
->hw
.last_period
;
1039 if (!x86_perf_event_set_period(event
))
1041 if (perf_event_overflow(event
, &data
, regs
))
1042 x86_pmu_stop(event
, 0);
1046 inc_irq_stat(apic_perf_irqs
);
1049 * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
1050 * been observed that the OVF bit flag has to be cleared first _before_
1051 * the LVTPC can be unmasked.
1053 * The reason is the NMI line will continue to be asserted while the OVF
1054 * bit is set. This causes a second NMI to generate if the LVTPC is
1055 * unmasked before the OVF bit is cleared, leading to unknown NMI
1058 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
1064 * swap thread specific fields according to a thread
1065 * we are going to run on
1067 static void p4_pmu_swap_config_ts(struct hw_perf_event
*hwc
, int cpu
)
1072 * we either lucky and continue on same cpu or no HT support
1074 if (!p4_should_swap_ts(hwc
->config
, cpu
))
1078 * the event is migrated from an another logical
1079 * cpu, so we need to swap thread specific flags
1082 escr
= p4_config_unpack_escr(hwc
->config
);
1083 cccr
= p4_config_unpack_cccr(hwc
->config
);
1085 if (p4_ht_thread(cpu
)) {
1086 cccr
&= ~P4_CCCR_OVF_PMI_T0
;
1087 cccr
|= P4_CCCR_OVF_PMI_T1
;
1088 if (escr
& P4_ESCR_T0_OS
) {
1089 escr
&= ~P4_ESCR_T0_OS
;
1090 escr
|= P4_ESCR_T1_OS
;
1092 if (escr
& P4_ESCR_T0_USR
) {
1093 escr
&= ~P4_ESCR_T0_USR
;
1094 escr
|= P4_ESCR_T1_USR
;
1096 hwc
->config
= p4_config_pack_escr(escr
);
1097 hwc
->config
|= p4_config_pack_cccr(cccr
);
1098 hwc
->config
|= P4_CONFIG_HT
;
1100 cccr
&= ~P4_CCCR_OVF_PMI_T1
;
1101 cccr
|= P4_CCCR_OVF_PMI_T0
;
1102 if (escr
& P4_ESCR_T1_OS
) {
1103 escr
&= ~P4_ESCR_T1_OS
;
1104 escr
|= P4_ESCR_T0_OS
;
1106 if (escr
& P4_ESCR_T1_USR
) {
1107 escr
&= ~P4_ESCR_T1_USR
;
1108 escr
|= P4_ESCR_T0_USR
;
1110 hwc
->config
= p4_config_pack_escr(escr
);
1111 hwc
->config
|= p4_config_pack_cccr(cccr
);
1112 hwc
->config
&= ~P4_CONFIG_HT
;
1117 * ESCR address hashing is tricky, ESCRs are not sequential
1118 * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
1119 * the metric between any ESCRs is laid in range [0xa0,0xe1]
1121 * so we make ~70% filled hashtable
1124 #define P4_ESCR_MSR_BASE 0x000003a0
1125 #define P4_ESCR_MSR_MAX 0x000003e1
1126 #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
1127 #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE)
1128 #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr
1130 static const unsigned int p4_escr_table
[P4_ESCR_MSR_TABLE_SIZE
] = {
1131 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0
),
1132 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1
),
1133 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0
),
1134 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1
),
1135 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0
),
1136 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1
),
1137 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0
),
1138 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1
),
1139 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2
),
1140 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3
),
1141 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4
),
1142 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5
),
1143 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0
),
1144 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1
),
1145 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0
),
1146 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1
),
1147 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0
),
1148 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1
),
1149 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0
),
1150 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1
),
1151 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0
),
1152 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1
),
1153 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0
),
1154 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1
),
1155 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0
),
1156 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1
),
1157 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0
),
1158 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1
),
1159 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0
),
1160 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1
),
1161 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0
),
1162 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1
),
1163 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0
),
1164 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1
),
1165 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0
),
1166 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1
),
1167 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0
),
1168 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1
),
1169 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0
),
1170 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1
),
1171 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0
),
1172 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1
),
1173 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0
),
1174 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1
),
1175 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0
),
1176 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1
),
1179 static int p4_get_escr_idx(unsigned int addr
)
1181 unsigned int idx
= P4_ESCR_MSR_IDX(addr
);
1183 if (unlikely(idx
>= P4_ESCR_MSR_TABLE_SIZE
||
1184 !p4_escr_table
[idx
] ||
1185 p4_escr_table
[idx
] != addr
)) {
1186 WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr
);
1193 static int p4_next_cntr(int thread
, unsigned long *used_mask
,
1194 struct p4_event_bind
*bind
)
1198 for (i
= 0; i
< P4_CNTR_LIMIT
; i
++) {
1199 j
= bind
->cntr
[thread
][i
];
1200 if (j
!= -1 && !test_bit(j
, used_mask
))
1207 static int p4_pmu_schedule_events(struct cpu_hw_events
*cpuc
, int n
, int *assign
)
1209 unsigned long used_mask
[BITS_TO_LONGS(X86_PMC_IDX_MAX
)];
1210 unsigned long escr_mask
[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE
)];
1211 int cpu
= smp_processor_id();
1212 struct hw_perf_event
*hwc
;
1213 struct p4_event_bind
*bind
;
1214 unsigned int i
, thread
, num
;
1215 int cntr_idx
, escr_idx
;
1219 bitmap_zero(used_mask
, X86_PMC_IDX_MAX
);
1220 bitmap_zero(escr_mask
, P4_ESCR_MSR_TABLE_SIZE
);
1222 for (i
= 0, num
= n
; i
< n
; i
++, num
--) {
1224 hwc
= &cpuc
->event_list
[i
]->hw
;
1225 thread
= p4_ht_thread(cpu
);
1230 * It's possible to hit a circular lock
1231 * between original and alternative events
1232 * if both are scheduled already.
1237 bind
= p4_config_get_bind(hwc
->config
);
1238 escr_idx
= p4_get_escr_idx(bind
->escr_msr
[thread
]);
1239 if (unlikely(escr_idx
== -1))
1242 if (hwc
->idx
!= -1 && !p4_should_swap_ts(hwc
->config
, cpu
)) {
1243 cntr_idx
= hwc
->idx
;
1245 assign
[i
] = hwc
->idx
;
1249 cntr_idx
= p4_next_cntr(thread
, used_mask
, bind
);
1250 if (cntr_idx
== -1 || test_bit(escr_idx
, escr_mask
)) {
1252 * Check whether an event alias is still available.
1254 config_alias
= p4_get_alias_event(hwc
->config
);
1257 hwc
->config
= config_alias
;
1262 p4_pmu_swap_config_ts(hwc
, cpu
);
1264 assign
[i
] = cntr_idx
;
1266 set_bit(cntr_idx
, used_mask
);
1267 set_bit(escr_idx
, escr_mask
);
1271 return num
? -EINVAL
: 0;
1274 static __initconst
const struct x86_pmu p4_pmu
= {
1275 .name
= "Netburst P4/Xeon",
1276 .handle_irq
= p4_pmu_handle_irq
,
1277 .disable_all
= p4_pmu_disable_all
,
1278 .enable_all
= p4_pmu_enable_all
,
1279 .enable
= p4_pmu_enable_event
,
1280 .disable
= p4_pmu_disable_event
,
1281 .eventsel
= MSR_P4_BPU_CCCR0
,
1282 .perfctr
= MSR_P4_BPU_PERFCTR0
,
1283 .event_map
= p4_pmu_event_map
,
1284 .max_events
= ARRAY_SIZE(p4_general_events
),
1285 .get_event_constraints
= x86_get_event_constraints
,
1287 * IF HT disabled we may need to use all
1288 * ARCH_P4_MAX_CCCR counters simulaneously
1289 * though leave it restricted at moment assuming
1292 .num_counters
= ARCH_P4_MAX_CCCR
,
1294 .cntval_bits
= ARCH_P4_CNTRVAL_BITS
,
1295 .cntval_mask
= ARCH_P4_CNTRVAL_MASK
,
1296 .max_period
= (1ULL << (ARCH_P4_CNTRVAL_BITS
- 1)) - 1,
1297 .hw_config
= p4_hw_config
,
1298 .schedule_events
= p4_pmu_schedule_events
,
1300 * This handles erratum N15 in intel doc 249199-029,
1301 * the counter may not be updated correctly on write
1302 * so we need a second write operation to do the trick
1303 * (the official workaround didn't work)
1305 * the former idea is taken from OProfile code
1307 .perfctr_second_write
= 1,
1310 __init
int p4_pmu_init(void)
1312 unsigned int low
, high
;
1314 /* If we get stripped -- indexing fails */
1315 BUILD_BUG_ON(ARCH_P4_MAX_CCCR
> X86_PMC_MAX_GENERIC
);
1317 rdmsr(MSR_IA32_MISC_ENABLE
, low
, high
);
1318 if (!(low
& (1 << 7))) {
1319 pr_cont("unsupported Netburst CPU model %d ",
1320 boot_cpu_data
.x86_model
);
1324 memcpy(hw_cache_event_ids
, p4_hw_cache_event_ids
,
1325 sizeof(hw_cache_event_ids
));
1327 pr_cont("Netburst events, ");