4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 * This file contains preset event names from the Performance Application
27 * Programming Interface v3.5 which included the following notice:
29 * Copyright (c) 2005,6
30 * Innovative Computing Labs
31 * Computer Science Department,
32 * University of Tennessee,
34 * All Rights Reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions are met:
40 * * Redistributions of source code must retain the above copyright notice,
41 * this list of conditions and the following disclaimer.
42 * * Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * * Neither the name of the University of Tennessee nor the names of its
46 * contributors may be used to endorse or promote products derived from
47 * this software without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
50 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
53 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
62 * This open source software license conforms to the BSD License template.
66 * Performance Counter Back-End for Pentium 4.
69 #include <sys/cpuvar.h>
70 #include <sys/param.h>
71 #include <sys/cpc_impl.h>
72 #include <sys/cpc_pcbe.h>
73 #include <sys/inttypes.h>
74 #include <sys/errno.h>
75 #include <sys/systm.h>
76 #include <sys/archsystm.h>
77 #include <sys/x86_archext.h>
78 #include <sys/modctl.h>
81 #include <sys/policy.h>
82 #include <sys/privregs.h>
84 static int p4_pcbe_init(void);
85 static uint_t
p4_pcbe_ncounters(void);
86 static const char *p4_pcbe_impl_name(void);
87 static const char *p4_pcbe_cpuref(void);
88 static char *p4_pcbe_list_events(uint_t picnum
);
89 static char *p4_pcbe_list_attrs(void);
90 static uint64_t p4_pcbe_event_coverage(char *event
);
91 static uint64_t p4_pcbe_overflow_bitmap(void);
92 static int p4_pcbe_configure(uint_t picnum
, char *event
, uint64_t preset
,
93 uint32_t flags
, uint_t nattrs
, kcpc_attr_t
*attrs
, void **data
,
95 static void p4_pcbe_program(void *token
);
96 static void p4_pcbe_allstop(void);
97 static void p4_pcbe_sample(void *token
);
98 static void p4_pcbe_free(void *config
);
100 extern int cpuid_get_clogid(cpu_t
*);
102 static pcbe_ops_t p4_pcbe_ops
= {
104 CPC_CAP_OVERFLOW_INTERRUPT
| CPC_CAP_OVERFLOW_PRECISE
,
110 p4_pcbe_event_coverage
,
111 p4_pcbe_overflow_bitmap
,
120 * P4 Configuration Flags.
122 #define P4_THIS_USR 0x1 /* HTT: Measure usr events on this logical CPU */
123 #define P4_THIS_SYS 0x2 /* HTT: Measure os events on this logical CPU */
124 #define P4_SIBLING_USR 0x4 /* HTT: Measure os events on other logical CPU */
125 #define P4_SIBLING_SYS 0x8 /* HTT: Measure usr events on other logical CPU */
126 #define P4_PMI 0x10 /* HTT: Set PMI bit for local logical CPU */
128 typedef struct _p4_pcbe_config
{
130 uint8_t p4_picno
; /* From 0 to 18 */
131 uint8_t p4_escr_ndx
; /* Which ESCR to use */
132 uint32_t p4_escr
; /* Value to program in selected ESCR */
133 uint32_t p4_cccr
; /* Value to program in counter's CCCR */
137 typedef uint32_t cntr_map_t
;
139 typedef struct _p4_escr
{
142 uint32_t pe_map
; /* bitmap of counters; bit 1 means ctr 0 */
145 #define MASK40 UINT64_C(0xffffffffff)
148 * CCCR field definitions.
150 * Note that the Intel Developer's Manual states that the reserved field at
151 * bit location 16 and 17 must be set to 11. (??)
153 #define CCCR_ENABLE_SHIFT 12
154 #define CCCR_ESCR_SEL_SHIFT 13
155 #define CCCR_ACTV_THR_SHIFT 16
156 #define CCCR_COMPARE_SHIFT 18
157 #define CCCR_COMPLEMENT_SHIFT 19
158 #define CCCR_THRESHOLD_SHIFT 20
159 #define CCCR_EDGE_SHIFT 24
160 #define CCCR_OVF_PMI_SHIFT 26
161 #define CCCR_OVF_PMI_T0_SHIFT 26
162 #define CCCR_OVF_PMI_T1_SHIFT 27
163 #define CCCR_OVF_SHIFT 31
164 #define CCCR_ACTV_THR_MASK 0x3
165 #define CCCR_THRESHOLD_MAX 0xF
166 #define CCCR_ENABLE (1U << CCCR_ENABLE_SHIFT)
167 #define CCCR_COMPARE (1U << CCCR_COMPARE_SHIFT)
168 #define CCCR_COMPLEMENT (1U << CCCR_COMPLEMENT_SHIFT)
169 #define CCCR_EDGE (1U << CCCR_EDGE_SHIFT)
170 #define CCCR_OVF_PMI (1U << CCCR_OVF_PMI_SHIFT)
171 #define CCCR_OVF_PMI_T0 (1U << CCCR_OVF_PMI_T0_SHIFT)
172 #define CCCR_OVF_PMI_T1 (1U << CCCR_OVF_PMI_T1_SHIFT)
173 #define CCCR_INIT CCCR_ENABLE
174 #define CCCR_OVF (1U << CCCR_OVF_SHIFT)
176 #define ESCR_EVSEL_SHIFT 25
177 #define ESCR_EVMASK_SHIFT 9
178 #define ESCR_TAG_VALUE_SHIFT 5
179 #define ESCR_TAG_VALUE_MAX 0xF
180 #define ESCR_TAG_ENABLE_SHIFT 4
181 #define ESCR_USR_SHIFT 2
182 #define ESCR_OS_SHIFT 3
183 #define ESCR_USR (1U << ESCR_USR_SHIFT)
184 #define ESCR_OS (1U << ESCR_OS_SHIFT)
185 #define ESCR_TAG_ENABLE (1U << ESCR_TAG_ENABLE_SHIFT)
188 * HyperThreaded ESCR fields.
190 #define ESCR_T0_OS_SHIFT 3
191 #define ESCR_T0_USR_SHIFT 2
192 #define ESCR_T1_OS_SHIFT 1
193 #define ESCR_T1_USR_SHIFT 0
194 #define ESCR_T0_OS (1U << ESCR_T0_OS_SHIFT)
195 #define ESCR_T0_USR (1U << ESCR_T0_USR_SHIFT)
196 #define ESCR_T1_OS (1U << ESCR_T1_OS_SHIFT)
197 #define ESCR_T1_USR (1U << ESCR_T1_USR_SHIFT)
200 * ESCRs are grouped by counter; each group of ESCRs is associated with a
201 * distinct group of counters. Use these macros to fill in the table below.
203 #define BPU0_map (0x1 | 0x2) /* Counters 0 and 1 */
204 #define BPU2_map (0x4 | 0x8) /* Counters 2 and 3 */
205 #define MS0_map (0x10 | 0x20) /* Counters 4 and 5 */
206 #define MS2_map (0x40 | 0x80) /* Counters 6 and 7 */
207 #define FLAME0_map (0x100 | 0x200) /* Counters 8 and 9 */
208 #define FLAME2_map (0x400 | 0x800) /* Counters 10 and 11 */
209 #define IQ0_map (0x1000 | 0x2000 | 0x10000) /* Counters 12, 13, 16 */
210 #define IQ2_map (0x4000 | 0x8000 | 0x20000) /* Counters 14, 15, 17 */
213 * Table describing the 45 Event Selection and Control Registers (ESCRs).
215 const p4_escr_t p4_escrs
[] = {
217 { 0, 0x3B2, BPU0_map
}, /* 0 */
218 #define IS0 (1ULL << 1)
219 { 1, 0x3B4, BPU0_map
}, /* 1 */
220 #define MOB0 (1ULL << 2)
221 { 2, 0x3AA, BPU0_map
}, /* 2 */
222 #define ITLB0 (1ULL << 3)
223 { 3, 0x3B6, BPU0_map
}, /* 3 */
224 #define PMH0 (1ULL << 4)
225 { 4, 0x3AC, BPU0_map
}, /* 4 */
226 #define IX0 (1ULL << 5)
227 { 5, 0x3C8, BPU0_map
}, /* 5 */
228 #define FSB0 (1ULL << 6)
229 { 6, 0x3A2, BPU0_map
}, /* 6 */
230 #define BSU0 (1ULL << 7)
231 { 7, 0x3A0, BPU0_map
}, /* 7 */
232 #define BPU1 (1ULL << 8)
233 { 0, 0x3B3, BPU2_map
}, /* 8 */
234 #define IS1 (1ULL << 9)
235 { 1, 0x3B5, BPU2_map
}, /* 9 */
236 #define MOB1 (1ULL << 10)
237 { 2, 0x3AB, BPU2_map
}, /* 10 */
238 #define ITLB1 (1ULL << 11)
239 { 3, 0x3B7, BPU2_map
}, /* 11 */
240 #define PMH1 (1ULL << 12)
241 { 4, 0x3AD, BPU2_map
}, /* 12 */
242 #define IX1 (1ULL << 13)
243 { 5, 0x3C9, BPU2_map
}, /* 13 */
244 #define FSB1 (1ULL << 14)
245 { 6, 0x3A3, BPU2_map
}, /* 14 */
246 #define BSU1 (1ULL << 15)
247 { 7, 0x3A1, BPU2_map
}, /* 15 */
248 #define MS0 (1ULL << 16)
249 { 0, 0x3C0, MS0_map
}, /* 16 */
250 #define TC0 (1ULL << 17)
251 { 1, 0x3C4, MS0_map
}, /* 17 */
252 #define TBPU0 (1ULL << 18)
253 { 2, 0x3C2, MS0_map
}, /* 18 */
254 #define MS1 (1ULL << 19)
255 { 0, 0x3C1, MS2_map
}, /* 19 */
256 #define TC1 (1ULL << 20)
257 { 1, 0x3C5, MS2_map
}, /* 20 */
258 #define TBPU1 (1ULL << 21)
259 { 2, 0x3C3, MS2_map
}, /* 21 */
260 #define FLAME0 (1ULL << 22)
261 { 0, 0x3A6, FLAME0_map
}, /* 22 */
262 #define FIRM0 (1ULL << 23)
263 { 1, 0x3A4, FLAME0_map
}, /* 23 */
264 #define SAAT0 (1ULL << 24)
265 { 2, 0x3AE, FLAME0_map
}, /* 24 */
266 #define U2L0 (1ULL << 25)
267 { 3, 0x3B0, FLAME0_map
}, /* 25 */
268 #define DAC0 (1ULL << 26)
269 { 5, 0x3A8, FLAME0_map
}, /* 26 */
270 #define FLAME1 (1ULL << 27)
271 { 0, 0x3A7, FLAME2_map
}, /* 27 */
272 #define FIRM1 (1ULL << 28)
273 { 1, 0x3A5, FLAME2_map
}, /* 28 */
274 #define SAAT1 (1ULL << 29)
275 { 2, 0x3AF, FLAME2_map
}, /* 29 */
276 #define U2L1 (1ULL << 30)
277 { 3, 0x3B1, FLAME2_map
}, /* 30 */
278 #define DAC1 (1ULL << 31)
279 { 5, 0x3A9, FLAME2_map
}, /* 31 */
280 #define IQ0 (1ULL << 32)
281 { 0, 0x3BA, IQ0_map
}, /* 32 */
282 #define ALF0 (1ULL << 33)
283 { 1, 0x3CA, IQ0_map
}, /* 33 */
284 #define RAT0 (1ULL << 34)
285 { 2, 0x3BC, IQ0_map
}, /* 34 */
286 #define SSU0 (1ULL << 35)
287 { 3, 0x3BE, IQ0_map
}, /* 35 */
288 #define CRU0 (1ULL << 36)
289 { 4, 0x3B8, IQ0_map
}, /* 36 */
290 #define CRU2 (1ULL << 37)
291 { 5, 0x3CC, IQ0_map
}, /* 37 */
292 #define CRU4 (1ULL << 38)
293 { 6, 0x3E0, IQ0_map
}, /* 38 */
294 #define IQ1 (1ULL << 39)
295 { 0, 0x3BB, IQ2_map
}, /* 39 */
296 #define ALF1 (1ULL << 40)
297 { 1, 0x3CB, IQ2_map
}, /* 40 */
298 #define RAT1 (1ULL << 41)
299 { 2, 0x3BD, IQ2_map
}, /* 41 */
300 #define CRU1 (1ULL << 42)
301 { 4, 0x3B9, IQ2_map
}, /* 42 */
302 #define CRU3 (1ULL << 43)
303 { 5, 0x3CD, IQ2_map
}, /* 43 */
304 #define CRU5 (1ULL << 44)
305 { 6, 0x3E1, IQ2_map
} /* 44 */
308 #define ESCR_MAX_INDEX 44
310 typedef struct _p4_ctr
{
311 uint32_t pc_caddr
; /* counter MSR address */
312 uint32_t pc_ctladdr
; /* counter's CCCR MSR address */
313 uint64_t pc_map
; /* bitmap of ESCRs controlling ctr */
316 const p4_ctr_t p4_ctrs
[18] = {
317 { /* BPU_COUNTER0 */ 0x300, 0x360, BSU0
|FSB0
|MOB0
|PMH0
|BPU0
|IS0
|ITLB0
|IX0
},
318 { /* BPU_COUNTER1 */ 0x301, 0x361, BSU0
|FSB0
|MOB0
|PMH0
|BPU0
|IS0
|ITLB0
|IX0
},
319 { /* BPU_COUNTER2 */ 0x302, 0x362, BSU1
|FSB1
|MOB1
|PMH1
|BPU1
|IS1
|ITLB1
|IX1
},
320 { /* BPU_COUNTER3 */ 0x303, 0x363, BSU1
|FSB1
|MOB1
|PMH1
|BPU1
|IS1
|ITLB1
|IX1
},
321 { /* MS_COUNTER0 */ 0x304, 0x364, MS0
|TBPU0
|TC0
},
322 { /* MS_COUNTER1 */ 0x305, 0x365, MS0
|TBPU0
|TC0
},
323 { /* MS_COUNTER2 */ 0x306, 0x366, MS1
|TBPU1
|TC1
},
324 { /* MS_COUNTER3 */ 0x307, 0x367, MS1
|TBPU1
|TC1
},
325 { /* FLAME_COUNTER0 */ 0x308, 0x368, FIRM0
|FLAME0
|DAC0
|SAAT0
|U2L0
},
326 { /* FLAME_COUNTER1 */ 0x309, 0x369, FIRM0
|FLAME0
|DAC0
|SAAT0
|U2L0
},
327 { /* FLAME_COUNTER2 */ 0x30A, 0x36A, FIRM1
|FLAME1
|DAC1
|SAAT1
|U2L1
},
328 { /* FLAME_COUNTER3 */ 0x30B, 0x36B, FIRM1
|FLAME1
|DAC1
|SAAT1
|U2L1
},
329 { /* IQ_COUNTER0 */ 0x30C, 0x36C, CRU0
|CRU2
|CRU4
|IQ0
|RAT0
|SSU0
|ALF0
},
330 { /* IQ_COUNTER1 */ 0x30D, 0x36D, CRU0
|CRU2
|CRU4
|IQ0
|RAT0
|SSU0
|ALF0
},
331 { /* IQ_COUNTER2 */ 0x30E, 0x36E, CRU1
|CRU3
|CRU5
|IQ1
|RAT1
|ALF1
},
332 { /* IQ_COUNTER3 */ 0x30F, 0x36F, CRU1
|CRU3
|CRU5
|IQ1
|RAT1
|ALF1
},
333 { /* IQ_COUNTER4 */ 0x310, 0x370, CRU0
|CRU2
|CRU4
|IQ0
|RAT0
|SSU0
|ALF0
},
334 { /* IQ_COUNTER5 */ 0x311, 0x371, CRU1
|CRU3
|CRU5
|IQ1
|RAT1
|ALF1
}
337 typedef struct _p4_event
{
338 char *pe_name
; /* Name of event according to docs */
339 uint64_t pe_escr_map
; /* Bitmap of ESCRs capable of event */
340 uint32_t pe_escr_mask
; /* permissible ESCR event mask */
341 uint8_t pe_ev
; /* ESCR event select value */
342 uint16_t pe_cccr
; /* CCCR select value */
343 uint32_t pe_ctr_mask
; /* Bitmap of capable counters */
346 typedef struct _p4_generic_event
{
351 } p4_generic_event_t
;
353 #define C(n) (1 << n)
354 #define GEN_EVT_END { NULL, NULL, 0x0, 0x0 }
356 p4_event_t p4_events
[] = {
357 { "branch_retired", CRU2
|CRU3
, 0xF, 0x6, 0x5, C(12)|C(13)|C(14)|C(15)|C(16) },
358 { "mispred_branch_retired", CRU0
|CRU1
, 0x1, 0x3, 0x4,
359 C(12)|C(13)|C(14)|C(15)|C(16) },
360 { "TC_deliver_mode", TC0
|TC1
, 0xFF, 0x1, 0x1, C(4)|C(5)|C(6)|C(7) },
361 { "BPU_fetch_request", BPU0
|BPU1
, 0x1, 0x3, 0x0, C(0)|C(1)|C(2)|C(3) },
362 { "ITLB_reference", ITLB0
|ITLB1
, 0x7, 0x18, 0x3, C(0)|C(1)|C(2)|C(3) },
363 { "memory_cancel", DAC0
|DAC1
, 0x6, 0x2, 0x5, C(8)|C(9)|C(10)|C(11) },
364 { "memory_complete", SAAT0
|SAAT1
, 0x3, 0x8, 0x2, C(8)|C(9)|C(10)|C(11) },
365 { "load_port_replay", SAAT0
|SAAT1
, 0x1, 0x4, 0x2, C(8)|C(9)|C(10)|C(11) },
366 { "store_port_replay", SAAT0
|SAAT1
, 0x1, 0x5, 0x2, C(8)|C(9)|C(10)|C(11) },
367 { "MOB_load_replay", MOB0
|MOB1
, 0x35, 0x3, 0x2, C(0)|C(1)|C(2)|C(3) },
368 { "page_walk_type", PMH0
|PMH1
, 0x3, 0x1, 0x4, C(0)|C(1)|C(2)|C(3) },
369 { "BSQ_cache_reference", BSU0
|BSU1
, 0x73F, 0xC, 0x7, C(0)|C(1)|C(2)|C(3) },
370 { "IOQ_allocation", FSB0
, 0xEFFF, 0x3, 0x6, C(0)|C(1) },
371 { "IOQ_active_entries", FSB1
, 0xEFFF, 0x1A, 0x6, C(2)|C(3) },
372 { "FSB_data_activity", FSB0
|FSB1
, 0x3F, 0x17, 0x6, C(0)|C(1)|C(2)|C(3) },
373 { "BSQ_allocation", BSU0
, 0x3FEF, 0x5, 0x7, C(0)|C(1) },
374 { "bsq_active_entries", BSU1
, 0x3FEF, 0x6, 0x7, C(2)|C(3) },
375 { "x87_assist", CRU2
|CRU3
, 0x1F, 0x3, 0x5, C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
376 { "SSE_input_assist", FIRM0
|FIRM1
, 0x8000, 0x34, 0x1, C(8)|C(9)|C(10)|C(11) },
377 { "packed_SP_uop", FIRM0
|FIRM1
, 0x8000, 0x8, 0x1, C(8)|C(9)|C(10)|C(11) },
378 { "packed_DP_uop", FIRM0
|FIRM1
, 0x8000, 0xC, 0x1, C(8)|C(9)|C(10)|C(11) },
379 { "scalar_SP_uop", FIRM0
|FIRM1
, 0x8000, 0xA, 0x1, C(8)|C(9)|C(10)|C(11) },
380 { "scalar_DP_uop", FIRM0
|FIRM1
, 0x8000, 0xE, 0x1, C(8)|C(9)|C(10)|C(11) },
381 { "64bit_MMX_uop", FIRM0
|FIRM1
, 0x8000, 0x2, 0x1, C(8)|C(9)|C(10)|C(11) },
382 { "128bit_MMX_uop", FIRM0
|FIRM1
, 0x8000, 0x1A, 0x1, C(8)|C(9)|C(10)|C(11) },
383 { "x87_FP_uop", FIRM0
|FIRM1
, 0x8000, 0x4, 0x1, C(8)|C(9)|C(10)|C(11) },
384 { "x87_SIMD_moves_uop", FIRM0
|FIRM1
, 0x18, 0x2E, 0x1, C(8)|C(9)|C(10)|C(11) },
385 { "machine_clear", CRU2
|CRU3
, 0xD, 0x2, 0x5,
386 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
387 { "global_power_events", FSB0
|FSB1
, 0x1, 0x13, 0x6, C(0)|C(1)|C(2)|C(3) },
388 { "tc_ms_xfer", MS0
|MS1
, 0x1, 0x5, 0x0, C(4)|C(5)|C(6)|C(7) },
389 { "uop_queue_writes", MS0
|MS1
, 0x7, 0x9, 0x0, C(4)|C(5)|C(6)|C(7) },
390 { "front_end_event", CRU2
|CRU3
, 0x3, 0x8, 0x5,
391 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
392 { "execution_event", CRU2
|CRU3
, 0xFF, 0xC, 0x5,
393 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
394 { "replay_event", CRU2
|CRU3
, 0x3, 0x9, 0x5,
395 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
396 { "instr_retired", CRU0
|CRU1
, 0xF, 0x2, 0x4,
397 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
398 { "uops_retired", CRU0
|CRU1
, 0x3, 0x1, 0x4,
399 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
400 { "uop_type", RAT0
|RAT1
, 0x3, 0x2, 0x2, C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
401 { "retired_mispred_branch_type", TBPU0
|TBPU1
, 0x1F, 0x5, 0x2,
402 C(4)|C(5)|C(6)|C(7)},
403 { "retired_branch_type", TBPU0
|TBPU1
, 0x1F, 0x4, 0x2, C(4)|C(5)|C(6)|C(7) },
407 static p4_generic_event_t p4_generic_events
[] = {
408 { "PAPI_br_msp", "branch_retired", 0xa, C(12)|C(13)|C(14)|C(15)|C(16) },
409 { "PAPI_br_ins", "branch_retired", 0xf, C(12)|C(13)|C(14)|C(15)|C(16) },
410 { "PAPI_br_tkn", "branch_retired", 0xc, C(12)|C(13)|C(14)|C(15)|C(16) },
411 { "PAPI_br_ntk", "branch_retired", 0x3, C(12)|C(13)|C(14)|C(15)|C(16) },
412 { "PAPI_br_prc", "branch_retired", 0x5, C(12)|C(13)|C(14)|C(15)|C(16) },
413 { "PAPI_tot_ins", "instr_retired", 0x3, C(12)|C(13)|C(14)|C(15)|C(16)|C(17) },
414 { "PAPI_tot_cyc", "global_power_events", 0x1, C(0)|C(1)|C(2)|C(3) },
415 { "PAPI_tlb_dm", "page_walk_type", 0x1, C(0)|C(1)|C(2)|C(3) },
416 { "PAPI_tlb_im", "page_walk_type", 0x2, C(0)|C(1)|C(2)|C(3) },
417 { "PAPI_tlb_tm", "page_walk_type", 0x3, C(0)|C(1)|C(2)|C(3) },
418 { "PAPI_l1_icm", "BPU_fetch_request", 0x1, C(0)|C(1)|C(2)|C(3) },
419 { "PAPI_l2_ldm", "BSQ_cache_reference", 0x100, C(0)|C(1)|C(2)|C(3) },
420 { "PAPI_l2_stm", "BSQ_cache_reference", 0x400, C(0)|C(1)|C(2)|C(3) },
421 { "PAPI_l2_tcm", "BSQ_cache_reference", 0x500, C(0)|C(1)|C(2)|C(3) },
426 * Indicates whether the "rdpmc" instruction is available on this processor.
428 static int p4_rdpmc_avail
= 0;
429 static char *p4_eventlist
[18];
432 * If set, this processor has HyperThreading.
434 static int p4_htt
= 0;
436 #define P4_FAMILY 0xF
444 p4_generic_event_t
*gevp
;
447 * If we're not running on a P4, refuse to load.
449 if (cpuid_getvendor(CPU
) != X86_VENDOR_Intel
||
450 cpuid_getfamily(CPU
) != P4_FAMILY
)
454 * Set up the event lists for each counter.
456 * First pass calculates the size of the event list, and the second
457 * pass copies each event name into the event list.
459 for (i
= 0; i
< 18; i
++) {
462 for (ev
= p4_events
; ev
->pe_name
!= NULL
; ev
++) {
463 if (ev
->pe_ctr_mask
& C(i
))
464 size
+= strlen(ev
->pe_name
) + 1;
467 for (gevp
= p4_generic_events
; gevp
->name
!= NULL
; gevp
++) {
468 if (gevp
->ctr_mask
& C(i
))
469 size
+= strlen(gevp
->name
) + 1;
473 * We use 'size + 1' here to ensure room for the final
474 * strcat when it terminates the string.
476 p4_eventlist
[i
] = kmem_alloc(size
+ 1, KM_SLEEP
);
477 *p4_eventlist
[i
] = '\0';
479 for (ev
= p4_events
; ev
->pe_name
!= NULL
; ev
++) {
480 if (ev
->pe_ctr_mask
& C(i
)) {
481 (void) strcat(p4_eventlist
[i
], ev
->pe_name
);
482 (void) strcat(p4_eventlist
[i
], ",");
486 for (gevp
= p4_generic_events
; gevp
->name
!= NULL
; gevp
++) {
487 if (gevp
->ctr_mask
& C(i
)) {
488 (void) strcat(p4_eventlist
[i
], gevp
->name
);
489 (void) strcat(p4_eventlist
[i
], ",");
494 * Remove trailing ','
496 p4_eventlist
[i
][size
- 1] = '\0';
499 if (is_x86_feature(x86_featureset
, X86FSET_MMX
))
502 * The X86_HTT flag may disappear soon, so we'll isolate the impact of
503 * its demise to the following if().
505 if (is_x86_feature(x86_featureset
, X86FSET_HTT
))
512 p4_pcbe_ncounters(void)
518 p4_pcbe_impl_name(void)
521 return (PCBE_IMPL_NAME_P4HT
);
522 return ("Pentium 4");
528 return ("See Appendix A.1 of the \"IA-32 Intel Architecture Software " \
529 "Developer's Manual Volume 3: System Programming Guide,\" " \
530 "Order # 245472-012, 2003");
534 p4_pcbe_list_events(uint_t picnum
)
536 ASSERT(picnum
>= 0 && picnum
< 18);
538 return (p4_eventlist
[picnum
]);
541 #define P4_ATTRS "emask,tag,compare,complement,threshold,edge"
544 p4_pcbe_list_attrs(void)
547 return (P4_ATTRS
",active_thread,count_sibling_usr,"
548 "count_sibling_sys");
552 static p4_generic_event_t
*
553 find_generic_event(char *name
)
555 p4_generic_event_t
*gevp
;
557 for (gevp
= p4_generic_events
; gevp
->name
!= NULL
; gevp
++)
558 if (strcmp(name
, gevp
->name
) == 0)
565 find_event(char *name
)
569 for (evp
= p4_events
; evp
->pe_name
!= NULL
; evp
++)
570 if (strcmp(name
, evp
->pe_name
) == 0)
577 p4_pcbe_event_coverage(char *event
)
580 p4_generic_event_t
*gevp
;
582 if ((ev
= find_event(event
)) == NULL
) {
583 if ((gevp
= find_generic_event(event
)) != NULL
)
584 return (gevp
->ctr_mask
);
589 return (ev
->pe_ctr_mask
);
593 p4_pcbe_overflow_bitmap(void)
595 extern int kcpc_hw_overflow_intr_installed
;
600 * The CCCR's OVF bit indicates that the corresponding counter has
601 * overflowed. It must be explicitly cleared by software, so it is
602 * safe to read the CCCR values here.
604 for (i
= 0; i
< 18; i
++) {
605 if (rdmsr(p4_ctrs
[i
].pc_ctladdr
) & CCCR_OVF
)
610 * Pentium 4 and Xeon turn off the CPC interrupt mask bit in the LVT at
611 * every overflow. Turn it back on here.
613 ASSERT(kcpc_hw_overflow_intr_installed
);
614 (*kcpc_hw_enable_cpc_intr
)();
620 p4_escr_inuse(p4_pcbe_config_t
**cfgs
, int escr_ndx
)
624 for (i
= 0; i
< 18; i
++) {
627 if (cfgs
[i
]->p4_escr_ndx
== escr_ndx
)
635 build_cfgs(p4_pcbe_config_t
*cfgs
[18], uint64_t *data
[18], void *token
)
637 p4_pcbe_config_t
*cfg
= NULL
;
640 bzero(cfgs
, 18 * sizeof (p4_pcbe_config_t
*));
643 cfg
= (p4_pcbe_config_t
*)kcpc_next_config(token
, cfg
, &daddr
);
646 ASSERT(cfg
->p4_picno
< 18);
647 cfgs
[cfg
->p4_picno
] = cfg
;
649 ASSERT(daddr
!= NULL
);
650 data
[cfg
->p4_picno
] = daddr
;
653 } while (cfg
!= NULL
);
657 * Programming a counter:
660 * Choose an ESCR capable of counting that event.
661 * Set up the ESCR with the desired parameters (usr, sys, tag).
662 * Set up the CCCR to point to the selected ESCR.
663 * Set the CCCR parameters (overflow, cascade, edge, etc).
666 p4_pcbe_configure(uint_t picnum
, char *eventname
, uint64_t preset
,
667 uint32_t flags
, uint_t nattrs
, kcpc_attr_t
*attrs
, void **data
,
670 p4_pcbe_config_t
*cfgs
[18];
671 p4_pcbe_config_t
*cfg
;
673 p4_generic_event_t
*gevp
;
679 int active_thread
= 0x3; /* default is "any" */
684 int sibling_usr
= 0; /* count usr on other cpu */
685 int sibling_sys
= 0; /* count sys on other cpu */
686 int invalid_attr
= 0;
689 * If we've been handed an existing configuration, we need only preset
694 cfg
->p4_rawpic
= preset
& MASK40
;
698 if (picnum
< 0 || picnum
>= 18)
699 return (CPC_INVALID_PICNUM
);
701 if ((ev
= find_event(eventname
)) == NULL
) {
702 if ((gevp
= find_generic_event(eventname
)) != NULL
) {
703 ev
= find_event(gevp
->event
);
707 * For generic events a HTT processor is only allowed
708 * to specify the 'active_thread', 'count_sibling_usr'
709 * and 'count_sibling_sys' attributes.
712 for (i
= 0; i
< nattrs
; i
++)
714 attrs
[i
].ka_name
) != NULL
)
717 if ((p4_htt
&& invalid_attr
) ||
718 (!p4_htt
&& nattrs
> 0))
719 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
723 return (CPC_INVALID_EVENT
);
727 build_cfgs(cfgs
, NULL
, token
);
730 * Find an ESCR capable of counting this event.
732 for (escr_ndx
= 0; escr_ndx
< ESCR_MAX_INDEX
; escr_ndx
++) {
733 if ((ev
->pe_escr_map
& (1ULL << escr_ndx
)) &&
734 p4_escr_inuse(cfgs
, escr_ndx
) == 0)
739 * All ESCRs capable of counting this event are already being
742 if (escr_ndx
== ESCR_MAX_INDEX
)
743 return (CPC_RESOURCE_UNAVAIL
);
746 * At this point, ev points to the desired event and escr is the index
747 * of a capable and available ESCR.
749 * Now process and verify the attributes.
751 for (i
= 0; i
< nattrs
; i
++) {
752 if (strcmp("emask", attrs
[i
].ka_name
) == 0) {
753 if ((attrs
[i
].ka_val
| ev
->pe_escr_mask
)
755 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
756 emask
= attrs
[i
].ka_val
;
758 } else if (strcmp("tag", attrs
[i
].ka_name
) == 0) {
759 if (attrs
[i
].ka_val
> ESCR_TAG_VALUE_MAX
)
760 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
761 tag
= attrs
[i
].ka_val
;
764 } else if (strcmp("compare", attrs
[i
].ka_name
) == 0) {
765 if (attrs
[i
].ka_val
!= 0)
768 } else if (strcmp("complement", attrs
[i
].ka_name
) == 0) {
769 if (attrs
[i
].ka_val
!= 0)
772 } else if (strcmp("threshold", attrs
[i
].ka_name
) == 0) {
773 if (attrs
[i
].ka_val
> CCCR_THRESHOLD_MAX
)
774 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
775 threshold
= attrs
[i
].ka_val
;
777 } else if (strcmp("edge", attrs
[i
].ka_name
) == 0) {
778 if (attrs
[i
].ka_val
!= 0)
784 * The remaining attributes are valid only on HyperThreaded P4s
785 * for processes with the "cpc_cpu" privilege.
788 return (CPC_INVALID_ATTRIBUTE
);
790 if (secpolicy_cpc_cpu(crgetcred()) != 0)
791 return (CPC_ATTR_REQUIRES_PRIVILEGE
);
793 if (strcmp("active_thread", attrs
[i
].ka_name
) == 0) {
794 if ((attrs
[i
].ka_val
| CCCR_ACTV_THR_MASK
) !=
796 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
797 active_thread
= (int)attrs
[i
].ka_val
;
798 } else if (strcmp("count_sibling_usr", attrs
[i
].ka_name
) == 0) {
799 if (attrs
[i
].ka_val
!= 0)
801 } else if (strcmp("count_sibling_sys", attrs
[i
].ka_name
) == 0) {
802 if (attrs
[i
].ka_val
!= 0)
805 return (CPC_INVALID_ATTRIBUTE
);
809 * Make sure the counter can count this event
811 if ((ev
->pe_ctr_mask
& C(picnum
)) == 0)
812 return (CPC_PIC_NOT_CAPABLE
);
815 * Find an ESCR that lines up with the event _and_ the counter.
817 for (escr_ndx
= 0; escr_ndx
< ESCR_MAX_INDEX
; escr_ndx
++) {
818 if ((ev
->pe_escr_map
& (1ULL << escr_ndx
)) &&
819 (p4_escrs
[escr_ndx
].pe_map
& (1 << picnum
)) &&
820 p4_escr_inuse(cfgs
, escr_ndx
) == 0)
823 if (escr_ndx
== ESCR_MAX_INDEX
)
824 return (CPC_RESOURCE_UNAVAIL
);
826 cfg
= (p4_pcbe_config_t
*)kmem_alloc(sizeof (p4_pcbe_config_t
),
830 cfg
->p4_picno
= picnum
;
831 cfg
->p4_escr_ndx
= escr_ndx
;
832 cfg
->p4_escr
= (ev
->pe_ev
<< ESCR_EVSEL_SHIFT
) |
833 (emask
<< ESCR_EVMASK_SHIFT
);
836 cfg
->p4_escr
|= tag
<< ESCR_TAG_VALUE_SHIFT
;
837 cfg
->p4_escr
|= ESCR_TAG_ENABLE
;
842 * This is a HyperThreaded P4. Since we don't know which
843 * logical CPU this configuration will eventually be programmed
844 * on, we can't yet decide which fields of the ESCR to select.
846 * Record the necessary information in the flags for later.
848 if (flags
& CPC_COUNT_USER
)
849 cfg
->p4_flags
|= P4_THIS_USR
;
850 if (flags
& CPC_COUNT_SYSTEM
)
851 cfg
->p4_flags
|= P4_THIS_SYS
;
852 if (p4_htt
&& sibling_usr
)
853 cfg
->p4_flags
|= P4_SIBLING_USR
;
854 if (p4_htt
&& sibling_sys
)
855 cfg
->p4_flags
|= P4_SIBLING_SYS
;
858 * This is not HyperThreaded, so we can determine the exact
859 * ESCR value necessary now.
861 if (flags
& CPC_COUNT_USER
)
862 cfg
->p4_escr
|= ESCR_USR
;
863 if (flags
& CPC_COUNT_SYSTEM
)
864 cfg
->p4_escr
|= ESCR_OS
;
867 cfg
->p4_rawpic
= preset
& MASK40
;
870 * Even on non-HT P4s, Intel states the active_thread field (marked as
871 * "reserved" for the non-HT chips) must be set to all 1s.
873 cfg
->p4_cccr
= CCCR_INIT
| (active_thread
<< CCCR_ACTV_THR_SHIFT
);
875 cfg
->p4_cccr
|= CCCR_COMPARE
;
877 cfg
->p4_cccr
|= CCCR_COMPLEMENT
;
878 cfg
->p4_cccr
|= threshold
<< CCCR_THRESHOLD_SHIFT
;
880 cfg
->p4_cccr
|= CCCR_EDGE
;
881 cfg
->p4_cccr
|= p4_escrs
[cfg
->p4_escr_ndx
].pe_num
882 << CCCR_ESCR_SEL_SHIFT
;
883 if (flags
& CPC_OVF_NOTIFY_EMT
) {
885 cfg
->p4_flags
|= P4_PMI
;
888 * If the user has asked for notification of overflows,
889 * we automatically program the hardware to generate an
890 * interrupt on overflow.
892 * This can only be programmed now if this P4 doesn't
893 * have HyperThreading. If it does, we must wait until
894 * we know which logical CPU we'll be programming.
896 cfg
->p4_cccr
|= CCCR_OVF_PMI
;
906 p4_pcbe_program(void *token
)
910 p4_pcbe_config_t
*cfgs
[18];
914 build_cfgs(cfgs
, NULL
, token
);
916 if (p4_rdpmc_avail
) {
917 ulong_t curcr4
= getcr4();
918 if (kcpc_allow_nonpriv(token
))
919 setcr4(curcr4
| CR4_PCE
);
921 setcr4(curcr4
& ~CR4_PCE
);
925 * Ideally we would start all counters with a single operation, but in
926 * P4 each counter is enabled individually via its CCCR. To minimize the
927 * probe effect of enabling the counters, we do it in two passes: the
928 * first programs the counter and ESCR, and the second programs the
929 * CCCR (and thus enables the counter).
932 int lid
= cpuid_get_clogid(CPU
); /* Logical ID of CPU */
934 for (i
= 0; i
< 18; i
++) {
939 escr
= (uint64_t)cfgs
[i
]->p4_escr
;
941 if (cfgs
[i
]->p4_flags
& P4_THIS_USR
)
942 escr
|= (lid
== 0) ? ESCR_T0_USR
: ESCR_T1_USR
;
943 if (cfgs
[i
]->p4_flags
& P4_THIS_SYS
)
944 escr
|= (lid
== 0) ? ESCR_T0_OS
: ESCR_T1_OS
;
945 if (cfgs
[i
]->p4_flags
& P4_SIBLING_USR
)
946 escr
|= (lid
== 0) ? ESCR_T1_USR
: ESCR_T0_USR
;
947 if (cfgs
[i
]->p4_flags
& P4_SIBLING_SYS
)
948 escr
|= (lid
== 0) ? ESCR_T1_OS
: ESCR_T0_OS
;
950 wrmsr(p4_ctrs
[i
].pc_caddr
, cfgs
[i
]->p4_rawpic
);
951 wrmsr(p4_escrs
[cfgs
[i
]->p4_escr_ndx
].pe_addr
, escr
);
954 for (i
= 0; i
< 18; i
++) {
957 cccr
= (uint64_t)cfgs
[i
]->p4_cccr
;
959 * We always target the overflow interrupt at the
960 * logical CPU which is doing the counting.
962 if (cfgs
[i
]->p4_flags
& P4_PMI
)
964 CCCR_OVF_PMI_T0
: CCCR_OVF_PMI_T1
;
965 wrmsr(p4_ctrs
[i
].pc_ctladdr
, cccr
);
968 for (i
= 0; i
< 18; i
++) {
971 wrmsr(p4_ctrs
[i
].pc_caddr
, cfgs
[i
]->p4_rawpic
);
972 wrmsr(p4_escrs
[cfgs
[i
]->p4_escr_ndx
].pe_addr
,
973 (uint64_t)cfgs
[i
]->p4_escr
);
976 for (i
= 0; i
< 18; i
++) {
979 wrmsr(p4_ctrs
[i
].pc_ctladdr
,
980 (uint64_t)cfgs
[i
]->p4_cccr
);
986 p4_pcbe_allstop(void)
990 for (i
= 0; i
< 18; i
++)
991 wrmsr(p4_ctrs
[i
].pc_ctladdr
, 0ULL);
993 setcr4(getcr4() & ~CR4_PCE
);
998 p4_pcbe_sample(void *token
)
1000 p4_pcbe_config_t
*cfgs
[18];
1001 uint64_t *addrs
[18];
1002 uint64_t curpic
[18];
1006 for (i
= 0; i
< 18; i
++)
1007 curpic
[i
] = rdmsr(p4_ctrs
[i
].pc_caddr
);
1009 build_cfgs(cfgs
, addrs
, token
);
1011 for (i
= 0; i
< 18; i
++) {
1012 if (cfgs
[i
] == NULL
)
1014 diff
= curpic
[i
] - cfgs
[i
]->p4_rawpic
;
1016 diff
+= (1ll << 40);
1018 DTRACE_PROBE4(p4__pcbe__sample
, int, i
, uint64_t, *addrs
[i
],
1019 uint64_t, curpic
[i
], uint64_t, cfgs
[i
]->p4_rawpic
);
1020 cfgs
[i
]->p4_rawpic
= *addrs
[i
] & MASK40
;
1025 p4_pcbe_free(void *config
)
1027 kmem_free(config
, sizeof (p4_pcbe_config_t
));
1030 static struct modlpcbe modlpcbe
= {
1032 "Pentium 4 Performance Counters",
1036 static struct modlinkage modl
= {
1044 if (p4_pcbe_init() != 0)
1046 return (mod_install(&modl
));
1052 return (mod_remove(&modl
));
1056 _info(struct modinfo
*mi
)
1058 return (mod_info(&modl
, mi
));