4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
26 * This file contains preset event names from the Performance Application
27 * Programming Interface v3.5 which included the following notice:
29 * Copyright (c) 2005,6
30 * Innovative Computing Labs
31 * Computer Science Department,
32 * University of Tennessee,
34 * All Rights Reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions are met:
40 * * Redistributions of source code must retain the above copyright notice,
41 * this list of conditions and the following disclaimer.
42 * * Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * * Neither the name of the University of Tennessee nor the names of its
46 * contributors may be used to endorse or promote products derived from
47 * this software without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
50 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
53 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
62 * This open source software license conforms to the BSD License template.
67 * Performance Counter Back-End for Intel processors supporting Architectural
68 * Performance Monitoring.
71 #include <sys/cpuvar.h>
72 #include <sys/param.h>
73 #include <sys/cpc_impl.h>
74 #include <sys/cpc_pcbe.h>
75 #include <sys/modctl.h>
76 #include <sys/inttypes.h>
77 #include <sys/systm.h>
78 #include <sys/cmn_err.h>
79 #include <sys/x86_archext.h>
81 #include <sys/archsystm.h>
82 #include <sys/privregs.h>
84 #include <sys/sunddi.h>
86 #include <sys/policy.h>
88 static int core_pcbe_init(void);
89 static uint_t
core_pcbe_ncounters(void);
90 static const char *core_pcbe_impl_name(void);
91 static const char *core_pcbe_cpuref(void);
92 static char *core_pcbe_list_events(uint_t picnum
);
93 static char *core_pcbe_list_attrs(void);
94 static uint64_t core_pcbe_event_coverage(char *event
);
95 static uint64_t core_pcbe_overflow_bitmap(void);
96 static int core_pcbe_configure(uint_t picnum
, char *event
, uint64_t preset
,
97 uint32_t flags
, uint_t nattrs
, kcpc_attr_t
*attrs
, void **data
,
99 static void core_pcbe_program(void *token
);
100 static void core_pcbe_allstop(void);
101 static void core_pcbe_sample(void *token
);
102 static void core_pcbe_free(void *config
);
108 #define CORE_GPC 0 /* General-Purpose Counter (GPC) */
109 #define CORE_FFC 1 /* Fixed-Function Counter (FFC) */
112 #define GPC_BASE_PMC 0x00c1 /* First GPC */
113 #define GPC_BASE_PES 0x0186 /* First GPC Event Select register */
114 #define FFC_BASE_PMC 0x0309 /* First FFC */
115 #define PERF_FIXED_CTR_CTRL 0x038d /* Used to enable/disable FFCs */
116 #define PERF_GLOBAL_STATUS 0x038e /* Overflow status register */
117 #define PERF_GLOBAL_CTRL 0x038f /* Used to enable/disable counting */
118 #define PERF_GLOBAL_OVF_CTRL 0x0390 /* Used to clear overflow status */
121 * Processor Event Select register fields
123 #define CORE_USR (1ULL << 16) /* Count while not in ring 0 */
124 #define CORE_OS (1ULL << 17) /* Count while in ring 0 */
125 #define CORE_EDGE (1ULL << 18) /* Enable edge detection */
126 #define CORE_PC (1ULL << 19) /* Enable pin control */
127 #define CORE_INT (1ULL << 20) /* Enable interrupt on overflow */
128 #define CORE_EN (1ULL << 22) /* Enable counting */
129 #define CORE_INV (1ULL << 23) /* Invert the CMASK */
130 #define CORE_ANYTHR (1ULL << 21) /* Count event for any thread on core */
132 #define CORE_UMASK_SHIFT 8
133 #define CORE_UMASK_MASK 0xffu
134 #define CORE_CMASK_SHIFT 24
135 #define CORE_CMASK_MASK 0xffu
138 * Fixed-function counter attributes
140 #define CORE_FFC_OS_EN (1ULL << 0) /* Count while not in ring 0 */
141 #define CORE_FFC_USR_EN (1ULL << 1) /* Count while in ring 1 */
142 #define CORE_FFC_ANYTHR (1ULL << 2) /* Count event for any thread on core */
143 #define CORE_FFC_PMI (1ULL << 3) /* Enable interrupt on overflow */
146 * Number of bits for specifying each FFC's attributes in the control register
148 #define CORE_FFC_ATTR_SIZE 4
151 * CondChgd and OvfBuffer fields of global status and overflow control registers
153 #define CONDCHGD (1ULL << 63)
154 #define OVFBUFFER (1ULL << 62)
155 #define MASK_CONDCHGD_OVFBUFFER (CONDCHGD | OVFBUFFER)
157 #define ALL_STOPPED 0ULL
159 #define BITMASK_XBITS(x) ((1ull << (x)) - 1ull)
162 * Only the lower 32-bits can be written to in the general-purpose
163 * counters. The higher bits are extended from bit 31; all ones if
164 * bit 31 is one and all zeros otherwise.
166 * The fixed-function counters do not have this restriction.
168 #define BITS_EXTENDED_FROM_31 (BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
170 #define WRMSR(msr, value) \
171 wrmsr((msr), (value)); \
172 DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
174 #define RDMSR(msr, value) \
175 (value) = rdmsr((msr)); \
176 DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
178 typedef struct core_pcbe_config
{
179 uint64_t core_rawpic
;
180 uint64_t core_ctl
; /* Event Select bits */
181 uint64_t core_pmc
; /* Counter register address */
182 uint64_t core_pes
; /* Event Select register address */
184 uint8_t core_pictype
; /* CORE_GPC or CORE_FFC */
185 } core_pcbe_config_t
;
187 pcbe_ops_t core_pcbe_ops
= {
188 PCBE_VER_1
, /* pcbe_ver */
189 CPC_CAP_OVERFLOW_INTERRUPT
| CPC_CAP_OVERFLOW_PRECISE
, /* pcbe_caps */
190 core_pcbe_ncounters
, /* pcbe_ncounters */
191 core_pcbe_impl_name
, /* pcbe_impl_name */
192 core_pcbe_cpuref
, /* pcbe_cpuref */
193 core_pcbe_list_events
, /* pcbe_list_events */
194 core_pcbe_list_attrs
, /* pcbe_list_attrs */
195 core_pcbe_event_coverage
, /* pcbe_event_coverage */
196 core_pcbe_overflow_bitmap
, /* pcbe_overflow_bitmap */
197 core_pcbe_configure
, /* pcbe_configure */
198 core_pcbe_program
, /* pcbe_program */
199 core_pcbe_allstop
, /* pcbe_allstop */
200 core_pcbe_sample
, /* pcbe_sample */
201 core_pcbe_free
/* pcbe_free */
204 struct nametable_core_uarch
{
206 uint64_t restricted_bits
;
213 * Counting an event for all cores or all bus agents requires cpc_cpu privileges
215 #define ALL_CORES (1ULL << 15)
216 #define ALL_AGENTS (1ULL << 13)
218 struct generic_events
{
224 static const struct generic_events cmn_generic_events
[] = {
225 { "PAPI_tot_cyc", 0x3c, 0x00 }, /* cpu_clk_unhalted.thread_p/core */
226 { "PAPI_tot_ins", 0xc0, 0x00 }, /* inst_retired.any_p */
227 { "PAPI_br_ins", 0xc4, 0x0c }, /* br_inst_retired.taken */
228 { "PAPI_br_msp", 0xc5, 0x00 }, /* br_inst_retired.mispred */
229 { "PAPI_br_ntk", 0xc4, 0x03 },
230 /* br_inst_retired.pred_not_taken|pred_taken */
231 { "PAPI_br_prc", 0xc4, 0x05 },
232 /* br_inst_retired.pred_not_taken|pred_taken */
233 { "PAPI_hw_int", 0xc8, 0x00 }, /* hw_int_rvc */
234 { "PAPI_tot_iis", 0xaa, 0x01 }, /* macro_insts.decoded */
235 { "PAPI_l1_dca", 0x43, 0x01 }, /* l1d_all_ref */
236 { "PAPI_l1_icm", 0x81, 0x00 }, /* l1i_misses */
237 { "PAPI_l1_icr", 0x80, 0x00 }, /* l1i_reads */
238 { "PAPI_l1_tcw", 0x41, 0x0f }, /* l1d_cache_st.mesi */
239 { "PAPI_l2_stm", 0x2a, 0x41 }, /* l2_st.self.i_state */
240 { "PAPI_l2_tca", 0x2e, 0x4f }, /* l2_rqsts.self.demand.mesi */
241 { "PAPI_l2_tch", 0x2e, 0x4e }, /* l2_rqsts.mes */
242 { "PAPI_l2_tcm", 0x2e, 0x41 }, /* l2_rqsts.self.demand.i_state */
243 { "PAPI_l2_tcw", 0x2a, 0x4f }, /* l2_st.self.mesi */
244 { "PAPI_ld_ins", 0xc0, 0x01 }, /* inst_retired.loads */
245 { "PAPI_lst_ins", 0xc0, 0x03 }, /* inst_retired.loads|stores */
246 { "PAPI_sr_ins", 0xc0, 0x02 }, /* inst_retired.stores */
247 { "PAPI_tlb_dm", 0x08, 0x01 }, /* dtlb_misses.any */
248 { "PAPI_tlb_im", 0x82, 0x12 }, /* itlb.small_miss|large_miss */
249 { "PAPI_tlb_tl", 0x0c, 0x03 }, /* page_walks */
253 static const struct generic_events generic_events_pic0
[] = {
254 { "PAPI_l1_dcm", 0xcb, 0x01 }, /* mem_load_retired.l1d_miss */
259 * The events listed in the following table can be counted on all
260 * general-purpose counters on processors that are of Penryn and Merom Family
262 static const struct nametable_core_uarch cmn_gpc_events_core_uarch
[] = {
263 /* Alphabetical order of event name */
265 { "baclears", 0x0, 0xe6 },
266 { "bogus_br", 0x0, 0xe4 },
267 { "br_bac_missp_exec", 0x0, 0x8a },
269 { "br_call_exec", 0x0, 0x92 },
270 { "br_call_missp_exec", 0x0, 0x93 },
271 { "br_cnd_exec", 0x0, 0x8b },
273 { "br_cnd_missp_exec", 0x0, 0x8c },
274 { "br_ind_call_exec", 0x0, 0x94 },
275 { "br_ind_exec", 0x0, 0x8d },
277 { "br_ind_missp_exec", 0x0, 0x8e },
278 { "br_inst_decoded", 0x0, 0xe0 },
279 { "br_inst_exec", 0x0, 0x88 },
281 { "br_inst_retired", 0x0, 0xc4 },
282 { "br_inst_retired_mispred", 0x0, 0xc5 },
283 { "br_missp_exec", 0x0, 0x89 },
285 { "br_ret_bac_missp_exec", 0x0, 0x91 },
286 { "br_ret_exec", 0x0, 0x8f },
287 { "br_ret_missp_exec", 0x0, 0x90 },
289 { "br_tkn_bubble_1", 0x0, 0x97 },
290 { "br_tkn_bubble_2", 0x0, 0x98 },
291 { "bus_bnr_drv", ALL_AGENTS
, 0x61 },
293 { "bus_data_rcv", ALL_CORES
, 0x64 },
294 { "bus_drdy_clocks", ALL_AGENTS
, 0x62 },
295 { "bus_hit_drv", ALL_AGENTS
, 0x7a },
297 { "bus_hitm_drv", ALL_AGENTS
, 0x7b },
298 { "bus_io_wait", ALL_CORES
, 0x7f },
299 { "bus_lock_clocks", ALL_CORES
| ALL_AGENTS
, 0x63 },
301 { "bus_request_outstanding", ALL_CORES
| ALL_AGENTS
, 0x60 },
302 { "bus_trans_any", ALL_CORES
| ALL_AGENTS
, 0x70 },
303 { "bus_trans_brd", ALL_CORES
| ALL_AGENTS
, 0x65 },
305 { "bus_trans_burst", ALL_CORES
| ALL_AGENTS
, 0x6e },
306 { "bus_trans_def", ALL_CORES
| ALL_AGENTS
, 0x6d },
307 { "bus_trans_ifetch", ALL_CORES
| ALL_AGENTS
, 0x68 },
309 { "bus_trans_inval", ALL_CORES
| ALL_AGENTS
, 0x69 },
310 { "bus_trans_io", ALL_CORES
| ALL_AGENTS
, 0x6c },
311 { "bus_trans_mem", ALL_CORES
| ALL_AGENTS
, 0x6f },
313 { "bus_trans_p", ALL_CORES
| ALL_AGENTS
, 0x6b },
314 { "bus_trans_pwr", ALL_CORES
| ALL_AGENTS
, 0x6a },
315 { "bus_trans_rfo", ALL_CORES
| ALL_AGENTS
, 0x66 },
317 { "bus_trans_wb", ALL_CORES
| ALL_AGENTS
, 0x67 },
318 { "busq_empty", ALL_CORES
, 0x7d },
319 { "cmp_snoop", ALL_CORES
, 0x78 },
321 { "cpu_clk_unhalted", 0x0, 0x3c },
322 { "cycles_int", 0x0, 0xc6 },
323 { "cycles_l1i_mem_stalled", 0x0, 0x86 },
325 { "dtlb_misses", 0x0, 0x08 },
326 { "eist_trans", 0x0, 0x3a },
327 { "esp", 0x0, 0xab },
329 { "ext_snoop", ALL_AGENTS
, 0x77 },
330 { "fp_mmx_trans", 0x0, 0xcc },
331 { "hw_int_rcv", 0x0, 0xc8 },
333 { "ild_stall", 0x0, 0x87 },
334 { "inst_queue", 0x0, 0x83 },
335 { "inst_retired", 0x0, 0xc0 },
337 { "itlb", 0x0, 0x82 },
338 { "itlb_miss_retired", 0x0, 0xc9 },
339 { "l1d_all_ref", 0x0, 0x43 },
341 { "l1d_cache_ld", 0x0, 0x40 },
342 { "l1d_cache_lock", 0x0, 0x42 },
343 { "l1d_cache_st", 0x0, 0x41 },
345 { "l1d_m_evict", 0x0, 0x47 },
346 { "l1d_m_repl", 0x0, 0x46 },
347 { "l1d_pend_miss", 0x0, 0x48 },
349 { "l1d_prefetch", 0x0, 0x4e },
350 { "l1d_repl", 0x0, 0x45 },
351 { "l1d_split", 0x0, 0x49 },
353 { "l1i_misses", 0x0, 0x81 },
354 { "l1i_reads", 0x0, 0x80 },
355 { "l2_ads", ALL_CORES
, 0x21 },
357 { "l2_dbus_busy_rd", ALL_CORES
, 0x23 },
358 { "l2_ifetch", ALL_CORES
, 0x28 },
359 { "l2_ld", ALL_CORES
, 0x29 },
361 { "l2_lines_in", ALL_CORES
, 0x24 },
362 { "l2_lines_out", ALL_CORES
, 0x26 },
363 { "l2_lock", ALL_CORES
, 0x2b },
365 { "l2_m_lines_in", ALL_CORES
, 0x25 },
366 { "l2_m_lines_out", ALL_CORES
, 0x27 },
367 { "l2_no_req", ALL_CORES
, 0x32 },
369 { "l2_reject_busq", ALL_CORES
, 0x30 },
370 { "l2_rqsts", ALL_CORES
, 0x2e },
371 { "l2_st", ALL_CORES
, 0x2a },
373 { "load_block", 0x0, 0x03 },
374 { "load_hit_pre", 0x0, 0x4c },
375 { "machine_nukes", 0x0, 0xc3 },
377 { "macro_insts", 0x0, 0xaa },
378 { "memory_disambiguation", 0x0, 0x09 },
379 { "misalign_mem_ref", 0x0, 0x05 },
380 { "page_walks", 0x0, 0x0c },
382 { "pref_rqsts_dn", 0x0, 0xf8 },
383 { "pref_rqsts_up", 0x0, 0xf0 },
384 { "rat_stalls", 0x0, 0xd2 },
386 { "resource_stalls", 0x0, 0xdc },
387 { "rs_uops_dispatched", 0x0, 0xa0 },
388 { "seg_reg_renames", 0x0, 0xd5 },
390 { "seg_rename_stalls", 0x0, 0xd4 },
391 { "segment_reg_loads", 0x0, 0x06 },
392 { "simd_assist", 0x0, 0xcd },
394 { "simd_comp_inst_retired", 0x0, 0xca },
395 { "simd_inst_retired", 0x0, 0xc7 },
396 { "simd_instr_retired", 0x0, 0xce },
398 { "simd_sat_instr_retired", 0x0, 0xcf },
399 { "simd_sat_uop_exec", 0x0, 0xb1 },
400 { "simd_uop_type_exec", 0x0, 0xb3 },
402 { "simd_uops_exec", 0x0, 0xb0 },
403 { "snoop_stall_drv", ALL_CORES
| ALL_AGENTS
, 0x7e },
404 { "sse_pre_exec", 0x0, 0x07 },
406 { "sse_pre_miss", 0x0, 0x4b },
407 { "store_block", 0x0, 0x04 },
408 { "thermal_trip", 0x0, 0x3b },
410 { "uops_retired", 0x0, 0xc2 },
411 { "x87_ops_retired", 0x0, 0xc1 },
416 * If any of the pic specific events require privileges, make sure to add a
417 * check in configure_gpc() to find whether an event hard-coded as a number by
418 * the user has any privilege requirements
420 static const struct nametable_core_uarch pic0_events
[] = {
421 /* Alphabetical order of event name */
423 { "cycles_div_busy", 0x0, 0x14 },
424 { "fp_comp_ops_exe", 0x0, 0x10 },
425 { "idle_during_div", 0x0, 0x18 },
427 { "mem_load_retired", 0x0, 0xcb },
428 { "rs_uops_dispatched_port", 0x0, 0xa1 },
432 static const struct nametable_core_uarch pic1_events
[] = {
433 /* Alphabetical order of event name */
435 { "delayed_bypass", 0x0, 0x19 },
436 { "div", 0x0, 0x13 },
437 { "fp_assist", 0x0, 0x11 },
439 { "mul", 0x0, 0x12 },
443 /* FFC entries must be in order */
444 static char *ffc_names_non_htt
[] = {
446 "cpu_clk_unhalted.core",
447 "cpu_clk_unhalted.ref",
451 static char *ffc_names_htt
[] = {
453 "cpu_clk_unhalted.thread",
454 "cpu_clk_unhalted.ref",
458 static char *ffc_genericnames
[] = {
465 static char **ffc_names
= NULL
;
466 static char **ffc_allnames
= NULL
;
467 static char **gpc_names
= NULL
;
468 static uint32_t versionid
;
469 static uint64_t num_gpc
;
470 static uint64_t width_gpc
;
471 static uint64_t mask_gpc
;
472 static uint64_t num_ffc
;
473 static uint64_t width_ffc
;
474 static uint64_t mask_ffc
;
475 static uint_t total_pmc
;
476 static uint64_t control_ffc
;
477 static uint64_t control_gpc
;
478 static uint64_t control_mask
;
479 static uint32_t arch_events_vector
;
481 #define IMPL_NAME_LEN 100
482 static char core_impl_name
[IMPL_NAME_LEN
];
484 static const char *core_cpuref
=
485 "See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \
486 " Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \
487 " Order Number: 253669-026US, Februrary 2008";
489 struct events_table_t
{
492 uint64_t supported_counters
;
496 /* Used to describe which counters support an event */
497 #define C(x) (1 << (x))
502 #define C_ALL 0xFFFFFFFFFFFFFFFF
504 /* Architectural events */
505 #define ARCH_EVENTS_COMMON \
506 { 0xc0, 0x00, C_ALL, "inst_retired.any_p" }, \
507 { 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" }, \
508 { 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" }, \
509 { 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" }, \
510 { 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" }, \
511 { 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
513 static const struct events_table_t arch_events_table_non_htt
[] = {
514 { 0x3c, 0x00, C_ALL
, "cpu_clk_unhalted.core" },
518 static const struct events_table_t arch_events_table_htt
[] = {
519 { 0x3c, 0x00, C_ALL
, "cpu_clk_unhalted.thread_p" },
523 static char *arch_genevents_table
[] = {
524 "PAPI_tot_cyc", /* cpu_clk_unhalted.thread_p/core */
525 "PAPI_tot_ins", /* inst_retired.any_p */
526 "", /* cpu_clk_unhalted.ref_p */
527 "", /* longest_lat_cache.reference */
528 "", /* longest_lat_cache.miss */
529 "", /* br_inst_retired.all_branches */
530 "", /* br_misp_retired.all_branches */
533 static const struct events_table_t
*arch_events_table
= NULL
;
534 static uint64_t known_arch_events
;
535 static uint64_t known_ffc_num
;
537 #define GENERICEVENTS_FAM6_NHM \
538 { 0xc4, 0x01, C0|C1|C2|C3, "PAPI_br_cn" }, /* br_inst_retired.conditional */ \
539 { 0x1d, 0x01, C0|C1|C2|C3, "PAPI_hw_int" }, /* hw_int.rcx */ \
540 { 0x17, 0x01, C0|C1|C2|C3, "PAPI_tot_iis" }, /* inst_queue_writes */ \
541 { 0x43, 0x01, C0|C1, "PAPI_l1_dca" }, /* l1d_all_ref.any */ \
542 { 0x24, 0x03, C0|C1|C2|C3, "PAPI_l1_dcm" }, /* l2_rqsts. loads and rfos */ \
543 { 0x40, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcr" }, /* l1d_cache_ld.mesi */ \
544 { 0x41, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcw" }, /* l1d_cache_st.mesi */ \
545 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_ica" }, /* l1i.reads */ \
546 { 0x80, 0x01, C0|C1|C2|C3, "PAPI_l1_ich" }, /* l1i.hits */ \
547 { 0x80, 0x02, C0|C1|C2|C3, "PAPI_l1_icm" }, /* l1i.misses */ \
548 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_icr" }, /* l1i.reads */ \
549 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l1_ldm" }, /* l2_rqsts. loads and ifetches */\
550 { 0x24, 0xff, C0|C1|C2|C3, "PAPI_l1_tcm" }, /* l2_rqsts.references */ \
551 { 0x24, 0x02, C0|C1|C2|C3, "PAPI_l2_ldm" }, /* l2_rqsts.ld_miss */ \
552 { 0x24, 0x08, C0|C1|C2|C3, "PAPI_l2_stm" }, /* l2_rqsts.rfo_miss */ \
553 { 0x24, 0x3f, C0|C1|C2|C3, "PAPI_l2_tca" }, \
554 /* l2_rqsts. loads, rfos and ifetches */ \
555 { 0x24, 0x15, C0|C1|C2|C3, "PAPI_l2_tch" }, \
556 /* l2_rqsts. ld_hit, rfo_hit and ifetch_hit */ \
557 { 0x24, 0x2a, C0|C1|C2|C3, "PAPI_l2_tcm" }, \
558 /* l2_rqsts. ld_miss, rfo_miss and ifetch_miss */ \
559 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l2_tcr" }, /* l2_rqsts. loads and ifetches */\
560 { 0x24, 0x0c, C0|C1|C2|C3, "PAPI_l2_tcw" }, /* l2_rqsts.rfos */ \
561 { 0x2e, 0x4f, C0|C1|C2|C3, "PAPI_l3_tca" }, /* l3_lat_cache.reference */ \
562 { 0x2e, 0x41, C0|C1|C2|C3, "PAPI_l3_tcm" }, /* l3_lat_cache.misses */ \
563 { 0x0b, 0x01, C0|C1|C2|C3, "PAPI_ld_ins" }, /* mem_inst_retired.loads */ \
564 { 0x0b, 0x03, C0|C1|C2|C3, "PAPI_lst_ins" }, \
565 /* mem_inst_retired.loads and stores */ \
566 { 0x26, 0xf0, C0|C1|C2|C3, "PAPI_prf_dm" }, /* l2_data_rqsts.prefetch.mesi */ \
567 { 0x0b, 0x02, C0|C1|C2|C3, "PAPI_sr_ins" }, /* mem_inst_retired.stores */ \
568 { 0x49, 0x01, C0|C1|C2|C3, "PAPI_tlb_dm" }, /* dtlb_misses.any */ \
569 { 0x85, 0x01, C0|C1|C2|C3, "PAPI_tlb_im" } /* itlb_misses.any */
572 #define EVENTS_FAM6_NHM \
574 { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" }, \
575 { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" }, \
576 { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" }, \
578 { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" }, \
579 { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" }, \
580 { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" }, \
582 { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" }, \
583 { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" }, \
584 { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" }, \
586 { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" }, \
587 { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" }, \
588 { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" }, \
590 { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" }, \
591 { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" }, \
592 { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" }, \
594 { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" }, \
595 { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" }, \
596 { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" }, \
598 { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" }, \
599 { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" }, \
600 { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" }, \
602 { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" }, \
603 { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" }, \
604 { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" }, \
606 { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" }, \
607 { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" }, \
608 { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" }, \
610 { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" }, \
611 { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" }, \
612 { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" }, \
614 { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" }, \
615 { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" }, \
616 { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" }, \
618 { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" }, \
619 { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" }, \
620 { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" }, \
622 { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" }, \
623 { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" }, \
624 { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" }, \
626 { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" }, \
627 { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" }, \
628 { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" }, \
630 { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" }, \
631 { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" }, \
632 { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" }, \
634 { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" }, \
635 { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" }, \
636 { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" }, \
638 { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" }, \
639 { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" }, \
640 { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" }, \
642 { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" }, \
643 { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" }, \
644 { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" }, \
646 { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" }, \
647 { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" }, \
648 { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" }, \
650 { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" }, \
651 { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" }, \
652 { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" }, \
654 { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" }, \
655 { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" }, \
656 { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" }, \
658 { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" }, \
659 { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" }, \
660 { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" }, \
662 { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" }, \
663 { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" }, \
664 { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" }, \
666 { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" }, \
667 { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" }, \
668 { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" }, \
670 { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" }, \
671 { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" }, \
672 { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" }, \
674 { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" }, \
675 { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" }, \
676 { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" }, \
678 { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" }, \
679 { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" }, \
680 { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" }, \
682 { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" }, \
683 { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" }, \
684 { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" }, \
686 { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" }, \
687 { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" }, \
688 { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" }, \
690 { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" }, \
691 { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" }, \
692 { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" }, \
694 { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" }, \
695 { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" }, \
696 { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" }, \
698 { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" }, \
699 { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" }, \
700 { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" }, \
702 { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" }, \
703 { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" }, \
704 { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" }, \
706 { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" }, \
707 { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" }, \
708 { 0x43, 0x01, C0|C1, "l1d_all_ref.any" }, \
710 { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" }, \
711 { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" }, \
712 { 0x4C, 0x01, C0|C1, "load_hit_pre" }, \
714 { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" }, \
715 { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" }, \
716 { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" }, \
718 { 0x51, 0x04, C0|C1, "l1d.m_evict" }, \
719 { 0x51, 0x02, C0|C1, "l1d.m_repl" }, \
720 { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" }, \
722 { 0x51, 0x01, C0|C1, "l1d.repl" }, \
723 { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" }, \
724 { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" }, \
726 { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" }, \
727 { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" }, \
728 { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" }, \
730 { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" }, \
731 { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" }, \
732 { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" }, \
734 { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" }, \
735 { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" }, \
736 { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" }, \
738 { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" }, \
739 { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" }, \
740 { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" }, \
742 { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" }, \
743 { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" }, \
744 { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" }, \
746 { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" }, \
747 { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" }, \
748 { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" }, \
750 { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" }, \
751 { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" }, \
752 { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" }, \
754 { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" }, \
755 { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" }, \
756 { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" }, \
758 { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" }, \
759 { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" }, \
760 { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" }, \
762 { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" }, \
763 { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" }, \
764 { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" }, \
766 { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" }, \
767 { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" }, \
768 { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" }, \
770 { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" }, \
771 { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" }, \
772 { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" }, \
774 { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" }, \
775 { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" }, \
776 { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" }, \
778 { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" }, \
779 { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" }, \
780 { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" }, \
782 { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" }, \
783 { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" }, \
784 { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" }, \
786 { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" }, \
787 { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" }, \
788 { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" }, \
790 { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" }, \
791 { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" }, \
792 { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" }, \
794 { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" }, \
795 { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" }, \
796 { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" }, \
798 { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" }, \
799 { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" }, \
800 { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" }, \
802 { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" }, \
803 { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" }, \
804 { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" }, \
806 { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" }, \
807 { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" }, \
808 { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" }, \
810 { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" }, \
811 { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" }, \
812 { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" }, \
814 { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" }, \
815 { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" }, \
816 { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" }, \
818 { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" }, \
819 { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" }, \
820 { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" }, \
822 { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" }, \
823 { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" }, \
824 { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" }, \
826 { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" }, \
827 { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" }, \
828 { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" }, \
830 { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" }, \
831 { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" }, \
832 { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" }, \
834 { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" }, \
835 { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" }, \
836 { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" }, \
838 { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" }, \
839 { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" }, \
840 { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" }, \
842 { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" }, \
843 { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" }, \
844 { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" }, \
846 { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" }, \
847 { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" }, \
848 { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" }, \
850 { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" }, \
851 { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" }, \
852 { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" }, \
854 { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" }, \
855 { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" }, \
856 { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" }, \
858 { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\
859 { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" }, \
860 { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" }, \
862 { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" }, \
863 { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" }, \
864 { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" }, \
866 { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" }, \
867 { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" }, \
868 { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" }, \
870 { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" }, \
871 { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" }, \
872 { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" }, \
874 { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" }, \
875 { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" }, \
876 { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" }
878 #define GENERICEVENTS_FAM6_MOD28 \
879 { 0xc4, 0x00, C0|C1, "PAPI_br_ins" }, /* br_inst_retired.any */ \
880 { 0xc5, 0x00, C0|C1, "PAPI_br_msp" }, /* br_inst_retired.mispred */ \
881 { 0xc4, 0x03, C0|C1, "PAPI_br_ntk" }, \
882 /* br_inst_retired.pred_not_taken|mispred_not_taken */ \
883 { 0xc4, 0x05, C0|C1, "PAPI_br_prc" }, \
884 /* br_inst_retired.pred_not_taken|pred_taken */ \
885 { 0xc8, 0x00, C0|C1, "PAPI_hw_int" }, /* hw_int_rcv */ \
886 { 0xaa, 0x03, C0|C1, "PAPI_tot_iis" }, /* macro_insts.all_decoded */ \
887 { 0x40, 0x23, C0|C1, "PAPI_l1_dca" }, /* l1d_cache.l1|st */ \
888 { 0x2a, 0x41, C0|C1, "PAPI_l2_stm" }, /* l2_st.self.i_state */ \
889 { 0x2e, 0x4f, C0|C1, "PAPI_l2_tca" }, /* longest_lat_cache.reference */ \
890 { 0x2e, 0x4e, C0|C1, "PAPI_l2_tch" }, /* l2_rqsts.mes */ \
891 { 0x2e, 0x41, C0|C1, "PAPI_l2_tcm" }, /* longest_lat_cache.miss */ \
892 { 0x2a, 0x4f, C0|C1, "PAPI_l2_tcw" }, /* l2_st.self.mesi */ \
893 { 0x08, 0x07, C0|C1, "PAPI_tlb_dm" }, /* data_tlb_misses.dtlb.miss */ \
894 { 0x82, 0x02, C0|C1, "PAPI_tlb_im" } /* itlb.misses */
897 #define EVENTS_FAM6_MOD28 \
898 { 0x2, 0x81, C0|C1, "store_forwards.good" }, \
899 { 0x6, 0x0, C0|C1, "segment_reg_loads.any" }, \
900 { 0x7, 0x1, C0|C1, "prefetch.prefetcht0" }, \
901 { 0x7, 0x6, C0|C1, "prefetch.sw_l2" }, \
902 { 0x7, 0x8, C0|C1, "prefetch.prefetchnta" }, \
903 { 0x8, 0x7, C0|C1, "data_tlb_misses.dtlb_miss" }, \
904 { 0x8, 0x5, C0|C1, "data_tlb_misses.dtlb_miss_ld" }, \
905 { 0x8, 0x9, C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" }, \
906 { 0x8, 0x6, C0|C1, "data_tlb_misses.dtlb_miss_st" }, \
907 { 0xC, 0x3, C0|C1, "page_walks.cycles" }, \
908 { 0x10, 0x1, C0|C1, "x87_comp_ops_exe.any.s" }, \
909 { 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" }, \
910 { 0x11, 0x1, C0|C1, "fp_assist" }, \
911 { 0x11, 0x81, C0|C1, "fp_assist.ar" }, \
912 { 0x12, 0x1, C0|C1, "mul.s" }, \
913 { 0x12, 0x81, C0|C1, "mul.ar" }, \
914 { 0x13, 0x1, C0|C1, "div.s" }, \
915 { 0x13, 0x81, C0|C1, "div.ar" }, \
916 { 0x14, 0x1, C0|C1, "cycles_div_busy" }, \
917 { 0x21, 0x0, C0|C1, "l2_ads" }, \
918 { 0x22, 0x0, C0|C1, "l2_dbus_busy" }, \
919 { 0x24, 0x0, C0|C1, "l2_lines_in" }, \
920 { 0x25, 0x0, C0|C1, "l2_m_lines_in" }, \
921 { 0x26, 0x0, C0|C1, "l2_lines_out" }, \
922 { 0x27, 0x0, C0|C1, "l2_m_lines_out" }, \
923 { 0x28, 0x0, C0|C1, "l2_ifetch" }, \
924 { 0x29, 0x0, C0|C1, "l2_ld" }, \
925 { 0x2A, 0x0, C0|C1, "l2_st" }, \
926 { 0x2B, 0x0, C0|C1, "l2_lock" }, \
927 { 0x2E, 0x0, C0|C1, "l2_rqsts" }, \
928 { 0x2E, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" }, \
929 { 0x2E, 0x4F, C0|C1, "l2_rqsts.self.demand.mesi" }, \
930 { 0x30, 0x0, C0|C1, "l2_reject_bus_q" }, \
931 { 0x32, 0x0, C0|C1, "l2_no_req" }, \
932 { 0x3A, 0x0, C0|C1, "eist_trans" }, \
933 { 0x3B, 0xC0, C0|C1, "thermal_trip" }, \
934 { 0x3C, 0x0, C0|C1, "cpu_clk_unhalted.core_p" }, \
935 { 0x3C, 0x1, C0|C1, "cpu_clk_unhalted.bus" }, \
936 { 0x3C, 0x2, C0|C1, "cpu_clk_unhalted.no_other" }, \
937 { 0x40, 0x21, C0|C1, "l1d_cache.ld" }, \
938 { 0x40, 0x22, C0|C1, "l1d_cache.st" }, \
939 { 0x60, 0x0, C0|C1, "bus_request_outstanding" }, \
940 { 0x61, 0x0, C0|C1, "bus_bnr_drv" }, \
941 { 0x62, 0x0, C0|C1, "bus_drdy_clocks" }, \
942 { 0x63, 0x0, C0|C1, "bus_lock_clocks" }, \
943 { 0x64, 0x0, C0|C1, "bus_data_rcv" }, \
944 { 0x65, 0x0, C0|C1, "bus_trans_brd" }, \
945 { 0x66, 0x0, C0|C1, "bus_trans_rfo" }, \
946 { 0x67, 0x0, C0|C1, "bus_trans_wb" }, \
947 { 0x68, 0x0, C0|C1, "bus_trans_ifetch" }, \
948 { 0x69, 0x0, C0|C1, "bus_trans_inval" }, \
949 { 0x6A, 0x0, C0|C1, "bus_trans_pwr" }, \
950 { 0x6B, 0x0, C0|C1, "bus_trans_p" }, \
951 { 0x6C, 0x0, C0|C1, "bus_trans_io" }, \
952 { 0x6D, 0x0, C0|C1, "bus_trans_def" }, \
953 { 0x6E, 0x0, C0|C1, "bus_trans_burst" }, \
954 { 0x6F, 0x0, C0|C1, "bus_trans_mem" }, \
955 { 0x70, 0x0, C0|C1, "bus_trans_any" }, \
956 { 0x77, 0x0, C0|C1, "ext_snoop" }, \
957 { 0x7A, 0x0, C0|C1, "bus_hit_drv" }, \
958 { 0x7B, 0x0, C0|C1, "bus_hitm_drv" }, \
959 { 0x7D, 0x0, C0|C1, "busq_empty" }, \
960 { 0x7E, 0x0, C0|C1, "snoop_stall_drv" }, \
961 { 0x7F, 0x0, C0|C1, "bus_io_wait" }, \
962 { 0x80, 0x3, C0|C1, "icache.accesses" }, \
963 { 0x80, 0x2, C0|C1, "icache.misses" }, \
964 { 0x82, 0x4, C0|C1, "itlb.flush" }, \
965 { 0x82, 0x2, C0|C1, "itlb.misses" }, \
966 { 0xAA, 0x2, C0|C1, "macro_insts.cisc_decoded" }, \
967 { 0xAA, 0x3, C0|C1, "macro_insts.all_decoded" }, \
968 { 0xB0, 0x0, C0|C1, "simd_uops_exec.s" }, \
969 { 0xB0, 0x80, C0|C1, "simd_uops_exec.ar" }, \
970 { 0xB1, 0x0, C0|C1, "simd_sat_uop_exec.s" }, \
971 { 0xB1, 0x80, C0|C1, "simd_sat_uop_exec.ar" }, \
972 { 0xB3, 0x1, C0|C1, "simd_uop_type_exec.mul.s" }, \
973 { 0xB3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" }, \
974 { 0xB3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" }, \
975 { 0xB3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" }, \
976 { 0xB3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" }, \
977 { 0xB3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" }, \
978 { 0xB3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" }, \
979 { 0xB3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" }, \
980 { 0xB3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" }, \
981 { 0xB3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" }, \
982 { 0xB3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" }, \
983 { 0xB3, 0xA0, C0|C1, "simd_uop_type_exec.arithmetic.ar" }, \
984 { 0xC2, 0x10, C0|C1, "uops_retired.any" }, \
985 { 0xC3, 0x1, C0|C1, "machine_clears.smc" }, \
986 { 0xC4, 0x0, C0|C1, "br_inst_retired.any" }, \
987 { 0xC4, 0x1, C0|C1, "br_inst_retired.pred_not_taken" }, \
988 { 0xC4, 0x2, C0|C1, "br_inst_retired.mispred_not_taken" }, \
989 { 0xC4, 0x4, C0|C1, "br_inst_retired.pred_taken" }, \
990 { 0xC4, 0x8, C0|C1, "br_inst_retired.mispred_taken" }, \
991 { 0xC4, 0xA, C0|C1, "br_inst_retired.mispred" }, \
992 { 0xC4, 0xC, C0|C1, "br_inst_retired.taken" }, \
993 { 0xC4, 0xF, C0|C1, "br_inst_retired.any1" }, \
994 { 0xC6, 0x1, C0|C1, "cycles_int_masked.cycles_int_masked" }, \
995 { 0xC6, 0x2, C0|C1, \
996 "cycles_int_masked.cycles_int_pending_and_masked" }, \
997 { 0xC7, 0x1, C0|C1, "simd_inst_retired.packed_single" }, \
998 { 0xC7, 0x2, C0|C1, "simd_inst_retired.scalar_single" }, \
999 { 0xC7, 0x4, C0|C1, "simd_inst_retired.packed_double" }, \
1000 { 0xC7, 0x8, C0|C1, "simd_inst_retired.scalar_double" }, \
1001 { 0xC7, 0x10, C0|C1, "simd_inst_retired.vector" }, \
1002 { 0xC7, 0x1F, C0|C1, "simd_inst_retired.any" }, \
1003 { 0xC8, 0x00, C0|C1, "hw_int_rcv" }, \
1004 { 0xCA, 0x1, C0|C1, "simd_comp_inst_retired.packed_single" }, \
1005 { 0xCA, 0x2, C0|C1, "simd_comp_inst_retired.scalar_single" }, \
1006 { 0xCA, 0x4, C0|C1, "simd_comp_inst_retired.packed_double" }, \
1007 { 0xCA, 0x8, C0|C1, "simd_comp_inst_retired.scalar_double" }, \
1008 { 0xCB, 0x1, C0|C1, "mem_load_retired.l2_hit" }, \
1009 { 0xCB, 0x2, C0|C1, "mem_load_retired.l2_miss" }, \
1010 { 0xCB, 0x4, C0|C1, "mem_load_retired.dtlb_miss" }, \
1011 { 0xCD, 0x0, C0|C1, "simd_assist" }, \
1012 { 0xCE, 0x0, C0|C1, "simd_instr_retired" }, \
1013 { 0xCF, 0x0, C0|C1, "simd_sat_instr_retired" }, \
1014 { 0xE0, 0x1, C0|C1, "br_inst_decoded" }, \
1015 { 0xE4, 0x1, C0|C1, "bogus_br" }, \
1016 { 0xE6, 0x1, C0|C1, "baclears.any" }
1018 static const struct events_table_t
*events_table
= NULL
;
1020 const struct events_table_t events_fam6_nhm
[] = {
1021 GENERICEVENTS_FAM6_NHM
,
1023 { NT_END
, 0, 0, "" }
1026 const struct events_table_t events_fam6_mod28
[] = {
1027 GENERICEVENTS_FAM6_MOD28
,
1029 { NT_END
, 0, 0, "" }
1033 * Initialize string containing list of supported general-purpose counter
1034 * events for processors of Penryn and Merom Family
1037 pcbe_init_core_uarch()
1039 const struct nametable_core_uarch
*n
;
1040 const struct generic_events
*k
;
1041 const struct nametable_core_uarch
*picspecific_events
;
1042 const struct generic_events
*picspecific_genericevents
;
1047 gpc_names
= kmem_alloc(num_gpc
* sizeof (char *), KM_SLEEP
);
1049 /* Calculate space needed to save all the common event names */
1051 for (n
= cmn_gpc_events_core_uarch
; n
->event_num
!= NT_END
; n
++) {
1052 common_size
+= strlen(n
->name
) + 1;
1055 for (k
= cmn_generic_events
; k
->event_num
!= NT_END
; k
++) {
1056 common_size
+= strlen(k
->name
) + 1;
1059 for (i
= 0; i
< num_gpc
; i
++) {
1061 picspecific_genericevents
= NULL
;
1065 picspecific_events
= pic0_events
;
1066 picspecific_genericevents
= generic_events_pic0
;
1069 picspecific_events
= pic1_events
;
1072 picspecific_events
= NULL
;
1075 if (picspecific_events
!= NULL
) {
1076 for (n
= picspecific_events
;
1077 n
->event_num
!= NT_END
;
1079 size
+= strlen(n
->name
) + 1;
1082 if (picspecific_genericevents
!= NULL
) {
1083 for (k
= picspecific_genericevents
;
1084 k
->event_num
!= NT_END
; k
++) {
1085 size
+= strlen(k
->name
) + 1;
1090 kmem_alloc(size
+ common_size
+ 1, KM_SLEEP
);
1092 gpc_names
[i
][0] = '\0';
1093 if (picspecific_events
!= NULL
) {
1094 for (n
= picspecific_events
;
1095 n
->event_num
!= NT_END
; n
++) {
1096 (void) strcat(gpc_names
[i
], n
->name
);
1097 (void) strcat(gpc_names
[i
], ",");
1100 if (picspecific_genericevents
!= NULL
) {
1101 for (k
= picspecific_genericevents
;
1102 k
->event_num
!= NT_END
; k
++) {
1103 (void) strcat(gpc_names
[i
], k
->name
);
1104 (void) strcat(gpc_names
[i
], ",");
1107 for (n
= cmn_gpc_events_core_uarch
; n
->event_num
!= NT_END
;
1109 (void) strcat(gpc_names
[i
], n
->name
);
1110 (void) strcat(gpc_names
[i
], ",");
1112 for (k
= cmn_generic_events
; k
->event_num
!= NT_END
; k
++) {
1113 (void) strcat(gpc_names
[i
], k
->name
);
1114 (void) strcat(gpc_names
[i
], ",");
1118 * Remove trailing comma.
1120 gpc_names
[i
][common_size
+ size
- 1] = '\0';
1125 core_pcbe_init(void)
1127 struct cpuid_regs cp
;
1131 uint64_t arch_events_vector_length
;
1132 size_t arch_events_string_length
;
1135 if (cpuid_getvendor(CPU
) != X86_VENDOR_Intel
)
1138 /* Obtain Basic CPUID information */
1140 (void) __cpuid_insn(&cp
);
1142 /* No Architectural Performance Monitoring Leaf returned by CPUID */
1143 if (cp
.cp_eax
< 0xa) {
1147 /* Obtain the Architectural Performance Monitoring Leaf */
1149 (void) __cpuid_insn(&cp
);
1151 versionid
= cp
.cp_eax
& 0xFF;
1154 * Fixed-Function Counters (FFC)
1156 * All Family 6 Model 15 and Model 23 processors have fixed-function
1157 * counters. These counters were made Architectural with
1158 * Family 6 Model 15 Stepping 9.
1160 switch (versionid
) {
1166 num_ffc
= cp
.cp_edx
& 0x1F;
1167 width_ffc
= (cp
.cp_edx
>> 5) & 0xFF;
1170 * Some processors have an errata (AW34) where
1171 * versionid is reported as 2 when actually 1.
1172 * In this case, fixed-function counters are
1173 * model-specific as in Version 1.
1186 num_ffc
= cp
.cp_edx
& 0x1F;
1187 width_ffc
= (cp
.cp_edx
>> 5) & 0xFF;
1195 /* Set HTT-specific names of architectural & FFC events */
1196 if (is_x86_feature(x86_featureset
, X86FSET_HTT
)) {
1197 ffc_names
= ffc_names_htt
;
1198 arch_events_table
= arch_events_table_htt
;
1200 sizeof (arch_events_table_htt
) /
1201 sizeof (struct events_table_t
);
1203 sizeof (ffc_names_htt
) / sizeof (char *);
1205 ffc_names
= ffc_names_non_htt
;
1206 arch_events_table
= arch_events_table_non_htt
;
1208 sizeof (arch_events_table_non_htt
) /
1209 sizeof (struct events_table_t
);
1211 sizeof (ffc_names_non_htt
) / sizeof (char *);
1214 if (num_ffc
>= known_ffc_num
) {
1216 * The system seems to have more fixed-function counters than
1217 * what this PCBE is able to handle correctly. Default to the
1218 * maximum number of fixed-function counters that this driver
1221 num_ffc
= known_ffc_num
- 1;
1224 mask_ffc
= BITMASK_XBITS(width_ffc
);
1225 control_ffc
= BITMASK_XBITS(num_ffc
);
1228 * General Purpose Counters (GPC)
1230 num_gpc
= (cp
.cp_eax
>> 8) & 0xFF;
1231 width_gpc
= (cp
.cp_eax
>> 16) & 0xFF;
1236 mask_gpc
= BITMASK_XBITS(width_gpc
);
1238 control_gpc
= BITMASK_XBITS(num_gpc
);
1240 control_mask
= (control_ffc
<< 32) | control_gpc
;
1242 total_pmc
= num_gpc
+ num_ffc
;
1243 if (total_pmc
> 64) {
1244 /* Too wide for the overflow bitmap */
1249 ffc_allnames
= kmem_alloc(num_ffc
* sizeof (char *), KM_SLEEP
);
1250 for (i
= 0; i
< num_ffc
; i
++) {
1251 ffc_allnames
[i
] = kmem_alloc(
1252 strlen(ffc_names
[i
]) + strlen(ffc_genericnames
[i
]) + 2,
1255 ffc_allnames
[i
][0] = '\0';
1256 (void) strcat(ffc_allnames
[i
], ffc_names
[i
]);
1258 /* Check if this ffc has a generic name */
1259 if (strcmp(ffc_genericnames
[i
], "") != 0) {
1260 (void) strcat(ffc_allnames
[i
], ",");
1261 (void) strcat(ffc_allnames
[i
], ffc_genericnames
[i
]);
1265 /* GPC events for Family 6 Models 15, 23 and 29 only */
1266 if ((cpuid_getfamily(CPU
) == 6) &&
1267 ((cpuid_getmodel(CPU
) == 15) || (cpuid_getmodel(CPU
) == 23) ||
1268 (cpuid_getmodel(CPU
) == 29))) {
1269 (void) snprintf(core_impl_name
, IMPL_NAME_LEN
,
1270 "Core Microarchitecture");
1271 pcbe_init_core_uarch();
1275 (void) snprintf(core_impl_name
, IMPL_NAME_LEN
,
1276 "Intel Arch PerfMon v%d on Family %d Model %d",
1277 versionid
, cpuid_getfamily(CPU
), cpuid_getmodel(CPU
));
1280 * Architectural events
1282 arch_events_vector_length
= (cp
.cp_eax
>> 24) & 0xFF;
1284 ASSERT(known_arch_events
== arch_events_vector_length
);
1287 * To handle the case where a new performance monitoring setup is run
1288 * on a non-debug kernel
1290 if (known_arch_events
> arch_events_vector_length
) {
1291 known_arch_events
= arch_events_vector_length
;
1293 arch_events_vector_length
= known_arch_events
;
1296 arch_events_vector
= cp
.cp_ebx
&
1297 BITMASK_XBITS(arch_events_vector_length
);
1300 * Process architectural and non-architectural events using GPC
1304 gpc_names
= kmem_alloc(num_gpc
* sizeof (char *), KM_SLEEP
);
1306 /* Calculate space required for the architectural gpc events */
1307 arch_events_string_length
= 0;
1308 for (i
= 0; i
< known_arch_events
; i
++) {
1309 if (((1U << i
) & arch_events_vector
) == 0) {
1310 arch_events_string_length
+=
1311 strlen(arch_events_table
[i
].name
) + 1;
1312 if (strcmp(arch_genevents_table
[i
], "") != 0) {
1313 arch_events_string_length
+=
1314 strlen(arch_genevents_table
[i
]) + 1;
1319 /* Non-architectural events list */
1320 model
= cpuid_getmodel(CPU
);
1332 events_table
= events_fam6_nhm
;
1335 events_table
= events_fam6_mod28
;
1339 for (i
= 0; i
< num_gpc
; i
++) {
1342 * Determine length of all supported event names
1343 * (architectural + non-architectural)
1345 size
= arch_events_string_length
;
1346 for (j
= 0; events_table
!= NULL
&&
1347 events_table
[j
].eventselect
!= NT_END
;
1349 if (C(i
) & events_table
[j
].supported_counters
) {
1350 size
+= strlen(events_table
[j
].name
) +
1355 /* Allocate memory for this pics list */
1356 gpc_names
[i
] = kmem_alloc(size
+ 1, KM_SLEEP
);
1357 gpc_names
[i
][0] = '\0';
1363 * Create the list of all supported events
1364 * (architectural + non-architectural)
1366 for (j
= 0; j
< known_arch_events
; j
++) {
1367 if (((1U << j
) & arch_events_vector
) == 0) {
1368 (void) strcat(gpc_names
[i
],
1369 arch_events_table
[j
].name
);
1370 (void) strcat(gpc_names
[i
], ",");
1372 arch_genevents_table
[j
], "")
1374 (void) strcat(gpc_names
[i
],
1375 arch_genevents_table
[j
]);
1376 (void) strcat(gpc_names
[i
],
1382 for (j
= 0; events_table
!= NULL
&&
1383 events_table
[j
].eventselect
!= NT_END
;
1385 if (C(i
) & events_table
[j
].supported_counters
) {
1386 (void) strcat(gpc_names
[i
],
1387 events_table
[j
].name
);
1388 (void) strcat(gpc_names
[i
], ",");
1392 /* Remove trailing comma */
1393 gpc_names
[i
][size
- 1] = '\0';
1400 static uint_t
core_pcbe_ncounters()
1405 static const char *core_pcbe_impl_name(void)
1407 return (core_impl_name
);
1410 static const char *core_pcbe_cpuref(void)
1412 return (core_cpuref
);
1415 static char *core_pcbe_list_events(uint_t picnum
)
1417 ASSERT(picnum
< cpc_ncounters
);
1419 if (picnum
< num_gpc
) {
1420 return (gpc_names
[picnum
]);
1422 return (ffc_allnames
[picnum
- num_gpc
]);
1426 static char *core_pcbe_list_attrs(void)
1428 if (versionid
>= 3) {
1429 return ("edge,inv,umask,cmask,anythr");
1431 return ("edge,pc,inv,umask,cmask");
1435 static const struct nametable_core_uarch
*
1436 find_gpcevent_core_uarch(char *name
,
1437 const struct nametable_core_uarch
*nametable
)
1439 const struct nametable_core_uarch
*n
;
1440 int compare_result
= -1;
1442 for (n
= nametable
; n
->event_num
!= NT_END
; n
++) {
1443 compare_result
= strcmp(name
, n
->name
);
1444 if (compare_result
<= 0) {
1449 if (compare_result
== 0) {
1456 static const struct generic_events
*
1457 find_generic_events(char *name
, const struct generic_events
*table
)
1459 const struct generic_events
*n
;
1461 for (n
= table
; n
->event_num
!= NT_END
; n
++) {
1462 if (strcmp(name
, n
->name
) == 0) {
1470 static const struct events_table_t
*
1471 find_gpcevent(char *name
)
1475 /* Search architectural events */
1476 for (i
= 0; i
< known_arch_events
; i
++) {
1477 if (strcmp(name
, arch_events_table
[i
].name
) == 0 ||
1478 strcmp(name
, arch_genevents_table
[i
]) == 0) {
1479 if (((1U << i
) & arch_events_vector
) == 0) {
1480 return (&arch_events_table
[i
]);
1485 /* Search non-architectural events */
1486 if (events_table
!= NULL
) {
1487 for (i
= 0; events_table
[i
].eventselect
!= NT_END
; i
++) {
1488 if (strcmp(name
, events_table
[i
].name
) == 0) {
1489 return (&events_table
[i
]);
1498 core_pcbe_event_coverage(char *event
)
1502 const struct events_table_t
*n
;
1507 /* Is it an event that a GPC can track? */
1508 if (versionid
>= 3) {
1509 n
= find_gpcevent(event
);
1511 bitmap
|= (n
->supported_counters
&
1512 BITMASK_XBITS(num_gpc
));
1515 if (find_generic_events(event
, cmn_generic_events
) != NULL
) {
1516 bitmap
|= BITMASK_XBITS(num_gpc
);
1517 } if (find_generic_events(event
, generic_events_pic0
) != NULL
) {
1519 } else if (find_gpcevent_core_uarch(event
,
1520 cmn_gpc_events_core_uarch
) != NULL
) {
1521 bitmap
|= BITMASK_XBITS(num_gpc
);
1522 } else if (find_gpcevent_core_uarch(event
, pic0_events
) !=
1525 } else if (find_gpcevent_core_uarch(event
, pic1_events
) !=
1527 bitmap
|= 1ULL << 1;
1531 /* Check if the event can be counted in the fixed-function counters */
1533 bitmask
= 1ULL << num_gpc
;
1534 for (i
= 0; i
< num_ffc
; i
++) {
1535 if (strcmp(event
, ffc_names
[i
]) == 0) {
1537 } else if (strcmp(event
, ffc_genericnames
[i
]) == 0) {
1540 bitmask
= bitmask
<< 1;
1548 core_pcbe_overflow_bitmap(void)
1550 uint64_t interrupt_status
;
1551 uint64_t intrbits_ffc
;
1552 uint64_t intrbits_gpc
;
1553 extern int kcpc_hw_overflow_intr_installed
;
1554 uint64_t overflow_bitmap
;
1556 RDMSR(PERF_GLOBAL_STATUS
, interrupt_status
);
1557 WRMSR(PERF_GLOBAL_OVF_CTRL
, interrupt_status
);
1559 interrupt_status
= interrupt_status
& control_mask
;
1560 intrbits_ffc
= (interrupt_status
>> 32) & control_ffc
;
1561 intrbits_gpc
= interrupt_status
& control_gpc
;
1562 overflow_bitmap
= (intrbits_ffc
<< num_gpc
) | intrbits_gpc
;
1564 ASSERT(kcpc_hw_overflow_intr_installed
);
1565 (*kcpc_hw_enable_cpc_intr
)();
1567 return (overflow_bitmap
);
1571 check_cpc_securitypolicy(core_pcbe_config_t
*conf
,
1572 const struct nametable_core_uarch
*n
)
1574 if (conf
->core_ctl
& n
->restricted_bits
) {
1575 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1576 return (CPC_ATTR_REQUIRES_PRIVILEGE
);
1583 configure_gpc(uint_t picnum
, char *event
, uint64_t preset
, uint32_t flags
,
1584 uint_t nattrs
, kcpc_attr_t
*attrs
, void **data
)
1586 core_pcbe_config_t conf
;
1587 const struct nametable_core_uarch
*n
;
1588 const struct generic_events
*k
= NULL
;
1589 const struct nametable_core_uarch
*m
;
1590 const struct nametable_core_uarch
*picspecific_events
;
1591 struct nametable_core_uarch nt_raw
= { "", 0x0, 0x0 };
1594 const struct events_table_t
*eventcode
;
1596 if (((preset
& BITS_EXTENDED_FROM_31
) != 0) &&
1597 ((preset
& BITS_EXTENDED_FROM_31
) !=
1598 BITS_EXTENDED_FROM_31
)) {
1601 * Bits beyond bit-31 in the general-purpose counters can only
1602 * be written to by extension of bit 31. We cannot preset
1603 * these bits to any value other than all 1s or all 0s.
1605 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
1608 if (versionid
>= 3) {
1609 eventcode
= find_gpcevent(event
);
1610 if (eventcode
!= NULL
) {
1611 if ((C(picnum
) & eventcode
->supported_counters
) == 0) {
1612 return (CPC_PIC_NOT_CAPABLE
);
1615 (strncmp("PAPI_", event
, 5) == 0)) {
1616 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
1618 conf
.core_ctl
= eventcode
->eventselect
;
1619 conf
.core_ctl
|= eventcode
->unitmask
<<
1622 /* Event specified as raw event code */
1623 if (ddi_strtol(event
, NULL
, 0, &event_num
) != 0) {
1624 return (CPC_INVALID_EVENT
);
1626 conf
.core_ctl
= event_num
& 0xFF;
1629 if ((k
= find_generic_events(event
, cmn_generic_events
)) !=
1632 (k
= find_generic_events(event
, generic_events_pic0
)) !=
1635 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
1637 conf
.core_ctl
= k
->event_num
;
1638 conf
.core_ctl
|= k
->umask
<< CORE_UMASK_SHIFT
;
1640 /* Not a generic event */
1642 n
= find_gpcevent_core_uarch(event
,
1643 cmn_gpc_events_core_uarch
);
1647 picspecific_events
=
1651 picspecific_events
=
1655 picspecific_events
= NULL
;
1658 if (picspecific_events
!= NULL
) {
1659 n
= find_gpcevent_core_uarch(event
,
1660 picspecific_events
);
1666 * Check if this is a case where the event was
1667 * specified directly by its event number
1668 * instead of its name string.
1670 if (ddi_strtol(event
, NULL
, 0, &event_num
) !=
1672 return (CPC_INVALID_EVENT
);
1675 event_num
= event_num
& 0xFF;
1678 * Search the event table to find out if the
1679 * event specified has an privilege
1680 * requirements. Currently none of the
1681 * pic-specific counters have any privilege
1682 * requirements. Hence only the table
1683 * cmn_gpc_events_core_uarch is searched.
1685 for (m
= cmn_gpc_events_core_uarch
;
1686 m
->event_num
!= NT_END
;
1688 if (event_num
== m
->event_num
) {
1692 if (m
->event_num
== NT_END
) {
1693 nt_raw
.event_num
= (uint8_t)event_num
;
1699 conf
.core_ctl
= n
->event_num
; /* Event Select */
1704 conf
.core_picno
= picnum
;
1705 conf
.core_pictype
= CORE_GPC
;
1706 conf
.core_rawpic
= preset
& mask_gpc
;
1708 conf
.core_pes
= GPC_BASE_PES
+ picnum
;
1709 conf
.core_pmc
= GPC_BASE_PMC
+ picnum
;
1711 for (i
= 0; i
< nattrs
; i
++) {
1712 if (strncmp(attrs
[i
].ka_name
, "umask", 6) == 0) {
1713 if ((attrs
[i
].ka_val
| CORE_UMASK_MASK
) !=
1715 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
1717 /* Clear out the default umask */
1718 conf
.core_ctl
&= ~ (CORE_UMASK_MASK
<<
1720 /* Use the user provided umask */
1721 conf
.core_ctl
|= attrs
[i
].ka_val
<<
1723 } else if (strncmp(attrs
[i
].ka_name
, "edge", 6) == 0) {
1724 if (attrs
[i
].ka_val
!= 0)
1725 conf
.core_ctl
|= CORE_EDGE
;
1726 } else if (strncmp(attrs
[i
].ka_name
, "inv", 4) == 0) {
1727 if (attrs
[i
].ka_val
!= 0)
1728 conf
.core_ctl
|= CORE_INV
;
1729 } else if (strncmp(attrs
[i
].ka_name
, "cmask", 6) == 0) {
1730 if ((attrs
[i
].ka_val
| CORE_CMASK_MASK
) !=
1732 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
1734 conf
.core_ctl
|= attrs
[i
].ka_val
<<
1736 } else if (strncmp(attrs
[i
].ka_name
, "anythr", 7) ==
1739 return (CPC_INVALID_ATTRIBUTE
);
1740 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1741 return (CPC_ATTR_REQUIRES_PRIVILEGE
);
1743 if (attrs
[i
].ka_val
!= 0)
1744 conf
.core_ctl
|= CORE_ANYTHR
;
1746 return (CPC_INVALID_ATTRIBUTE
);
1750 if (flags
& CPC_COUNT_USER
)
1751 conf
.core_ctl
|= CORE_USR
;
1752 if (flags
& CPC_COUNT_SYSTEM
)
1753 conf
.core_ctl
|= CORE_OS
;
1754 if (flags
& CPC_OVF_NOTIFY_EMT
)
1755 conf
.core_ctl
|= CORE_INT
;
1756 conf
.core_ctl
|= CORE_EN
;
1758 if (versionid
< 3 && k
== NULL
) {
1759 if (check_cpc_securitypolicy(&conf
, n
) != 0) {
1760 return (CPC_ATTR_REQUIRES_PRIVILEGE
);
1764 *data
= kmem_alloc(sizeof (core_pcbe_config_t
), KM_SLEEP
);
1765 *((core_pcbe_config_t
*)*data
) = conf
;
1771 configure_ffc(uint_t picnum
, char *event
, uint64_t preset
, uint32_t flags
,
1772 uint_t nattrs
, kcpc_attr_t
*attrs
, void **data
)
1774 core_pcbe_config_t
*conf
;
1777 if (picnum
- num_gpc
>= num_ffc
) {
1778 return (CPC_INVALID_PICNUM
);
1781 if ((strcmp(ffc_names
[picnum
-num_gpc
], event
) != 0) &&
1782 (strcmp(ffc_genericnames
[picnum
-num_gpc
], event
) != 0)) {
1783 return (CPC_INVALID_EVENT
);
1786 if ((versionid
< 3) && (nattrs
!= 0)) {
1787 return (CPC_INVALID_ATTRIBUTE
);
1790 conf
= kmem_alloc(sizeof (core_pcbe_config_t
), KM_SLEEP
);
1793 for (i
= 0; i
< nattrs
; i
++) {
1794 if (strncmp(attrs
[i
].ka_name
, "anythr", 7) == 0) {
1795 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1796 kmem_free(conf
, sizeof (core_pcbe_config_t
));
1797 return (CPC_ATTR_REQUIRES_PRIVILEGE
);
1799 if (attrs
[i
].ka_val
!= 0) {
1800 conf
->core_ctl
|= CORE_FFC_ANYTHR
;
1803 kmem_free(conf
, sizeof (core_pcbe_config_t
));
1804 return (CPC_INVALID_ATTRIBUTE
);
1808 conf
->core_picno
= picnum
;
1809 conf
->core_pictype
= CORE_FFC
;
1810 conf
->core_rawpic
= preset
& mask_ffc
;
1811 conf
->core_pmc
= FFC_BASE_PMC
+ (picnum
- num_gpc
);
1813 /* All fixed-function counters have the same control register */
1814 conf
->core_pes
= PERF_FIXED_CTR_CTRL
;
1816 if (flags
& CPC_COUNT_USER
)
1817 conf
->core_ctl
|= CORE_FFC_USR_EN
;
1818 if (flags
& CPC_COUNT_SYSTEM
)
1819 conf
->core_ctl
|= CORE_FFC_OS_EN
;
1820 if (flags
& CPC_OVF_NOTIFY_EMT
)
1821 conf
->core_ctl
|= CORE_FFC_PMI
;
1829 core_pcbe_configure(uint_t picnum
, char *event
, uint64_t preset
,
1830 uint32_t flags
, uint_t nattrs
, kcpc_attr_t
*attrs
, void **data
,
1834 core_pcbe_config_t
*conf
;
1837 * If we've been handed an existing configuration, we need only preset
1838 * the counter value.
1840 if (*data
!= NULL
) {
1842 ASSERT(conf
->core_pictype
== CORE_GPC
||
1843 conf
->core_pictype
== CORE_FFC
);
1844 if (conf
->core_pictype
== CORE_GPC
)
1845 conf
->core_rawpic
= preset
& mask_gpc
;
1847 conf
->core_rawpic
= preset
& mask_ffc
;
1851 if (picnum
>= total_pmc
) {
1852 return (CPC_INVALID_PICNUM
);
1855 if (picnum
< num_gpc
) {
1856 ret
= configure_gpc(picnum
, event
, preset
, flags
,
1857 nattrs
, attrs
, data
);
1859 ret
= configure_ffc(picnum
, event
, preset
, flags
,
1860 nattrs
, attrs
, data
);
1866 core_pcbe_program(void *token
)
1868 core_pcbe_config_t
*cfg
;
1869 uint64_t perf_global_ctrl
;
1870 uint64_t perf_fixed_ctr_ctrl
;
1873 core_pcbe_allstop();
1876 if (kcpc_allow_nonpriv(token
))
1877 /* Allow RDPMC at any ring level */
1878 setcr4(curcr4
| CR4_PCE
);
1880 /* Allow RDPMC only at ring 0 */
1881 setcr4(curcr4
& ~CR4_PCE
);
1883 /* Clear any overflow indicators before programming the counters */
1884 WRMSR(PERF_GLOBAL_OVF_CTRL
, MASK_CONDCHGD_OVFBUFFER
| control_mask
);
1887 perf_global_ctrl
= 0;
1888 perf_fixed_ctr_ctrl
= 0;
1889 cfg
= (core_pcbe_config_t
*)kcpc_next_config(token
, cfg
, NULL
);
1890 while (cfg
!= NULL
) {
1891 ASSERT(cfg
->core_pictype
== CORE_GPC
||
1892 cfg
->core_pictype
== CORE_FFC
);
1894 if (cfg
->core_pictype
== CORE_GPC
) {
1896 * General-purpose counter registers have write
1897 * restrictions where only the lower 32-bits can be
1898 * written to. The rest of the relevant bits are
1899 * written to by extension from bit 31 (all ZEROS if
1900 * bit-31 is ZERO and all ONE if bit-31 is ONE). This
1901 * makes it possible to write to the counter register
1902 * only values that have all ONEs or all ZEROs in the
1905 if (((cfg
->core_rawpic
& BITS_EXTENDED_FROM_31
) == 0) ||
1906 ((cfg
->core_rawpic
& BITS_EXTENDED_FROM_31
) ==
1907 BITS_EXTENDED_FROM_31
)) {
1909 * Straighforward case where the higher bits
1910 * are all ZEROs or all ONEs.
1912 WRMSR(cfg
->core_pmc
,
1913 (cfg
->core_rawpic
& mask_gpc
));
1916 * The high order bits are not all the same.
1917 * We save what is currently in the registers
1918 * and do not write to it. When we want to do
1919 * a read from this register later (in
1920 * core_pcbe_sample()), we subtract the value
1921 * we save here to get the actual event count.
1923 * NOTE: As a result, we will not get overflow
1924 * interrupts as expected.
1926 RDMSR(cfg
->core_pmc
, cfg
->core_rawpic
);
1927 cfg
->core_rawpic
= cfg
->core_rawpic
& mask_gpc
;
1929 WRMSR(cfg
->core_pes
, cfg
->core_ctl
);
1930 perf_global_ctrl
|= 1ull << cfg
->core_picno
;
1933 * Unlike the general-purpose counters, all relevant
1934 * bits of fixed-function counters can be written to.
1936 WRMSR(cfg
->core_pmc
, cfg
->core_rawpic
& mask_ffc
);
1939 * Collect the control bits for all the
1940 * fixed-function counters and write it at one shot
1941 * later in this function
1943 perf_fixed_ctr_ctrl
|= cfg
->core_ctl
<<
1944 ((cfg
->core_picno
- num_gpc
) * CORE_FFC_ATTR_SIZE
);
1946 1ull << (cfg
->core_picno
- num_gpc
+ 32);
1949 cfg
= (core_pcbe_config_t
*)
1950 kcpc_next_config(token
, cfg
, NULL
);
1953 /* Enable all the counters */
1954 WRMSR(PERF_FIXED_CTR_CTRL
, perf_fixed_ctr_ctrl
);
1955 WRMSR(PERF_GLOBAL_CTRL
, perf_global_ctrl
);
1959 core_pcbe_allstop(void)
1961 /* Disable all the counters together */
1962 WRMSR(PERF_GLOBAL_CTRL
, ALL_STOPPED
);
1964 setcr4(getcr4() & ~CR4_PCE
);
1968 core_pcbe_sample(void *token
)
1972 core_pcbe_config_t
*cfg
;
1973 uint64_t counter_mask
;
1975 cfg
= (core_pcbe_config_t
*)kcpc_next_config(token
, NULL
, &daddr
);
1976 while (cfg
!= NULL
) {
1977 ASSERT(cfg
->core_pictype
== CORE_GPC
||
1978 cfg
->core_pictype
== CORE_FFC
);
1980 curpic
= rdmsr(cfg
->core_pmc
);
1982 DTRACE_PROBE4(core__pcbe__sample
,
1983 uint64_t, cfg
->core_pmc
,
1985 uint64_t, cfg
->core_rawpic
,
1988 if (cfg
->core_pictype
== CORE_GPC
) {
1989 counter_mask
= mask_gpc
;
1991 counter_mask
= mask_ffc
;
1993 curpic
= curpic
& counter_mask
;
1994 if (curpic
>= cfg
->core_rawpic
) {
1995 *daddr
+= curpic
- cfg
->core_rawpic
;
1997 /* Counter overflowed since our last sample */
1998 *daddr
+= counter_mask
- (cfg
->core_rawpic
- curpic
) +
2001 cfg
->core_rawpic
= *daddr
& counter_mask
;
2004 (core_pcbe_config_t
*)kcpc_next_config(token
, cfg
, &daddr
);
2009 core_pcbe_free(void *config
)
2011 kmem_free(config
, sizeof (core_pcbe_config_t
));
2014 static struct modlpcbe core_modlpcbe
= {
2016 "Core Performance Counters",
2020 static struct modlinkage core_modl
= {
2028 if (core_pcbe_init() != 0) {
2031 return (mod_install(&core_modl
));
2037 return (mod_remove(&core_modl
));
2041 _info(struct modinfo
*mi
)
2043 return (mod_info(&core_modl
, mi
));