[gdb] Fix tsan warning: signal handler spoils errno
[binutils-gdb.git] / gprofng / common / hwctable.c
blobb2b4a99b5abd7921e5a30cb16311b7e2af853e1e
1 /* Copyright (C) 2021-2024 Free Software Foundation, Inc.
2 Contributed by Oracle.
4 This file is part of GNU Binutils.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <limits.h>
26 #include <linux/perf_event.h>
28 #include "hwcdrv.h"
30 /*---------------------------------------------------------------------------*/
31 /* compile options */
33 #define DISALLOW_PENTIUM_PRO_MMX_7007575
34 /* Solaris/libcpc2 defaults to "Pentium Pro with MMX, Pentium II"
35 when it doesn't recognize an Intel processor. As a result,
36 when collect attempts to start Pentium Pro counters on a
37 new machine (e.g. Westmere as of 1/2011), the OS may hang. */
39 /* Register 0 counter doesn't work on Niagara T1 version (?) */
40 #define WORKAROUND_6231196_NIAGARA1_NO_CTR_0
42 /*---------------------------------------------------------------------------*/
43 /* consts, macros */
45 /* 10^N rates */
46 #define PRELOADS_9 1001000001
47 #define PRELOADS_85 320100001
48 #define PRELOADS_8 100100001
49 #define PRELOADS_75 32010001
50 #define PRELOADS_7 10010001
51 #define PRELOADS_65 3201001
52 #define PRELOADS_6 1001001
53 #define PRELOADS_55 320101
54 #define PRELOADS_5 100101
55 #define PRELOADS_45 32001
56 #define PRELOADS_4 10001
57 #define PRELOADS_35 3201
58 #define PRELOADS_3 1001
59 #define PRELOADS_25 301
61 #define ABST_TBD ABST_NONE /* to be determined */
63 /*---------------------------------------------------------------------------*/
64 /* prototypes */
65 static void hwc_cb (uint_t cpc_regno, const char *name);
66 static void attrs_cb (const char *attr);
67 static int attr_is_valid (int forKernel, const char *attr);
69 /*---------------------------------------------------------------------------*/
70 /* HWC definition tables */
73 comments on hwcentry tables
74 ---------------------------
75 name: this field should not contain '~'.
76 int_name: actual name of register, may contain ~ attribute specifications.
77 regnum: assigned register.
78 metric: if non-NULL, is a 'standard' counter that will show up in help.
79 timecvt: >0: can convert to time, 'timecvt' CPU cycles per event
80 =0: counts events
81 <0: can convert to time, count reference-clock cycles at '-timecvt' MHz
82 memop: see description for ABST_type enum
85 // PRELOAD(): generates an interval based on the cycles/event and CPU GHZ.
86 // Note: the macro tweaks the interval so that it ends in decimal 001.
87 #define CYC_PER_SAMPLE (1000ULL*1000*1000/100) // cycles per signal at 1ghz, 100 samples/second
88 #define PRELOAD(min_cycles_per_event,ghz) (((ghz)*CYC_PER_SAMPLE/(min_cycles_per_event))/100*100+1)
90 // PRELOAD_DEF: initial value for uncalibrated events.
91 // This value should be based on a rate that will work for the slowest changing
92 // HWCs, HWCs where there are many CPU cycles between events.
94 // The interval needs to target the slowest HWCs so that
95 // automatic adjustment of HWC overflow intervals can adapt.
96 #define PRELOAD_DEF PRELOAD(1000,3) // default interval targets 1000 cycles/event at 3ghz
97 // For er_kernel, which HWC intervals cannot be adjusted automatically for ON/HI/LO,
98 // The interval should target some safe interval for fast events
99 #define PRELOAD_DEF_ERKERNEL PRELOAD(4,4) // default interval targets 4 cycles/event at 4ghz
101 static const Hwcentry empty_ctr = {NULL, NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, 0};
104 // --- use cycles counter to expose "system_time" on Linux ---
105 #define SYSTIME_REGNOS REGNO_ANY // Linux: make sys_time/usr_time available for data collection
106 // Note: For x86, Linux and Solaris use different ref-clock names
107 #define USE_INTEL_REF_CYCLES(MHZ) \
108 {"usr_time","unhalted-reference-cycles", SYSTIME_REGNOS, STXT("User CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \
109 {"usr_time","cpu_clk_unhalted.ref_p", SYSTIME_REGNOS, STXT("User CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \
110 {"sys_time","unhalted-reference-cycles~system=1~user=0", SYSTIME_REGNOS, STXT("System CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \
111 {"sys_time","cpu_clk_unhalted.ref_p~system=1~user=0", SYSTIME_REGNOS, STXT("System CPU"), PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, \
112 {"cycles0", "unhalted-reference-cycles", 0, NULL, PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
113 {"cycles0", "cpu_clk_unhalted.ref_p", 0, NULL, PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
114 {"cycles1", "unhalted-reference-cycles", 1, NULL, PRELOAD( 910,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
115 {"cycles1", "cpu_clk_unhalted.ref_p", 1, NULL, PRELOAD( 910,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
116 /* end of list */
118 /* --- PERF_EVENTS "software" definitions --- */
119 #define PERF_EVENTS_SW_EVENT_ALIASES \
120 // none supported for now
121 #if 0
122 {"usr", "PERF_COUNT_SW_TASK_CLOCK", REGNO_ANY, STXT("User CPU"), PRELOADS_7, -(1000), ABST_NONE}, \
123 {"sys", "PERF_COUNT_SW_TASK_CLOCK~system=1~user=0", REGNO_ANY, STXT("System CPU"), PRELOADS_7, -(1000), ABST_NONE}, \
124 /* end of list */
125 #endif
127 #define PERF_EVENTS_SW_EVENT_DEFS
130 * The PAPI descriptive strings used to be wrapped with STXT(),
131 * a macro defined in perfan/include/i18n.h. For the time being,
132 * we want to demote the PAPI counters by omitting the
133 * descriptions. So we use a new macro PAPITXT() for this purpose.
135 #define PAPITXT(x) NULL
137 /* Solaris "Generic" Counters */
138 static Hwcentry papi_generic_list[] = {
139 {"PAPI_l1_dcm", NULL, REGNO_ANY, PAPITXT ("L1 D-cache misses"), PRELOADS_65, 0, ABST_NONE},
140 {"PAPI_l1_icm", NULL, REGNO_ANY, PAPITXT ("L1 I-cache misses"), PRELOADS_6, 0, ABST_NONE},
141 {"PAPI_l2_dcm", NULL, REGNO_ANY, PAPITXT ("L2 D-cache misses"), PRELOADS_6, 0, ABST_NONE},
142 {"PAPI_l2_icm", NULL, REGNO_ANY, PAPITXT ("L2 I-cache misses"), PRELOADS_6, 0, ABST_NONE},
143 {"PAPI_l3_dcm", NULL, REGNO_ANY, PAPITXT ("L3 D-cache misses"), PRELOADS_5, 0, ABST_NONE},
144 {"PAPI_l3_icm", NULL, REGNO_ANY, PAPITXT ("L3 I-cache misses"), PRELOADS_5, 0, ABST_NONE},
145 {"PAPI_l1_tcm", NULL, REGNO_ANY, PAPITXT ("L1 misses"), PRELOADS_65, 0, ABST_NONE},
146 {"PAPI_l2_tcm", NULL, REGNO_ANY, PAPITXT ("L2 misses"), PRELOADS_6, 0, ABST_NONE},
147 {"PAPI_l3_tcm", NULL, REGNO_ANY, PAPITXT ("L3 misses"), PRELOADS_5, 0, ABST_NONE},
148 {"PAPI_ca_snp", NULL, REGNO_ANY, PAPITXT ("Requests for a snoop"), PRELOADS_6, 0, ABST_NONE},
149 {"PAPI_ca_shr", NULL, REGNO_ANY, PAPITXT ("Requests for exclusive access to shared cache line"), PRELOADS_6, 0, ABST_NONE},
150 {"PAPI_ca_cln", NULL, REGNO_ANY, PAPITXT ("Requests for exclusive access to clean cache line"), PRELOADS_6, 0, ABST_NONE},
151 {"PAPI_ca_inv", NULL, REGNO_ANY, PAPITXT ("Requests for cache line invalidation"), PRELOADS_6, 0, ABST_NONE},
152 {"PAPI_ca_itv", NULL, REGNO_ANY, PAPITXT ("Requests for cache line intervention"), PRELOADS_6, 0, ABST_NONE},
153 {"PAPI_l3_ldm", NULL, REGNO_ANY, PAPITXT ("L3 load misses"), PRELOADS_5, 0, ABST_NONE},
154 {"PAPI_l3_stm", NULL, REGNO_ANY, PAPITXT ("L3 store misses"), PRELOADS_5, 0, ABST_NONE},
155 {"PAPI_bru_idl", NULL, REGNO_ANY, PAPITXT ("Cycles branch units are idle"), PRELOADS_7, 1, ABST_NONE},
156 {"PAPI_fxu_idl", NULL, REGNO_ANY, PAPITXT ("Cycles integer units are idle"), PRELOADS_7, 1, ABST_NONE},
157 {"PAPI_fpu_idl", NULL, REGNO_ANY, PAPITXT ("Cycles FP units are idle"), PRELOADS_7, 1, ABST_NONE},
158 {"PAPI_lsu_idl", NULL, REGNO_ANY, PAPITXT ("Cycles load/store units are idle"), PRELOADS_7, 1, ABST_NONE},
159 {"PAPI_tlb_dm", NULL, REGNO_ANY, PAPITXT ("DTLB misses"), PRELOADS_6, 0, ABST_NONE},
160 {"PAPI_tlb_im", NULL, REGNO_ANY, PAPITXT ("ITLB misses"), PRELOADS_6, 0, ABST_NONE},
161 {"PAPI_tlb_tl", NULL, REGNO_ANY, PAPITXT ("Total TLB misses"), PRELOADS_6, 0, ABST_NONE},
162 {"PAPI_tlb_tm", NULL, REGNO_ANY, PAPITXT ("Total TLB misses"), PRELOADS_6, 0, ABST_NONE},
163 {"PAPI_l1_ldm", NULL, REGNO_ANY, PAPITXT ("L1 load misses"), PRELOADS_65, 0, ABST_NONE},
164 {"PAPI_l1_stm", NULL, REGNO_ANY, PAPITXT ("L1 store misses"), PRELOADS_65, 0, ABST_NONE},
165 {"PAPI_l2_ldm", NULL, REGNO_ANY, PAPITXT ("L2 load misses"), PRELOADS_6, 0, ABST_NONE},
166 {"PAPI_l2_stm", NULL, REGNO_ANY, PAPITXT ("L2 store misses"), PRELOADS_6, 0, ABST_NONE},
167 {"PAPI_btac_m", NULL, REGNO_ANY, PAPITXT ("Branch target address cache misses"), PRELOADS_5, 0, ABST_NONE},
168 {"PAPI_prf_dm", NULL, REGNO_ANY, PAPITXT ("Data prefetch cache misses"), PRELOADS_65, 0, ABST_NONE},
169 {"PAPI_l3_dch", NULL, REGNO_ANY, PAPITXT ("L3 D-cache hits"), PRELOADS_6, 0, ABST_NONE},
170 {"PAPI_tlb_sd", NULL, REGNO_ANY, PAPITXT ("TLB shootdowns"), PRELOADS_6, 0, ABST_NONE},
171 {"PAPI_csr_fal", NULL, REGNO_ANY, PAPITXT ("Failed store conditional instructions"), PRELOADS_6, 0, ABST_NONE},
172 {"PAPI_csr_suc", NULL, REGNO_ANY, PAPITXT ("Successful store conditional instructions"), PRELOADS_7, 0, ABST_NONE},
173 {"PAPI_csr_tot", NULL, REGNO_ANY, PAPITXT ("Total store conditional instructions"), PRELOADS_7, 0, ABST_NONE},
174 {"PAPI_mem_scy", NULL, REGNO_ANY, PAPITXT ("Cycles Stalled Waiting for memory accesses"), PRELOADS_7, 1, ABST_NONE},
175 {"PAPI_mem_rcy", NULL, REGNO_ANY, PAPITXT ("Cycles Stalled Waiting for memory reads"), PRELOADS_7, 1, ABST_NONE},
176 {"PAPI_mem_wcy", NULL, REGNO_ANY, PAPITXT ("Cycles Stalled Waiting for memory writes"), PRELOADS_7, 1, ABST_NONE},
177 {"PAPI_stl_icy", NULL, REGNO_ANY, PAPITXT ("Cycles with no instruction issue"), PRELOADS_7, 1, ABST_NONE},
178 {"PAPI_ful_icy", NULL, REGNO_ANY, PAPITXT ("Cycles with maximum instruction issue"), PRELOADS_7, 1, ABST_NONE},
179 {"PAPI_stl_ccy", NULL, REGNO_ANY, PAPITXT ("Cycles with no instructions completed"), PRELOADS_7, 1, ABST_NONE},
180 {"PAPI_ful_ccy", NULL, REGNO_ANY, PAPITXT ("Cycles with maximum instructions completed"), PRELOADS_7, 1, ABST_NONE},
181 {"PAPI_hw_int", NULL, REGNO_ANY, PAPITXT ("Hardware interrupts"), PRELOADS_5, 0, ABST_NONE},
182 {"PAPI_br_ucn", NULL, REGNO_ANY, PAPITXT ("Unconditional branch instructions"), PRELOADS_7, 0, ABST_NONE},
183 {"PAPI_br_cn", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions"), PRELOADS_7, 0, ABST_NONE},
184 {"PAPI_br_tkn", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions taken"), PRELOADS_7, 0, ABST_NONE},
185 {"PAPI_br_ntk", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions not taken"), PRELOADS_7, 0, ABST_NONE},
186 {"PAPI_br_msp", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions mispredicted"), PRELOADS_6, 0, ABST_NONE},
187 {"PAPI_br_prc", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions correctly predicted"), PRELOADS_7, 0, ABST_NONE},
188 {"PAPI_fma_ins", NULL, REGNO_ANY, PAPITXT ("FMA instructions completed"), PRELOADS_65, 0, ABST_NONE},
189 {"PAPI_tot_iis", NULL, REGNO_ANY, PAPITXT ("Instructions issued"), PRELOADS_7, 0, ABST_NONE},
190 {"PAPI_tot_ins", NULL, REGNO_ANY, PAPITXT ("Instructions completed"), PRELOADS_7, 0, ABST_NONE},
191 {"PAPI_int_ins", NULL, REGNO_ANY, PAPITXT ("Integer instructions"), PRELOADS_7, 0, ABST_NONE},
192 {"PAPI_fp_ins", NULL, REGNO_ANY, PAPITXT ("Floating-point instructions"), PRELOADS_7, 0, ABST_NONE},
193 {"PAPI_ld_ins", NULL, REGNO_ANY, PAPITXT ("Load instructions"), PRELOADS_7, 0, ABST_NONE},
194 {"PAPI_sr_ins", NULL, REGNO_ANY, PAPITXT ("Store instructions"), PRELOADS_7, 0, ABST_NONE},
195 {"PAPI_br_ins", NULL, REGNO_ANY, PAPITXT ("Branch instructions"), PRELOADS_7, 0, ABST_NONE},
196 {"PAPI_vec_ins", NULL, REGNO_ANY, PAPITXT ("Vector/SIMD instructions"), PRELOADS_7, 0, ABST_NONE},
197 {"PAPI_res_stl", NULL, REGNO_ANY, PAPITXT ("Cycles stalled on any resource"), PRELOADS_7, 1, ABST_NONE},
198 {"PAPI_fp_stal", NULL, REGNO_ANY, PAPITXT ("Cycles the FP unit(s) are stalled"), PRELOADS_7, 1, ABST_NONE},
199 {"PAPI_tot_cyc", NULL, REGNO_ANY, PAPITXT ("Total cycles"), PRELOADS_7, 1, ABST_NONE},
200 {"PAPI_lst_ins", NULL, REGNO_ANY, PAPITXT ("Load/store instructions completed"), PRELOADS_7, 0, ABST_NONE},
201 {"PAPI_syc_ins", NULL, REGNO_ANY, PAPITXT ("Sync instructions completed"), PRELOADS_65, 0, ABST_NONE},
202 {"PAPI_l1_dch", NULL, REGNO_ANY, PAPITXT ("L1 D-cache hits"), PRELOADS_7, 0, ABST_NONE},
203 {"PAPI_l2_dch", NULL, REGNO_ANY, PAPITXT ("L2 D-cache hits"), PRELOADS_65, 0, ABST_NONE},
204 {"PAPI_l1_dca", NULL, REGNO_ANY, PAPITXT ("L1 D-cache accesses"), PRELOADS_7, 0, ABST_NONE},
205 {"PAPI_l2_dca", NULL, REGNO_ANY, PAPITXT ("L2 D-cache accesses"), PRELOADS_65, 0, ABST_NONE},
206 {"PAPI_l3_dca", NULL, REGNO_ANY, PAPITXT ("L3 D-cache accesses"), PRELOADS_6, 0, ABST_NONE},
207 {"PAPI_l1_dcr", NULL, REGNO_ANY, PAPITXT ("L1 D-cache reads"), PRELOADS_7, 0, ABST_NONE},
208 {"PAPI_l2_dcr", NULL, REGNO_ANY, PAPITXT ("L2 D-cache reads"), PRELOADS_65, 0, ABST_NONE},
209 {"PAPI_l3_dcr", NULL, REGNO_ANY, PAPITXT ("L3 D-cache reads"), PRELOADS_6, 0, ABST_NONE},
210 {"PAPI_l1_dcw", NULL, REGNO_ANY, PAPITXT ("L1 D-cache writes"), PRELOADS_7, 0, ABST_NONE},
211 {"PAPI_l2_dcw", NULL, REGNO_ANY, PAPITXT ("L2 D-cache writes"), PRELOADS_65, 0, ABST_NONE},
212 {"PAPI_l3_dcw", NULL, REGNO_ANY, PAPITXT ("L3 D-cache writes"), PRELOADS_6, 0, ABST_NONE},
213 {"PAPI_l1_ich", NULL, REGNO_ANY, PAPITXT ("L1 I-cache hits"), PRELOADS_7, 0, ABST_NONE},
214 {"PAPI_l2_ich", NULL, REGNO_ANY, PAPITXT ("L2 I-cache hits"), PRELOADS_65, 0, ABST_NONE},
215 {"PAPI_l3_ich", NULL, REGNO_ANY, PAPITXT ("L3 I-cache hits"), PRELOADS_6, 0, ABST_NONE},
216 {"PAPI_l1_ica", NULL, REGNO_ANY, PAPITXT ("L1 I-cache accesses"), PRELOADS_7, 0, ABST_NONE},
217 {"PAPI_l2_ica", NULL, REGNO_ANY, PAPITXT ("L2 I-cache accesses"), PRELOADS_65, 0, ABST_NONE},
218 {"PAPI_l3_ica", NULL, REGNO_ANY, PAPITXT ("L3 I-cache accesses"), PRELOADS_6, 0, ABST_NONE},
219 {"PAPI_l1_icr", NULL, REGNO_ANY, PAPITXT ("L1 I-cache reads"), PRELOADS_7, 0, ABST_NONE},
220 {"PAPI_l2_icr", NULL, REGNO_ANY, PAPITXT ("L2 I-cache reads"), PRELOADS_65, 0, ABST_NONE},
221 {"PAPI_l3_icr", NULL, REGNO_ANY, PAPITXT ("L3 I-cache reads"), PRELOADS_6, 0, ABST_NONE},
222 {"PAPI_l1_icw", NULL, REGNO_ANY, PAPITXT ("L1 I-cache writes"), PRELOADS_7, 0, ABST_NONE},
223 {"PAPI_l2_icw", NULL, REGNO_ANY, PAPITXT ("L2 I-cache writes"), PRELOADS_65, 0, ABST_NONE},
224 {"PAPI_l3_icw", NULL, REGNO_ANY, PAPITXT ("L3 I-cache writes"), PRELOADS_6, 0, ABST_NONE},
225 {"PAPI_l1_tch", NULL, REGNO_ANY, PAPITXT ("L1 total hits"), PRELOADS_7, 0, ABST_NONE},
226 {"PAPI_l2_tch", NULL, REGNO_ANY, PAPITXT ("L2 total hits"), PRELOADS_65, 0, ABST_NONE},
227 {"PAPI_l3_tch", NULL, REGNO_ANY, PAPITXT ("L3 total hits"), PRELOADS_6, 0, ABST_NONE},
228 {"PAPI_l1_tca", NULL, REGNO_ANY, PAPITXT ("L1 total accesses"), PRELOADS_7, 0, ABST_NONE},
229 {"PAPI_l2_tca", NULL, REGNO_ANY, PAPITXT ("L2 total accesses"), PRELOADS_65, 0, ABST_NONE},
230 {"PAPI_l3_tca", NULL, REGNO_ANY, PAPITXT ("L3 total accesses"), PRELOADS_6, 0, ABST_NONE},
231 {"PAPI_l1_tcr", NULL, REGNO_ANY, PAPITXT ("L1 total reads"), PRELOADS_7, 0, ABST_NONE},
232 {"PAPI_l2_tcr", NULL, REGNO_ANY, PAPITXT ("L2 total reads"), PRELOADS_65, 0, ABST_NONE},
233 {"PAPI_l3_tcr", NULL, REGNO_ANY, PAPITXT ("L3 total reads"), PRELOADS_6, 0, ABST_NONE},
234 {"PAPI_l1_tcw", NULL, REGNO_ANY, PAPITXT ("L1 total writes"), PRELOADS_7, 0, ABST_NONE},
235 {"PAPI_l2_tcw", NULL, REGNO_ANY, PAPITXT ("L2 total writes"), PRELOADS_65, 0, ABST_NONE},
236 {"PAPI_l3_tcw", NULL, REGNO_ANY, PAPITXT ("L3 total writes"), PRELOADS_6, 0, ABST_NONE},
237 {"PAPI_fml_ins", NULL, REGNO_ANY, PAPITXT ("FP multiply instructions"), PRELOADS_7, 0, ABST_NONE},
238 {"PAPI_fad_ins", NULL, REGNO_ANY, PAPITXT ("FP add instructions"), PRELOADS_7, 0, ABST_NONE},
239 {"PAPI_fdv_ins", NULL, REGNO_ANY, PAPITXT ("FP divide instructions"), PRELOADS_7, 0, ABST_NONE},
240 {"PAPI_fsq_ins", NULL, REGNO_ANY, PAPITXT ("FP square root instructions"), PRELOADS_65, 0, ABST_NONE},
241 {"PAPI_fnv_ins", NULL, REGNO_ANY, PAPITXT ("FP inverse instructions"), PRELOADS_7, 0, ABST_NONE},
242 {"PAPI_fp_ops", NULL, REGNO_ANY, PAPITXT ("FP operations"), PRELOADS_7, 0, ABST_NONE},
243 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
246 #if defined(__i386__) || defined(__x86_64)
247 /* Kernel profiling pseudo-chip, OBSOLETE (To support 12.3 and earlier, TBR) */
248 static Hwcentry kproflist[] = {
249 {"kcycles", "kcycles", 0, STXT ("KCPU Cycles"), PRELOADS_5, 1, ABST_NONE},
250 {"kucycles", "kucycles", 0, STXT ("KUCPU Cycles"), PRELOADS_5, 1, ABST_NONE},
251 {"kthr", "kthr", 0, STXT ("KTHR Cycles"), PRELOADS_5, 1, ABST_NONE},
252 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
255 static Hwcentry pentiumIIlist[] = {
256 /* note -- missing entries for dtlbm, ecm */
257 {"cycles", "cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
258 {"insts", "inst_retired", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
259 {"icm", "ifu_ifetch_miss", REGNO_ANY, STXT ("I$ Misses"), PRELOADS_5, 0, ABST_NONE},
260 {"dcrm", "dcu_m_lines_in", REGNO_ANY, STXT ("D$ Read Misses"), PRELOADS_5, 0, ABST_NONE},
261 {"dcwm", "dcu_m_lines_out", REGNO_ANY, STXT ("D$ Write Misses"), PRELOADS_5, 0, ABST_NONE},
262 {"flops", "flops", REGNO_ANY, STXT ("Floating-point Ops"), PRELOADS_7, 0, ABST_NONE},
263 {"itlbm", "itlb_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE},
264 {"ecim", "l2_ifetch", REGNO_ANY, STXT ("E$ Instr. Misses"), PRELOADS_5, 0, ABST_NONE},
266 /* explicit definitions of (hidden) entries for proper counters */
267 /* Only counters that can be time converted, or are load-store need to be in this table */
268 {"cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
270 /* additional (hidden) aliases for convenience */
271 {"cycles0", "cpu_clk_unhalted", 0, NULL, PRELOADS_75, 1, ABST_NONE},
272 {"cycles1", "cpu_clk_unhalted", 1, NULL, PRELOADS_75, 1, ABST_NONE},
273 {"insts0", "inst_retired", 0, NULL, PRELOADS_75, 0, ABST_NONE},
274 {"insts1", "inst_retired", 1, NULL, PRELOADS_75, 0, ABST_NONE},
275 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
278 static Hwcentry pentiumIIIlist[] = {
279 /* note -- many missing entries; no reference machine to try */
280 {"cycles", "cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
281 {"insts", "inst_retired", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
283 /* explicit definitions of (hidden) entries for proper counters */
284 /* Only counters that can be time converted, or are load-store need to be in this table */
285 {"cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
287 /* additional (hidden) aliases for convenience */
288 {"cycles0", "cpu_clk_unhalted", 0, NULL, PRELOADS_75, 1, ABST_NONE},
289 {"cycles1", "cpu_clk_unhalted", 1, NULL, PRELOADS_75, 1, ABST_NONE},
290 {"insts0", "inst_retired", 0, NULL, PRELOADS_75, 0, ABST_NONE},
291 {"insts1", "inst_retired", 1, NULL, PRELOADS_75, 0, ABST_NONE},
292 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
295 static Hwcentry pentium4[] = {
296 {"cycles", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
297 {"insts", "instr_retired~emask=0x3", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
298 {"l1m", "BSQ_cache_reference~emask=0x0507", REGNO_ANY, STXT ("L1 Cache Misses"), PRELOADS_7, 0, ABST_NONE},
299 {"l2h", "BSQ_cache_reference~emask=0x0007", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_7, 0, ABST_NONE},
300 {"l2m", "BSQ_cache_reference~emask=0x0500", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE},
302 /* explicit definitions of (hidden) entries for proper counters */
303 /* Only counters that can be time converted, or are load-store need to be in this table */
304 {"TC_deliver_mode", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
305 {"machine_clear", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
307 /* additional (hidden) aliases, for convenience */
308 {"cycles0", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", 5, NULL, PRELOADS_75, 1, ABST_NONE},
309 {"cycles1", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", 6, NULL, PRELOADS_75, 1, ABST_NONE},
310 {"insts0", "instr_retired~emask=0x3", 15, NULL, PRELOADS_75, 0, ABST_NONE},
311 {"insts1", "instr_retired~emask=0x3", 16, NULL, PRELOADS_75, 0, ABST_NONE},
312 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
315 static Hwcentry intelCore2list[] = {
316 // For post-processing, both Linux and Solaris definitions need to be "live".
317 // However, for data collection, OS-specific definitions may need to be hidden.
318 // Use REGNO_INVALID for definitions that should be hidden for data collection.
319 #define LINUX_ONLY REGNO_ANY
320 #define SOLARIS_ONLY REGNO_INVALID /* hidden for Linux data collection */
322 {"cycles", "cpu_clk_unhalted.core", /*6759307*/ SOLARIS_ONLY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
323 {"cycles", "cpu_clk_unhalted.thread", /*6759307*/ SOLARIS_ONLY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
324 /* Linux Note: 7046312 Many HWC tests fail on system Core2 system with perf_events if above alias used */
325 {"cycles", "cpu_clk_unhalted", LINUX_ONLY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
327 {"insts", "instr_retired.any", SOLARIS_ONLY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
328 /* Linux Note: 7046312 Many HWC tests fail on system Core2 system with perf_events if above alias used */
329 {"insts", "inst_retired", LINUX_ONLY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
331 // The following counters were identified in "Cycle Accounting Analysis on Intel Core2 Processors" by David Levinthal
332 {"uops_stalled", "rs_uops_dispatched~cmask=1~inv=1", REGNO_ANY, STXT ("uOps Stalled"), PRELOADS_7, 1, ABST_NONE},
333 {"l2m", "mem_load_retired~umask=0x08", REGNO_ANY, STXT ("L2 Line Misses"), PRELOADS_5, 0, ABST_NONE},
334 {"dtlbm", "mem_load_retired~umask=0x10", REGNO_ANY, STXT ("L1 DTLB Misses"), PRELOADS_5, 0, ABST_NONE},
335 {"l1m", "mem_load_retired~umask=0x02", REGNO_ANY, STXT ("L1 Line Misses"), PRELOADS_6, 0, ABST_NONE},
336 // {"stalls_resources","resource_stalls~umask=0x1f", REGNO_ANY, STXT("Resource Stalls"), PRELOADS_6, 1, ABST_NONE},
337 {"rs_full", "resource_stalls~umask=0x02", REGNO_ANY, STXT ("Reservation Station Full"), PRELOADS_6, 1, ABST_NONE},
338 {"br_miss_flush", "resource_stalls~umask=0x10", REGNO_ANY, STXT ("Mispredicted Branch Flushes"), PRELOADS_6, 1, ABST_NONE},
339 {"ld_st_full", "resource_stalls~umask=0x04", REGNO_ANY, STXT ("Load/Store Buffers Full"), PRELOADS_6, 1, ABST_NONE},
340 {"rob_full", "resource_stalls~umask=0x01", REGNO_ANY, STXT ("Reorder Buffer Full"), PRELOADS_6, 1, ABST_NONE},
341 {"slow_decode", "ild_stall", REGNO_ANY, STXT ("Slow Instruction Decode"), PRELOADS_6, 1, ABST_NONE},
342 {"br_miss", "br_cnd_missp_exec", REGNO_ANY, STXT ("Mispredicted Branches"), PRELOADS_5, 0, ABST_NONE},
343 {"ret_miss", "br_call_missp_exec", REGNO_ANY, STXT ("Mispredicted Return Calls"), PRELOADS_5, 0, ABST_NONE},
344 {"div_busy", "idle_during_div", REGNO_ANY, STXT ("Divider Unit Busy"), PRELOADS_5, 1, ABST_NONE},
345 {"fp_assists", "fp_assist", REGNO_ANY, STXT ("FP Microcode Assists"), PRELOADS_5, 0, ABST_NONE},
346 {"bus_busy", "bus_drdy_clocks~umask=0x60", REGNO_ANY, STXT ("Busy Data Bus"), PRELOADS_5, 1, ABST_NONE},
348 /* explicit definitions of (hidden) entries for proper counters */
349 /* Only counters that can be time converted, or are load-store need to be in this table */
350 {/*30a*/"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
351 {/*30a*/"cpu_clk_unhalted.thread", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
352 {/*03*/"store_block", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
353 {/*03*/"store_block.drain_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
354 {/*03*/"store_block.order", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
355 {/*03*/"store_block.snoop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
356 {/*09*/"memory_disambiguation.reset", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
357 {/*0c*/"page_walks.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
358 {/*14*/"cycles_div_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
359 {/*18*/"idle_during_div", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
360 {/*19*/"delayed_bypass.load", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
361 {/*21*/"l2_ads", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
362 {/*23*/"l2_dbus_busy_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
363 {/*32*/"l2_no_req", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
364 {/*3c*/"cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
365 {/*3c*/"cpu_clk_unhalted.core_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
366 {/*3c*/"cpu_clk_unhalted.bus", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
367 {/*3c*/"cpu_clk_unhalted.no_other", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
368 {/*42*/"l1d_cache_lock.duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
369 {/*62*/"bus_drdy_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
370 {/*63*/"bus_lock_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
371 {/*64*/"bus_data_rcv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
372 {/*7a*/"bus_hit_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
373 {/*7b*/"bus_hitm_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
374 {/*7d*/"busq_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
375 {/*7e*/"snoop_stall_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
376 {/*7f*/"bus_io_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
377 {/*83*/"inst_queue", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
378 {/*83*/"inst_queue.full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
379 {/*86*/"cycles_l1i_mem_stalled", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
380 {/*87*/"ild_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
381 {/*a1*/"rs_uops_dispatched", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
382 {/*a1*/"rs_uops_dispatched_port", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
383 {/*a1*/"rs_uops_dispatched_port.0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
384 {/*a1*/"rs_uops_dispatched_port.1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
385 {/*a1*/"rs_uops_dispatched_port.2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
386 {/*a1*/"rs_uops_dispatched_port.3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
387 {/*a1*/"rs_uops_dispatched_port.4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
388 {/*a1*/"rs_uops_dispatched_port.5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
389 {/*6c*/"cycles_int", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
390 {/*6c*/"cycles_int.masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
391 {/*6c*/"cycles_int.pending_and_masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
392 {/*d2*/"rat_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
393 {/*d2*/"rat_stalls.rob_read_port", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
394 {/*d2*/"rat_stalls.partial_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
395 {/*d2*/"rat_stalls.flags", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
396 {/*d2*/"rat_stalls.fpsw", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
397 {/*d2*/"rat_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
398 {/*d2*/"rat_stalls.other_serialization_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
399 {/*d4*/"seg_rename_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
400 {/*d4*/"seg_rename_stalls.es", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
401 {/*d4*/"seg_rename_stalls.ds", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
402 {/*d4*/"seg_rename_stalls.fs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
403 {/*d4*/"seg_rename_stalls.gs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
404 {/*d4*/"seg_rename_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
405 {/*dc*/"resource_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
406 {/*dc*/"resource_stalls.rob_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
407 {/*dc*/"resource_stalls.rs_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
408 {/*dc*/"resource_stalls.ld_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
409 {/*dc*/"resource_stalls.fpcw", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
410 {/*dc*/"resource_stalls.br_miss_clear", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
411 {/*dc*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
412 /* "Architectural" events: */
413 {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
415 /* additional (hidden) aliases for convenience */
416 {"cycles0", "cpu_clk_unhalted", 0, NULL, PRELOADS_8, 1, ABST_NONE},
417 {"cycles1", "cpu_clk_unhalted", 1, NULL, PRELOADS_8, 1, ABST_NONE},
418 {"insts0", "inst_retired", 0, NULL, PRELOADS_8, 0, ABST_NONE},
419 {"insts1", "inst_retired", 1, NULL, PRELOADS_8, 0, ABST_NONE},
420 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
424 static Hwcentry intelNehalemList[] = {
425 /* 6832635: on Linux, we're not seeing consistent overflows on FFCs */
426 /* 15634344==6940930: HWC overflow profiling can cause system hang on Solaris/core-i7 systems */
427 /* 17578620: counter overflow for fixed-function counters hangs systems */
428 /* same issues for intelSandyBridgeList and intelHaswellList */
429 PERF_EVENTS_SW_EVENT_ALIASES
430 USE_INTEL_REF_CYCLES (133)
431 {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
432 {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
433 // cpu_clk_unhalted.ref: at the ref requency of the cpu. Should not be affected by Speedstep or Turbo.
434 // cpu_clk_unhalted.thread_p: with HT & 2 threads, 2x cycles. Affected by Speedstep and Turbo.
436 // PEBs (Sampling)
437 {"l2m_latency", "mem_inst_retired.latency_above_threshold", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 33, ABST_EXACT_PEBS_PLUS1},
439 // See file hwctable.README.corei7
440 {"dch", "mem_load_retired.l1d_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE},
441 {"dcm", "0xCB~umask=0x1e", REGNO_ANY, STXT ("L1 D-Cache Misses"), PRELOADS_65, 0, ABST_NONE}, /*mem_load_retired*/
442 {"lfbdh", "mem_load_retired.hit_lfb", REGNO_ANY, STXT ("LFB D-cache Hits"), PRELOADS_65, 0, ABST_NONE},
443 {"l2h", "mem_load_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE},
444 {"l2m", "0xCB~umask=0x1c", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, /*mem_load_retired*/
445 {"l3h", "mem_load_retired.llc_unshared_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6, 0, ABST_NONE},
446 {"l3h_stall", "mem_load_retired.llc_unshared_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop x 35: Est. Stalls"), PRELOADS_6, 35, ABST_NONE},
447 {"l3hsnoop", "mem_load_retired.other_core_l2_hit_hitm", REGNO_ANY, STXT ("L3 Cache Hit w/Snoop"), PRELOADS_6, 0, ABST_NONE},
448 {"l3hsnoop_stall", "mem_load_retired.other_core_l2_hit_hitm", REGNO_ANY, STXT ("L3 Cache Hit w/Snoop x 74: Est. Stalls"), PRELOADS_6, 74, ABST_NONE},
449 {"l3m", "mem_load_retired.llc_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE},
450 {"l3m_stall", "mem_load_retired.llc_miss", REGNO_ANY, STXT ("L3 Cache Misses x 180: Estimated Stalls"), PRELOADS_5, 180, ABST_NONE},
451 {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
452 {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE},
453 {"addr_alias_stall", "partial_address_alias", REGNO_ANY, STXT ("Partial Address Aliases x 3: Est. Stalls"), PRELOADS_6, 3, ABST_NONE},
454 {"uope_stall", "uops_executed.port234~cmask=1~inv=1", REGNO_ANY, STXT ("UOP Execute Stalls per Core"), PRELOADS_7, 1, ABST_NONE},
455 {"uopr_stall", "uops_retired.any~cmask=1~inv=1", REGNO_ANY, STXT ("UOP Retired Stalls"), PRELOADS_7, 1, ABST_NONE},
456 {"itlbm", "itlb_miss_retired", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_6, 0, ABST_NONE},
457 {"l1i_stall", "l1i.cycles_stalled", REGNO_ANY, STXT ("L1 I-cache Stalls"), PRELOADS_6, 1, ABST_NONE},
458 {"br_rets", "br_inst_retired.all_branches", REGNO_ANY, STXT ("Branch Instruction Retires"), PRELOADS_7, 0, ABST_NONE},
459 {"br_misp", "br_misp_exec.any", REGNO_ANY, STXT ("Branch Mispredicts"), PRELOADS_6, 0, ABST_NONE},
460 {"mach_clear", "machine_clears.cycles", REGNO_ANY, STXT ("Machine Clear Asserted"), PRELOADS_6, 1, ABST_NONE},
461 {"fp_mmx", "fp_mmx_trans.any", REGNO_ANY, STXT ("FP-MMX Transistions"), PRELOADS_6, 0, ABST_NONE},
462 {"div_busy", "arith.cycles_div_busy", REGNO_ANY, STXT ("Divider Busy Cycles"), PRELOADS_6, 1, ABST_NONE},
464 /* explicit definitions of (hidden) entries for proper counters */
465 /* Only counters that can be time converted, or are load-store need to be in this table */
466 {/*30a*/"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
467 {/*30a*/"cpu_clk_unhalted.thread", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
468 {/*04*/"sb_drain.cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
469 {/*08.04*/"dtlb_load_misses.walk_cycles", /*westmere*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
470 //{/*0e*/"uops_issued.stalled_cycles",/*future, multibit*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
471 {/*09*/"memory_disambiguation.reset", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
472 {/*09*/"memory_disambiguation.watch_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
473 {/*0b*/"mem_inst_retired.latency_above_threshold", /*PEBS*/ NULL, REGNO_ANY, NULL, PRELOADS_4, 33, ABST_EXACT_PEBS_PLUS1}, //non-standard overflow
474 {/*14*/"arith.cycles_div_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
475 {/*17*/"inst_queue_write_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
476 {/*1d*/"hw_int.cycles_masked", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
477 {/*1d*/"hw_int.cycles_pending_and_masked", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
478 {/*3c*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
479 {/*48*/"l1d_pend_miss.load_buffers_full", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
480 {/*49.04*/"dtlb_misses.walk_cycles", /*westmere*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
481 {/*4e*/"sfence_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
482 {/*4f.10*/"ept.walk_cycles", /*westmere*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
483 {/*60*/"offcore_requests_outstanding.demand.read_data", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
484 {/*60*/"offcore_requests_outstanding.demand.read_code", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
485 {/*60*/"offcore_requests_outstanding.demand.rfo", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
486 {/*60*/"offcore_requests_outstanding.any.read", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
487 {/*63*/"cache_lock_cycles.l1d", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
488 {/*63*/"cache_lock_cycles.l1d_l2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
489 {/*80*/"l1i.cycles_stalled", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
490 {/*85*/"itlb_misses.walk_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
491 {/*85*/"itlb_misses.pmh_busy_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
492 {/*87*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
493 {/*87*/"ild_stall.mru", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
494 {/*87*/"ild_stall.iq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
495 {/*87*/"ild_stall.regen", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
496 {/*87*/"ild_stall.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
497 {/*a2*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
498 {/*a2*/"resource_stalls.load", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
499 {/*a2*/"resource_stalls.rs_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
500 {/*a2*/"resource_stalls.store", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
501 {/*a2*/"resource_stalls.rob_full", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
502 {/*a2*/"resource_stalls.fpcw", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
503 {/*a2*/"resource_stalls.mxcsr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
504 {/*a2*/"resource_stalls.other", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
505 {/*b0*/"offcore_requests_sq_full", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
506 {/*b3*/"snoopq_requests_outstanding.data", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
507 {/*b3*/"snoopq_requests_outstanding.invalidate", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
508 {/*b3*/"snoopq_requests_outstanding.code", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
509 //{/*c2*/"uops_retired.stalled_cycles",/*future, multibit*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
510 {/*c3*/"machine_clears.cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
511 {/*d2*/"rat_stalls.flags", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
512 {/*d2*/"rat_stalls.registers", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
513 {/*d2*/"rat_stalls.rob_read_port", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
514 {/*d2*/"rat_stalls.scoreboard", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
515 {/*d2*/"rat_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
516 {/*d4*/"seg_rename_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
517 {/*f6*/"sq_full_stall_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
518 /* "Architectural" events: */
519 {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
520 PERF_EVENTS_SW_EVENT_DEFS
522 /* additional (hidden) aliases for convenience */
523 #if 0
524 USE_INTEL_REF_CYCLES (133),
525 #endif
526 {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE},
527 {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE},
528 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
532 static Hwcentry intelSandyBridgeList[] = {
533 /* see comments for "cycles" and "insts" for intelNehalemList */
534 PERF_EVENTS_SW_EVENT_ALIASES
535 USE_INTEL_REF_CYCLES (100)
536 {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
537 {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
539 // PEBS (sampling)
540 {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
542 // See file hwctable.README.sandybridge
543 {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE},
544 {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, /*mem_load_uops_retired*/
545 {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE},
546 {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, /*mem_load_uops_retired*/
547 // Intel errata: BT241 and BT243 says the mem_load_uops_retired.llc* counters may not be reliable on some CPU variants
548 {"l3h", "mem_load_uops_retired.llc_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6, 0, ABST_NONE}, // may undercount
549 {"l3m", "longest_lat_cache.miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE},
551 /* dtlbm has not been confirmed via Intel white paper */
552 {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
553 {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE},
554 {"dtlbm", "dtlb_load_misses.demand_ld_walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
555 {"dtlbm_stall", "dtlb_load_misses.demand_ld_walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE},
557 /* explicit definitions of (hidden) entries for proper counters */
558 /* Only counters that can be time converted, or are load-store need to be in this table */
559 {/* 30a */"cpu_clk_unhalted.thread", /*15634344==6940930*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
560 //{/* 30a */"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
561 {/*08.04*/"dtlb_load_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
562 {/*08.84*/"dtlb_load_misses.demand_ld_walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
563 {/*0d.03*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
564 {/*0d.40*/"int_misc.rat_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
565 {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
566 {/*0e.01*/"uops_issued.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
567 {/*14.01*/"arith.fpu_div_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
568 {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
569 {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
570 {/*49.04*/"dtlb_store_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
571 {/*59.20*/"partial_rat_stalls.flags_merge_uop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
572 {/*59.20*/"partial_rat_stalls.flags_merge_uop_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
573 {/*59.40*/"partial_rat_stalls.slow_lea_window", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
574 //{/*59.80*/"partial_rat_stalls.mul_single_uop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
575 {/*5b.0c*/"resource_stalls2.all_fl_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
576 {/*5b.0f*/"resource_stalls2.all_prf_control", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
577 {/*5b.40*/"resource_stalls2.bob_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
578 {/*5b.4f*/"resource_stalls2.ooo_rsrc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
579 {/*5c.01*/"cpl_cycles.ring0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
580 {/*5c.02*/"cpl_cycles.ring123", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
581 {/*5c.xx*/"cpl_cycles.ring0_trans", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
582 {/*5c.xx*/"cpl_cycles.ring0_transition", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
583 {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
584 {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
585 {/*60.01*/"offcore_requests_outstanding.demand_data_rd_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
586 {/*60.04*/"offcore_requests_outstanding.cycles_with_demand_rfo", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
587 {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
588 {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
589 {/*60.08*/"offcore_requests_outstanding.all_data_rd_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
590 {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
591 {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
592 {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
593 {/*79.00*/"idq.empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
594 {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
595 {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
596 {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
597 {/*79.20*/"idq.ms_mite_uops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
598 {/*79.20*/"idq.ms_mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
599 {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
600 {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
601 {/*79.18*/"idq.all_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
602 {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
603 {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
604 {/*79.24*/"idq.all_mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
605 {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
606 {/*79.3c*/"idq.mite_all_cycles", /* Linux, but not in docs? */ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
607 {/*80.04*/"icache.ifetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
608 {/*85.04*/"itlb_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
609 {/*87.01*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
610 {/*87.04*/"ild_stall.iq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
611 {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
612 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
613 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
614 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
615 {/*9c.01*/"idq_uops_not_delivered.cycles_ge_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
616 {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
617 {/*a1.01*/"uops_executed_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
618 {/*a1.02*/"uops_executed_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
619 {/*a1.04*/"uops_executed_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
620 {/*a1.08*/"uops_executed_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
621 {/*a1.10*/"uops_executed_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
622 {/*a1.20*/"uops_executed_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
623 {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
624 {/*a2.02*/"resource_stalls.lb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
625 {/*a2.04*/"resource_stalls.rs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
626 {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
627 {/*a2.0a*/"resource_stalls.lb_sb", /*sb-ep*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
628 {/*a2.0e*/"resource_stalls.mem_rs", /*sb-ep*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
629 {/*a2.10*/"resource_stalls.rob", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
630 {/*a2.20*/"resource_stalls.fcsw", /*sb*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
631 {/*a2.40*/"resource_stalls.mxcsr", /*sb*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
632 {/*a2.80*/"resource_stalls.other", /*sb*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
633 {/*a2.F0*/"resource_stalls.ooo_rsrc", /*sb-ep*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
635 {/*a3.01*/"cycle_activity.cycles_l2_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
636 {/*??.??*/"cycle_activity.stalls_l2_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
637 {/*a3.02*/"cycle_activity.cycles_ldm_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
638 {/*??.??*/"cycle_activity.stalls_ldm_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
639 {/*a3.04*/"cycle_activity.cycles_no_execute", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
640 {/*a3.04*/"cycle_activity.cycles_no_dispatch", /*sandybridge*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
641 {/*a3.08*/"cycle_activity.cycles_l1d_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
642 {/*??.??*/"cycle_activity.stalls_l1d_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
644 {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
645 {/*b1.??*/"uops_executed.stall_cycles", /*? not in PRM*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
646 {/*b1.01*/"uops_dispatched.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
647 {/*b1.01*/"uops_executed.stall_cycles", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
648 {/*b1.01*/"uops_executed.cycles_ge_1_uop_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
649 {/*b1.01*/"uops_executed.cycles_ge_2_uops_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
650 {/*b1.01*/"uops_executed.cycles_ge_3_uops_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
651 {/*b1.01*/"uops_executed.cycles_ge_4_uops_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
653 {/*bf.05*/"l1d_blocks.bank_conflict_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
654 {/*c2.01*/"uops_retired.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
655 {/*c2.01*/"uops_retired.total_cycles", /*cmask==0x10*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
656 {/*c2.01*/"uops_retired.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
657 {/*c2.01*/"uops_retired.active_cycles", /*cmask==0x1*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
658 #if 0 // need to see documentation on the following before marking them as cycles
659 uops_executed.cycles_ge_1_uop_exec[ / {0 | 1 | 2 | 3}], 1000003 (events)
660 uops_executed.cycles_ge_2_uops_exec[ /
661 {0 | 1 | 2 | 3}
662 ], 1000003 (events)
663 uops_executed.cycles_ge_3_uops_exec[ /
664 {0 | 1 | 2 | 3}
665 ], 1000003 (events)
666 uops_executed.cycles_ge_4_uops_exec[ /
667 {0 | 1 | 2 | 3}
668 ], 1000003 (events)
669 #endif
670 {/*cd.01*/"mem_trans_retired.load_latency", /*PEBS*/ NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, //non-standard overflow
672 /* "Architectural" events: */
673 {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
674 PERF_EVENTS_SW_EVENT_DEFS
676 /* additional (hidden) aliases for convenience */
677 #if 0
678 USE_INTEL_REF_CYCLES (100),
679 #endif
680 {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE},
681 {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE},
682 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
686 static Hwcentry intelHaswellList[] = {
687 /* see comments for "cycles" and "insts" for intelNehalemList */
688 PERF_EVENTS_SW_EVENT_ALIASES
689 USE_INTEL_REF_CYCLES (100)
690 {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
691 {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
693 // PEBS (sampling)
694 {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
696 {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE},
697 {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, //mem_load_uops_retired
698 {"dcm", "0xd1~umask=0x08", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, //mem_load_uops_retired
699 {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE},
700 {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, //mem_load_uops_retired
701 {"l2m", "0xd1~umask=0x10", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, //mem_load_uops_retired
702 {"l3h", "mem_load_uops_retired.l3_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6, 0, ABST_NONE},
703 {"l3m", "mem_load_uops_retired.l3_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, //mem_load_uops_retired
704 {"l3m", "0xd1~umask=0x20", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, //mem_load_uops_retired
706 /* dtlbm has not been confirmed via Intel white paper */
707 {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
708 {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE},
710 /* explicit definitions of (hidden) entries for proper counters */
711 /* Only counters that can be time converted, or are load-store need to be in this table */
712 {/* 30a */"cpu_clk_unhalted.thread", /*15634344==6940930*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
713 //{/* 30a */"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
714 {/*08.10*/"dtlb_load_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
715 {/*0d.03*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
716 {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
717 {/*0e.01*/"uops_issued.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
718 {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
719 {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
720 {/*49.04*/"dtlb_store_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
721 {/*5c.01*/"cpl_cycles.ring0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
722 {/*5c.02*/"cpl_cycles.ring123", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
723 {/*5c.xx*/"cpl_cycles.ring0_trans", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
724 {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
725 {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
726 {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
727 {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
728 {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
729 {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
730 {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
731 {/*79.00*/"idq.empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
732 {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
733 {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
734 {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
735 {/*79.20*/"idq.ms_mite_uops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
736 {/*79.20*/"idq.ms_mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
737 {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
738 {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
739 {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
740 {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
741 {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
742 {/*80.04*/"icache.ifetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
743 {/*85.04*/"itlb_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
744 {/*87.01*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, // Intel SDM says these are stalls, not cycles
745 {/*87.04*/"ild_stall.iq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
746 {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
747 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
748 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
749 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
750 // {/*9c.01*/"idq_uops_not_delivered.cycles_ge_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
751 {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
753 {/*a1.01*/"uops_executed_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
754 {/*a1.02*/"uops_executed_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
755 {/*a1.04*/"uops_executed_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
756 {/*a1.08*/"uops_executed_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
757 {/*a1.10*/"uops_executed_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
758 {/*a1.20*/"uops_executed_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
759 {/*a1.40*/"uops_executed_port.port_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
760 {/*a1.80*/"uops_executed_port.port_7", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
761 {/*a1.01*/"uops_executed_port.port_0_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
762 {/*a1.02*/"uops_executed_port.port_1_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
763 {/*a1.04*/"uops_executed_port.port_2_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
764 {/*a1.08*/"uops_executed_port.port_3_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
765 {/*a1.10*/"uops_executed_port.port_4_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
766 {/*a1.20*/"uops_executed_port.port_5_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
767 {/*a1.40*/"uops_executed_port.port_6_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
768 {/*a1.80*/"uops_executed_port.port_7_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
770 {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
771 {/*a2.04*/"resource_stalls.rs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
772 {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
773 {/*a2.10*/"resource_stalls.rob", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
775 {/*a3.01*/"cycle_activity.cycles_l2_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
776 // {/*a3.01*/"cycle_activity.cycles_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
777 {/*a3.02*/"cycle_activity.cycles_ldm_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
778 // {/*a3.05*/"cycle_activity.stalls_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
779 {/*a3.08*/"cycle_activity.cycles_l1d_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
780 // {/*a3.??*/"cycle_activity.cycles_no_execute", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
781 // {/*a3.??*/"cycle_activity.stalls_ldm_pending",/*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
783 {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
785 {/*b1.??*/"uops_executed.stall_cycles", /*? not in PRM*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
786 {/*b1.??*/"uops_executed.cycles_ge_1_uop_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
787 {/*b1.??*/"uops_executed.cycles_ge_2_uops_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
788 {/*b1.??*/"uops_executed.cycles_ge_3_uops_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
789 {/*b1.??*/"uops_executed.cycles_ge_4_uops_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
791 {/*c2.01*/"uops_retired.stall_cycles", /*cmask==1 + INV*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
792 {/*c2.01*/"uops_retired.total_cycles", /*cmask==0x1*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
793 {/*c2.01*/"uops_retired.core_stall_cycles", /*PEBS Any==1*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
795 {/*c3.01*/"machine_clears.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
797 {/*ca.1e*/"fp_assist.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
799 {/*cd.01*/"mem_trans_retired.load_latency", /*PEBS*/ NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, //non-standard overflow
801 /* "Architectural" events: */
802 {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
803 PERF_EVENTS_SW_EVENT_DEFS
805 /* additional (hidden) aliases for convenience */
806 #if 0
807 USE_INTEL_REF_CYCLES (100),
808 #endif
809 {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE},
810 {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE},
811 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
815 static Hwcentry intelBroadwellList[] = {
816 /* see comments for "cycles" and "insts" for intelNehalemList */
817 PERF_EVENTS_SW_EVENT_ALIASES
818 USE_INTEL_REF_CYCLES (100)
819 {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
820 {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
822 // PEBS (sampling)
823 {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
824 {/*cd.01*/"mem_trans_retired.load_latency", NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
826 // aliases (the first set are PEBS, but on Intel the only precise counter we support is l2m_latency)
827 {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE},
828 {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE},
829 {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE},
830 {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE},
831 {"l3h", "mem_load_uops_retired.l3_hit", REGNO_ANY, STXT ("L3 Cache Hits"), PRELOADS_6, 0, ABST_NONE},
832 {"l3m", "mem_load_uops_retired.l3_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE},
833 {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
835 // counters that can be time converted (add FFCs if we decide to support them)
836 // counters that are load-store (did not include any... do we want to?)
837 {/*08.10*/"dtlb_load_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
838 {/*0d.03*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
839 {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
840 {/*0e.01*/"uops_issued.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
841 {/*14.01*/"arith.fpu_div_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
842 {/*3c.00*/"cpu_clk_unhalted.thread_p_any", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
843 {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
844 {/*3c.02*/"cpu_clk_thread_unhalted.one_thread_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
845 {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
846 {/*48.01*/"l1d_pend_miss.pending_cycles_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
847 {/*49.10*/"dtlb_store_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
848 {/*4f.10*/"ept.walk_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
849 {/*5c.01*/"cpl_cycles.ring0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
850 {/*5c.01*/"cpl_cycles.ring0_trans", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
851 {/*5c.02*/"cpl_cycles.ring123", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
852 {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
853 {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
854 {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
855 {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
856 {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
857 {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
858 {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
859 {/*79.02*/"idq.empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
860 {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
861 {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
862 {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
863 {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
864 {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
865 {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
866 {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
867 {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
868 {/*85.10*/"itlb_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
869 {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
870 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
871 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
872 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
873 {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
874 {/*a1.01*/"uops_executed_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
875 {/*a1.02*/"uops_executed_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
876 {/*a1.04*/"uops_executed_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
877 {/*a1.08*/"uops_executed_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
878 {/*a1.10*/"uops_executed_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
879 {/*a1.20*/"uops_executed_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
880 {/*a1.40*/"uops_executed_port.port_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
881 {/*a1.80*/"uops_executed_port.port_7", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
882 {/*a1.01*/"uops_executed_port.port_0_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
883 {/*a1.02*/"uops_executed_port.port_1_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
884 {/*a1.04*/"uops_executed_port.port_2_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
885 {/*a1.08*/"uops_executed_port.port_3_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
886 {/*a1.10*/"uops_executed_port.port_4_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
887 {/*a1.20*/"uops_executed_port.port_5_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
888 {/*a1.40*/"uops_executed_port.port_6_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
889 {/*a1.80*/"uops_executed_port.port_7_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
890 {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
891 {/*a2.04*/"resource_stalls.rs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
892 {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
893 {/*a2.10*/"resource_stalls.rob", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
894 {/*a3.01*/"cycle_activity.cycles_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
895 {/*a3.02*/"cycle_activity.cycles_ldm_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
896 {/*a3.04*/"cycle_activity.cycles_no_execute", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
897 {/*a3.08*/"cycle_activity.cycles_l1d_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
898 {/*a8.01*/"lsd.cycles_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
899 {/*a8.01*/"lsd.cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
900 {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
901 {/*b1.01*/"uops_executed.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
902 {/*c2.01*/"uops_retired.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
903 {/*c2.01*/"uops_retired.total_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
904 {/*c2.01*/"uops_retired.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
905 {/*c3.01*/"machine_clears.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
906 {/*ca.1e*/"fp_assist.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
908 /* "Architectural" events: */
909 {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
910 PERF_EVENTS_SW_EVENT_DEFS
912 /* additional (hidden) aliases for convenience */
913 #if 0
914 USE_INTEL_REF_CYCLES (100),
915 #endif
916 {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE},
917 {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE},
918 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
921 static Hwcentry intelSkylakeList[] = {
922 /* see comments for "cycles" and "insts" for intelNehalemList */
923 PERF_EVENTS_SW_EVENT_ALIASES
924 USE_INTEL_REF_CYCLES (25)
925 {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
926 {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
928 // PEBS (sampling)
929 {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
930 {/*cd.01*/"mem_trans_retired.load_latency", NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
932 // aliases (the first set are PEBS, but on Intel the only precise counter we support is l2m_latency)
933 {"dch", "mem_load_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE},
934 {"dcm", "mem_load_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE},
935 {"l2h", "mem_load_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE},
936 {"l2m", "mem_load_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE},
937 {"l2m_stall", "cycle_activity.stalls_l2_miss", REGNO_ANY, STXT ("L2 Cache Miss Stall"), PRELOADS_7, 1, ABST_NONE}, // needs validation
938 {"l3h", "mem_load_retired.l3_hit", REGNO_ANY, STXT ("L3 Cache Hits"), PRELOADS_6, 0, ABST_NONE},
939 {"l3m", "mem_load_retired.l3_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE},
940 {"l3m_stall", "cycle_activity.stalls_l3_miss", REGNO_ANY, STXT ("L3 Cache Miss Stall"), PRELOADS_7, 1, ABST_NONE}, // needs validation
941 {"dtlbm_stall", "dtlb_load_misses.walk_active", REGNO_ANY, STXT ("DTLB Miss Est Stall"), PRELOADS_7, 1, ABST_NONE, STXT ("Estimated time stalled on DTLB misses requiring a tablewalk. Does not include time related to STLB hits.")}, // needs validation
942 // PEBS mem_inst_retired.stlb_miss_loads for finding location of DTLB issues
943 // what about: dtlb_load_misses.walk_completed, dtlb_load_misses.walk_pending, dtlb_load_misses.stlb_hit
945 {"fp_scalar", "fp_arith_inst_retired.scalar_double~umask=0x3", REGNO_ANY, STXT ("FP Scalar uOps"), PRELOADS_7, 0, ABST_NONE, STXT ("Floating-point scalar micro-ops that retired")},
946 {"fp_vector", "fp_arith_inst_retired.128b_packed_double~umask=0x3c", REGNO_ANY, STXT ("FP Vector uOps"), /*needs test*/ PRELOADS_7, 0, ABST_NONE, STXT ("Floating-point vector micro-ops that retired")},
948 // counters that can be time converted (add FFCs if we decide to support them)
949 // counters that are load-store (did not include any... do we want to?)
950 {/*08.10*/"dtlb_load_misses.walk_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
951 {/*08.10*/"dtlb_load_misses.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
952 {/*0d.01*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
953 {/*0d.01*/"int_misc.recovery_cycles_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
954 {/*0d.80*/"int_misc.clear_resteer_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
955 {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
956 {/*14.01*/"arith.divider_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
957 {/*3c.00*/"cpu_clk_unhalted.ring0_trans", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
958 {/*3c.00*/"cpu_clk_unhalted.thread_p_any", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
959 {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
960 {/*3c.00*/"cpu_clk_unhalted.core", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
961 {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
962 {/*48.01*/"l1d_pend_miss.pending_cycles_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
963 {/*49.10*/"dtlb_store_misses.walk_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
964 {/*49.10*/"dtlb_store_misses.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
965 {/*4f.10*/"ept.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
966 {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
967 {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
968 {/*60.01*/"offcore_requests_outstanding.demand_data_rd_ge_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
969 {/*60.02*/"offcore_requests_outstanding.cycles_with_demand_code_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
970 {/*60.04*/"offcore_requests_outstanding.cycles_with_demand_rfo", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
971 {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
972 {/*60.10*/"offcore_requests_outstanding.cycles_with_l3_miss_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
973 {/*60.10*/"offcore_requests_outstanding.l3_miss_demand_data_rd_ge_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
974 {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
975 {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
976 {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
977 {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
978 {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
979 {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
980 {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
981 {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
982 {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
983 {/*80.04*/"icache_16b.ifdata_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
984 {/*83.04*/"icache_64b.iftag_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
985 {/*85.10*/"itlb_misses.walk_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
986 {/*85.10*/"itlb_misses.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
987 {/*87.01*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
988 {/*9c.01*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
989 {/*9c.01*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
990 {/*9c.01*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
991 {/*9c.01*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
992 {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
993 {/*a1.01*/"uops_dispatched_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
994 {/*a1.02*/"uops_dispatched_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
995 {/*a1.04*/"uops_dispatched_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
996 {/*a1.08*/"uops_dispatched_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
997 {/*a1.10*/"uops_dispatched_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
998 {/*a1.20*/"uops_dispatched_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
999 {/*a1.40*/"uops_dispatched_port.port_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1000 {/*a1.80*/"uops_dispatched_port.port_7", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1001 {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1002 {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1003 {/*a3.01*/"cycle_activity.cycles_l2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1004 {/*a3.02*/"cycle_activity.cycles_l3_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1005 {/*a3.04*/"cycle_activity.stalls_total", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1006 {/*a3.05*/"cycle_activity.stalls_l2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1007 {/*a3.06*/"cycle_activity.stalls_l3_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1008 {/*a3.08*/"cycle_activity.cycles_l1d_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1009 {/*a3.0c*/"cycle_activity.stalls_l1d_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1010 {/*a3.10*/"cycle_activity.cycles_mem_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1011 {/*a3.14*/"cycle_activity.stalls_mem_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1012 {/*a6.01*/"exe_activity.exe_bound_0_ports", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1013 {/*a6.02*/"exe_activity.1_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1014 {/*a6.04*/"exe_activity.2_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1015 {/*a6.08*/"exe_activity.3_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1016 {/*a6.10*/"exe_activity.4_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1017 {/*a6.40*/"exe_activity.bound_on_stores", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1018 {/*a8.01*/"lsd.cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1019 {/*a8.01*/"lsd.cycles_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1020 {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1021 {/*b1.01*/"uops_executed.cycles_ge_1_uop_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1022 {/*b1.01*/"uops_executed.cycles_ge_2_uops_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1023 {/*b1.01*/"uops_executed.cycles_ge_3_uops_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1024 {/*b1.01*/"uops_executed.cycles_ge_4_uops_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1025 {/*b1.01*/"uops_executed.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1026 {/*b1.02*/"uops_executed.core_cycles_ge_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1027 {/*b1.02*/"uops_executed.core_cycles_ge_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1028 {/*b1.02*/"uops_executed.core_cycles_ge_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1029 {/*b1.02*/"uops_executed.core_cycles_ge_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1030 {/*b1.02*/"uops_executed.core_cycles_none", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1031 {/*c0.1*/"inst_retired.total_cycles_ps", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1032 {/*c2.01*/"uops_retired.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1033 {/*c2.01*/"uops_retired.total_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1034 {/*ca.1e*/"fp_assist.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1036 /* "Architectural" events: */
1037 {/* FFC */"cpu_clk_unhalted.thread", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
1038 {/* FFC */"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
1039 PERF_EVENTS_SW_EVENT_DEFS
1041 /* additional (hidden) aliases for convenience */
1042 #if 0
1043 USE_INTEL_REF_CYCLES (25),
1044 #endif
1045 {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE},
1046 {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE},
1047 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
1050 static Hwcentry intelLinuxUnknown[] = {
1051 PERF_EVENTS_SW_EVENT_ALIASES
1052 // USE_INTEL_REF_CYCLES(100) // freq is unknown
1053 {"cycles", "unhalted-core-cycles", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
1054 {"cycles", "PERF_COUNT_HW_CPU_CYCLES", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
1055 {"insts", "instruction-retired", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
1056 {"insts", "PERF_COUNT_HW_INSTRUCTIONS", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
1058 {"dcm", "PERF_COUNT_HW_CACHE_MISSES.L1D", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE},
1059 {"llm", "llc-misses", REGNO_ANY, STXT ("Last-Level Cache Misses"), PRELOADS_5, 0, ABST_NONE},
1060 {"llm", "PERF_COUNT_HW_CACHE_MISSES.LL", REGNO_ANY, STXT ("Last-Level Cache Misses"), PRELOADS_5, 0, ABST_NONE},
1062 {"br_msp", "branch-misses-retired", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE},
1063 {"br_msp", "PERF_COUNT_HW_BRANCH_MISSES", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE},
1064 {"br_ins", "branch-instruction-retired", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE},
1065 {"br_ins", "PERF_COUNT_HW_BRANCH_INSTRUCTIONS", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE},
1067 // counters that can be time converted (add FFCs if we decide to support them)
1068 // counters that are load-store (did not include any... do we want to?)
1069 /* "Architectural" events: */
1070 {/* FFC */"cpu_clk_unhalted.thread", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
1071 {/* FFC */"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
1072 PERF_EVENTS_SW_EVENT_DEFS
1074 /* additional (hidden) aliases for convenience */
1075 {"cycles0", "unhalted-reference-cycles", 0, NULL, PRELOADS_6, -(25), ABST_NONE}, //YXXX -can't do with ref cycles #
1076 {"cycles0", "PERF_COUNT_HW_BUS_CYCLES", 0, NULL, PRELOADS_6, -(25), ABST_NONE}, //YXXX -can't do with ref cycles #
1077 {"cycles1", "unhalted-reference-cycles", 1, NULL, PRELOADS_65, -(25), ABST_NONE}, //YXXX - can't do with ref cycles #
1078 {"cycles1", "PERF_COUNT_HW_BUS_CYCLES", 1, NULL, PRELOADS_65, -(25), ABST_NONE}, //YXXX - can't do with ref cycles #
1079 {"insts0", "instruction-retired", 0, NULL, PRELOADS_8, 0, ABST_NONE},
1080 {"insts0", "PERF_COUNT_HW_INSTRUCTIONS", 0, NULL, PRELOADS_8, 0, ABST_NONE},
1081 {"insts1", "instruction-retired", 1, NULL, PRELOADS_8, 0, ABST_NONE},
1082 {"insts1", "PERF_COUNT_HW_INSTRUCTIONS", 1, NULL, PRELOADS_8, 0, ABST_NONE},
1083 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
1086 static Hwcentry intelAtomList[] = {
1087 {"cycles", "cpu_clk_unhalted.core", /*6759307*/ REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
1088 {"cycles", "cpu_clk_unhalted.thread", /*6759307*/ REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
1089 {"insts", "instr_retired.any", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
1091 /* explicit definitions of (hidden) entries for proper counters */
1092 /* Only counters that can be time converted, or are load-store need to be in this table */
1093 /* XXXX add core2-related entries if appropriate */
1094 {/*30A*/"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
1095 {/*30A*/"cpu_clk_unhalted.thread", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
1096 {/*0c*/"page_walks.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1097 {/*14*/"cycles_div_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1098 {/*21*/"l2_ads", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1099 {/*22*/"l2_dbus_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1100 {/*32*/"l2_no_req", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1101 {/*3c*/"cpu_clk_unhalted.core_p", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
1102 {/*3c*/"cpu_clk_unhalted.bus", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1103 {/*3c*/"cpu_clk_unhalted.no_other", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1104 {/*62*/"bus_drdy_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1105 {/*63*/"bus_lock_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1106 {/*64*/"bus_data_rcv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1107 {/*7a*/"bus_hit_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1108 {/*7b*/"bus_hitm_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1109 {/*7d*/"busq_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1110 {/*7e*/"snoop_stall_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1111 {/*7f*/"bus_io_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1112 {/*c6*/"cycles_int_masked.cycles_int_masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1113 {/*c6*/"cycles_int_masked.cycles_int_pending_and_masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1115 /* "Architectural" events: */
1116 {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1118 /* additional (hidden) aliases for convenience */
1119 {"cycles0", "cpu_clk_unhalted.core_p", 0, NULL, PRELOADS_75, 1, ABST_NONE},
1120 {"cycles1", "cpu_clk_unhalted.core_p", 1, NULL, PRELOADS_75, 1, ABST_NONE},
1121 {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_75, 0, ABST_NONE},
1122 {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_75, 0, ABST_NONE},
1123 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
1126 static Hwcentry amd_opteron_10h_11h[] = {
1127 {"cycles", "BU_cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
1128 {"insts", "FR_retired_x86_instr_w_excp_intr", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
1129 {"icr", "IC_fetch", REGNO_ANY, STXT ("L1 I-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */
1130 {"icm", "IC_miss", REGNO_ANY, STXT ("L1 I-cache Misses"), PRELOADS_6, 0, ABST_NONE},
1131 {"l2itlbh", "IC_itlb_L1_miss_L2_hit", REGNO_ANY, STXT ("L2 ITLB Hits"), PRELOADS_6, 0, ABST_NONE}, /* new */
1132 {"l2itlbm", "IC_itlb_L1_miss_L2_miss", REGNO_ANY, STXT ("L2 ITLB Misses"), PRELOADS_5, 0, ABST_NONE}, /* new */
1133 {"l2ir", "BU_internal_L2_req~umask=0x1", REGNO_ANY, STXT ("L2 I-cache Refs"), PRELOADS_6, 0, ABST_NONE},
1134 {"l2im", "BU_fill_req_missed_L2~umask=0x1", REGNO_ANY, STXT ("L2 I-cache Misses"), PRELOADS_4, 0, ABST_NONE},
1135 {"dcr", "DC_access", REGNO_ANY, STXT ("L1 D-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */
1136 {"dcm", "DC_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, /* new */
1137 {"l2dtlbh", "DC_dtlb_L1_miss_L2_hit", REGNO_ANY, STXT ("L2 DTLB Hits"), PRELOADS_6, 0, ABST_NONE}, /* new */
1138 {"l2dtlbm", "DC_dtlb_L1_miss_L2_miss", REGNO_ANY, STXT ("L2 DTLB Misses"), PRELOADS_5, 0, ABST_NONE}, /* new */
1139 {"l2dr", "BU_internal_L2_req~umask=0x2", REGNO_ANY, STXT ("L2 D-cache Refs"), PRELOADS_65, 0, ABST_NONE}, /* hwc_cache_load: 1.6x overcount on shanghai01 */
1140 {"l2dm", "BU_fill_req_missed_L2~umask=0x2", REGNO_ANY, STXT ("L2 D-cache Misses"), PRELOADS_6, 0, ABST_NONE}, /* new */
1141 {"fpadd", "FP_dispatched_fpu_ops~umask=0x1", REGNO_ANY, STXT ("FP Adds"), PRELOADS_7, 0, ABST_NONE},
1142 {"fpmul", "FP_dispatched_fpu_ops~umask=0x2", REGNO_ANY, STXT ("FP Muls"), PRELOADS_7, 0, ABST_NONE},
1143 {"fpustall", "FR_dispatch_stall_fpu_full", REGNO_ANY, STXT ("FPU Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
1144 {"memstall", "FR_dispatch_stall_ls_full", REGNO_ANY, STXT ("Memory Unit Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
1145 // For PAPI mappings, see hwctable.README.family10h
1146 // For PAPI mappings, see hwctable.README.opteron
1148 /* explicit definitions of (hidden) entries for proper counters */
1149 /* Only counters that can be time converted, or are load-store need to be in this table */
1150 {"BU_cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
1151 {"FP_cycles_no_fpu_ops_retired", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1152 {"FP_serialize_ops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1153 {"FR_dispatch_stall_branch_abort_to_retire", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1154 {"FR_dispatch_stall_far_ctl_trsfr_resync_branch_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1155 {"FR_dispatch_stall_fpu_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1156 {"FR_dispatch_stall_ls_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1157 {"FR_dispatch_stall_reorder_buffer_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1158 {"FR_dispatch_stall_resv_stations_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1159 {"FR_dispatch_stall_segment_load", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1160 {"FR_dispatch_stall_serialization", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1161 {"FR_dispatch_stall_waiting_all_quiet", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1162 {"FR_dispatch_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1163 {"FR_intr_masked_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1164 {"FR_intr_masked_while_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1165 {"FR_nothing_to_dispatch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1166 {"IC_instr_fetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1167 {"LS_buffer_2_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1168 {"NB_mem_ctrlr_dram_cmd_slots_missed", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1169 {"NB_mem_ctrlr_turnaround", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
1171 /* additional (hidden) aliases, for convenience */
1172 {"cycles0", "BU_cpu_clk_unhalted", 0, NULL, PRELOADS_8, 1, ABST_NONE},
1173 {"cycles1", "BU_cpu_clk_unhalted", 1, NULL, PRELOADS_8, 1, ABST_NONE},
1174 {"insts0", "FR_retired_x86_instr_w_excp_intr", 0, NULL, PRELOADS_8, 0, ABST_NONE},
1175 {"insts1", "FR_retired_x86_instr_w_excp_intr", 1, NULL, PRELOADS_8, 0, ABST_NONE},
1176 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
1179 static Hwcentry amd_15h[] = {
1180 {"cycles", "CU_cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
1181 {"insts", "EX_retired_instr_w_excp_intr", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
1182 {"icr", "IC_fetch", REGNO_ANY, STXT ("L1 I-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */
1183 {"icm", "IC_miss", REGNO_ANY, STXT ("L1 I-cache Misses"), PRELOADS_6, 0, ABST_NONE},
1184 {"l2im", "IC_refill_from_system", REGNO_ANY, STXT ("L2 I-cache Misses"), PRELOADS_6, 0, ABST_NONE},
1185 {"dcr", "DC_access", REGNO_ANY, STXT ("L1 D-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */
1186 {"dcm", "DC_miss~umask=0x3", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, /* new */
1187 {"l2dm", "DC_refill_from_system", REGNO_ANY, STXT ("L2 D-cache Misses"), PRELOADS_6, 0, ABST_NONE}, /* new */
1188 {"dtlbm", "DC_unified_tlb_miss~umask=0x7", REGNO_ANY, STXT ("L2 DTLB Misses"), PRELOADS_5, 0, ABST_NONE}, /* new */
1189 // For PAPI mappings, see hwctable.README.family15h
1191 /* explicit definitions of (hidden) entries for proper counters */
1192 /* Only counters that can be time converted, or are load-store need to be in this table */
1193 {/*001.xx*/"FP_scheduler_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1194 {/*006.xx*/"FP_bottom_execute_uops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1195 {/*023.xx*/"LS_ldq_stq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1196 {/*024.xx*/"LS_locked_operation", /*umask!=0*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1197 {/*069.xx*/"CU_mab_wait_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1198 {/*076.xx*/"CU_cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
1199 {/*087.xx*/"IC_instr_fetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1200 {/*0cd.xx*/"EX_intr_masked_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1201 {/*0ce.xx*/"EX_intr_masked_while_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1202 {/*0d0.xx*/"DE_nothing_to_dispatch", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1203 {/*0d1.xx*/"DE_dispatch_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1204 {/*0d3.xx*/"DE_dispatch_stall_serialization", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1205 {/*0d5.xx*/"DE_dispatch_stall_instr_retire_q_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1206 {/*0d6.xx*/"DE_dispatch_stall_int_scheduler_q_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1207 {/*0d7.xx*/"DE_dispatch_stall_fp_scheduler_q_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1208 {/*0d8.xx*/"DE_dispatch_stall_ldq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1209 {/*0d9.xx*/"DE_dispatch_stall_waiting_all_quiet", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1210 {/*1d8.xx*/"EX_dispatch_stall_stq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
1212 /* additional (hidden) aliases, for convenience */
1213 {"cycles0", "CU_cpu_clk_unhalted", 0, NULL, PRELOADS_8, 1, ABST_NONE},
1214 {"cycles1", "CU_cpu_clk_unhalted", 1, NULL, PRELOADS_8, 1, ABST_NONE},
1215 {"insts0", "EX_retired_instr_w_excp_intr", 0, NULL, PRELOADS_8, 0, ABST_NONE},
1216 {"insts1", "EX_retired_instr_w_excp_intr", 1, NULL, PRELOADS_8, 0, ABST_NONE},
1217 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
1219 #endif /* __i386__ or __x86_64 */
1221 #define INIT_HWC(nm, mtr, cfg, ty) .name = (nm), .metric = (mtr), \
1222 .config = (cfg), .type = ty, .use_perf_event_type = 1, \
1223 .val = PRELOAD_DEF, .reg_num = REGNO_ANY
1224 #define HWE(nm, mtr, cfg) INIT_HWC(nm, mtr, cfg, PERF_TYPE_HARDWARE)
1225 #define SWE(nm, mtr, cfg) INIT_HWC(nm, mtr, cfg, PERF_TYPE_SOFTWARE)
1226 #define HWCE(nm, mtr, id, op, res) \
1227 INIT_HWC(nm, mtr, (id) | ((op) << 8) | ((res) << 16), PERF_TYPE_HW_CACHE)
1229 #define HWC_GENERIC \
1230 /* Hardware event: */\
1231 { HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,\
1232 .int_name = "cycles" },\
1233 { HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,\
1234 .int_name = "cycles~system=1~user=0" },\
1235 { HWE("branch-instructions", STXT("Branch-instructions"),\
1236 PERF_COUNT_HW_BRANCH_INSTRUCTIONS) },\
1237 { HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) },\
1238 { HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES),\
1239 .timecvt = 1 },\
1240 { HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) },\
1241 { HWE("cache-references", STXT("Cache-references"),\
1242 PERF_COUNT_HW_CACHE_REFERENCES) },\
1243 { HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 },\
1244 { HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS),\
1245 .int_name = "instructions" },\
1246 { HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES),\
1247 .timecvt = 1 },\
1248 { HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."),\
1249 PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 },\
1250 { HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."),\
1251 PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 },\
1252 /* Software event: */\
1253 { SWE("alignment-faults", STXT("Alignment Faults"),\
1254 PERF_COUNT_SW_ALIGNMENT_FAULTS) },\
1255 { SWE("context-switches", STXT("Context Switches"),\
1256 PERF_COUNT_SW_CONTEXT_SWITCHES) },\
1257 { SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK),\
1258 .timecvt = 1 },\
1259 { SWE("cpu-migrations", STXT("CPU Migrations"),\
1260 PERF_COUNT_SW_CPU_MIGRATIONS) },\
1261 { SWE("emulation-faults", STXT("Emulation Faults"),\
1262 PERF_COUNT_SW_EMULATION_FAULTS) },\
1263 { SWE("major-faults", STXT("Major Page Faults"),\
1264 PERF_COUNT_SW_PAGE_FAULTS_MAJ) },\
1265 { SWE("minor-faults", STXT("Minor Page Faults"),\
1266 PERF_COUNT_SW_PAGE_FAULTS_MIN) },\
1267 { SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) },\
1268 { SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK),\
1269 .timecvt = 1 },\
1270 /* Hardware cache event: */\
1271 { HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"),\
1272 PERF_COUNT_HW_CACHE_L1D,\
1273 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
1274 { HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"),\
1275 PERF_COUNT_HW_CACHE_L1D,\
1276 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
1277 { HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"),\
1278 PERF_COUNT_HW_CACHE_L1D,\
1279 PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
1280 { HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"),\
1281 PERF_COUNT_HW_CACHE_L1D,\
1282 PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
1283 { HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"),\
1284 PERF_COUNT_HW_CACHE_L1I,\
1285 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
1286 { HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"),\
1287 PERF_COUNT_HW_CACHE_L1I,\
1288 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
1289 { HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"),\
1290 PERF_COUNT_HW_CACHE_DTLB,\
1291 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
1292 { HWCE("dTLB-loads", STXT("D-TLB Loads"),\
1293 PERF_COUNT_HW_CACHE_DTLB,\
1294 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
1295 { HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"),\
1296 PERF_COUNT_HW_CACHE_ITLB,\
1297 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
1298 { HWCE("iTLB-loads", STXT("The Instruction TLB Loads"),\
1299 PERF_COUNT_HW_CACHE_ITLB,\
1300 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
1301 static Hwcentry generic_list[] = {
1302 HWC_GENERIC
1303 {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
1306 #if defined(__i386__) || defined(__x86_64)
1307 #include "hwc_amd_zen3.h"
1308 #include "hwc_amd_zen4.h"
1309 #include "hwc_intel_icelake.h"
1310 #elif defined(__aarch64__)
1311 #include "hwc_arm64_amcc.h"
1312 #include "hwc_arm_neoverse_n1.h"
1313 #include "hwc_arm_ampere_1.h"
1314 #endif
1316 /* structure defining the counters for a CPU type */
1317 typedef struct
1319 int cputag;
1320 Hwcentry *stdlist_table;
1321 #define MAX_DEFAULT_HWC_DEFS 4 // allows multiple defs to handle OS variations; extend as needed
1322 char *default_exp_p[MAX_DEFAULT_HWC_DEFS + 1]; // end of list MUST be marked with NULL
1323 } cpu_list_t;
1325 /* IMPORTANT NOTE:
1327 * Any default HWC string must consist of counter names separated by -TWO- commas,
1328 * with a no trailing comma/value after the last counter name
1330 * Only aliased counters should be specified; non-aliased counters will
1331 * not get the right overflow values set.
1332 * If the string is not formatted that way, -h hi and -h lo will fail
1334 static cpu_list_t cputabs[] = {
1335 #if defined(__i386__) || defined(__x86_64)
1336 {CPC_PENTIUM_PRO_MMX, pentiumIIlist, {"insts", 0}},
1337 {CPC_PENTIUM_PRO, pentiumIIIlist, {"insts", 0}},
1338 {CPC_PENTIUM_4, pentium4, {"insts", 0}},
1339 {CPC_PENTIUM_4_HT, pentium4, {"insts", 0}},
1340 {CPC_INTEL_CORE2, intelCore2list, {"insts,,cycles", 0}},
1341 {CPC_INTEL_NEHALEM, intelNehalemList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1342 "insts,,cycles,,l3m_stall,,dtlbm_stall", 0}},
1343 {CPC_INTEL_WESTMERE, intelNehalemList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1344 "insts,,cycles,,l3m_stall,,dtlbm_stall", 0}},
1345 {CPC_INTEL_SANDYBRIDGE, intelSandyBridgeList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1346 "insts,,cycles,,l3m,,dtlbm", 0}},
1347 {CPC_INTEL_IVYBRIDGE, intelSandyBridgeList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1348 "insts,,cycles,,l3m,,dtlbm", 0}},
1349 {CPC_INTEL_HASWELL, intelHaswellList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1350 "insts,,cycles,,l3m,,dtlbm", 0}},
1351 {CPC_INTEL_BROADWELL, intelBroadwellList, {"insts,,cycles,,+l2m_latency,,dtlbm",
1352 "insts,,cycles,,l3m,,dtlbm", 0}},
1353 {CPC_INTEL_SKYLAKE, intelSkylakeList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1354 "insts,,cycles,,l2m_stall,,dtlbm_stall", 0}},
1355 {CPC_INTEL_ICELAKE, intelIcelakeList, {"insts,,cycles,,dTLB-load-misses", 0}},
1356 {CPC_INTEL_UNKNOWN, intelLinuxUnknown, {"cycles,,insts,,llm",
1357 "user_time,,system_time,,cycles,,insts,,llm", 0}},
1358 {CPC_INTEL_ATOM, intelAtomList, {"insts", 0}},
1359 {CPC_AMD_K8C, amd_opteron_10h_11h, {"insts,,cycles,,l2dm,,l2dtlbm", 0}},
1360 {CPC_AMD_FAM_10H, amd_opteron_10h_11h, {"insts,,cycles,,l2dm,,l2dtlbm", 0}},
1361 {CPC_AMD_FAM_11H, amd_opteron_10h_11h, {"insts,,cycles,,l2dm,,l2dtlbm", 0}},
1362 {CPC_AMD_FAM_15H, amd_15h, {"insts,,cycles", 0}},
1363 {CPC_KPROF, kproflist, {NULL}}, // OBSOLETE (To support 12.3 and earlier, TBR)
1364 {CPC_AMD_Authentic, generic_list, {"insts,,cycles", 0}},
1365 {CPC_AMD_FAM_19H_ZEN3, amd_zen3_list, {"insts,,cycles", 0}},
1366 {CPC_AMD_FAM_19H_ZEN4, amd_zen4_list, {"insts,,cycles", 0}},
1367 #elif defined(__aarch64__)
1368 {CPC_ARM64_AMCC, arm64_amcc_list, {"insts,,cycles", 0}},
1369 {CPC_ARM_NEOVERSE_N1, arm_neoverse_n1_list, {"insts,,cycles", 0}},
1370 {CPC_ARM_AMPERE_1, arm_ampere_1_list, {"insts,,cycles", 0}},
1371 {CPC_ARM_GENERIC, generic_list, {"insts,,cycles", 0}},
1372 #endif
1373 {0, generic_list, {"insts,,cycles", 0}},
1376 /*---------------------------------------------------------------------------*/
1377 /* state variables */
1378 static int initialized;
1379 static int signals_disabled;
1381 // Simple array list
1382 typedef struct
1384 void** array; // array of ptrs, last item set to null
1385 int sz; // num live elements in array
1386 int max; // array allocation size
1387 } ptr_list;
1389 static void
1390 ptr_list_init (ptr_list *lst)
1392 lst->sz = 0;
1393 lst->max = 0;
1394 lst->array = 0;
1397 static void
1398 ptr_list_add (ptr_list *lst, char* ptr)
1399 { // ptr must be freeable
1400 if (lst->sz >= lst->max - 1)
1402 void * * new;
1403 int newmax = lst->max ? lst->max * 2 : 16;
1404 new = (void**) realloc (lst->array, newmax * sizeof (void*));
1405 if (!new) return; // failed, discard add
1406 lst->max = newmax;
1407 lst->array = new;
1409 lst->array[lst->sz++] = ptr;
1410 lst->array[lst->sz] = NULL; // mark new end-of-list
1413 static void
1414 ptr_list_free (ptr_list *lst)
1415 { // includes shallow free of all elements
1416 if (lst->array)
1418 for (int ii = 0; lst->array[ii]; ii++)
1419 free (lst->array[ii]);
1420 free (lst->array);
1422 lst->sz = 0;
1423 lst->max = 0;
1424 lst->array = 0;
1427 // Capabilities of this machine (initialized by setup_cpc())
1428 static int cpcx_cpuver = CPUVER_UNDEFINED;
1429 static uint_t cpcx_npics;
1430 static const char *cpcx_cciname;
1431 static const char *cpcx_docref;
1432 static uint64_t cpcx_support_bitmask;
1434 // cpcx_*[0]: collect lists
1435 // cpcx_*[1]: er_kernel lists
1436 // Each cpcx_*[] list is an array of ptrs with null ptr marking end of list
1437 static char **cpcx_attrs[2];
1439 static Hwcentry **cpcx_std[2];
1440 static Hwcentry **cpcx_raw[2];
1441 static Hwcentry **cpcx_hidden[2];
1443 static uint_t cpcx_max_concurrent[2];
1444 static char *cpcx_default_hwcs[2];
1445 static char *cpcx_orig_default_hwcs[2];
1446 static int cpcx_has_precise[2];
1448 #define VALID_FOR_KERNEL(forKernel) ((forKernel)>=0 && (forKernel)<=1)
1449 #define IS_KERNEL(forKernel) ((forKernel)==1)
1451 // used to build lists:
1452 static ptr_list unfiltered_attrs;
1453 static ptr_list unfiltered_raw;
1455 /*---------------------------------------------------------------------------*/
1456 /* misc internal utilities */
1458 /* compare 2 strings to either \0 or <termchar> */
1459 #define IS_EOL(currchar, termchar) ((currchar)==(termchar) || (currchar)==0)
1461 static int
1462 is_same (const char * regname, const char * int_name, char termchar)
1466 char a = *regname;
1467 char b = *int_name;
1468 if (IS_EOL (a, termchar))
1470 if (IS_EOL (b, termchar))
1471 return 1; /* strings are the same up to terminating char */
1472 else
1473 break; /* strings differ */
1475 if (a != b)
1476 break; /* strings differ */
1477 regname++;
1478 int_name++;
1480 while (1);
1481 return 0;
1484 static int
1485 is_numeric (const char *name, uint64_t *pval)
1487 char *endptr;
1488 uint64_t val = strtoull (name, &endptr, 0);
1489 if (!*name || *endptr)
1490 return 0; /* name does not specify a numeric value */
1491 if (pval)
1492 *pval = val;
1493 return 1;
1496 static int
1497 is_visible_alias (Hwcentry* pctr)
1499 if (!pctr)
1500 return 0;
1501 if (pctr->name && pctr->int_name && pctr->metric)
1502 return 1;
1503 return 0;
1506 static int
1507 is_hidden_alias (Hwcentry* pctr)
1509 if (!pctr)
1510 return 0;
1511 if (pctr->name && pctr->int_name && pctr->metric == NULL)
1512 return 1;
1513 return 0;
1516 #if !HWC_DEBUG
1517 #define hwcentry_print(lvl,x1,x2)
1518 #else
1520 /* print a Hwcentry */
1521 static void
1522 hwcentry_print (int lvl, const char * header, const Hwcentry *pentry)
1524 Tprintf (lvl, "%s '%s', '%s', %d, '%s', %d, %d, %d, %d, %d, %d, /\n",
1525 header,
1526 pentry->name ? pentry->name : "NULL",
1527 pentry->int_name ? pentry->int_name : "NULL",
1528 pentry->reg_num,
1529 pentry->metric ? pentry->metric : "NULL",
1530 pentry->lval, /* low-resolution/long run */
1531 pentry->val, /* normal */
1532 pentry->hval, /* high-resolution/short run */
1533 pentry->timecvt,
1534 pentry->memop, /* type of instruction that can trigger */
1535 pentry->sort_order);
1537 #endif
1539 /*---------------------------------------------------------------------------*/
1540 /* utilities for rawlist (list of raw counters with reglist[] filled in) */
1542 /* search the 'raw' list of counters for <name> */
1543 static Hwcentry *
1544 ptrarray_find_by_name (Hwcentry** array, const char * name)
1546 if (name == NULL)
1547 return NULL;
1548 Tprintf (DBG_LT3, "hwctable: array_find_by_name(%s):\n", name);
1549 for (int ii = 0; array && array[ii]; ii++)
1550 if (strcmp (array[ii]->name, name) == 0)
1551 return array[ii];
1552 return NULL; /* not found */
1555 /* add Hwcentry to the 'raw' list of counters */
1556 static Hwcentry *
1557 alloc_shallow_copy (const Hwcentry *pctr)
1559 Hwcentry *node = (Hwcentry *) malloc (sizeof (Hwcentry));
1560 if (!node)
1561 return NULL; // fail
1562 *node = *pctr; /* shallow copy! */
1563 if (pctr->name)
1564 node->name = strdup (pctr->name);
1565 return node;
1568 /* add Hwcentry to the 'raw' list of counters */
1569 static Hwcentry *
1570 list_append_shallow_copy (ptr_list *list, const Hwcentry *pctr)
1572 Hwcentry *node = alloc_shallow_copy (pctr);
1573 if (!node)
1574 return NULL; // fail
1575 ptr_list_add (list, (void*) node);
1576 return node;
1579 static Hwcentry *
1580 list_add (ptr_list *list, uint_t regno, const char *name)
1582 Hwcentry *praw;
1583 praw = ptrarray_find_by_name ((Hwcentry**) list->array, name);
1584 if (!praw)
1586 Hwcentry tmpctr = empty_ctr;
1587 tmpctr.name = (char *) name;
1588 praw = list_append_shallow_copy (list, &tmpctr);
1590 return praw;
1593 /*---------------------------------------------------------------------------*/
1594 /* utilities for stdlist (table of aliased, hidden, & convenience, ctrs) */
1596 /* find top level definition for <cpuid> */
1597 static cpu_list_t*
1598 cputabs_find_entry (int cpuid)
1600 int i;
1601 /* now search for the appropriate table */
1602 for (i = 0;; i++)
1604 if (cputabs[i].cputag == 0)
1605 break;
1606 if (cpuid == cputabs[i].cputag)
1607 return &cputabs[i];
1609 Tprintf (0, "hwctable: cputabs_find_entry: WARNING: "
1610 "cpu_id = %d not defined. No 'standard' counters are available\n",
1611 cpuid);
1612 return &cputabs[i];
1615 /* find Hwcentry table for <cpuid> */
1616 static Hwcentry*
1617 stdlist_get_table (int cpuid)
1619 cpu_list_t* tmp = cputabs_find_entry (cpuid);
1620 if (tmp)
1621 return tmp->stdlist_table;
1622 return NULL;
1625 /* search the 'standard' list of counters for <name>,<regno> */
1626 /* note: <regno>=REGNO_ANY is a wildcard that matches any value. */
1628 /* note: int_name==NULL is a wildcard */
1629 static const Hwcentry *
1630 ptrarray_find (const Hwcentry **array, const char *name, const char *int_name,
1631 int check_regno, regno_t regno)
1633 const Hwcentry *pctr;
1634 if (!array)
1635 return NULL;
1636 for (int ii = 0; array[ii]; ii++)
1638 pctr = array[ii];
1639 if (strcmp (pctr->name, name))
1640 continue;
1641 if (int_name && int_name[0] != 0 && pctr->int_name)
1643 if (NULL == strstr (int_name, pctr->int_name))
1644 continue;
1646 return pctr;
1648 return NULL;
1651 /* search the 'standard' list of counters for <name>,<regno> */
1653 /* note: <regno>=REGNO_ANY is a wildcard that matches any value. */
1654 static const Hwcentry *
1655 static_table_find (const Hwcentry *table, const char *name, const char *int_name,
1656 int check_regno, regno_t regno)
1658 int sz;
1659 for (sz = 0; table && table[sz].name; sz++)
1661 if (!sz)
1662 return NULL;
1663 const Hwcentry ** list = calloc (sz + 1, sizeof (void*));
1664 if (!list)
1665 return NULL;
1666 for (int ii = 0; ii < sz; ii++)
1667 list[ii] = &table[ii];
1668 list[sz] = NULL;
1669 const Hwcentry *pctr = ptrarray_find (list, name, int_name, check_regno, regno);
1670 free (list);
1671 return pctr;
1674 #if !HWC_DEBUG
1675 #define stdlist_print(dbg_lvl,table)
1676 #else
1678 /* print all Hwcentries in standard table. Check for weird stuff */
1679 static void
1680 stdlist_print (int dbg_lvl, const Hwcentry* table)
1682 if (!table)
1684 Tprintf (0, "hwctable: stdlist_print: ERROR: "
1685 "table is invalid.\n");
1686 return;
1688 for (const Hwcentry *pctr = table; pctr->name; pctr++)
1690 hwcentry_print (dbg_lvl, "hwctable: stdlist: ", pctr);
1693 #endif
1695 /*---------------------------------------------------------------------------*/
1696 /* utilities for init */
1698 /* try to bind counters to hw. Return 0 on success, nonzero otherwise */
1699 static int
1700 test_hwcs (const Hwcentry* entries[], unsigned numctrs)
1702 int rc = -1;
1703 hwc_event_t sample;
1704 int created = 0;
1705 hwcdrv_api_t *hwcdrv = get_hwcdrv ();
1706 Tprintf (DBG_LT2, "hwctable: test_hwcs()...\n");
1707 rc = hwcfuncs_bind_hwcentry (entries, numctrs);
1708 if (rc)
1710 Tprintf (0, "hwctable: WARNING: test "
1711 "counters could not be created\n");
1712 goto end_test_hwcs;
1714 created = 1;
1715 if (!signals_disabled)
1717 (void) signal (HWCFUNCS_SIGNAL, SIG_IGN);
1718 signals_disabled = 1;
1720 rc = hwcdrv->hwcdrv_start ();
1721 if (rc)
1723 Tprintf (0, "hwctable: WARNING: test "
1724 "counters could not be started\n");
1725 goto end_test_hwcs;
1727 rc = hwcdrv->hwcdrv_read_events (&sample, NULL);
1728 if (rc)
1729 Tprintf (0, "hwctable: WARNING: test sample failed\n");
1730 rc = 0;
1731 #if HWC_DEBUG
1733 unsigned ii;
1734 Tprintf (DBG_LT1, "hwctable: test_hwcs(");
1735 for (ii = 0; ii < numctrs; ii++)
1736 Tprintf (DBG_LT1, "%s%s", ii ? "," : "", entries[ii]->name);
1737 Tprintf (DBG_LT1, ") PASS\n");
1739 #endif
1741 end_test_hwcs:
1742 if (created && hwcdrv->hwcdrv_free_counters ())
1743 Tprintf (0, "hwctable: WARNING: test counters could not be freed\n");
1744 return rc;
1747 #if !HWC_DEBUG
1748 #define check_tables()
1749 #else
1751 /* check for typos in tables */
1752 static void
1753 check_tables ()
1755 int i;
1756 /* now search the known table of counters */
1757 for (i = 0;; i++)
1759 Hwcentry * pentry;
1760 int cputag = cputabs[i].cputag;
1761 if (cputag == 0)
1762 break;
1763 if (cputag == CPC_KPROF)
1764 continue;
1765 pentry = cputabs[i].stdlist_table;
1766 for (; pentry; pentry++)
1768 if (!pentry->name)
1769 break;
1770 if (!pentry->int_name)
1771 {/* internal, only to supply ABST and timecvt */
1772 if (pentry->metric)
1773 Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:"
1774 " internal && metric @%d, %s\n", cputag, pentry->name);
1775 if (pentry->val != PRELOAD_DEF
1776 && pentry->memop != ABST_EXACT_PEBS_PLUS1)
1777 Tprintf (DBG_LT2, "hwctable: check_tables: INFO:"
1778 " internal && custom val=%d @%d, %s\n",
1779 pentry->val, cputag, pentry->name);
1781 if (pentry->metric)
1782 { /* aliased */
1783 if (!pentry->int_name)
1784 Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:"
1785 " aliased && !int_name @%d, %s\n", cputag, pentry->name);
1787 if (pentry->int_name && !pentry->metric)
1788 { /* convenience */
1789 if (!strcmp (pentry->name, pentry->int_name))
1790 Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:"
1791 " convenience && name==int_name @%d, %s\n",
1792 cputag, pentry->name);
1797 #endif
1799 static int try_a_counter (int forKernel);
1800 static void hwc_process_raw_ctrs (int forKernel, Hwcentry ***pstd_out,
1801 Hwcentry ***praw_out, Hwcentry ***phidden_out,
1802 Hwcentry**static_tables,
1803 Hwcentry **raw_unfiltered_in);
1805 /* internal call to initialize libs, ctr tables */
1806 static void
1807 setup_cpc_general (int skip_hwc_test)
1809 const cpu_list_t* cputabs_entry;
1810 int rc = -1;
1811 Tprintf (DBG_LT2, "hwctable: setup_cpc()... \n");
1812 if (initialized)
1814 Tprintf (0, "hwctable: WARNING: setup_cpc() has already been called\n");
1815 return;
1817 initialized = 1;
1818 cpcx_cpuver = CPUVER_UNDEFINED;
1819 cpcx_cciname = NULL;
1820 cpcx_npics = 0;
1821 cpcx_docref = NULL;
1822 cpcx_support_bitmask = 0;
1823 for (int kk = 0; kk < 2; kk++)
1824 { // collect-0 and kernel-1
1825 cpcx_attrs[kk] = NULL;
1826 cpcx_std[kk] = NULL;
1827 cpcx_raw[kk] = NULL;
1828 cpcx_hidden[kk] = NULL;
1829 cpcx_max_concurrent[kk] = 0;
1830 cpcx_default_hwcs[kk] = NULL;
1831 cpcx_orig_default_hwcs[kk] = NULL;
1832 cpcx_has_precise[kk] = 0;
1834 check_tables ();
1835 hwcdrv_api_t *hwcdrv = get_hwcdrv ();
1836 if (hwcdrv->hwcdrv_init_status)
1838 Tprintf (0, "WARNING: setup_cpc_general() failed. init_status=%d \n",
1839 hwcdrv->hwcdrv_init_status);
1840 goto setup_cpc_wrapup;
1842 hwcdrv->hwcdrv_get_info (&cpcx_cpuver, &cpcx_cciname, &cpcx_npics,
1843 &cpcx_docref, &cpcx_support_bitmask);
1845 /* Fix cpcx_cpuver for new Zen and Intel machines */
1846 cpu_info_t *cpu_p = read_cpuinfo ();
1847 if (strcmp (cpu_p->cpu_vendorstr, "AuthenticAMD") == 0)
1849 if (cpu_p->cpu_family == AMD_ZEN3_FAMILY)
1850 switch (cpu_p->cpu_model)
1852 case AMD_ZEN3_RYZEN:
1853 case AMD_ZEN3_RYZEN2:
1854 case AMD_ZEN3_RYZEN3:
1855 case AMD_ZEN3_EPYC_TRENTO:
1856 cpcx_cpuver = CPC_AMD_FAM_19H_ZEN3;
1857 break;
1858 case AMD_ZEN4_RYZEN:
1859 case AMD_ZEN4_EPYC:
1860 cpcx_cpuver = CPC_AMD_FAM_19H_ZEN4;
1861 break;
1864 else if (strcmp (cpu_p->cpu_vendorstr, "GenuineIntel") == 0)
1866 if (cpu_p->cpu_family == 6)
1868 if (cpu_p->cpu_model == 106)
1869 cpcx_cpuver = CPC_INTEL_ICELAKE;
1872 else if (strcmp (cpu_p->cpu_vendorstr, AARCH64_VENDORSTR_ARM) == 0)
1874 if (cpu_p->cpu_family == 0x50)
1875 cpcx_cpuver = CPC_ARM64_AMCC;
1876 else if (cpu_p->cpu_family == 0x41)
1877 cpcx_cpuver = CPC_ARM_NEOVERSE_N1;
1878 else if (cpu_p->cpu_family == 0xc0)
1879 cpcx_cpuver = CPC_ARM_AMPERE_1;
1880 else
1881 cpcx_cpuver = CPC_ARM_GENERIC;
1884 #ifdef DISALLOW_PENTIUM_PRO_MMX_7007575
1885 if (cpcx_cpuver == CPC_PENTIUM_PRO_MMX)
1887 Tprintf (0, "hwctable: WARNING: setup_cpc(): cpu=%d"
1888 " `Pentium Pro with MMX, Pentium II' is not supported\n", cpcx_cpuver);
1889 hwcfuncs_int_logerr (GTXT ("libcpc cannot identify processor type\n"));
1890 goto setup_cpc_wrapup;
1892 #endif
1894 /* now search the known table of counters */
1895 cputabs_entry = cputabs_find_entry (cpcx_cpuver);
1896 if (cputabs_entry == NULL)
1898 Tprintf (0, "hwctable: WARNING: setup_cpc(): cpu=%d"
1899 " could not be found in the tables\n", cpcx_cpuver);
1900 /* strange, should have at least selected "unknownlist" */
1901 hwcfuncs_int_logerr (GTXT ("Analyzer CPU table could not be found\n"));
1902 goto setup_cpc_wrapup;
1905 Hwcentry * valid_cpu_tables[2]; // [0]:static table of counters, [1]:static table of generic counters
1906 valid_cpu_tables[0] = cputabs_entry->stdlist_table;
1907 if (valid_cpu_tables[0] == NULL)
1909 Tprintf (0, "hwctable: WARNING: setup_cpc(): "
1910 " valid_cpu_tables was NULL??\n");
1911 /* strange, someone put a NULL in the lookup table? */
1912 hwcfuncs_int_logerr (GTXT ("Analyzer CPU table is invalid\n"));
1913 goto setup_cpc_wrapup;
1915 valid_cpu_tables[1] = papi_generic_list;
1916 Tprintf (DBG_LT2, "hwctable: setup_cpc(): getting descriptions \n");
1917 // populate cpcx_raw and cpcx_attr
1918 hwcdrv->hwcdrv_get_descriptions (hwc_cb, attrs_cb, cputabs_entry->stdlist_table);
1919 for (int kk = 0; kk < 2; kk++)
1920 { // collect and er_kernel
1921 hwc_process_raw_ctrs (kk, &cpcx_std[kk], &cpcx_raw[kk], &cpcx_hidden[kk],
1922 valid_cpu_tables, (Hwcentry**) unfiltered_raw.array);
1923 cpcx_has_precise[kk] = 0;
1924 for (int rr = 0; cpcx_raw[kk] && cpcx_raw[kk][rr]; rr++)
1926 int memop = cpcx_raw[kk][rr]->memop;
1927 if (ABST_MEMSPACE_ENABLED (memop))
1929 cpcx_has_precise[kk] = 1;
1930 break;
1933 cpcx_attrs[kk] = (char**) unfiltered_attrs.array;
1934 cpcx_max_concurrent[kk] = cpcx_npics;
1936 #if 1 // 22897042 - DTrace cpc provider does not support profiling on multiple ctrs on some systems
1937 if ((cpcx_support_bitmask & HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID) != HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID)
1939 // kernel profiling only supports one counter if overflowing counter can't be identified
1940 cpcx_max_concurrent[1] = cpcx_npics ? 1 : 0;
1942 #endif
1944 /* --- quick test of the cpc interface --- */
1945 if (skip_hwc_test)
1946 rc = 0;
1947 else
1948 rc = try_a_counter (0);
1950 /* initialize the default counter string definition */
1951 for (int kk = 0; kk < 2; kk++)
1953 char * default_exp = 0;
1954 int jj;
1955 for (jj = 0; (default_exp = cputabs_entry->default_exp_p[jj]); jj++)
1957 int rc = hwc_lookup (kk, 0, default_exp, NULL, 0, NULL, NULL);
1958 if (rc > 0)
1959 break;
1961 if (!default_exp)
1963 char * fallback[3] = {NTXT ("insts,,cycles,,l3m"), NTXT ("insts,,cycles"), NTXT ("insts")};
1964 for (int ff = 0; ff < 3; ff++)
1966 int rc = hwc_lookup (kk, 0, fallback[ff], NULL, 0, NULL, NULL);
1967 if (rc > 0)
1969 default_exp = strdup (fallback[ff]);
1970 break;
1974 cpcx_default_hwcs[kk] = default_exp;
1975 cpcx_orig_default_hwcs[kk] = default_exp;
1978 setup_cpc_wrapup:
1979 if (rc)
1981 cpcx_npics = 0;
1983 ptr_list_free(&tmp_raw); // free stuff... YXXX
1984 ptr_list_free(&unfiltered_attrs);
1987 return;
1990 static void
1991 setup_cpcx ()
1993 if (initialized)
1994 return;
1995 setup_cpc_general (0); // set up and include a hwc test run
1998 static void
1999 setup_cpc_skip_hwctest ()
2001 if (initialized)
2002 return;
2003 setup_cpc_general (1); // set up but skip hwc test run
2006 static int
2007 try_a_counter (int forKernel)
2009 if (!VALID_FOR_KERNEL (forKernel))
2010 return -1;
2011 int rc = -1;
2012 const Hwcentry * testevent;
2013 if (cpcx_std[forKernel] == NULL)
2015 Tprintf (0, "hwctable: WARNING: cpcx_std not initialized");
2016 return 0; /* consider this an automatic PASS */
2018 /* look for a valid table entry, only try valid_cpu_tables[0] */
2019 testevent = cpcx_std[forKernel][0];
2020 if (!testevent || !testevent->name)
2022 Tprintf (0, "hwctable: WARNING: no test metric"
2023 " available to verify counters\n");
2024 return 0; /* consider this an automatic PASS */
2026 Hwcentry tmp_testevent;
2027 tmp_testevent = *testevent; /* shallow copy */
2028 if (tmp_testevent.int_name == NULL)
2030 /* counter is defined in 'hidden' section of table, supply int_name */
2031 tmp_testevent.int_name = strdup (tmp_testevent.name);
2033 Hwcentry * test_array[1] = {&tmp_testevent};
2034 rc = test_hwcs ((const Hwcentry**) test_array, 1);
2035 if (rc == HWCFUNCS_ERROR_UNAVAIL)
2037 // consider this a pass (allow HWC table to be printed)
2038 Tprintf (0, "hwctable: WARNING: "
2039 "cpc_bind_event() shows counters busy; allow to continue\n");
2040 return 0;
2042 else if (rc)
2044 // failed to start for some other reason
2045 Tprintf (0, "hwctable: WARNING: "
2046 "test of counter '%s' failed\n",
2047 testevent->name);
2048 return rc;
2050 return 0;
2053 void
2054 hwc_update_val (Hwcentry *hwc)
2056 if (hwc->ref_val == 0)
2057 hwc->ref_val = hwc->val; // save original reference
2058 int64_t newVal;
2059 hrtime_t min_time_nsec = hwc->min_time;
2060 if (min_time_nsec == HWCTIME_TBD)
2061 min_time_nsec = hwc->min_time_default;
2062 switch (min_time_nsec)
2064 case 0: // disable time-based intervals
2065 // do not modify val
2066 return;
2067 case HWCTIME_ON:
2068 case HWCTIME_TBD:
2069 newVal = HWC_VAL_ON (hwc->ref_val);
2070 break;
2071 case HWCTIME_LO:
2072 newVal = HWC_VAL_LO (hwc->ref_val);
2073 break;
2074 case HWCTIME_HI:
2075 newVal = HWC_VAL_HI (hwc->ref_val);
2076 break;
2077 default:
2078 newVal = HWC_VAL_CUSTOM (hwc->ref_val, min_time_nsec);
2079 break;
2081 #define MAX_INT_VAL (2*1000*1000*1000 + 1000100)// yuck, limited to signed int
2082 if (newVal >= MAX_INT_VAL)
2083 newVal = MAX_INT_VAL;
2084 hwc->val = newVal;
2087 /* convert value string to value and store result in hwc->val */
2088 /* This function moved here from collctrl.cc */
2090 * Keep the HWCTIME_* definitions in sync with those in
2091 * collctrl.cc Coll_Ctrl::add_hwcstring().
2093 static int
2094 set_hwcval (Hwcentry *hwc, hrtime_t global_min_time_nsec, const char *valptr)
2096 hwc->min_time_default = global_min_time_nsec;
2097 if (hwc->val == 1)
2099 // An interval of 1 is used for certain types of count data.
2100 // (er_bit, er_generic, er_rock ...)
2101 // Hi and Lo do not apply.
2102 /* use the default */
2104 else if (valptr == NULL || valptr[0] == 0 || strcmp (valptr, "auto") == 0)
2105 hwc->min_time = HWCTIME_TBD;
2106 else if (strcmp (valptr, "on") == 0)
2107 hwc->min_time = HWCTIME_ON;
2108 else if (strcmp (valptr, "lo") == 0 || strcmp (valptr, "low") == 0)
2109 hwc->min_time = HWCTIME_LO;
2110 else if (strcmp (valptr, "hi") == 0 || strcmp (valptr, "high") == 0
2111 || strcmp (valptr, "h") == 0)
2112 hwc->min_time = HWCTIME_HI;
2113 else
2115 /* the remaining string should be a number > 0 */
2116 char *endchar = NULL;
2117 long long tmp = strtoll (valptr, &endchar, 0);
2118 int value = (int) tmp;
2119 if (*endchar != 0 || tmp <= 0 || value != tmp)
2121 // also covers errno == ERANGE
2122 Tprintf (0, "hwctable: set_hwcval(): ERROR: "
2123 "Invalid counter value %s for counter `%s'\n",
2124 valptr, hwc->name);
2125 return -1;
2127 if (tmp > UINT32_MAX / 2)
2129 /* Roch B. says that we MUST do this check for er_kernel
2130 because some platforms deliver overflow interrupts without
2131 identifying which counter overflowed. The only way to
2132 determine which counter overflowed is to have enough
2133 margin on 32 bit counters to make sure they don't
2134 wrap.
2136 Tprintf (0, "hwctable: set_hwcval(): ERROR: "
2137 "Counter value %s exceeds %lu\n",
2138 valptr, (unsigned long) UINT32_MAX / 2);
2139 return -1;
2141 /* set the value */
2142 if (value != 0)
2144 if (hwc->ref_val == 0)
2145 hwc->ref_val = hwc->val; // save original reference
2146 hwc->val = value;
2147 hwc->min_time = 0; // turn off auto-adjust
2150 hwc_update_val (hwc);
2151 return 0;
2154 static char *
2155 canonical_name (const char *counter)
2157 char *nameOnly = NULL;
2158 char *attrs = NULL;
2159 char tmpbuf[1024];
2160 tmpbuf[0] = 0;
2161 hwcfuncs_parse_ctr (counter, NULL, &nameOnly, &attrs, NULL, NULL);
2162 snprintf (tmpbuf + strlen (tmpbuf), sizeof (tmpbuf) - strlen (tmpbuf),
2163 "%s", nameOnly);
2164 if (attrs)
2166 hwcfuncs_attr_t cpc2_attrs[HWCFUNCS_MAX_ATTRS];
2167 void * attr_mem;
2168 unsigned nattrs;
2169 int ii, jj;
2171 /* extract attributes from counter */
2172 attr_mem = hwcfuncs_parse_attrs (counter, cpc2_attrs, HWCFUNCS_MAX_ATTRS,
2173 &nattrs, NULL);
2174 if (!attr_mem)
2176 snprintf (tmpbuf + strlen (tmpbuf), sizeof (tmpbuf) - strlen (tmpbuf),
2177 "~UNKNOWN");
2178 goto canonical_attrs_wrapup;
2181 /* sort the attributes */
2182 for (ii = 0; ii < (int) nattrs - 1; ii++)
2184 for (jj = ii + 1; jj < nattrs; jj++)
2186 int cmp = strcmp (cpc2_attrs[ii].ca_name,
2187 cpc2_attrs[jj].ca_name);
2188 if (cmp > 0)
2190 hwcfuncs_attr_t tmp = cpc2_attrs[jj];
2191 cpc2_attrs[jj] = cpc2_attrs[ii];
2192 cpc2_attrs[ii] = tmp;
2197 /* print attributes in canonical format */
2198 for (ii = 0; ii < nattrs; ii++)
2199 snprintf (tmpbuf + strlen (tmpbuf), sizeof (tmpbuf) - strlen (tmpbuf),
2200 "~%s=0x%llx", cpc2_attrs[ii].ca_name, (long long) cpc2_attrs[ii].ca_val);
2201 free (attr_mem);
2203 canonical_attrs_wrapup:
2204 free (nameOnly);
2205 free (attrs);
2206 return strdup (tmpbuf);
2209 /* process counter and value strings - put results in <*pret_ctr> */
2211 /* Print errors to UEbuf for any failure that results in nonzero return */
2212 static int
2213 process_ctr_def (int forKernel, hrtime_t global_min_time_nsec,
2214 const char *counter, const char *value, Hwcentry *pret_ctr,
2215 char* UWbuf, size_t UWsz, char* UEbuf, size_t UEsz)
2217 int rc = -1;
2218 char *nameOnly = NULL;
2219 char *attrs = NULL;
2220 char *regstr = NULL;
2221 int plus;
2222 regno_t regno;
2223 const Hwcentry *pfound = NULL;
2224 const char *uname = NULL;
2225 int disable_backtrack;
2226 UEbuf[0] = 0;
2227 UWbuf[0] = 0;
2228 Tprintf (DBG_LT3, "hwctable: process_ctr_def(): counter=%s value=%s \n",
2229 counter, value ? value : "NULL");
2230 hwcfuncs_parse_ctr (counter, &plus, &nameOnly, &attrs, &regstr, &regno);
2232 /* search for the counter in the std and raw lists */
2234 pfound = ptrarray_find ((const Hwcentry**) cpcx_std[forKernel], nameOnly, NULL, 1, regno);
2235 if (pfound)
2236 hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist:",
2237 pfound);
2239 if (!pfound)
2241 pfound = ptrarray_find ((const Hwcentry**) cpcx_hidden[forKernel], nameOnly, NULL, 1, regno);
2242 if (pfound)
2243 hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist(hidden):", pfound);
2245 if (!pfound)
2247 pfound = ptrarray_find_by_name (cpcx_raw[forKernel], nameOnly); /* (regno match checked later) */
2248 if (pfound)
2249 hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in rawlist:", pfound);
2251 if (!pfound)
2253 pfound = ptrarray_find ((const Hwcentry**) cpcx_std[forKernel], nameOnly, NULL, 1, REGNO_ANY);
2254 if (pfound)
2255 hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist but regno didn't match:", pfound);
2257 if (!pfound)
2259 pfound = ptrarray_find ((const Hwcentry**) cpcx_hidden[forKernel], nameOnly, NULL, 1, REGNO_ANY);
2260 if (pfound)
2261 hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist(hidden) but regno didn't match:", pfound);
2263 if (!pfound)
2265 uint64_t val = 0;
2266 if (is_numeric (nameOnly, &val))
2268 Hwcentry *tmp = alloc_shallow_copy (&empty_ctr); // Leaks?
2269 if (tmp)
2271 tmp->name = strdup (nameOnly);
2272 pfound = tmp;
2275 if (pfound)
2276 hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: counter specified by numeric value:", pfound);
2278 if (!pfound)
2280 snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
2281 GTXT ("Invalid HW counter name: %s\n"), nameOnly);
2282 snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
2283 GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
2284 (IS_KERNEL (forKernel) ? "er_kernel" : "collect"));
2285 goto process_ctr_def_wrapup;
2288 /* counter found */
2289 *pret_ctr = *pfound; /* shallow copy */
2290 pret_ctr->int_name = NULL; /* so free doesn't try to free these pfound's ptrs */
2291 pret_ctr->name = NULL; /* so free doesn't try to free these pfound's ptrs */
2293 /* update uname,memop */
2294 uname = counter;
2295 disable_backtrack = 0;
2296 if (plus != 0 || ABST_PLUS_BY_DEFAULT (pret_ctr->memop))
2298 // attempt to process memoryspace profiling
2299 int message_printed = 0;
2300 if (cpcx_cpuver == CPUVER_GENERIC)
2302 // accept plus, since we don't know what this CPU is
2303 snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
2304 GTXT ("`+' may not be correctly supported on `%s' because processor is not recognized."),
2305 cpcx_cciname);
2306 pret_ctr->memop = ABST_LDST; // supply a backtracking data type - required for collector
2308 else if (cpcx_cpuver == CPC_ULTRA1 || cpcx_cpuver == CPC_ULTRA2
2309 || cpcx_cpuver == CPC_ULTRA3 || cpcx_cpuver == CPC_ULTRA3_PLUS
2310 || cpcx_cpuver == CPC_ULTRA3_I || cpcx_cpuver == CPC_ULTRA4_PLUS
2311 || cpcx_cpuver == CPC_ULTRA4 || cpcx_cpuver == CPC_ULTRA_T1
2312 || cpcx_cpuver == CPC_ULTRA_T2 || cpcx_cpuver == CPC_ULTRA_T2P
2313 || cpcx_cpuver == CPC_ULTRA_T3)
2315 if (!ABST_BACKTRACK_ENABLED (pret_ctr->memop))
2316 disable_backtrack = 1;
2318 else if (cpcx_cpuver == CPC_SPARC_T4 || cpcx_cpuver == CPC_SPARC_T5
2319 || cpcx_cpuver == CPC_SPARC_T6 || cpcx_cpuver == CPC_SPARC_M4
2320 || cpcx_cpuver == CPC_SPARC_M5 || cpcx_cpuver == CPC_SPARC_M6
2321 || cpcx_cpuver == CPC_SPARC_M7 || cpcx_cpuver == CPC_SPARC_M8)
2323 if (pret_ctr->memop != ABST_EXACT)
2324 disable_backtrack = 1;
2326 else if (cpcx_cpuver == CPC_INTEL_NEHALEM || cpcx_cpuver == CPC_INTEL_WESTMERE
2327 || cpcx_cpuver == CPC_INTEL_SANDYBRIDGE
2328 || cpcx_cpuver == CPC_INTEL_IVYBRIDGE
2329 || cpcx_cpuver == CPC_INTEL_HASWELL
2330 || cpcx_cpuver == CPC_INTEL_BROADWELL
2331 || cpcx_cpuver == CPC_INTEL_SKYLAKE)
2333 if (pret_ctr->memop != ABST_EXACT_PEBS_PLUS1)
2334 disable_backtrack = 1;
2335 else if (plus < 0)
2337 // disabling memoryspace not supported for
2338 // remove specified -
2339 uname++;
2340 plus = 0;
2341 snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf),
2342 GTXT ("Warning: `-' is not supported on `%s' -- memory reference backtracking will remain enabled for this counter\n"),
2343 nameOnly);
2346 else
2348 message_printed = 1;
2349 snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf),
2350 GTXT ("Warning: `+' is not supported on `%s' -- memory reference backtracking will not be enabled for `%s'\n"),
2351 cpcx_cciname, nameOnly);
2352 disable_backtrack = 1;
2354 if (disable_backtrack)
2356 if (plus != 0)
2357 uname++; // remove specified + or -
2358 if (!message_printed && plus > 0)
2359 snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf),
2360 GTXT ("Warning: `+' is not supported on `%s' -- memory reference backtracking will not be enabled for this counter\n"),
2361 nameOnly);
2364 else
2365 disable_backtrack = 1;
2366 if (disable_backtrack || plus < 0)
2367 if (pret_ctr->memop != ABST_NOPC)
2368 pret_ctr->memop = ABST_NONE;
2369 if (pret_ctr->memop == ABST_NOPC)
2370 snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf),
2371 GTXT ("Warning: HW counter `%s' is not program-related -- callstacks will be not be recorded for this counter\n"),
2372 uname);
2374 /* update name and int_name */
2376 // validate attributes
2377 if (attrs)
2379 hwcfuncs_attr_t cpc2_attrs[HWCFUNCS_MAX_ATTRS];
2380 void * attr_mem;
2381 unsigned nattrs;
2382 char *errbuf;
2383 /* extract attributes from uname */
2384 attr_mem = hwcfuncs_parse_attrs (uname, cpc2_attrs, HWCFUNCS_MAX_ATTRS,
2385 &nattrs, &errbuf);
2386 if (!attr_mem)
2388 snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
2389 "%s\n", errbuf);
2390 free (errbuf);
2391 goto process_ctr_def_wrapup;
2393 /* make sure all attributes are valid */
2394 for (unsigned ii = 0; ii < nattrs; ii++)
2396 if (!attr_is_valid (forKernel, cpc2_attrs[ii].ca_name))
2398 snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
2399 GTXT ("Invalid attribute specified for counter `%s': %s\n"),
2400 nameOnly, cpc2_attrs[ii].ca_name);
2401 snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
2402 GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
2403 (IS_KERNEL (forKernel) ? "er_kernel" : "collect"));
2404 free (attr_mem);
2405 goto process_ctr_def_wrapup;
2407 for (unsigned jj = ii + 1; jj < nattrs; jj++)
2409 if (strcmp (cpc2_attrs[ii].ca_name,
2410 cpc2_attrs[jj].ca_name) == 0)
2412 snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
2413 GTXT ("Duplicate attribute specified for counter `%s': %s\n"),
2414 nameOnly, cpc2_attrs[ii].ca_name);
2415 free (attr_mem);
2416 goto process_ctr_def_wrapup;
2420 free (attr_mem);
2422 pret_ctr->name = strdup (uname);
2424 // assign int_name
2425 if (pfound->int_name)
2427 // Counter is one of the following:
2428 // - aliased (e.g. cycles~system=1),
2429 // - convenience (e.g. cycles0~system=1),
2430 if (!attrs) // convert alias to internal name
2431 pret_ctr->int_name = strdup (pfound->int_name);
2432 else
2434 // convert alias to internal name and
2435 // append user-supplied attributes
2436 size_t sz = strlen (pfound->int_name) + strlen (attrs) + 1;
2437 char *tbuf = calloc (sz, 1);
2438 if (tbuf)
2439 snprintf (tbuf, sz, "%s%s", pfound->int_name, attrs);
2440 pret_ctr->int_name = tbuf;
2443 else
2444 pret_ctr->int_name = strdup (uname); // user-supplied name
2447 /* update val */
2448 if (set_hwcval (pret_ctr, global_min_time_nsec, value))
2450 snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
2451 GTXT ("Invalid interval for HW counter `%s': %s\n"),
2452 nameOnly, value);
2453 goto process_ctr_def_wrapup;
2455 hwcentry_print (DBG_LT2, "hwctable: process_ctr_def:", pret_ctr);
2456 rc = 0;
2458 process_ctr_def_wrapup:
2459 free (regstr);
2460 free (attrs);
2461 free (nameOnly);
2462 return rc;
2465 /*---------------------------------------------------------------------------*/
2467 /* external interfaces, see hwcentry.h for descriptions. */
2469 extern int
2470 hwc_lookup (int forKernel, hrtime_t global_min_time_nsec, const char *instring,
2471 Hwcentry *caller_entries[], unsigned maxctrs, char **emsg, char **wmsg)
2473 unsigned ii;
2474 char *instr_copy = NULL, *ss = NULL;
2475 unsigned numctrs = 0;
2476 int rc = 0;
2477 char *tokenptr[MAX_PICS * 2];
2478 unsigned numtokens = 0;
2479 char UEbuf[1024 * 5]; /* error message buffer; strdup of it is passed back to user */
2480 char UWbuf[1024 * 5]; /* warning message buffer; strdup of it is passed back to user */
2481 if (emsg)
2482 *emsg = NULL;
2483 if (wmsg)
2484 *wmsg = NULL;
2485 UEbuf[0] = 0;
2486 UWbuf[0] = 0;
2488 // supply temporary result buffers as needed
2489 Hwcentry tmp_entry_table[MAX_PICS];
2490 Hwcentry * tmp_entries[MAX_PICS];
2491 Hwcentry **entries;
2492 if (caller_entries)
2493 entries = caller_entries;
2494 else
2496 // user doesn't care about results; provide temporary storage for results
2497 for (ii = 0; ii < MAX_PICS; ii++)
2498 tmp_entries[ii] = &tmp_entry_table[ii];
2499 entries = tmp_entries;
2500 maxctrs = MAX_PICS;
2502 Tprintf (DBG_LT1, "hwctable: hwc_lookup(%s)\n",
2503 instring ? instring : "NULL");
2505 /* clear <entries> first - prevent seg faults in hwc_lookup_wrapup */
2506 for (ii = 0; ii < maxctrs; ii++)
2507 *entries[ii] = empty_ctr;
2508 if (!instring)
2510 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2511 GTXT ("No HW counters were specified."));
2512 rc = -1;
2513 goto hwc_lookup_wrapup;
2516 /* make sure tables are initialized */
2517 setup_cpc_skip_hwctest ();
2518 if (cpcx_npics == 0)
2520 if (cpcx_cpuver < 0)
2522 char buf[1024];
2523 *buf = 0;
2524 char *pch = hwcfuncs_errmsg_get (buf, sizeof (buf), 0); /* get first err msg, disable capture */
2525 if (*pch)
2526 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2527 GTXT ("HW counter profiling is not supported on this system: %s%s"),
2528 pch, pch[strlen (pch) - 1] == '\n' ? "" : "\n");
2529 else
2530 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2531 GTXT ("HW counter profiling is not supported on this system\n"));
2533 else
2534 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2535 GTXT ("HW counter profiling is not supported on '%s'\n"),
2536 cpcx_cciname);
2537 rc = -1;
2538 goto hwc_lookup_wrapup;
2540 ss = instr_copy = strdup (instring);
2541 while (*ss != 0 && (*ss == ' ' || *ss == '\t'))
2542 ss++;
2543 tokenptr[numtokens++] = ss;
2546 /* find end of previous token, replace w/ NULL, skip whitespace, set <tokenptr>, repeat */
2547 for (; *ss; ss++)
2549 if (*ss == ',' || *ss == ' ' || *ss == '\t')
2551 /* end of previous token found */
2552 *ss = 0; /* terminate the previous token */
2553 ss++;
2554 while (*ss != 0 && (*ss == ' ' || *ss == '\t'))
2555 ss++;
2556 if (*ss)
2557 tokenptr[numtokens++] = ss;
2558 break; // from for loop
2562 while (*ss && numtokens < (MAX_PICS * 2));
2564 if (*ss)
2566 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2567 GTXT ("The number of HW counters specified exceeds internal resources\n"));
2568 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2569 GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
2570 (IS_KERNEL (forKernel) ? "er_kernel" : "collect"));
2571 rc = -1;
2572 goto hwc_lookup_wrapup;
2574 Tprintf (DBG_LT3, "hwctable: hwc_lookup(): numtokens=%d\n", numtokens);
2576 /* look up individual counters */
2578 int fail = 0;
2579 for (ii = 0; ii < numtokens && numctrs < maxctrs; ii += 2)
2581 const char *counter;
2582 const char *value;
2583 Hwcentry *pret_ctr = entries[numctrs];
2585 /* assign the tokens to ctrnames, timeoutValues. */
2586 counter = tokenptr[ii];
2587 if (ii + 1 < numtokens)
2588 value = tokenptr[ii + 1];
2589 else
2590 value = 0;
2591 if (process_ctr_def (forKernel, global_min_time_nsec, counter, value, pret_ctr,
2592 UWbuf + strlen (UWbuf),
2593 sizeof (UWbuf) - strlen (UWbuf),
2594 UEbuf + strlen (UEbuf),
2595 sizeof (UEbuf) - strlen (UEbuf)))
2597 /* could choose to set fail=1 and continue here,
2598 but errmsgs would be aggregated (messy) */
2599 rc = -1;
2600 goto hwc_lookup_wrapup;
2602 numctrs++;
2604 if (fail)
2606 rc = -1;
2607 goto hwc_lookup_wrapup;
2611 if (!numctrs)
2613 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2614 GTXT ("No HW counters were specified.\n"));
2615 rc = -1;
2616 goto hwc_lookup_wrapup;
2618 if (numctrs > cpcx_max_concurrent[forKernel])
2620 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2621 GTXT ("The HW counter configuration could not be loaded: More than %d counters were specified\n"), cpcx_max_concurrent[forKernel]);
2622 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2623 GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
2624 (IS_KERNEL (forKernel) ? "er_kernel" : "collect"));
2625 rc = -1;
2626 goto hwc_lookup_wrapup;
2629 hwc_lookup_wrapup:
2630 free (instr_copy);
2631 if (wmsg && strlen (UWbuf))
2632 *wmsg = strdup (UWbuf);
2633 if (emsg && strlen (UEbuf))
2634 *emsg = strdup (UEbuf);
2635 if (rc == 0)
2636 rc = numctrs;
2637 return rc;
2640 extern char *
2641 hwc_validate_ctrs (int forKernel, Hwcentry *entries[], unsigned numctrs)
2643 char UEbuf[1024 * 5];
2644 UEbuf[0] = 0;
2646 /* test counters */
2647 hwcfuncs_errmsg_get (NULL, 0, 1); /* enable errmsg capture */
2648 int hwc_rc = test_hwcs ((const Hwcentry**) entries, numctrs);
2649 if (hwc_rc)
2651 if (cpcx_cpuver == CPC_PENTIUM_4_HT || cpcx_cpuver == CPC_PENTIUM_4)
2653 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2654 GTXT ("HW counter profiling is disabled unless only one logical CPU per HyperThreaded processor is online (see psradm)\n"));
2655 return strdup (UEbuf);
2657 char buf[1024];
2658 *buf = 0;
2659 char * pch = hwcfuncs_errmsg_get (buf, sizeof (buf), 0); /* get first err msg, disable capture */
2660 if (*pch)
2661 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2662 GTXT ("The HW counter configuration could not be loaded: %s%s"),
2663 pch, pch[strlen (pch) - 1] == '\n' ? "" : "\n");
2664 else
2665 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2666 GTXT ("The HW counter configuration could not be loaded\n"));
2667 snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
2668 GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
2669 (IS_KERNEL (forKernel) ? "er_kernel" : "collect"));
2670 return strdup (UEbuf);
2672 return NULL;
2675 extern Hwcentry *
2676 hwc_post_lookup (Hwcentry * pret_ctr, char *counter, char * int_name, int cpuver)
2678 const Hwcentry *pfound;
2679 regno_t regno;
2680 char *nameOnly = NULL;
2681 char *attrs = NULL;
2683 /* fields in pret_ctr (name and int_name) should already be free */
2684 hwcfuncs_parse_ctr (counter, NULL, &nameOnly, &attrs, NULL, &regno);
2686 /* look for it in the canonical list */
2687 pfound = static_table_find (stdlist_get_table (cpuver),
2688 nameOnly, int_name, 0, REGNO_ANY);
2689 if (!pfound) /* try the generic list */
2690 pfound = static_table_find (papi_generic_list,
2691 nameOnly, int_name, 0, REGNO_ANY);
2692 if (pfound)
2694 /* in standard list */
2695 *pret_ctr = *pfound; /* shallow copy */
2696 if (pret_ctr->int_name)
2698 // aliased counter
2699 pret_ctr->int_name = strdup (pret_ctr->int_name);
2700 if (pret_ctr->short_desc == NULL)
2702 // look for short_desc of corresponding raw counter
2703 const Hwcentry *praw = static_table_find (stdlist_get_table (cpuver),
2704 pret_ctr->int_name, NULL, 0, REGNO_ANY);
2705 if (praw && praw->short_desc)
2706 pret_ctr->short_desc = strdup (praw->short_desc);
2709 else
2710 pret_ctr->int_name = strdup (counter);
2712 else
2714 /* not a standard counter */
2715 *pret_ctr = empty_ctr;
2716 pret_ctr->int_name = strdup (counter);
2719 /* update the name */
2720 if (attrs)
2722 pret_ctr->name = canonical_name (counter);
2723 if (pret_ctr->metric)
2725 // metric text is supplied from a table. (User supplied HWC alias)
2726 // Append user-supplied attributes to metric name:
2727 size_t len = strlen (pret_ctr->metric) + strlen (attrs) + 4;
2728 char *pch = calloc (len, 1);
2729 if (pch)
2730 snprintf (pch, len, "%s (%s)", pret_ctr->metric, attrs);
2731 pret_ctr->metric = pch; // leaks
2734 else
2735 pret_ctr->name = strdup (nameOnly);
2737 if (pfound)
2738 hwcentry_print (DBG_LT2, "hwctable: hwc_post_lookup: found: ", pret_ctr);
2739 else
2740 hwcentry_print (DBG_LT2, "hwctable: hwc_post_lookup: default: ", pret_ctr);
2741 free (attrs);
2742 free (nameOnly);
2743 return pret_ctr;
2746 static const char *
2747 hwc_on_lo_hi (const Hwcentry *pctr)
2749 char* rate;
2751 switch (pctr->min_time)
2753 case (HWCTIME_LO):
2754 rate = NTXT ("lo");
2755 break;
2756 case (HWCTIME_ON):
2757 rate = NTXT ("on");
2758 break;
2759 case (HWCTIME_HI):
2760 rate = NTXT ("hi");
2761 break;
2762 case (0):
2763 rate = NULL; // null => use interval count
2764 break;
2765 default:
2766 case (HWCTIME_TBD):
2767 rate = NTXT ("on");
2768 break;
2771 return rate; //strdup( rate );
2774 extern char *
2775 hwc_rate_string (const Hwcentry *pctr, int force_numeric)
2777 const char * rateString = hwc_on_lo_hi (pctr);
2778 char buf[128];
2779 if (!rateString || force_numeric)
2781 snprintf (buf, sizeof (buf), NTXT ("%d"), pctr->val);
2782 rateString = buf;
2784 return strdup (rateString);
2787 static char metricbuf[2048];
2789 extern char *
2790 hwc_i18n_metric (const Hwcentry *pctr)
2792 if (pctr->metric != NULL)
2793 snprintf (metricbuf, sizeof (metricbuf), NTXT ("%s"), PTXT (pctr->metric));
2794 else if (pctr->name != NULL)
2795 snprintf (metricbuf, sizeof (metricbuf), GTXT ("%s Events"), pctr->name);
2796 else if (pctr->int_name != NULL)
2797 snprintf (metricbuf, sizeof (metricbuf), GTXT ("%s Events"), pctr->int_name);
2798 else
2799 snprintf (metricbuf, sizeof (metricbuf), GTXT ("Undefined Events"));
2800 return metricbuf;
2803 /* return cpu version, should only be called when about to generate an experiment,
2804 not when reading back an experiment */
2805 #if 0 /* called by ... */
2806 . / perfan / collect / src / collect.cc : start : 245 : cpuver = hwc_get_cpc_cpuver ();
2807 . / ccr_components / Collector_Interface / collctrl.cc : constructor : 202 : cpcx_cpuver = hwc_get_cpc_cpuver ();
2808 . / perfan / dbe / src / Dbe.cc : 3041 : JApplication::cpuver = hwc_get_cpc_cpuver ();
2809 . / perfan / dbe / src / Dbe.cc : 3164 : JApplication::cpuver = hwc_get_cpc_cpuver ();
2811 note:
2812 cpc_getcpuver () : only papi, ostest, this and hwprofile.c call it
2813 #endif
2815 hwc_get_cpc_cpuver ()
2817 setup_cpcx ();
2818 return cpcx_cpuver;
2821 extern char*
2822 hwc_get_cpuname (char *buf, size_t buflen)
2824 setup_cpcx ();
2825 if (!buf || !buflen)
2826 return buf;
2827 buf[0] = 0;
2828 if (cpcx_cciname)
2830 strncpy (buf, cpcx_cciname, buflen - 1);
2831 buf[buflen - 1] = 0;
2833 return buf;
2836 extern char*
2837 hwc_get_docref (char *buf, size_t buflen)
2839 setup_cpcx ();
2840 if (!buf || !buflen)
2841 return buf;
2842 buf[0] = 0;
2843 if (cpcx_docref)
2845 strncpy (buf, cpcx_docref, buflen - 1);
2846 buf[buflen - 1] = 0;
2848 return buf;
2851 extern char*
2852 hwc_get_default_cntrs2 (int forKernel, int style)
2854 setup_cpcx ();
2855 if (!VALID_FOR_KERNEL (forKernel))
2856 return NULL;
2857 char *cpcx_default = cpcx_default_hwcs[forKernel];
2858 if (cpcx_default == NULL || cpcx_npics == 0)
2859 return NULL;
2860 if (style == 1)
2861 return strdup (cpcx_default);
2863 // style == 2
2864 // we will replace "," delimiters with " -h " (an extra 3 chars per HWC)
2865 char *s = (char *) malloc (strlen (cpcx_default) + 3 * cpcx_npics);
2866 if (s == NULL) return s;
2867 char *p = s;
2868 char *q = cpcx_default;
2869 int i;
2870 for (i = 0; i < cpcx_npics; i++)
2872 int qlen = strlen (q);
2873 if (qlen == 0)
2875 p[0] = '\0';
2876 break;
2878 // add " -h " if not the first HWC
2879 if (i != 0)
2881 p[0] = ' ';
2882 p[1] = '-';
2883 p[2] = 'h';
2884 p[3] = ' ';
2885 p += 4;
2888 // find second comma
2889 char *r = strchr (q, ',');
2890 if (r)
2891 r = strchr (r + 1, ',');
2893 // we didn't find one, so the rest of the string is the last HWC
2894 if (r == NULL)
2896 // EUGENE could check i==cpcx_npicx-1, but what if it isn't???
2897 strcpy (p, q);
2898 if (p[qlen - 1] == ',')
2899 qlen--;
2900 p[qlen] = '\0';
2901 break;
2904 // copy the HWC, trim trailing comma, add null char
2905 qlen = r - q - 1;
2906 strcpy (p, q);
2907 if (p[qlen - 1] == ',')
2908 qlen--;
2909 p += qlen;
2910 p[0] = '\0';
2911 q = r + 1;
2913 return s;
2916 extern char*
2917 hwc_get_orig_default_cntrs (int forKernel)
2919 setup_cpcx ();
2920 if (!VALID_FOR_KERNEL (forKernel))
2921 return NULL;
2922 if (cpcx_orig_default_hwcs[forKernel] != NULL)
2923 return strdup (cpcx_orig_default_hwcs[forKernel]);
2924 return NULL;
2927 extern const char *
2928 hwc_memop_string (ABST_type memop)
2930 const char * s;
2931 switch (memop)
2933 case ABST_NONE:
2934 s = "";
2935 break;
2936 case ABST_LOAD:
2937 s = GTXT ("load ");
2938 break;
2939 case ABST_STORE:
2940 s = GTXT ("store ");
2941 break;
2942 case ABST_LDST:
2943 case ABST_US_DTLBM:
2944 case ABST_LDST_SPARC64:
2945 s = GTXT ("load-store ");
2946 break;
2947 case ABST_EXACT_PEBS_PLUS1:
2948 case ABST_EXACT:
2949 s = GTXT ("memoryspace ");
2950 break;
2951 case ABST_COUNT:
2952 s = GTXT ("count ");
2953 break;
2954 case ABST_NOPC:
2955 s = GTXT ("not-program-related ");
2956 break;
2957 default:
2958 s = ""; // was "ABST_UNK", but that's meaningless to users
2959 break;
2961 return s;
2964 static const char *
2965 timecvt_string (int timecvt)
2967 if (timecvt > 0)
2968 return GTXT ("CPU-cycles");
2969 if (timecvt < 0)
2970 return GTXT ("ref-cycles");
2971 return GTXT ("events");
2974 int show_regs = 0; // The register setting is available on Solaris only
2977 * print the specified strings in aligned columns
2979 static void
2980 format_columns (char *buf, int bufsiz, char *s1, char *s2, const char *s3,
2981 const char *s4, const char *s6)
2983 // NULL strings are blanks
2984 char *blank = NTXT ("");
2985 if (s2 == NULL)
2986 s2 = blank;
2987 if (s3 == NULL)
2988 s3 = blank;
2989 if (s6 == NULL)
2990 s6 = blank;
2992 // get the lengths and target widths
2993 // (s6 can be as wide as it likes)
2994 int l1 = strlen (s1), n1 = 10, l2 = strlen (s2), n2 = 13;
2995 int l3 = strlen (s3), n3 = 20, l4 = strlen (s4), n4 = 10;
2996 char divide = ' ';
2998 // adjust widths, stealing from one column to help a neighbor
2999 // There's a ragged boundary between s2 and s3.
3000 // So push this boundary to the right.
3001 n2 += n3 - l3;
3002 n3 -= n3 - l3;
3004 // If s3 is empty, push the boundary over to s4.
3005 if (l3 == 0)
3007 n2 += n4 - l4;
3008 n4 -= n4 - l4;
3011 // If there's enough room to fit s1 and s2, do so.
3012 if (n1 + n2 >= l1 + l2)
3014 if (n1 < l1)
3016 n2 -= l1 - n1;
3017 n1 += l1 - n1;
3019 if (n2 < l2)
3021 n1 -= l2 - n2;
3022 n2 += l2 - n2;
3025 else
3027 // not enough room, so we need to divide the line
3028 n3 += 4 // 4-blank margin
3029 + n1 // 1st column
3030 + 1 // space between 1st and 2nd columns
3031 + n2 // 2nd column
3032 + 1; // space between 2nd and 3th columns
3033 divide = '\n';
3035 // make 1st column large enough
3036 if (n1 < l1)
3037 n1 = l1;
3039 // width of 2nd column no longer matters since we divided the line
3040 n2 = 0;
3043 snprintf (buf, bufsiz, "%-*s %-*s%c%*s%*s %s",
3044 n1, s1, n2, s2, divide, n3, s3, n4, s4, s6);
3045 for (int i = strlen (buf); i > 0; i--)
3046 if (buf[i] == ' ' || buf[i] == '\t')
3047 buf[i] = 0;
3048 else
3049 break;
3052 /* routine to return HW counter string formatted and i18n'd */
3053 static char *
3054 hwc_hwcentry_string_internal (char *buf, size_t buflen, const Hwcentry *ctr,
3055 int show_short_desc)
3057 if (!buf || !buflen)
3058 return buf;
3059 if (ctr == NULL)
3061 snprintf (buf, buflen, GTXT ("HW counter not available"));
3062 return buf;
3064 char *desc = NULL;
3065 if (show_short_desc)
3066 desc = ctr->short_desc;
3067 if (desc == NULL)
3068 desc = ctr->metric ? hwc_i18n_metric (ctr) : NULL;
3069 format_columns (buf, buflen, ctr->name, ctr->int_name,
3070 hwc_memop_string (ctr->memop), timecvt_string (ctr->timecvt),
3071 desc);
3072 return buf;
3075 /* routine to return HW counter string formatted and i18n'd */
3076 extern char *
3077 hwc_hwcentry_string (char *buf, size_t buflen, const Hwcentry *ctr)
3079 return hwc_hwcentry_string_internal (buf, buflen, ctr, 0);
3082 /* routine to return HW counter string formatted and i18n'd */
3083 extern char *
3084 hwc_hwcentry_specd_string (char *buf, size_t buflen, const Hwcentry *ctr)
3086 const char *memop, *timecvt;
3087 char descstr[1024];
3088 if (!buf || !buflen)
3089 return buf;
3090 if (ctr == NULL)
3092 snprintf (buf, buflen, GTXT ("HW counter not available"));
3093 return buf;
3095 timecvt = timecvt_string (ctr->timecvt);
3096 if (ctr->memop)
3097 memop = hwc_memop_string (ctr->memop);
3098 else
3099 memop = "";
3100 if (ctr->metric != NULL) /* a standard counter for a specific register */
3101 snprintf (descstr, sizeof (descstr), " (`%s'; %s%s)",
3102 hwc_i18n_metric (ctr), memop, timecvt);
3103 else /* raw counter */
3104 snprintf (descstr, sizeof (descstr), " (%s%s)", memop, timecvt);
3106 char *rateString = hwc_rate_string (ctr, 1);
3107 snprintf (buf, buflen, "%s,%s%s", ctr->name,
3108 rateString ? rateString : "", descstr);
3109 free (rateString);
3110 return buf;
3113 unsigned
3114 hwc_get_max_regs ()
3116 setup_cpcx ();
3117 return cpcx_npics;
3120 unsigned
3121 hwc_get_max_concurrent (int forKernel)
3123 setup_cpcx ();
3124 if (!VALID_FOR_KERNEL (forKernel))
3125 return 0;
3126 return cpcx_max_concurrent[forKernel];
3129 char**
3130 hwc_get_attrs (int forKernel)
3132 setup_cpcx ();
3133 if (!VALID_FOR_KERNEL (forKernel))
3134 return NULL;
3135 return cpcx_attrs[forKernel];
3138 Hwcentry **
3139 hwc_get_std_ctrs (int forKernel)
3141 setup_cpcx ();
3142 if (!VALID_FOR_KERNEL (forKernel))
3143 return NULL;
3144 return cpcx_std[forKernel];
3147 Hwcentry **
3148 hwc_get_raw_ctrs (int forKernel)
3150 setup_cpcx ();
3151 if (!VALID_FOR_KERNEL (forKernel))
3152 return NULL;
3153 return cpcx_raw[forKernel];
3156 /* Call an action function for each attribute supported */
3157 unsigned
3158 hwc_scan_attrs (void (*action)(const char *attr, const char *desc))
3160 setup_cpcx ();
3161 int cnt = 0;
3162 for (int ii = 0; cpcx_attrs[0] && cpcx_attrs[0][ii]; ii++, cnt++)
3164 if (action)
3165 action (cpcx_attrs[0][ii], NULL);
3167 if (!cnt && action)
3168 action (NULL, NULL);
3169 return cnt;
3172 unsigned
3173 hwc_scan_std_ctrs (void (*action)(const Hwcentry *))
3175 setup_cpcx ();
3176 Tprintf (DBG_LT1, "hwctable: hwc_scan_standard_ctrs()...\n");
3177 int cnt = 0;
3178 for (int ii = 0; cpcx_std[0] && cpcx_std[0][ii]; ii++, cnt++)
3179 if (action)
3180 action (cpcx_std[0][ii]);
3181 if (!cnt && action)
3182 action (NULL);
3183 return cnt;
3186 /* Call an action function for each counter supported */
3187 /* action is called with NULL when all counters have been seen */
3188 unsigned
3189 hwc_scan_raw_ctrs (void (*action)(const Hwcentry *))
3191 setup_cpcx ();
3192 Tprintf (DBG_LT1, "hwctable: hwc_scan_raw_ctrs()...\n");
3193 int cnt = 0;
3194 for (int ii = 0; cpcx_raw[0] && cpcx_raw[0][ii]; ii++, cnt++)
3195 if (action)
3196 action (cpcx_raw[0][ii]);
3197 if (!cnt && action)
3198 action (NULL);
3199 return cnt;
3202 static void
3203 hwc_usage_raw_overview_sparc (FILE *f_usage, int cpuver)
3205 /* All these cpuver's use cputabs[]==sparc_t5_m6 anyhow. */
3206 if ((cpuver == CPC_SPARC_M5) || (cpuver == CPC_SPARC_M6)
3207 || (cpuver == CPC_SPARC_T5) || (cpuver == CPC_SPARC_T6))
3208 cpuver = CPC_SPARC_M4; // M4 was renamed to M5
3210 /* While there are small differences between
3211 * cputabs[]== sparc_t4
3212 * cputabs[]== sparc_t5_m6
3213 * they are in HWCs we don't discuss in the overview anyhow.
3214 * So just lump them in with T4.
3216 if (cpuver == CPC_SPARC_M4)
3217 cpuver = CPC_SPARC_T4;
3219 /* Check for the cases we support. */
3220 if (cpuver != CPC_SPARC_T4 && cpuver != CPC_SPARC_M7 && cpuver != CPC_SPARC_M8)
3221 return;
3222 fprintf (f_usage, GTXT (" While the above aliases represent the most useful hardware counters\n"
3223 " for this processor, a full list of raw (unaliased) counter names appears\n"
3224 " below. First is an overview of some of these names.\n\n"));
3225 fprintf (f_usage, GTXT (" == Cycles.\n"
3226 " Count active cycles with\n"
3227 " Cycles_user\n"
3228 " Set attributes to choose user, system, and/or hyperprivileged cycles.\n\n"));
3229 fprintf (f_usage, GTXT (" == Instructions.\n"
3230 " Count instructions when they are committed with:\n"));
3231 fprintf (f_usage, NTXT (" Instr_all\n"));
3232 if (cpuver != CPC_SPARC_M8)
3233 fprintf (f_usage, GTXT (" It is the total of these counters:\n"));
3234 else
3235 fprintf (f_usage, GTXT (" Some subsets of instructions can be counted separately:\n"));
3236 fprintf (f_usage, NTXT (" Branches %s\n"), GTXT ("branches"));
3237 fprintf (f_usage, NTXT (" Instr_FGU_crypto %s\n"), GTXT ("Floating Point and Graphics Unit"));
3238 fprintf (f_usage, NTXT (" Instr_ld %s\n"), GTXT ("loads"));
3239 fprintf (f_usage, NTXT (" Instr_st %s\n"), GTXT ("stores"));
3240 fprintf (f_usage, NTXT (" %-19s %s\n"),
3241 cpuver == CPC_SPARC_M7 ? NTXT ("Instr_SPR_ring_ops")
3242 : NTXT ("SPR_ring_ops"),
3243 GTXT ("internal use of SPR ring"));
3244 fprintf (f_usage, NTXT (" Instr_other %s\n"), GTXT ("basic arithmetic and logical instructions"));
3245 if (cpuver != CPC_SPARC_M8)
3246 fprintf (f_usage, GTXT (" Some subsets of these instructions can be counted separately:\n"));
3247 fprintf (f_usage, NTXT (" Br_taken %s\n"), GTXT ("Branches that are taken"));
3248 fprintf (f_usage, NTXT (" %-19s %s\n"),
3249 cpuver == CPC_SPARC_M7 ? NTXT ("Instr_block_ld_st")
3250 : NTXT ("Block_ld_st"),
3251 GTXT ("block load/store"));
3252 fprintf (f_usage, NTXT (" %-19s %s\n"),
3253 cpuver == CPC_SPARC_M7 ? NTXT ("Instr_atomic")
3254 : NTXT ("Atomics"),
3255 GTXT ("atomic instructions"));
3256 fprintf (f_usage, NTXT (" %-19s %s\n"),
3257 cpuver == CPC_SPARC_M7 ? NTXT ("Instr_SW_prefetch")
3258 : NTXT ("SW_prefetch"),
3259 GTXT ("prefetches"));
3260 fprintf (f_usage, NTXT (" %-19s %s\n"),
3261 cpuver == CPC_SPARC_M7 ? NTXT ("Instr_SW_count")
3262 : NTXT ("Sw_count_intr"),
3263 GTXT ("SW Count instructions (counts special no-op assembler instructions)"));
3264 fprintf (f_usage, NTXT ("\n"));
3266 #ifdef TMPLEN
3267 compilation error : we're trying to use a macro that's already defined
3268 #endif
3269 #define TMPLEN 32
3270 char s0[TMPLEN], s1[TMPLEN], s2[TMPLEN], s3[TMPLEN];
3271 if (cpuver == CPC_SPARC_M7)
3273 snprintf (s0, TMPLEN, "Commit_0_cyc");
3274 snprintf (s1, TMPLEN, "Commit_1_cyc");
3275 snprintf (s2, TMPLEN, "Commit_2_cyc");
3276 snprintf (s3, TMPLEN, "Commit_1_or_2_cyc");
3278 else
3280 snprintf (s0, TMPLEN, "Commit_0");
3281 snprintf (s1, TMPLEN, "Commit_1");
3282 snprintf (s2, TMPLEN, "Commit_2");
3283 snprintf (s3, TMPLEN, "Commit_1_or_2");
3285 #undef TMPLEN
3286 fprintf (f_usage, GTXT (" == Commit.\n"
3287 " Instructions may be launched speculatively, executed out of order, etc.\n"));
3288 if (cpuver != CPC_SPARC_M8)
3290 fprintf (f_usage, GTXT (" We can count the number of cycles during which 0, 1, or 2 instructions are\n"
3291 " actually completed and their results committed:\n"));
3292 fprintf (f_usage, GTXT (" %s\n"
3293 " %s\n"
3294 " %s\n"
3295 " %s\n"
3296 " %s is a useful way of identifying parts of your application with\n"
3297 " high-latency instructions.\n\n"),
3298 s0, s1, s2, s3, s0);
3300 else
3302 fprintf (f_usage, GTXT (" We can count the number of cycles during which no instructions were\n"
3303 " able to commit results using:\n"));
3304 fprintf (f_usage, GTXT (" %s\n"
3305 " %s is a useful way of identifying parts of your application with\n"
3306 " high-latency instructions.\n\n"),
3307 s0, s0);
3310 fprintf (f_usage, GTXT (" == Cache/memory hierarchy.\n"));
3311 if (cpuver == CPC_SPARC_M7)
3313 fprintf (f_usage, GTXT (" In the cache hierarchy:\n"
3314 " * Each socket has memory and multiple SPARC core clusters (scc).\n"
3315 " * Each scc has an L3 cache and multiple L2 and L1 caches.\n"));
3316 fprintf (f_usage, GTXT (" Loads can be counted by where they hit on socket:\n"));
3317 fprintf (f_usage, NTXT (" %-22s %s\n"),
3318 NTXT ("DC_hit"), GTXT ("hit own L1 data cache"));
3319 fprintf (f_usage, NTXT (" %-22s %s\n"),
3320 NTXT ("DC_miss_L2_hit"), GTXT ("hit own L2"));
3321 fprintf (f_usage, NTXT (" %-22s %s\n"),
3322 NTXT ("DC_miss_L3_hit"), GTXT ("hit own L3"));
3323 fprintf (f_usage, NTXT (" %-22s %s\n"),
3324 NTXT ("DC_miss_nbr_L2_hit"), GTXT ("hit neighbor L2 (same scc)"));
3325 fprintf (f_usage, NTXT (" %-22s %s\n"),
3326 NTXT ("DC_miss_nbr_scc_hit"), GTXT ("hit neighbor scc (same socket)"));
3327 fprintf (f_usage, NTXT (" %-22s %s\n"),
3328 NTXT ("DC_miss_nbr_scc_miss"), GTXT ("miss all caches (same socket)"));
3329 fprintf (f_usage, GTXT (" These loads can also be grouped:\n"));
3330 fprintf (f_usage, NTXT (" %-22s %s\n"),
3331 NTXT ("DC_miss"), GTXT ("all - DC_hit"));
3332 fprintf (f_usage, NTXT (" %-22s %s\n"),
3333 NTXT ("DC_miss_L2_miss"), GTXT ("all - DC_hit - DC_miss_L2_hit"));
3334 fprintf (f_usage, NTXT (" %-22s %s\n"),
3335 NTXT ("DC_miss_L3_miss"), GTXT ("DC_miss_nbr_scc_hit + DC_miss_nbr_scc_miss"));
3336 fprintf (f_usage, GTXT (" Loads that miss all caches on this socket can be counted:\n"));
3337 fprintf (f_usage, NTXT (" %-22s %s\n"),
3338 NTXT ("DC_miss_remote_scc_hit"), GTXT ("hit cache on different socket"));
3339 fprintf (f_usage, NTXT (" %-22s %s\n"),
3340 NTXT ("DC_miss_local_mem_hit"), GTXT ("hit local memory (same socket)"));
3341 fprintf (f_usage, NTXT (" %-22s %s\n"),
3342 NTXT ("DC_miss_remote_mem_hit"), GTXT ("hit remote memory (off socket)"));
3343 fprintf (f_usage, GTXT (" These events are for speculative loads, launched in anticipation\n"
3344 " of helping performance but whose results might not be committed.\n"));
3345 #if 0 // was: #if defined(linux). See 22236226 - sparc-Linux: Support basic Memoryspace and Dataspace profiling (capture VADDR)
3346 /* 21869427 should not look like memoryspace profiling is supported on Linux */
3347 /* 21869424 desire memoryspace profiling on Linux */
3348 fprintf (f_usage, GTXT (" To count only data-cache misses that commit, use:\n"));
3349 fprintf (f_usage, NTXT (" DC_miss_commit\n"));
3350 #else
3351 fprintf (f_usage, GTXT (" To count only data-cache misses that commit, or for memoryspace profiling,\n"
3352 " use the 'memoryspace' counter:\n"));
3353 fprintf (f_usage, NTXT (" DC_miss_commit\n"));
3354 #endif
3355 fprintf (f_usage, NTXT ("\n"));
3357 else if (cpuver == CPC_SPARC_M8)
3359 fprintf (f_usage, GTXT (" In the cache hierarchy:\n"
3360 " * Each processor has 4 memory controllers and 2 quad core clusters (QCC).\n"
3361 " * Each QCC contains 4 cache processor clusters (CPC).\n"
3362 " * Each CPC contains 4 cores.\n"
3363 " * Each core supports 8 hardware threads.\n"
3364 " * The L3 consists of 2 partitions with 1 QCC per partition.\n"
3366 fprintf (f_usage, GTXT (" Loads can be counted by where they hit on socket:\n"));
3367 fprintf (f_usage, NTXT (" %-22s %s\n"),
3368 NTXT ("DC_miss_L2_hit"), GTXT ("hit own L2"));
3369 fprintf (f_usage, NTXT (" %-22s %s\n"),
3370 NTXT ("DC_miss_L3_hit"), GTXT ("hit own L3"));
3371 fprintf (f_usage, NTXT (" %-22s %s\n"),
3372 NTXT ("DC_miss_L3_dirty_copyback"), GTXT ("hit own L3 but require copyback from L2D"));
3373 fprintf (f_usage, NTXT (" %-22s %s\n"),
3374 NTXT ("DC_miss_nbr_L3_hit"), GTXT ("hit neighbor L3 (same socket)"));
3375 fprintf (f_usage, GTXT (" Loads that miss all caches on this socket can be counted:\n"));
3376 fprintf (f_usage, NTXT (" %-22s %s\n"),
3377 NTXT ("DC_miss_remote_L3_hit"), GTXT ("hit cache on different socket"));
3378 fprintf (f_usage, NTXT (" %-22s %s\n"),
3379 NTXT ("DC_miss_local_mem_hit"), GTXT ("hit local memory (same socket)"));
3380 fprintf (f_usage, NTXT (" %-22s %s\n"),
3381 NTXT ("DC_miss_remote_mem_hit"), GTXT ("hit remote memory (off socket)"));
3382 fprintf (f_usage, GTXT (" These events are for speculative loads, launched in anticipation\n"
3383 " of helping performance but whose results might not be committed.\n"));
3384 #if 0 // was: #if defined(linux). See 22236226 - sparc-Linux: Support basic Memoryspace and Dataspace profiling (capture VADDR)
3385 /* 21869427 should not look like memoryspace profiling is supported on Linux */
3386 /* 21869424 desire memoryspace profiling on Linux */
3387 fprintf (f_usage, GTXT (" To count only data-cache misses that commit, use:\n"));
3388 fprintf (f_usage, NTXT (" DC_miss_commit\n"));
3389 #else
3390 fprintf (f_usage, GTXT (" To count only data-cache misses that commit, or for memoryspace profiling,\n"
3391 " use the 'memoryspace' counter:\n"));
3392 fprintf (f_usage, NTXT (" DC_miss_commit\n"));
3393 #endif
3394 fprintf (f_usage, NTXT ("\n"));
3396 else
3398 fprintf (f_usage, GTXT (" Total data-cache misses can be counted with:\n"));
3399 fprintf (f_usage, NTXT (" DC_miss DC_miss_nospec\n"));
3400 fprintf (f_usage, GTXT (" They are the totals of misses that hit in L2/L3 cache, local memory, or\n"
3401 " remote memory:\n"));
3402 fprintf (f_usage, NTXT (" DC_miss_L2_L3_hit DC_miss_L2_L3_hit_nospec\n"));
3403 fprintf (f_usage, NTXT (" DC_miss_local_hit DC_miss_local_hit_nospec\n"));
3404 fprintf (f_usage, NTXT (" DC_miss_remote_L3_hit DC_miss_remote_L3_hit_nospec\n"));
3405 fprintf (f_usage, GTXT (" The events in the left column include speculative operations. Use the\n"
3406 " right-hand _nospec events to count only data accesses that commit\n"
3407 " or for memoryspace profiling.\n\n"));
3410 fprintf (f_usage, GTXT (" == TLB misses.\n"
3411 " The Translation Lookaside Buffer (TLB) is a cache of virtual-to-physical\n"
3412 " page translations."));
3413 fprintf (f_usage, GTXT (" If a virtual address (VA) is not represented in the\n"
3414 " TLB, an expensive hardware table walk (HWTW) must be conducted."));
3415 fprintf (f_usage, GTXT (" If the\n"
3416 " page is still not found, a trap results. There is a data TLB (DTLB) and\n"
3417 " an instruction TLB (ITLB).\n\n"));
3418 fprintf (f_usage, GTXT (" TLB misses can be counted by:\n"));
3419 fprintf (f_usage, NTXT (" %s\n"),
3420 cpuver == CPC_SPARC_M7 ?
3421 NTXT ("DTLB_HWTW_search ITLB_HWTW_search") :
3422 cpuver == CPC_SPARC_M8 ?
3423 NTXT ("DTLB_HWTW ITLB_HWTW") :
3424 NTXT ("DTLB_miss_asynch ITLB_miss_asynch"));
3425 fprintf (f_usage, GTXT (" or broken down by page size:\n"));
3426 fprintf (f_usage, NTXT (" %s"),
3427 cpuver == CPC_SPARC_M7 ?
3428 NTXT ("DTLB_HWTW_hit_8K ITLB_HWTW_hit_8K\n"
3429 " DTLB_HWTW_hit_64K ITLB_HWTW_hit_64K\n"
3430 " DTLB_HWTW_hit_4M ITLB_HWTW_hit_4M\n") :
3431 NTXT ("DTLB_fill_8KB ITLB_fill_8KB\n"
3432 " DTLB_fill_64KB ITLB_fill_64KB\n"
3433 " DTLB_fill_4MB ITLB_fill_4MB\n"));
3434 fprintf (f_usage, NTXT (" %s\n\n"),
3435 cpuver == CPC_SPARC_M7 ?
3436 NTXT ("DTLB_HWTW_hit_256M ITLB_HWTW_hit_256M\n"
3437 " DTLB_HWTW_hit_2G_16G ITLB_HWTW_hit_2G_16G\n"
3438 " DTLB_HWTW_miss_trap ITLB_HWTW_miss_trap") :
3439 cpuver == CPC_SPARC_M8 ?
3440 NTXT ("DTLB_HWTW_hit_256M ITLB_HWTW_hit_256M\n"
3441 " DTLB_HWTW_hit_16G ITLB_HWTW_hit_16G\n"
3442 " DTLB_HWTW_hit_1T ITLB_HWTW_hit_1T") :
3443 NTXT ("DTLB_fill_256MB ITLB_fill_256MB\n"
3444 " DTLB_fill_2GB ITLB_fill_2GB\n"
3445 " DTLB_fill_trap ITLB_fill_trap"));
3446 if (cpuver == CPC_SPARC_M8)
3448 fprintf (f_usage, GTXT (" TLB traps, which can require hundreds of cycles, can be counted with:\n"));
3449 fprintf (f_usage, NTXT (" %s\n\n"),
3450 NTXT ("DTLB_fill_trap ITLB_fill_trap"));
3453 fprintf (f_usage, GTXT (" == Branch misprediction.\n"
3454 " Count branch mispredictions with:\n"
3455 " Br_mispred\n"
3456 " It is the total of:\n"
3457 " Br_dir_mispred direction was mispredicted\n"
3458 " %s target was mispredicted\n"
3459 "\n"), cpuver == CPC_SPARC_M7 ? NTXT ("Br_tgt_mispred") : NTXT ("Br_trg_mispred"));
3461 fprintf (f_usage, GTXT (" == RAW hazards.\n"
3462 " A read-after-write (RAW) delay occurs when we attempt to read a datum\n"
3463 " before an earlier write has had time to complete:\n"));
3464 if (cpuver == CPC_SPARC_M8)
3466 fprintf (f_usage, NTXT (" RAW_hit\n"));
3467 fprintf (f_usage, GTXT (" RAW_hit events can be broken down into:\n"));
3469 else
3471 fprintf (f_usage, NTXT (" RAW_hit_st_q~emask=0xf\n"));
3472 fprintf (f_usage, GTXT (" The mask 0xf counts the total of all types such as:\n"));
3474 fprintf (f_usage, NTXT (" RAW_hit_st_buf write is still in store buffer\n"
3475 " RAW_hit_st_q write is still in store queue\n"
3476 "\n"));
3477 if (cpuver == CPC_SPARC_M7)
3479 fprintf (f_usage, GTXT (" == Flush.\n"
3480 " One can count the number of times the pipeline must be flushed:\n"));
3481 fprintf (f_usage, NTXT (" %-22s %s\n"),
3482 NTXT ("Flush_L3_miss"), GTXT ("load missed L3 and >1 strand is active on the core"));
3483 fprintf (f_usage, NTXT (" %-22s %s\n"),
3484 NTXT ("Flush_br_mispred"), GTXT ("branch misprediction"));
3485 fprintf (f_usage, NTXT (" %-22s %s\n"),
3486 NTXT ("Flush_arch_exception"), GTXT ("SPARC exceptions and trap entry/return"));
3487 fprintf (f_usage, NTXT (" %-22s %s\n"),
3488 NTXT ("Flush_other"), GTXT ("state change to/from halted/paused"));
3489 fprintf (f_usage, NTXT ("\n"));
3493 static void
3494 hwc_usage_internal (int forKernel, FILE *f_usage, const char *cmd, const char *dataspace_msg, int show_syntax, int show_short_desc)
3496 if (!VALID_FOR_KERNEL (forKernel))
3497 return;
3498 char cpuname[128];
3499 hwc_get_cpuname (cpuname, 128);
3500 Hwcentry** raw_ctrs = hwc_get_raw_ctrs (forKernel);
3501 int has_raw_ctrs = (raw_ctrs && raw_ctrs[0]);
3502 Hwcentry** std_ctrs = hwc_get_std_ctrs (forKernel);
3503 int has_std_ctrs = (std_ctrs && std_ctrs[0]);
3504 unsigned hwc_maxregs = hwc_get_max_concurrent (forKernel);
3505 int cpuver = hwc_get_cpc_cpuver ();
3506 if (hwc_maxregs != 0)
3508 if (show_syntax)
3510 fprintf (f_usage, GTXT ("\nSpecifying HW counters on `%s' (cpuver=%d):\n\n"), cpuname, cpuver);
3511 fprintf (f_usage, GTXT (" -h {auto|lo|on|hi}\n"));
3512 fprintf (f_usage, GTXT ("\tturn on default set of HW counters at the specified rate\n"));
3513 if (hwc_maxregs == 1)
3515 fprintf (f_usage, GTXT (" -h <ctr_def>\n"));
3516 fprintf (f_usage, GTXT ("\tspecify HW counter profiling for one HW counter only\n"));
3518 else
3520 fprintf (f_usage, GTXT (" -h <ctr_def> [-h <ctr_def>]...\n"));
3521 fprintf (f_usage, GTXT (" -h <ctr_def>[,<ctr_def>]...\n"));
3522 fprintf (f_usage, GTXT ("\tspecify HW counter profiling for up to %u HW counters\n"), hwc_maxregs);
3524 fprintf (f_usage, NTXT ("\n"));
3526 else
3528 fprintf (f_usage, GTXT ("\nSpecifying HW counters on `%s' (cpuver=%d)\n\n"), cpuname, cpuver);
3529 if (hwc_maxregs == 1)
3530 fprintf (f_usage, GTXT (" Hardware counter profiling is supported for only one counter.\n"));
3531 else
3532 fprintf (f_usage, GTXT (" Hardware counter profiling is supported for up to %u HW counters.\n"), hwc_maxregs);
3535 else
3537 if (!IS_KERNEL (forKernel))
3538 { // EUGENE I don't see why we don't also use this for er_kernel
3539 char buf[1024];
3540 *buf = 0;
3541 char *pch = hwcfuncs_errmsg_get (buf, sizeof (buf), 0);
3542 if (*pch)
3543 fprintf (f_usage, GTXT ("HW counter profiling is not supported on this system: %s%s"),
3544 pch, pch[strlen (pch) - 1] == '\n' ? "" : "\n");
3545 else
3546 fprintf (f_usage, GTXT ("HW counter profiling is not supported on this system\n"));
3548 return;
3551 /* At this point, we know we have counters */
3552 char**hwc_attrs = hwc_get_attrs (forKernel);
3553 int has_attrs = (hwc_attrs && hwc_attrs[0]);
3554 if (show_syntax)
3556 const char *reg_s = show_regs ? "[/<reg#>]" : "";
3557 const char *attr_s = has_attrs ? "[[~<attr>=<val>]...]" : "";
3558 fprintf (f_usage, GTXT (" <ctr_def> == <ctr>%s%s,[<rate>]\n"), attr_s, reg_s);
3559 if (dataspace_msg)
3560 fprintf (f_usage, NTXT ("%s"), dataspace_msg);
3561 fprintf (f_usage, GTXT (" <ctr>\n"));
3562 fprintf (f_usage, GTXT (" counter name, "));
3564 else
3565 fprintf (f_usage, GTXT (" Counter name "));
3566 fprintf (f_usage, GTXT ("must be selected from the available counters\n"
3567 " listed below. On most systems, if a counter is not listed\n"
3568 " below, it may still be specified by its numeric value.\n"));
3569 if (cpcx_has_precise[forKernel])
3571 if (!forKernel)
3572 fprintf (f_usage, GTXT (" Counters labeled as 'memoryspace' in the list below will\n"
3573 " collect memoryspace data by default.\n"));
3575 fprintf (f_usage, GTXT ("\n"));
3576 if (has_attrs)
3578 if (show_syntax)
3580 fprintf (f_usage, GTXT (" ~<attr>=<val>\n"));
3581 fprintf (f_usage, GTXT (" optional attribute where <val> can be in decimal or hex\n"
3582 " format, and <attr> can be one of: \n"));
3584 else
3585 fprintf (f_usage, GTXT (" Optional attribute where <val> can be in decimal or hex\n"
3586 " format, and <attr> can be one of: \n"));
3587 for (char **pattr = hwc_attrs; *pattr; pattr++)
3588 fprintf (f_usage, NTXT (" `%s'\n"), *pattr);
3589 if (show_syntax)
3590 fprintf (f_usage, GTXT (" Multiple attributes may be specified, and each must be preceded by a ~.\n\n"));
3591 else
3592 fprintf (f_usage, GTXT (" Multiple attributes may be specified.\n\n"));
3593 if (IS_KERNEL (forKernel))
3594 fprintf (f_usage, GTXT (" Other attributes may be supported by the chip, but are not supported by DTrace and will be ignored by er_kernel.\n\n"));
3597 if (show_syntax)
3599 if (show_regs)
3600 fprintf (f_usage, GTXT (" /<reg#>\n"
3601 " forces use of a specific hardware register. (Solaris only)\n"
3602 " If not specified, %s will attempt to place the counter into the first\n"
3603 " available register and as a result may be unable to place\n"
3604 " subsequent counters due to register conflicts.\n"
3605 " The / in front of the register number is required if a register is specified.\n\n"),
3606 cmd);
3608 fprintf (f_usage, GTXT (" <rate> == {auto|lo|on|hi}\n"));
3609 fprintf (f_usage, GTXT (" `auto' (default) match the rate used by clock profiling.\n"));
3610 fprintf (f_usage, GTXT (" If clock profiling is disabled, use `on'.\n"));
3611 fprintf (f_usage, GTXT (" `lo' per-thread maximum rate of ~10 samples/second\n"));
3612 fprintf (f_usage, GTXT (" `on' per-thread maximum rate of ~100 samples/second\n"));
3613 fprintf (f_usage, GTXT (" `hi' per-thread maximum rate of ~1000 samples/second\n\n"));
3614 fprintf (f_usage, GTXT (" <rate> == <interval>\n"
3615 " Fixed event interval value to trigger a sample.\n"
3616 " Smaller intervals imply more frequent samples.\n"
3617 " Example: when counting cycles on a 2 GHz processor,\n"
3618 " an interval of 2,000,003 implies ~1000 samples/sec\n"
3619 "\n"
3620 " Use this feature with caution, because:\n"
3621 " (1) Frequent sampling increases overhead and may disturb \n"
3622 " other applications on your system.\n"
3623 " (2) Event counts vary dramatically depending on the event \n"
3624 " and depending on the application.\n"
3625 " (3) A fixed event interval disables any other gprofng\n"
3626 " internal mechanisms that may limit event rates.\n"
3627 "\n"
3628 " Guidelines: Aim at <1000 events per second. Start by \n"
3629 " collecting with the 'hi' option; in the experiment overview,\n"
3630 " notice how many events are recorded per second; divide by\n"
3631 " 1000, and use that as your starting point.\n\n"));
3633 fprintf (f_usage, GTXT (" A comma ',' followed immediately by white space may be omitted.\n\n"));
3636 /* default counters */
3637 fprintf (f_usage, GTXT ("Default set of HW counters:\n\n"));
3638 char * defctrs = hwc_get_default_cntrs2 (forKernel, 1);
3639 if (defctrs == NULL)
3640 fprintf (f_usage, GTXT (" No default HW counter set defined for this system.\n"));
3641 else if (strlen (defctrs) == 0)
3643 char *s = hwc_get_orig_default_cntrs (forKernel);
3644 fprintf (f_usage, GTXT (" The default HW counter set (%s) defined for %s cannot be loaded on this system.\n"),
3645 s, cpuname);
3646 free (s);
3647 free (defctrs);
3649 else
3651 char *defctrs2 = hwc_get_default_cntrs2 (forKernel, 2);
3652 fprintf (f_usage, GTXT (" -h %s\n"), defctrs);
3653 free (defctrs2);
3654 free (defctrs);
3657 /* long listings */
3658 char tmp[1024];
3659 if (has_std_ctrs)
3661 fprintf (f_usage, GTXT ("\nAliases for most useful HW counters:\n\n"));
3662 format_columns (tmp, 1024, "alias", "raw name", "type ", "units", "description");
3663 fprintf (f_usage, NTXT (" %s\n\n"), tmp);
3664 for (Hwcentry **pctr = std_ctrs; *pctr; pctr++)
3666 Hwcentry *ctr = *pctr;
3667 hwc_hwcentry_string_internal (tmp, sizeof (tmp), ctr, 0);
3668 fprintf (f_usage, NTXT (" %s\n"), tmp);
3671 if (has_raw_ctrs)
3673 fprintf (f_usage, GTXT ("\nRaw HW counters:\n\n"));
3674 hwc_usage_raw_overview_sparc (f_usage, cpuver);
3675 format_columns (tmp, 1024, "name", NULL, "type ", "units", "description");
3676 fprintf (f_usage, NTXT (" %s\n\n"), tmp);
3677 for (Hwcentry **pctr = raw_ctrs; *pctr; pctr++)
3679 Hwcentry *ctr = *pctr;
3680 hwc_hwcentry_string_internal (tmp, sizeof (tmp), ctr, show_short_desc);
3681 fprintf (f_usage, NTXT (" %s\n"), tmp);
3685 /* documentation notice */
3686 hwc_get_docref (tmp, 1024);
3687 if (strlen (tmp))
3688 fprintf (f_usage, NTXT ("\n%s\n"), tmp);
3691 /* Print a description of "-h" usage, largely common to collect and er_kernel. */
3692 void
3693 hwc_usage (int forKernel, const char *cmd, const char *dataspace_msg)
3695 hwc_usage_internal (forKernel, stdout, cmd, dataspace_msg, 1, 0);
3698 void
3699 hwc_usage_f (int forKernel, FILE *f, const char *cmd, const char *dataspace_msg, int show_syntax, int show_short_desc)
3701 hwc_usage_internal (forKernel, f, cmd, dataspace_msg, show_syntax, show_short_desc);
3704 /*---------------------------------------------------------------------------*/
3705 /* init functions */
3707 static char* supported_pebs_counters[] = {
3708 "mem_inst_retired.latency_above_threshold",
3709 "mem_trans_retired.load_latency",
3710 "mem_trans_retired.precise_store",
3711 NULL
3714 /* callback, (see setup_cpc()) called for each valid regno/name combo */
3715 static void
3716 hwc_cb (uint_t cpc_regno, const char *name)
3718 regno_t regno = cpc_regno; /* convert type */
3719 list_add (&unfiltered_raw, regno, name);
3722 static int
3723 supported_hwc (Hwcentry *pctr)
3725 if (ABST_PLUS_BY_DEFAULT (pctr->memop) &&
3726 (cpcx_support_bitmask & SUPPORT_MEMORYSPACE_PROFILING) == 0)
3727 return 0;
3728 // remove specific PEBs counters when back end doesn't support sampling
3729 if ((cpcx_support_bitmask & HWCFUNCS_SUPPORT_PEBS_SAMPLING) == 0)
3730 for (int ii = 0; supported_pebs_counters[ii]; ii++)
3731 if (strcmp (supported_pebs_counters[ii], pctr->name) == 0)
3732 return 0;
3733 return 1;
3736 /* input:
3737 * forKernel: 1 - generate lists for er_kernel, 0 - generate lists for collect
3739 * raw_orig: HWCs as generated by hwc_cb()
3740 * output:
3741 * pstd_out[], praw_out[]: malloc'd array of pointers to malloc'd hwcentry, or NULL
3743 static void
3744 hwc_process_raw_ctrs (int forKernel, Hwcentry ***pstd_out,
3745 Hwcentry ***praw_out, Hwcentry ***phidden_out,
3746 Hwcentry**static_tables, Hwcentry **raw_unfiltered_in)
3748 // set up output buffers
3749 ptr_list s_outbufs[3];
3750 ptr_list *std_out = &s_outbufs[0];
3751 ptr_list_init (std_out);
3752 ptr_list *raw_out = &s_outbufs[1];
3753 ptr_list_init (raw_out);
3754 ptr_list *hidden_out = &s_outbufs[2];
3755 ptr_list_init (hidden_out);
3757 #define NUM_TABLES 3
3758 ptr_list table_copy[NUM_TABLES]; // copy of data from static tables. [0]std, [1]generic, and [2]hidden
3759 for (int tt = 0; tt < NUM_TABLES; tt++)
3760 ptr_list_init (&table_copy[tt]);
3762 // copy records from std [0] and generic [1] static input tables into table_copy[0],[1],or[2]
3763 for (int tt = 0; tt < 2; tt++)
3764 for (Hwcentry *pctr = static_tables[tt]; pctr && pctr->name; pctr++)
3766 if (!supported_hwc (pctr))
3767 continue;
3768 if (is_hidden_alias (pctr))
3769 list_append_shallow_copy (&table_copy[2], pctr); // hidden list
3770 else
3771 list_append_shallow_copy (&table_copy[tt], pctr);
3774 // copy raw_unfiltered_in to raw_out
3775 for (int ii = 0; raw_unfiltered_in && raw_unfiltered_in[ii]; ii++)
3777 Hwcentry *pctr = raw_unfiltered_in[ii];
3778 if (supported_hwc (pctr))
3779 list_append_shallow_copy (raw_out, pctr);
3782 // Scan raw counters to populate Hwcentry fields from matching static_tables entries
3783 for (int uu = 0; uu < raw_out->sz; uu++)
3785 Hwcentry *praw = (Hwcentry*) raw_out->array[uu];
3786 Hwcentry *pstd = NULL; // set if non-alias entry from std table matches
3787 char *name = praw->name;
3788 for (int tt = 0; tt < NUM_TABLES; tt++)
3789 { // std, generic, and hidden
3790 if (table_copy[tt].sz == 0)
3791 continue;
3792 Hwcentry **array = (Hwcentry**) table_copy[tt].array;
3793 for (int jj = 0; array[jj]; jj++)
3794 { // all table counters
3795 Hwcentry *pctr = array[jj];
3796 char *pname;
3797 if (pctr->int_name)
3798 pname = pctr->int_name;
3799 else
3800 pname = pctr->name;
3801 if (!is_same (name, pname, '~'))
3802 continue;
3804 if (!is_visible_alias (pctr) && !is_hidden_alias (pctr))
3806 // Note: we could expand criteria to also allow aliases to set default rates for raw HWCs
3807 /* This is an 'internal' raw counter */
3808 if (!pstd)
3809 pstd = pctr; /* use info as a template when adding to raw list */
3810 else
3811 hwcentry_print (DBG_LT0, "hwctable: hwc_cb: Warning: "
3812 "counter %s appears in table more than once: ",
3813 pstd);
3815 }/* for table rows */
3816 }/* for std and generic tables */
3818 if (pstd)
3820 /* the main table had an entry that matched <name> exactly */
3821 /* Apply the main table entry as a template */
3822 *praw = *pstd;
3824 }/* for (raw_out) */
3826 // update std_out and hidden_out
3827 for (int tt = 0; tt < NUM_TABLES; tt++)
3829 if (tt == 1 /*skip std_raw*/ || table_copy[tt].sz == 0)
3830 continue;
3831 Hwcentry *pctr;
3832 for (int ii = 0; (pctr = table_copy[tt].array[ii]); ii++)
3834 // prune unsupported rows from std table
3835 if (!is_visible_alias (pctr) && !is_hidden_alias (pctr))
3836 continue; // only aliases
3837 ptr_list *dest = (tt == 0) ? std_out : hidden_out;
3838 Hwcentry *isInList;
3839 if (pctr->short_desc == NULL)
3841 isInList = ptrarray_find_by_name ((Hwcentry**) raw_out->array, pctr->int_name);
3842 if (isInList)
3843 pctr->short_desc = isInList->short_desc; // copy the raw counter's detailed description
3845 isInList = ptrarray_find_by_name ((Hwcentry**) dest->array, pctr->name);
3846 if (isInList)
3847 hwcentry_print (DBG_LT0, "hwctable: hwc_cb: Warning: "
3848 "counter %s appears in alias list more than once: ",
3849 pctr);
3850 else
3851 list_append_shallow_copy (dest, pctr);
3854 for (int tt = 0; tt < NUM_TABLES; tt++)
3855 ptr_list_free (&table_copy[tt]);
3857 if (forKernel)
3859 // for er_kernel, use baseline value of PRELOAD_DEF_ERKERNEL instead of PRELOAD_DEF
3860 for (int tt = 0; tt < 3; tt++)
3861 { // std_out-0, raw_out-1, hidden_out-2
3862 Hwcentry** hwcs = (Hwcentry**) (s_outbufs[tt].array);
3863 for (int ii = 0; hwcs && hwcs[ii]; ii++)
3865 Hwcentry *hwc = hwcs[ii];
3866 if (hwc->val == PRELOAD_DEF)
3867 hwc->val = PRELOAD_DEF_ERKERNEL;
3871 *pstd_out = (Hwcentry**) std_out->array;
3872 *praw_out = (Hwcentry**) raw_out->array;
3873 *phidden_out = (Hwcentry**) hidden_out->array;
3876 /* callback, (see setup_cpc()) called for each valid attribute */
3877 /* builds attrlist */
3878 static void
3879 attrs_cb (const char *attr)
3881 Tprintf (DBG_LT3, "hwctable: attrs_cb(): %s\n", attr);
3882 if (strcmp (attr, "picnum") == 0)
3883 return; /* don't make this attribute available to users */
3884 ptr_list_add (&unfiltered_attrs, (void*) strdup (attr));
3887 /* returns true if attribute is valid for this platform */
3888 static int
3889 attr_is_valid (int forKernel, const char *attr)
3891 setup_cpcx ();
3892 if (!VALID_FOR_KERNEL (forKernel) || !cpcx_attrs[forKernel])
3893 return 0;
3894 for (int ii = 0; cpcx_attrs[forKernel][ii]; ii++)
3895 if (strcmp (attr, cpcx_attrs[forKernel][ii]) == 0)
3896 return 1;
3897 return 0;