1 /* Copyright (C) 2021-2024 Free Software Foundation, Inc.
4 This file is part of GNU Binutils.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
26 #include <linux/perf_event.h>
30 /*---------------------------------------------------------------------------*/
33 #define DISALLOW_PENTIUM_PRO_MMX_7007575
34 /* Solaris/libcpc2 defaults to "Pentium Pro with MMX, Pentium II"
35 when it doesn't recognize an Intel processor. As a result,
36 when collect attempts to start Pentium Pro counters on a
37 new machine (e.g. Westmere as of 1/2011), the OS may hang. */
39 /* Register 0 counter doesn't work on Niagara T1 version (?) */
40 #define WORKAROUND_6231196_NIAGARA1_NO_CTR_0
42 /*---------------------------------------------------------------------------*/
46 #define PRELOADS_9 1001000001
47 #define PRELOADS_85 320100001
48 #define PRELOADS_8 100100001
49 #define PRELOADS_75 32010001
50 #define PRELOADS_7 10010001
51 #define PRELOADS_65 3201001
52 #define PRELOADS_6 1001001
53 #define PRELOADS_55 320101
54 #define PRELOADS_5 100101
55 #define PRELOADS_45 32001
56 #define PRELOADS_4 10001
57 #define PRELOADS_35 3201
58 #define PRELOADS_3 1001
59 #define PRELOADS_25 301
61 #define ABST_TBD ABST_NONE /* to be determined */
63 /*---------------------------------------------------------------------------*/
65 static void hwc_cb (uint_t cpc_regno
, const char *name
);
66 static void attrs_cb (const char *attr
);
67 static int attr_is_valid (int forKernel
, const char *attr
);
69 /*---------------------------------------------------------------------------*/
70 /* HWC definition tables */
73 comments on hwcentry tables
74 ---------------------------
75 name: this field should not contain '~'.
76 int_name: actual name of register, may contain ~ attribute specifications.
77 regnum: assigned register.
78 metric: if non-NULL, is a 'standard' counter that will show up in help.
79 timecvt: >0: can convert to time, 'timecvt' CPU cycles per event
81 <0: can convert to time, count reference-clock cycles at '-timecvt' MHz
82 memop: see description for ABST_type enum
85 // PRELOAD(): generates an interval based on the cycles/event and CPU GHZ.
86 // Note: the macro tweaks the interval so that it ends in decimal 001.
87 #define CYC_PER_SAMPLE (1000ULL*1000*1000/100) // cycles per signal at 1ghz, 100 samples/second
88 #define PRELOAD(min_cycles_per_event,ghz) (((ghz)*CYC_PER_SAMPLE/(min_cycles_per_event))/100*100+1)
90 // PRELOAD_DEF: initial value for uncalibrated events.
91 // This value should be based on a rate that will work for the slowest changing
92 // HWCs, HWCs where there are many CPU cycles between events.
94 // The interval needs to target the slowest HWCs so that
95 // automatic adjustment of HWC overflow intervals can adapt.
96 #define PRELOAD_DEF PRELOAD(1000,3) // default interval targets 1000 cycles/event at 3ghz
97 // For er_kernel, which HWC intervals cannot be adjusted automatically for ON/HI/LO,
98 // The interval should target some safe interval for fast events
99 #define PRELOAD_DEF_ERKERNEL PRELOAD(4,4) // default interval targets 4 cycles/event at 4ghz
101 static const Hwcentry empty_ctr
= {NULL
, NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 0, ABST_NONE
, 0};
104 // --- use cycles counter to expose "system_time" on Linux ---
105 #define SYSTIME_REGNOS REGNO_ANY // Linux: make sys_time/usr_time available for data collection
106 // Note: For x86, Linux and Solaris use different ref-clock names
107 #define USE_INTEL_REF_CYCLES(MHZ) \
108 {"usr_time","unhalted-reference-cycles", SYSTIME_REGNOS, STXT("User CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \
109 {"usr_time","cpu_clk_unhalted.ref_p", SYSTIME_REGNOS, STXT("User CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \
110 {"sys_time","unhalted-reference-cycles~system=1~user=0", SYSTIME_REGNOS, STXT("System CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \
111 {"sys_time","cpu_clk_unhalted.ref_p~system=1~user=0", SYSTIME_REGNOS, STXT("System CPU"), PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, \
112 {"cycles0", "unhalted-reference-cycles", 0, NULL, PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
113 {"cycles0", "cpu_clk_unhalted.ref_p", 0, NULL, PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
114 {"cycles1", "unhalted-reference-cycles", 1, NULL, PRELOAD( 910,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
115 {"cycles1", "cpu_clk_unhalted.ref_p", 1, NULL, PRELOAD( 910,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
118 /* --- PERF_EVENTS "software" definitions --- */
119 #define PERF_EVENTS_SW_EVENT_ALIASES \
120 // none supported for now
122 {"usr", "PERF_COUNT_SW_TASK_CLOCK", REGNO_ANY
, STXT("User CPU"), PRELOADS_7
, -(1000), ABST_NONE
}, \
123 {"sys", "PERF_COUNT_SW_TASK_CLOCK~system=1~user=0", REGNO_ANY
, STXT("System CPU"), PRELOADS_7
, -(1000), ABST_NONE
}, \
127 #define PERF_EVENTS_SW_EVENT_DEFS
130 * The PAPI descriptive strings used to be wrapped with STXT(),
131 * a macro defined in perfan/include/i18n.h. For the time being,
132 * we want to demote the PAPI counters by omitting the
133 * descriptions. So we use a new macro PAPITXT() for this purpose.
135 #define PAPITXT(x) NULL
137 /* Solaris "Generic" Counters */
138 static Hwcentry papi_generic_list
[] = {
139 {"PAPI_l1_dcm", NULL
, REGNO_ANY
, PAPITXT ("L1 D-cache misses"), PRELOADS_65
, 0, ABST_NONE
},
140 {"PAPI_l1_icm", NULL
, REGNO_ANY
, PAPITXT ("L1 I-cache misses"), PRELOADS_6
, 0, ABST_NONE
},
141 {"PAPI_l2_dcm", NULL
, REGNO_ANY
, PAPITXT ("L2 D-cache misses"), PRELOADS_6
, 0, ABST_NONE
},
142 {"PAPI_l2_icm", NULL
, REGNO_ANY
, PAPITXT ("L2 I-cache misses"), PRELOADS_6
, 0, ABST_NONE
},
143 {"PAPI_l3_dcm", NULL
, REGNO_ANY
, PAPITXT ("L3 D-cache misses"), PRELOADS_5
, 0, ABST_NONE
},
144 {"PAPI_l3_icm", NULL
, REGNO_ANY
, PAPITXT ("L3 I-cache misses"), PRELOADS_5
, 0, ABST_NONE
},
145 {"PAPI_l1_tcm", NULL
, REGNO_ANY
, PAPITXT ("L1 misses"), PRELOADS_65
, 0, ABST_NONE
},
146 {"PAPI_l2_tcm", NULL
, REGNO_ANY
, PAPITXT ("L2 misses"), PRELOADS_6
, 0, ABST_NONE
},
147 {"PAPI_l3_tcm", NULL
, REGNO_ANY
, PAPITXT ("L3 misses"), PRELOADS_5
, 0, ABST_NONE
},
148 {"PAPI_ca_snp", NULL
, REGNO_ANY
, PAPITXT ("Requests for a snoop"), PRELOADS_6
, 0, ABST_NONE
},
149 {"PAPI_ca_shr", NULL
, REGNO_ANY
, PAPITXT ("Requests for exclusive access to shared cache line"), PRELOADS_6
, 0, ABST_NONE
},
150 {"PAPI_ca_cln", NULL
, REGNO_ANY
, PAPITXT ("Requests for exclusive access to clean cache line"), PRELOADS_6
, 0, ABST_NONE
},
151 {"PAPI_ca_inv", NULL
, REGNO_ANY
, PAPITXT ("Requests for cache line invalidation"), PRELOADS_6
, 0, ABST_NONE
},
152 {"PAPI_ca_itv", NULL
, REGNO_ANY
, PAPITXT ("Requests for cache line intervention"), PRELOADS_6
, 0, ABST_NONE
},
153 {"PAPI_l3_ldm", NULL
, REGNO_ANY
, PAPITXT ("L3 load misses"), PRELOADS_5
, 0, ABST_NONE
},
154 {"PAPI_l3_stm", NULL
, REGNO_ANY
, PAPITXT ("L3 store misses"), PRELOADS_5
, 0, ABST_NONE
},
155 {"PAPI_bru_idl", NULL
, REGNO_ANY
, PAPITXT ("Cycles branch units are idle"), PRELOADS_7
, 1, ABST_NONE
},
156 {"PAPI_fxu_idl", NULL
, REGNO_ANY
, PAPITXT ("Cycles integer units are idle"), PRELOADS_7
, 1, ABST_NONE
},
157 {"PAPI_fpu_idl", NULL
, REGNO_ANY
, PAPITXT ("Cycles FP units are idle"), PRELOADS_7
, 1, ABST_NONE
},
158 {"PAPI_lsu_idl", NULL
, REGNO_ANY
, PAPITXT ("Cycles load/store units are idle"), PRELOADS_7
, 1, ABST_NONE
},
159 {"PAPI_tlb_dm", NULL
, REGNO_ANY
, PAPITXT ("DTLB misses"), PRELOADS_6
, 0, ABST_NONE
},
160 {"PAPI_tlb_im", NULL
, REGNO_ANY
, PAPITXT ("ITLB misses"), PRELOADS_6
, 0, ABST_NONE
},
161 {"PAPI_tlb_tl", NULL
, REGNO_ANY
, PAPITXT ("Total TLB misses"), PRELOADS_6
, 0, ABST_NONE
},
162 {"PAPI_tlb_tm", NULL
, REGNO_ANY
, PAPITXT ("Total TLB misses"), PRELOADS_6
, 0, ABST_NONE
},
163 {"PAPI_l1_ldm", NULL
, REGNO_ANY
, PAPITXT ("L1 load misses"), PRELOADS_65
, 0, ABST_NONE
},
164 {"PAPI_l1_stm", NULL
, REGNO_ANY
, PAPITXT ("L1 store misses"), PRELOADS_65
, 0, ABST_NONE
},
165 {"PAPI_l2_ldm", NULL
, REGNO_ANY
, PAPITXT ("L2 load misses"), PRELOADS_6
, 0, ABST_NONE
},
166 {"PAPI_l2_stm", NULL
, REGNO_ANY
, PAPITXT ("L2 store misses"), PRELOADS_6
, 0, ABST_NONE
},
167 {"PAPI_btac_m", NULL
, REGNO_ANY
, PAPITXT ("Branch target address cache misses"), PRELOADS_5
, 0, ABST_NONE
},
168 {"PAPI_prf_dm", NULL
, REGNO_ANY
, PAPITXT ("Data prefetch cache misses"), PRELOADS_65
, 0, ABST_NONE
},
169 {"PAPI_l3_dch", NULL
, REGNO_ANY
, PAPITXT ("L3 D-cache hits"), PRELOADS_6
, 0, ABST_NONE
},
170 {"PAPI_tlb_sd", NULL
, REGNO_ANY
, PAPITXT ("TLB shootdowns"), PRELOADS_6
, 0, ABST_NONE
},
171 {"PAPI_csr_fal", NULL
, REGNO_ANY
, PAPITXT ("Failed store conditional instructions"), PRELOADS_6
, 0, ABST_NONE
},
172 {"PAPI_csr_suc", NULL
, REGNO_ANY
, PAPITXT ("Successful store conditional instructions"), PRELOADS_7
, 0, ABST_NONE
},
173 {"PAPI_csr_tot", NULL
, REGNO_ANY
, PAPITXT ("Total store conditional instructions"), PRELOADS_7
, 0, ABST_NONE
},
174 {"PAPI_mem_scy", NULL
, REGNO_ANY
, PAPITXT ("Cycles Stalled Waiting for memory accesses"), PRELOADS_7
, 1, ABST_NONE
},
175 {"PAPI_mem_rcy", NULL
, REGNO_ANY
, PAPITXT ("Cycles Stalled Waiting for memory reads"), PRELOADS_7
, 1, ABST_NONE
},
176 {"PAPI_mem_wcy", NULL
, REGNO_ANY
, PAPITXT ("Cycles Stalled Waiting for memory writes"), PRELOADS_7
, 1, ABST_NONE
},
177 {"PAPI_stl_icy", NULL
, REGNO_ANY
, PAPITXT ("Cycles with no instruction issue"), PRELOADS_7
, 1, ABST_NONE
},
178 {"PAPI_ful_icy", NULL
, REGNO_ANY
, PAPITXT ("Cycles with maximum instruction issue"), PRELOADS_7
, 1, ABST_NONE
},
179 {"PAPI_stl_ccy", NULL
, REGNO_ANY
, PAPITXT ("Cycles with no instructions completed"), PRELOADS_7
, 1, ABST_NONE
},
180 {"PAPI_ful_ccy", NULL
, REGNO_ANY
, PAPITXT ("Cycles with maximum instructions completed"), PRELOADS_7
, 1, ABST_NONE
},
181 {"PAPI_hw_int", NULL
, REGNO_ANY
, PAPITXT ("Hardware interrupts"), PRELOADS_5
, 0, ABST_NONE
},
182 {"PAPI_br_ucn", NULL
, REGNO_ANY
, PAPITXT ("Unconditional branch instructions"), PRELOADS_7
, 0, ABST_NONE
},
183 {"PAPI_br_cn", NULL
, REGNO_ANY
, PAPITXT ("Cond. branch instructions"), PRELOADS_7
, 0, ABST_NONE
},
184 {"PAPI_br_tkn", NULL
, REGNO_ANY
, PAPITXT ("Cond. branch instructions taken"), PRELOADS_7
, 0, ABST_NONE
},
185 {"PAPI_br_ntk", NULL
, REGNO_ANY
, PAPITXT ("Cond. branch instructions not taken"), PRELOADS_7
, 0, ABST_NONE
},
186 {"PAPI_br_msp", NULL
, REGNO_ANY
, PAPITXT ("Cond. branch instructions mispredicted"), PRELOADS_6
, 0, ABST_NONE
},
187 {"PAPI_br_prc", NULL
, REGNO_ANY
, PAPITXT ("Cond. branch instructions correctly predicted"), PRELOADS_7
, 0, ABST_NONE
},
188 {"PAPI_fma_ins", NULL
, REGNO_ANY
, PAPITXT ("FMA instructions completed"), PRELOADS_65
, 0, ABST_NONE
},
189 {"PAPI_tot_iis", NULL
, REGNO_ANY
, PAPITXT ("Instructions issued"), PRELOADS_7
, 0, ABST_NONE
},
190 {"PAPI_tot_ins", NULL
, REGNO_ANY
, PAPITXT ("Instructions completed"), PRELOADS_7
, 0, ABST_NONE
},
191 {"PAPI_int_ins", NULL
, REGNO_ANY
, PAPITXT ("Integer instructions"), PRELOADS_7
, 0, ABST_NONE
},
192 {"PAPI_fp_ins", NULL
, REGNO_ANY
, PAPITXT ("Floating-point instructions"), PRELOADS_7
, 0, ABST_NONE
},
193 {"PAPI_ld_ins", NULL
, REGNO_ANY
, PAPITXT ("Load instructions"), PRELOADS_7
, 0, ABST_NONE
},
194 {"PAPI_sr_ins", NULL
, REGNO_ANY
, PAPITXT ("Store instructions"), PRELOADS_7
, 0, ABST_NONE
},
195 {"PAPI_br_ins", NULL
, REGNO_ANY
, PAPITXT ("Branch instructions"), PRELOADS_7
, 0, ABST_NONE
},
196 {"PAPI_vec_ins", NULL
, REGNO_ANY
, PAPITXT ("Vector/SIMD instructions"), PRELOADS_7
, 0, ABST_NONE
},
197 {"PAPI_res_stl", NULL
, REGNO_ANY
, PAPITXT ("Cycles stalled on any resource"), PRELOADS_7
, 1, ABST_NONE
},
198 {"PAPI_fp_stal", NULL
, REGNO_ANY
, PAPITXT ("Cycles the FP unit(s) are stalled"), PRELOADS_7
, 1, ABST_NONE
},
199 {"PAPI_tot_cyc", NULL
, REGNO_ANY
, PAPITXT ("Total cycles"), PRELOADS_7
, 1, ABST_NONE
},
200 {"PAPI_lst_ins", NULL
, REGNO_ANY
, PAPITXT ("Load/store instructions completed"), PRELOADS_7
, 0, ABST_NONE
},
201 {"PAPI_syc_ins", NULL
, REGNO_ANY
, PAPITXT ("Sync instructions completed"), PRELOADS_65
, 0, ABST_NONE
},
202 {"PAPI_l1_dch", NULL
, REGNO_ANY
, PAPITXT ("L1 D-cache hits"), PRELOADS_7
, 0, ABST_NONE
},
203 {"PAPI_l2_dch", NULL
, REGNO_ANY
, PAPITXT ("L2 D-cache hits"), PRELOADS_65
, 0, ABST_NONE
},
204 {"PAPI_l1_dca", NULL
, REGNO_ANY
, PAPITXT ("L1 D-cache accesses"), PRELOADS_7
, 0, ABST_NONE
},
205 {"PAPI_l2_dca", NULL
, REGNO_ANY
, PAPITXT ("L2 D-cache accesses"), PRELOADS_65
, 0, ABST_NONE
},
206 {"PAPI_l3_dca", NULL
, REGNO_ANY
, PAPITXT ("L3 D-cache accesses"), PRELOADS_6
, 0, ABST_NONE
},
207 {"PAPI_l1_dcr", NULL
, REGNO_ANY
, PAPITXT ("L1 D-cache reads"), PRELOADS_7
, 0, ABST_NONE
},
208 {"PAPI_l2_dcr", NULL
, REGNO_ANY
, PAPITXT ("L2 D-cache reads"), PRELOADS_65
, 0, ABST_NONE
},
209 {"PAPI_l3_dcr", NULL
, REGNO_ANY
, PAPITXT ("L3 D-cache reads"), PRELOADS_6
, 0, ABST_NONE
},
210 {"PAPI_l1_dcw", NULL
, REGNO_ANY
, PAPITXT ("L1 D-cache writes"), PRELOADS_7
, 0, ABST_NONE
},
211 {"PAPI_l2_dcw", NULL
, REGNO_ANY
, PAPITXT ("L2 D-cache writes"), PRELOADS_65
, 0, ABST_NONE
},
212 {"PAPI_l3_dcw", NULL
, REGNO_ANY
, PAPITXT ("L3 D-cache writes"), PRELOADS_6
, 0, ABST_NONE
},
213 {"PAPI_l1_ich", NULL
, REGNO_ANY
, PAPITXT ("L1 I-cache hits"), PRELOADS_7
, 0, ABST_NONE
},
214 {"PAPI_l2_ich", NULL
, REGNO_ANY
, PAPITXT ("L2 I-cache hits"), PRELOADS_65
, 0, ABST_NONE
},
215 {"PAPI_l3_ich", NULL
, REGNO_ANY
, PAPITXT ("L3 I-cache hits"), PRELOADS_6
, 0, ABST_NONE
},
216 {"PAPI_l1_ica", NULL
, REGNO_ANY
, PAPITXT ("L1 I-cache accesses"), PRELOADS_7
, 0, ABST_NONE
},
217 {"PAPI_l2_ica", NULL
, REGNO_ANY
, PAPITXT ("L2 I-cache accesses"), PRELOADS_65
, 0, ABST_NONE
},
218 {"PAPI_l3_ica", NULL
, REGNO_ANY
, PAPITXT ("L3 I-cache accesses"), PRELOADS_6
, 0, ABST_NONE
},
219 {"PAPI_l1_icr", NULL
, REGNO_ANY
, PAPITXT ("L1 I-cache reads"), PRELOADS_7
, 0, ABST_NONE
},
220 {"PAPI_l2_icr", NULL
, REGNO_ANY
, PAPITXT ("L2 I-cache reads"), PRELOADS_65
, 0, ABST_NONE
},
221 {"PAPI_l3_icr", NULL
, REGNO_ANY
, PAPITXT ("L3 I-cache reads"), PRELOADS_6
, 0, ABST_NONE
},
222 {"PAPI_l1_icw", NULL
, REGNO_ANY
, PAPITXT ("L1 I-cache writes"), PRELOADS_7
, 0, ABST_NONE
},
223 {"PAPI_l2_icw", NULL
, REGNO_ANY
, PAPITXT ("L2 I-cache writes"), PRELOADS_65
, 0, ABST_NONE
},
224 {"PAPI_l3_icw", NULL
, REGNO_ANY
, PAPITXT ("L3 I-cache writes"), PRELOADS_6
, 0, ABST_NONE
},
225 {"PAPI_l1_tch", NULL
, REGNO_ANY
, PAPITXT ("L1 total hits"), PRELOADS_7
, 0, ABST_NONE
},
226 {"PAPI_l2_tch", NULL
, REGNO_ANY
, PAPITXT ("L2 total hits"), PRELOADS_65
, 0, ABST_NONE
},
227 {"PAPI_l3_tch", NULL
, REGNO_ANY
, PAPITXT ("L3 total hits"), PRELOADS_6
, 0, ABST_NONE
},
228 {"PAPI_l1_tca", NULL
, REGNO_ANY
, PAPITXT ("L1 total accesses"), PRELOADS_7
, 0, ABST_NONE
},
229 {"PAPI_l2_tca", NULL
, REGNO_ANY
, PAPITXT ("L2 total accesses"), PRELOADS_65
, 0, ABST_NONE
},
230 {"PAPI_l3_tca", NULL
, REGNO_ANY
, PAPITXT ("L3 total accesses"), PRELOADS_6
, 0, ABST_NONE
},
231 {"PAPI_l1_tcr", NULL
, REGNO_ANY
, PAPITXT ("L1 total reads"), PRELOADS_7
, 0, ABST_NONE
},
232 {"PAPI_l2_tcr", NULL
, REGNO_ANY
, PAPITXT ("L2 total reads"), PRELOADS_65
, 0, ABST_NONE
},
233 {"PAPI_l3_tcr", NULL
, REGNO_ANY
, PAPITXT ("L3 total reads"), PRELOADS_6
, 0, ABST_NONE
},
234 {"PAPI_l1_tcw", NULL
, REGNO_ANY
, PAPITXT ("L1 total writes"), PRELOADS_7
, 0, ABST_NONE
},
235 {"PAPI_l2_tcw", NULL
, REGNO_ANY
, PAPITXT ("L2 total writes"), PRELOADS_65
, 0, ABST_NONE
},
236 {"PAPI_l3_tcw", NULL
, REGNO_ANY
, PAPITXT ("L3 total writes"), PRELOADS_6
, 0, ABST_NONE
},
237 {"PAPI_fml_ins", NULL
, REGNO_ANY
, PAPITXT ("FP multiply instructions"), PRELOADS_7
, 0, ABST_NONE
},
238 {"PAPI_fad_ins", NULL
, REGNO_ANY
, PAPITXT ("FP add instructions"), PRELOADS_7
, 0, ABST_NONE
},
239 {"PAPI_fdv_ins", NULL
, REGNO_ANY
, PAPITXT ("FP divide instructions"), PRELOADS_7
, 0, ABST_NONE
},
240 {"PAPI_fsq_ins", NULL
, REGNO_ANY
, PAPITXT ("FP square root instructions"), PRELOADS_65
, 0, ABST_NONE
},
241 {"PAPI_fnv_ins", NULL
, REGNO_ANY
, PAPITXT ("FP inverse instructions"), PRELOADS_7
, 0, ABST_NONE
},
242 {"PAPI_fp_ops", NULL
, REGNO_ANY
, PAPITXT ("FP operations"), PRELOADS_7
, 0, ABST_NONE
},
243 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
246 #if defined(__i386__) || defined(__x86_64)
247 /* Kernel profiling pseudo-chip, OBSOLETE (To support 12.3 and earlier, TBR) */
248 static Hwcentry kproflist
[] = {
249 {"kcycles", "kcycles", 0, STXT ("KCPU Cycles"), PRELOADS_5
, 1, ABST_NONE
},
250 {"kucycles", "kucycles", 0, STXT ("KUCPU Cycles"), PRELOADS_5
, 1, ABST_NONE
},
251 {"kthr", "kthr", 0, STXT ("KTHR Cycles"), PRELOADS_5
, 1, ABST_NONE
},
252 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
255 static Hwcentry pentiumIIlist
[] = {
256 /* note -- missing entries for dtlbm, ecm */
257 {"cycles", "cpu_clk_unhalted", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_7
, 1, ABST_NONE
},
258 {"insts", "inst_retired", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_7
, 0, ABST_NONE
},
259 {"icm", "ifu_ifetch_miss", REGNO_ANY
, STXT ("I$ Misses"), PRELOADS_5
, 0, ABST_NONE
},
260 {"dcrm", "dcu_m_lines_in", REGNO_ANY
, STXT ("D$ Read Misses"), PRELOADS_5
, 0, ABST_NONE
},
261 {"dcwm", "dcu_m_lines_out", REGNO_ANY
, STXT ("D$ Write Misses"), PRELOADS_5
, 0, ABST_NONE
},
262 {"flops", "flops", REGNO_ANY
, STXT ("Floating-point Ops"), PRELOADS_7
, 0, ABST_NONE
},
263 {"itlbm", "itlb_miss", REGNO_ANY
, STXT ("ITLB Misses"), PRELOADS_5
, 0, ABST_NONE
},
264 {"ecim", "l2_ifetch", REGNO_ANY
, STXT ("E$ Instr. Misses"), PRELOADS_5
, 0, ABST_NONE
},
266 /* explicit definitions of (hidden) entries for proper counters */
267 /* Only counters that can be time converted, or are load-store need to be in this table */
268 {"cpu_clk_unhalted", NULL
, REGNO_ANY
, NULL
, PRELOADS_7
, 1, ABST_NONE
},
270 /* additional (hidden) aliases for convenience */
271 {"cycles0", "cpu_clk_unhalted", 0, NULL
, PRELOADS_75
, 1, ABST_NONE
},
272 {"cycles1", "cpu_clk_unhalted", 1, NULL
, PRELOADS_75
, 1, ABST_NONE
},
273 {"insts0", "inst_retired", 0, NULL
, PRELOADS_75
, 0, ABST_NONE
},
274 {"insts1", "inst_retired", 1, NULL
, PRELOADS_75
, 0, ABST_NONE
},
275 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
278 static Hwcentry pentiumIIIlist
[] = {
279 /* note -- many missing entries; no reference machine to try */
280 {"cycles", "cpu_clk_unhalted", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_7
, 1, ABST_NONE
},
281 {"insts", "inst_retired", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_7
, 0, ABST_NONE
},
283 /* explicit definitions of (hidden) entries for proper counters */
284 /* Only counters that can be time converted, or are load-store need to be in this table */
285 {"cpu_clk_unhalted", NULL
, REGNO_ANY
, NULL
, PRELOADS_7
, 1, ABST_NONE
},
287 /* additional (hidden) aliases for convenience */
288 {"cycles0", "cpu_clk_unhalted", 0, NULL
, PRELOADS_75
, 1, ABST_NONE
},
289 {"cycles1", "cpu_clk_unhalted", 1, NULL
, PRELOADS_75
, 1, ABST_NONE
},
290 {"insts0", "inst_retired", 0, NULL
, PRELOADS_75
, 0, ABST_NONE
},
291 {"insts1", "inst_retired", 1, NULL
, PRELOADS_75
, 0, ABST_NONE
},
292 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
295 static Hwcentry pentium4
[] = {
296 {"cycles", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_7
, 1, ABST_NONE
},
297 {"insts", "instr_retired~emask=0x3", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_7
, 0, ABST_NONE
},
298 {"l1m", "BSQ_cache_reference~emask=0x0507", REGNO_ANY
, STXT ("L1 Cache Misses"), PRELOADS_7
, 0, ABST_NONE
},
299 {"l2h", "BSQ_cache_reference~emask=0x0007", REGNO_ANY
, STXT ("L2 Cache Hits"), PRELOADS_7
, 0, ABST_NONE
},
300 {"l2m", "BSQ_cache_reference~emask=0x0500", REGNO_ANY
, STXT ("L2 Cache Misses"), PRELOADS_6
, 0, ABST_NONE
},
302 /* explicit definitions of (hidden) entries for proper counters */
303 /* Only counters that can be time converted, or are load-store need to be in this table */
304 {"TC_deliver_mode", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
305 {"machine_clear", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
307 /* additional (hidden) aliases, for convenience */
308 {"cycles0", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", 5, NULL
, PRELOADS_75
, 1, ABST_NONE
},
309 {"cycles1", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", 6, NULL
, PRELOADS_75
, 1, ABST_NONE
},
310 {"insts0", "instr_retired~emask=0x3", 15, NULL
, PRELOADS_75
, 0, ABST_NONE
},
311 {"insts1", "instr_retired~emask=0x3", 16, NULL
, PRELOADS_75
, 0, ABST_NONE
},
312 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
315 static Hwcentry intelCore2list
[] = {
316 // For post-processing, both Linux and Solaris definitions need to be "live".
317 // However, for data collection, OS-specific definitions may need to be hidden.
318 // Use REGNO_INVALID for definitions that should be hidden for data collection.
319 #define LINUX_ONLY REGNO_ANY
320 #define SOLARIS_ONLY REGNO_INVALID /* hidden for Linux data collection */
322 {"cycles", "cpu_clk_unhalted.core", /*6759307*/ SOLARIS_ONLY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
323 {"cycles", "cpu_clk_unhalted.thread", /*6759307*/ SOLARIS_ONLY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
324 /* Linux Note: 7046312 Many HWC tests fail on system Core2 system with perf_events if above alias used */
325 {"cycles", "cpu_clk_unhalted", LINUX_ONLY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
327 {"insts", "instr_retired.any", SOLARIS_ONLY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
328 /* Linux Note: 7046312 Many HWC tests fail on system Core2 system with perf_events if above alias used */
329 {"insts", "inst_retired", LINUX_ONLY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
331 // The following counters were identified in "Cycle Accounting Analysis on Intel Core2 Processors" by David Levinthal
332 {"uops_stalled", "rs_uops_dispatched~cmask=1~inv=1", REGNO_ANY
, STXT ("uOps Stalled"), PRELOADS_7
, 1, ABST_NONE
},
333 {"l2m", "mem_load_retired~umask=0x08", REGNO_ANY
, STXT ("L2 Line Misses"), PRELOADS_5
, 0, ABST_NONE
},
334 {"dtlbm", "mem_load_retired~umask=0x10", REGNO_ANY
, STXT ("L1 DTLB Misses"), PRELOADS_5
, 0, ABST_NONE
},
335 {"l1m", "mem_load_retired~umask=0x02", REGNO_ANY
, STXT ("L1 Line Misses"), PRELOADS_6
, 0, ABST_NONE
},
336 // {"stalls_resources","resource_stalls~umask=0x1f", REGNO_ANY, STXT("Resource Stalls"), PRELOADS_6, 1, ABST_NONE},
337 {"rs_full", "resource_stalls~umask=0x02", REGNO_ANY
, STXT ("Reservation Station Full"), PRELOADS_6
, 1, ABST_NONE
},
338 {"br_miss_flush", "resource_stalls~umask=0x10", REGNO_ANY
, STXT ("Mispredicted Branch Flushes"), PRELOADS_6
, 1, ABST_NONE
},
339 {"ld_st_full", "resource_stalls~umask=0x04", REGNO_ANY
, STXT ("Load/Store Buffers Full"), PRELOADS_6
, 1, ABST_NONE
},
340 {"rob_full", "resource_stalls~umask=0x01", REGNO_ANY
, STXT ("Reorder Buffer Full"), PRELOADS_6
, 1, ABST_NONE
},
341 {"slow_decode", "ild_stall", REGNO_ANY
, STXT ("Slow Instruction Decode"), PRELOADS_6
, 1, ABST_NONE
},
342 {"br_miss", "br_cnd_missp_exec", REGNO_ANY
, STXT ("Mispredicted Branches"), PRELOADS_5
, 0, ABST_NONE
},
343 {"ret_miss", "br_call_missp_exec", REGNO_ANY
, STXT ("Mispredicted Return Calls"), PRELOADS_5
, 0, ABST_NONE
},
344 {"div_busy", "idle_during_div", REGNO_ANY
, STXT ("Divider Unit Busy"), PRELOADS_5
, 1, ABST_NONE
},
345 {"fp_assists", "fp_assist", REGNO_ANY
, STXT ("FP Microcode Assists"), PRELOADS_5
, 0, ABST_NONE
},
346 {"bus_busy", "bus_drdy_clocks~umask=0x60", REGNO_ANY
, STXT ("Busy Data Bus"), PRELOADS_5
, 1, ABST_NONE
},
348 /* explicit definitions of (hidden) entries for proper counters */
349 /* Only counters that can be time converted, or are load-store need to be in this table */
350 {/*30a*/"cpu_clk_unhalted.core", /*6759307*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
351 {/*30a*/"cpu_clk_unhalted.thread", /*6759307*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
352 {/*03*/"store_block", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
353 {/*03*/"store_block.drain_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
354 {/*03*/"store_block.order", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
355 {/*03*/"store_block.snoop", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
356 {/*09*/"memory_disambiguation.reset", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
357 {/*0c*/"page_walks.cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
358 {/*14*/"cycles_div_busy", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
359 {/*18*/"idle_during_div", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
360 {/*19*/"delayed_bypass.load", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
361 {/*21*/"l2_ads", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
362 {/*23*/"l2_dbus_busy_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
363 {/*32*/"l2_no_req", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
364 {/*3c*/"cpu_clk_unhalted", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
365 {/*3c*/"cpu_clk_unhalted.core_p", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
366 {/*3c*/"cpu_clk_unhalted.bus", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
367 {/*3c*/"cpu_clk_unhalted.no_other", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
368 {/*42*/"l1d_cache_lock.duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
369 {/*62*/"bus_drdy_clocks", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
370 {/*63*/"bus_lock_clocks", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
371 {/*64*/"bus_data_rcv", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
372 {/*7a*/"bus_hit_drv", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
373 {/*7b*/"bus_hitm_drv", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
374 {/*7d*/"busq_empty", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
375 {/*7e*/"snoop_stall_drv", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
376 {/*7f*/"bus_io_wait", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
377 {/*83*/"inst_queue", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
378 {/*83*/"inst_queue.full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
379 {/*86*/"cycles_l1i_mem_stalled", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
380 {/*87*/"ild_stall", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
381 {/*a1*/"rs_uops_dispatched", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
382 {/*a1*/"rs_uops_dispatched_port", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
383 {/*a1*/"rs_uops_dispatched_port.0", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
384 {/*a1*/"rs_uops_dispatched_port.1", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
385 {/*a1*/"rs_uops_dispatched_port.2", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
386 {/*a1*/"rs_uops_dispatched_port.3", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
387 {/*a1*/"rs_uops_dispatched_port.4", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
388 {/*a1*/"rs_uops_dispatched_port.5", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
389 {/*6c*/"cycles_int", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
390 {/*6c*/"cycles_int.masked", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
391 {/*6c*/"cycles_int.pending_and_masked", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
392 {/*d2*/"rat_stalls", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
393 {/*d2*/"rat_stalls.rob_read_port", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
394 {/*d2*/"rat_stalls.partial_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
395 {/*d2*/"rat_stalls.flags", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
396 {/*d2*/"rat_stalls.fpsw", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
397 {/*d2*/"rat_stalls.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
398 {/*d2*/"rat_stalls.other_serialization_stalls", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
399 {/*d4*/"seg_rename_stalls", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
400 {/*d4*/"seg_rename_stalls.es", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
401 {/*d4*/"seg_rename_stalls.ds", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
402 {/*d4*/"seg_rename_stalls.fs", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
403 {/*d4*/"seg_rename_stalls.gs", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
404 {/*d4*/"seg_rename_stalls.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
405 {/*dc*/"resource_stalls", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
406 {/*dc*/"resource_stalls.rob_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
407 {/*dc*/"resource_stalls.rs_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
408 {/*dc*/"resource_stalls.ld_st", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
409 {/*dc*/"resource_stalls.fpcw", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
410 {/*dc*/"resource_stalls.br_miss_clear", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
411 {/*dc*/"resource_stalls.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
412 /* "Architectural" events: */
413 {/*3c*/"unhalted-core-cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
415 /* additional (hidden) aliases for convenience */
416 {"cycles0", "cpu_clk_unhalted", 0, NULL
, PRELOADS_8
, 1, ABST_NONE
},
417 {"cycles1", "cpu_clk_unhalted", 1, NULL
, PRELOADS_8
, 1, ABST_NONE
},
418 {"insts0", "inst_retired", 0, NULL
, PRELOADS_8
, 0, ABST_NONE
},
419 {"insts1", "inst_retired", 1, NULL
, PRELOADS_8
, 0, ABST_NONE
},
420 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
424 static Hwcentry intelNehalemList
[] = {
425 /* 6832635: on Linux, we're not seeing consistent overflows on FFCs */
426 /* 15634344==6940930: HWC overflow profiling can cause system hang on Solaris/core-i7 systems */
427 /* 17578620: counter overflow for fixed-function counters hangs systems */
428 /* same issues for intelSandyBridgeList and intelHaswellList */
429 PERF_EVENTS_SW_EVENT_ALIASES
430 USE_INTEL_REF_CYCLES (133)
431 {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
432 {"insts", "inst_retired.any_p", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
433 // cpu_clk_unhalted.ref: at the ref requency of the cpu. Should not be affected by Speedstep or Turbo.
434 // cpu_clk_unhalted.thread_p: with HT & 2 threads, 2x cycles. Affected by Speedstep and Turbo.
437 {"l2m_latency", "mem_inst_retired.latency_above_threshold", REGNO_ANY
, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4
, 33, ABST_EXACT_PEBS_PLUS1
},
439 // See file hwctable.README.corei7
440 {"dch", "mem_load_retired.l1d_hit", REGNO_ANY
, STXT ("L1 D-cache Hits"), PRELOADS_7
, 0, ABST_NONE
},
441 {"dcm", "0xCB~umask=0x1e", REGNO_ANY
, STXT ("L1 D-Cache Misses"), PRELOADS_65
, 0, ABST_NONE
}, /*mem_load_retired*/
442 {"lfbdh", "mem_load_retired.hit_lfb", REGNO_ANY
, STXT ("LFB D-cache Hits"), PRELOADS_65
, 0, ABST_NONE
},
443 {"l2h", "mem_load_retired.l2_hit", REGNO_ANY
, STXT ("L2 Cache Hits"), PRELOADS_65
, 0, ABST_NONE
},
444 {"l2m", "0xCB~umask=0x1c", REGNO_ANY
, STXT ("L2 Cache Misses"), PRELOADS_6
, 0, ABST_NONE
}, /*mem_load_retired*/
445 {"l3h", "mem_load_retired.llc_unshared_hit", REGNO_ANY
, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6
, 0, ABST_NONE
},
446 {"l3h_stall", "mem_load_retired.llc_unshared_hit", REGNO_ANY
, STXT ("L3 Cache Hit w/o Snoop x 35: Est. Stalls"), PRELOADS_6
, 35, ABST_NONE
},
447 {"l3hsnoop", "mem_load_retired.other_core_l2_hit_hitm", REGNO_ANY
, STXT ("L3 Cache Hit w/Snoop"), PRELOADS_6
, 0, ABST_NONE
},
448 {"l3hsnoop_stall", "mem_load_retired.other_core_l2_hit_hitm", REGNO_ANY
, STXT ("L3 Cache Hit w/Snoop x 74: Est. Stalls"), PRELOADS_6
, 74, ABST_NONE
},
449 {"l3m", "mem_load_retired.llc_miss", REGNO_ANY
, STXT ("L3 Cache Misses"), PRELOADS_5
, 0, ABST_NONE
},
450 {"l3m_stall", "mem_load_retired.llc_miss", REGNO_ANY
, STXT ("L3 Cache Misses x 180: Estimated Stalls"), PRELOADS_5
, 180, ABST_NONE
},
451 {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY
, STXT ("DTLB Misses"), PRELOADS_6
, 0, ABST_NONE
},
452 {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY
, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6
, 30, ABST_NONE
},
453 {"addr_alias_stall", "partial_address_alias", REGNO_ANY
, STXT ("Partial Address Aliases x 3: Est. Stalls"), PRELOADS_6
, 3, ABST_NONE
},
454 {"uope_stall", "uops_executed.port234~cmask=1~inv=1", REGNO_ANY
, STXT ("UOP Execute Stalls per Core"), PRELOADS_7
, 1, ABST_NONE
},
455 {"uopr_stall", "uops_retired.any~cmask=1~inv=1", REGNO_ANY
, STXT ("UOP Retired Stalls"), PRELOADS_7
, 1, ABST_NONE
},
456 {"itlbm", "itlb_miss_retired", REGNO_ANY
, STXT ("ITLB Misses"), PRELOADS_6
, 0, ABST_NONE
},
457 {"l1i_stall", "l1i.cycles_stalled", REGNO_ANY
, STXT ("L1 I-cache Stalls"), PRELOADS_6
, 1, ABST_NONE
},
458 {"br_rets", "br_inst_retired.all_branches", REGNO_ANY
, STXT ("Branch Instruction Retires"), PRELOADS_7
, 0, ABST_NONE
},
459 {"br_misp", "br_misp_exec.any", REGNO_ANY
, STXT ("Branch Mispredicts"), PRELOADS_6
, 0, ABST_NONE
},
460 {"mach_clear", "machine_clears.cycles", REGNO_ANY
, STXT ("Machine Clear Asserted"), PRELOADS_6
, 1, ABST_NONE
},
461 {"fp_mmx", "fp_mmx_trans.any", REGNO_ANY
, STXT ("FP-MMX Transistions"), PRELOADS_6
, 0, ABST_NONE
},
462 {"div_busy", "arith.cycles_div_busy", REGNO_ANY
, STXT ("Divider Busy Cycles"), PRELOADS_6
, 1, ABST_NONE
},
464 /* explicit definitions of (hidden) entries for proper counters */
465 /* Only counters that can be time converted, or are load-store need to be in this table */
466 {/*30a*/"cpu_clk_unhalted.core", /*6759307*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
467 {/*30a*/"cpu_clk_unhalted.thread", /*6759307*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
468 {/*04*/"sb_drain.cycles", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
469 {/*08.04*/"dtlb_load_misses.walk_cycles", /*westmere*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
470 //{/*0e*/"uops_issued.stalled_cycles",/*future, multibit*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
471 {/*09*/"memory_disambiguation.reset", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
472 {/*09*/"memory_disambiguation.watch_cycles", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
473 {/*0b*/"mem_inst_retired.latency_above_threshold", /*PEBS*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_4
, 33, ABST_EXACT_PEBS_PLUS1
}, //non-standard overflow
474 {/*14*/"arith.cycles_div_busy", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
475 {/*17*/"inst_queue_write_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
476 {/*1d*/"hw_int.cycles_masked", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
477 {/*1d*/"hw_int.cycles_pending_and_masked", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
478 {/*3c*/"cpu_clk_unhalted.thread_p", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
479 {/*48*/"l1d_pend_miss.load_buffers_full", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
480 {/*49.04*/"dtlb_misses.walk_cycles", /*westmere*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
481 {/*4e*/"sfence_cycles", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
482 {/*4f.10*/"ept.walk_cycles", /*westmere*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
483 {/*60*/"offcore_requests_outstanding.demand.read_data", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
484 {/*60*/"offcore_requests_outstanding.demand.read_code", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
485 {/*60*/"offcore_requests_outstanding.demand.rfo", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
486 {/*60*/"offcore_requests_outstanding.any.read", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
487 {/*63*/"cache_lock_cycles.l1d", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
488 {/*63*/"cache_lock_cycles.l1d_l2", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
489 {/*80*/"l1i.cycles_stalled", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
490 {/*85*/"itlb_misses.walk_cycles", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
491 {/*85*/"itlb_misses.pmh_busy_cycles", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
492 {/*87*/"ild_stall.lcp", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
493 {/*87*/"ild_stall.mru", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
494 {/*87*/"ild_stall.iq_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
495 {/*87*/"ild_stall.regen", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
496 {/*87*/"ild_stall.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
497 {/*a2*/"resource_stalls.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
498 {/*a2*/"resource_stalls.load", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
499 {/*a2*/"resource_stalls.rs_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
500 {/*a2*/"resource_stalls.store", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
501 {/*a2*/"resource_stalls.rob_full", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
502 {/*a2*/"resource_stalls.fpcw", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
503 {/*a2*/"resource_stalls.mxcsr", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
504 {/*a2*/"resource_stalls.other", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
505 {/*b0*/"offcore_requests_sq_full", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
506 {/*b3*/"snoopq_requests_outstanding.data", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
507 {/*b3*/"snoopq_requests_outstanding.invalidate", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
508 {/*b3*/"snoopq_requests_outstanding.code", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
509 //{/*c2*/"uops_retired.stalled_cycles",/*future, multibit*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
510 {/*c3*/"machine_clears.cycles", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
511 {/*d2*/"rat_stalls.flags", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
512 {/*d2*/"rat_stalls.registers", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
513 {/*d2*/"rat_stalls.rob_read_port", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
514 {/*d2*/"rat_stalls.scoreboard", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
515 {/*d2*/"rat_stalls.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
516 {/*d4*/"seg_rename_stalls", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
517 {/*f6*/"sq_full_stall_cycles", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
518 /* "Architectural" events: */
519 {/*3c*/"unhalted-core-cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
520 PERF_EVENTS_SW_EVENT_DEFS
522 /* additional (hidden) aliases for convenience */
524 USE_INTEL_REF_CYCLES (133),
526 {"insts0", "inst_retired.any_p", 0, NULL
, PRELOADS_8
, 0, ABST_NONE
},
527 {"insts1", "inst_retired.any_p", 1, NULL
, PRELOADS_8
, 0, ABST_NONE
},
528 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
532 static Hwcentry intelSandyBridgeList
[] = {
533 /* see comments for "cycles" and "insts" for intelNehalemList */
534 PERF_EVENTS_SW_EVENT_ALIASES
535 USE_INTEL_REF_CYCLES (100)
536 {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
537 {"insts", "inst_retired.any_p", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
540 {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY
, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4
, 65, ABST_EXACT_PEBS_PLUS1
},
542 // See file hwctable.README.sandybridge
543 {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY
, STXT ("L1 D-cache Hits"), PRELOADS_7
, 0, ABST_NONE
},
544 {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY
, STXT ("L1 D-cache Misses"), PRELOADS_65
, 0, ABST_NONE
}, /*mem_load_uops_retired*/
545 {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY
, STXT ("L2 Cache Hits"), PRELOADS_65
, 0, ABST_NONE
},
546 {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY
, STXT ("L2 Cache Misses"), PRELOADS_6
, 0, ABST_NONE
}, /*mem_load_uops_retired*/
547 // Intel errata: BT241 and BT243 says the mem_load_uops_retired.llc* counters may not be reliable on some CPU variants
548 {"l3h", "mem_load_uops_retired.llc_hit", REGNO_ANY
, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6
, 0, ABST_NONE
}, // may undercount
549 {"l3m", "longest_lat_cache.miss", REGNO_ANY
, STXT ("L3 Cache Misses"), PRELOADS_5
, 0, ABST_NONE
},
551 /* dtlbm has not been confirmed via Intel white paper */
552 {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY
, STXT ("DTLB Misses"), PRELOADS_6
, 0, ABST_NONE
},
553 {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY
, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6
, 30, ABST_NONE
},
554 {"dtlbm", "dtlb_load_misses.demand_ld_walk_completed", REGNO_ANY
, STXT ("DTLB Misses"), PRELOADS_6
, 0, ABST_NONE
},
555 {"dtlbm_stall", "dtlb_load_misses.demand_ld_walk_completed", REGNO_ANY
, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6
, 30, ABST_NONE
},
557 /* explicit definitions of (hidden) entries for proper counters */
558 /* Only counters that can be time converted, or are load-store need to be in this table */
559 {/* 30a */"cpu_clk_unhalted.thread", /*15634344==6940930*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
560 //{/* 30a */"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
561 {/*08.04*/"dtlb_load_misses.walk_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
562 {/*08.84*/"dtlb_load_misses.demand_ld_walk_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
563 {/*0d.03*/"int_misc.recovery_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
564 {/*0d.40*/"int_misc.rat_stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
565 {/*0e.01*/"uops_issued.stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
566 {/*0e.01*/"uops_issued.core_stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
567 {/*14.01*/"arith.fpu_div_active", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
568 {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
569 {/*48.01*/"l1d_pend_miss.pending_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
570 {/*49.04*/"dtlb_store_misses.walk_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
571 {/*59.20*/"partial_rat_stalls.flags_merge_uop", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
572 {/*59.20*/"partial_rat_stalls.flags_merge_uop_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
573 {/*59.40*/"partial_rat_stalls.slow_lea_window", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
574 //{/*59.80*/"partial_rat_stalls.mul_single_uop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
575 {/*5b.0c*/"resource_stalls2.all_fl_empty", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
576 {/*5b.0f*/"resource_stalls2.all_prf_control", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
577 {/*5b.40*/"resource_stalls2.bob_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
578 {/*5b.4f*/"resource_stalls2.ooo_rsrc", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
579 {/*5c.01*/"cpl_cycles.ring0", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
580 {/*5c.02*/"cpl_cycles.ring123", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
581 {/*5c.xx*/"cpl_cycles.ring0_trans", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
582 {/*5c.xx*/"cpl_cycles.ring0_transition", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
583 {/*5e.01*/"rs_events.empty_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
584 {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
585 {/*60.01*/"offcore_requests_outstanding.demand_data_rd_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
586 {/*60.04*/"offcore_requests_outstanding.cycles_with_demand_rfo", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
587 {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
588 {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
589 {/*60.08*/"offcore_requests_outstanding.all_data_rd_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
590 {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", /*?*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
591 {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
592 {/*63.02*/"lock_cycles.cache_lock_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
593 {/*79.00*/"idq.empty", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
594 {/*79.04*/"idq.mite_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
595 {/*79.08*/"idq.dsb_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
596 {/*79.10*/"idq.ms_dsb_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
597 {/*79.20*/"idq.ms_mite_uops_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
598 {/*79.20*/"idq.ms_mite_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
599 {/*79.30*/"idq.ms_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
600 {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
601 {/*79.18*/"idq.all_dsb_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
602 {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
603 {/*79.24*/"idq.all_mite_cycles_any_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
604 {/*79.24*/"idq.all_mite_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
605 {/*79.24*/"idq.all_mite_cycles_4_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
606 {/*79.3c*/"idq.mite_all_cycles", /* Linux, but not in docs? */ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
607 {/*80.04*/"icache.ifetch_stall", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
608 {/*85.04*/"itlb_misses.walk_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
609 {/*87.01*/"ild_stall.lcp", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
610 {/*87.04*/"ild_stall.iq_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
611 {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
612 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
613 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
614 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
615 {/*9c.01*/"idq_uops_not_delivered.cycles_ge_1_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
616 {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
617 {/*a1.01*/"uops_executed_port.port_0", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
618 {/*a1.02*/"uops_executed_port.port_1", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
619 {/*a1.04*/"uops_executed_port.port_2", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
620 {/*a1.08*/"uops_executed_port.port_3", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
621 {/*a1.10*/"uops_executed_port.port_4", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
622 {/*a1.20*/"uops_executed_port.port_5", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
623 {/*a2.01*/"resource_stalls.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
624 {/*a2.02*/"resource_stalls.lb", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
625 {/*a2.04*/"resource_stalls.rs", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
626 {/*a2.08*/"resource_stalls.sb", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
627 {/*a2.0a*/"resource_stalls.lb_sb", /*sb-ep*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
628 {/*a2.0e*/"resource_stalls.mem_rs", /*sb-ep*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
629 {/*a2.10*/"resource_stalls.rob", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
630 {/*a2.20*/"resource_stalls.fcsw", /*sb*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
631 {/*a2.40*/"resource_stalls.mxcsr", /*sb*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
632 {/*a2.80*/"resource_stalls.other", /*sb*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
633 {/*a2.F0*/"resource_stalls.ooo_rsrc", /*sb-ep*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
635 {/*a3.01*/"cycle_activity.cycles_l2_pending", /*F6M62*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
636 {/*??.??*/"cycle_activity.stalls_l2_pending", /*F6M62*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
637 {/*a3.02*/"cycle_activity.cycles_ldm_pending", /*F6M62*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
638 {/*??.??*/"cycle_activity.stalls_ldm_pending", /*F6M62*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
639 {/*a3.04*/"cycle_activity.cycles_no_execute", /*F6M62*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
640 {/*a3.04*/"cycle_activity.cycles_no_dispatch", /*sandybridge*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
641 {/*a3.08*/"cycle_activity.cycles_l1d_pending", /*F6M62*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
642 {/*??.??*/"cycle_activity.stalls_l1d_pending", /*F6M62*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
644 {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
645 {/*b1.??*/"uops_executed.stall_cycles", /*? not in PRM*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
646 {/*b1.01*/"uops_dispatched.stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
647 {/*b1.01*/"uops_executed.stall_cycles", /*F6M62*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
648 {/*b1.01*/"uops_executed.cycles_ge_1_uop_exec", /*F6M62,not doc'd*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
649 {/*b1.01*/"uops_executed.cycles_ge_2_uops_exec", /*F6M62,not doc'd*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
650 {/*b1.01*/"uops_executed.cycles_ge_3_uops_exec", /*F6M62,not doc'd*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
651 {/*b1.01*/"uops_executed.cycles_ge_4_uops_exec", /*F6M62,not doc'd*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
653 {/*bf.05*/"l1d_blocks.bank_conflict_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
654 {/*c2.01*/"uops_retired.stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
655 {/*c2.01*/"uops_retired.total_cycles", /*cmask==0x10*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
656 {/*c2.01*/"uops_retired.core_stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
657 {/*c2.01*/"uops_retired.active_cycles", /*cmask==0x1*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
658 #if 0 // need to see documentation on the following before marking them as cycles
659 uops_executed
.cycles_ge_1_uop_exec
[ / {0 | 1 | 2 | 3}], 1000003 (events
)
660 uops_executed
.cycles_ge_2_uops_exec
[ /
663 uops_executed
.cycles_ge_3_uops_exec
[ /
666 uops_executed
.cycles_ge_4_uops_exec
[ /
670 {/*cd.01*/"mem_trans_retired.load_latency", /*PEBS*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_4
, 65, ABST_EXACT_PEBS_PLUS1
}, //non-standard overflow
672 /* "Architectural" events: */
673 {/*3c*/"unhalted-core-cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
674 PERF_EVENTS_SW_EVENT_DEFS
676 /* additional (hidden) aliases for convenience */
678 USE_INTEL_REF_CYCLES (100),
680 {"insts0", "inst_retired.any_p", 0, NULL
, PRELOADS_8
, 0, ABST_NONE
},
681 {"insts1", "inst_retired.any_p", 1, NULL
, PRELOADS_8
, 0, ABST_NONE
},
682 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
686 static Hwcentry intelHaswellList
[] = {
687 /* see comments for "cycles" and "insts" for intelNehalemList */
688 PERF_EVENTS_SW_EVENT_ALIASES
689 USE_INTEL_REF_CYCLES (100)
690 {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
691 {"insts", "inst_retired.any_p", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
694 {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY
, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4
, 65, ABST_EXACT_PEBS_PLUS1
},
696 {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY
, STXT ("L1 D-cache Hits"), PRELOADS_7
, 0, ABST_NONE
},
697 {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY
, STXT ("L1 D-cache Misses"), PRELOADS_65
, 0, ABST_NONE
}, //mem_load_uops_retired
698 {"dcm", "0xd1~umask=0x08", REGNO_ANY
, STXT ("L1 D-cache Misses"), PRELOADS_65
, 0, ABST_NONE
}, //mem_load_uops_retired
699 {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY
, STXT ("L2 Cache Hits"), PRELOADS_65
, 0, ABST_NONE
},
700 {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY
, STXT ("L2 Cache Misses"), PRELOADS_6
, 0, ABST_NONE
}, //mem_load_uops_retired
701 {"l2m", "0xd1~umask=0x10", REGNO_ANY
, STXT ("L2 Cache Misses"), PRELOADS_6
, 0, ABST_NONE
}, //mem_load_uops_retired
702 {"l3h", "mem_load_uops_retired.l3_hit", REGNO_ANY
, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6
, 0, ABST_NONE
},
703 {"l3m", "mem_load_uops_retired.l3_miss", REGNO_ANY
, STXT ("L3 Cache Misses"), PRELOADS_5
, 0, ABST_NONE
}, //mem_load_uops_retired
704 {"l3m", "0xd1~umask=0x20", REGNO_ANY
, STXT ("L3 Cache Misses"), PRELOADS_5
, 0, ABST_NONE
}, //mem_load_uops_retired
706 /* dtlbm has not been confirmed via Intel white paper */
707 {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY
, STXT ("DTLB Misses"), PRELOADS_6
, 0, ABST_NONE
},
708 {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY
, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6
, 30, ABST_NONE
},
710 /* explicit definitions of (hidden) entries for proper counters */
711 /* Only counters that can be time converted, or are load-store need to be in this table */
712 {/* 30a */"cpu_clk_unhalted.thread", /*15634344==6940930*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
713 //{/* 30a */"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
714 {/*08.10*/"dtlb_load_misses.walk_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
715 {/*0d.03*/"int_misc.recovery_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
716 {/*0e.01*/"uops_issued.stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
717 {/*0e.01*/"uops_issued.core_stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
718 {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
719 {/*48.01*/"l1d_pend_miss.pending_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
720 {/*49.04*/"dtlb_store_misses.walk_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
721 {/*5c.01*/"cpl_cycles.ring0", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
722 {/*5c.02*/"cpl_cycles.ring123", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
723 {/*5c.xx*/"cpl_cycles.ring0_trans", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
724 {/*5e.01*/"rs_events.empty_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
725 {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
726 {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", /*?*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
727 {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", /*?*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
728 {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
729 {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
730 {/*63.02*/"lock_cycles.cache_lock_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
731 {/*79.00*/"idq.empty", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
732 {/*79.04*/"idq.mite_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
733 {/*79.08*/"idq.dsb_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
734 {/*79.10*/"idq.ms_dsb_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
735 {/*79.20*/"idq.ms_mite_uops_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
736 {/*79.20*/"idq.ms_mite_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
737 {/*79.30*/"idq.ms_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
738 {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
739 {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
740 {/*79.24*/"idq.all_mite_cycles_any_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
741 {/*79.24*/"idq.all_mite_cycles_4_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
742 {/*80.04*/"icache.ifetch_stall", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
743 {/*85.04*/"itlb_misses.walk_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
744 {/*87.01*/"ild_stall.lcp", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
}, // Intel SDM says these are stalls, not cycles
745 {/*87.04*/"ild_stall.iq_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
746 {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
747 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
748 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
749 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
750 // {/*9c.01*/"idq_uops_not_delivered.cycles_ge_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
751 {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
753 {/*a1.01*/"uops_executed_port.port_0", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
754 {/*a1.02*/"uops_executed_port.port_1", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
755 {/*a1.04*/"uops_executed_port.port_2", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
756 {/*a1.08*/"uops_executed_port.port_3", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
757 {/*a1.10*/"uops_executed_port.port_4", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
758 {/*a1.20*/"uops_executed_port.port_5", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
759 {/*a1.40*/"uops_executed_port.port_6", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
760 {/*a1.80*/"uops_executed_port.port_7", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
761 {/*a1.01*/"uops_executed_port.port_0_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
762 {/*a1.02*/"uops_executed_port.port_1_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
763 {/*a1.04*/"uops_executed_port.port_2_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
764 {/*a1.08*/"uops_executed_port.port_3_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
765 {/*a1.10*/"uops_executed_port.port_4_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
766 {/*a1.20*/"uops_executed_port.port_5_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
767 {/*a1.40*/"uops_executed_port.port_6_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
768 {/*a1.80*/"uops_executed_port.port_7_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
770 {/*a2.01*/"resource_stalls.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
771 {/*a2.04*/"resource_stalls.rs", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
772 {/*a2.08*/"resource_stalls.sb", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
773 {/*a2.10*/"resource_stalls.rob", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
775 {/*a3.01*/"cycle_activity.cycles_l2_pending_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
776 // {/*a3.01*/"cycle_activity.cycles_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
777 {/*a3.02*/"cycle_activity.cycles_ldm_pending_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
778 // {/*a3.05*/"cycle_activity.stalls_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
779 {/*a3.08*/"cycle_activity.cycles_l1d_pending_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
780 // {/*a3.??*/"cycle_activity.cycles_no_execute", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
781 // {/*a3.??*/"cycle_activity.stalls_ldm_pending",/*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
783 {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
785 {/*b1.??*/"uops_executed.stall_cycles", /*? not in PRM*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
786 {/*b1.??*/"uops_executed.cycles_ge_1_uop_exec", /*?*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
787 {/*b1.??*/"uops_executed.cycles_ge_2_uops_exec", /*?*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
788 {/*b1.??*/"uops_executed.cycles_ge_3_uops_exec", /*?*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
789 {/*b1.??*/"uops_executed.cycles_ge_4_uops_exec", /*?*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
791 {/*c2.01*/"uops_retired.stall_cycles", /*cmask==1 + INV*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
792 {/*c2.01*/"uops_retired.total_cycles", /*cmask==0x1*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
793 {/*c2.01*/"uops_retired.core_stall_cycles", /*PEBS Any==1*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
795 {/*c3.01*/"machine_clears.cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
797 {/*ca.1e*/"fp_assist.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
799 {/*cd.01*/"mem_trans_retired.load_latency", /*PEBS*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_4
, 65, ABST_EXACT_PEBS_PLUS1
}, //non-standard overflow
801 /* "Architectural" events: */
802 {/*3c*/"unhalted-core-cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
803 PERF_EVENTS_SW_EVENT_DEFS
805 /* additional (hidden) aliases for convenience */
807 USE_INTEL_REF_CYCLES (100),
809 {"insts0", "inst_retired.any_p", 0, NULL
, PRELOADS_8
, 0, ABST_NONE
},
810 {"insts1", "inst_retired.any_p", 1, NULL
, PRELOADS_8
, 0, ABST_NONE
},
811 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
815 static Hwcentry intelBroadwellList
[] = {
816 /* see comments for "cycles" and "insts" for intelNehalemList */
817 PERF_EVENTS_SW_EVENT_ALIASES
818 USE_INTEL_REF_CYCLES (100)
819 {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
820 {"insts", "inst_retired.any_p", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
823 {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY
, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4
, 65, ABST_EXACT_PEBS_PLUS1
},
824 {/*cd.01*/"mem_trans_retired.load_latency", NULL
, REGNO_ANY
, NULL
, PRELOADS_4
, 65, ABST_EXACT_PEBS_PLUS1
},
826 // aliases (the first set are PEBS, but on Intel the only precise counter we support is l2m_latency)
827 {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY
, STXT ("L1 D-cache Hits"), PRELOADS_7
, 0, ABST_NONE
},
828 {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY
, STXT ("L1 D-cache Misses"), PRELOADS_65
, 0, ABST_NONE
},
829 {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY
, STXT ("L2 Cache Hits"), PRELOADS_65
, 0, ABST_NONE
},
830 {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY
, STXT ("L2 Cache Misses"), PRELOADS_6
, 0, ABST_NONE
},
831 {"l3h", "mem_load_uops_retired.l3_hit", REGNO_ANY
, STXT ("L3 Cache Hits"), PRELOADS_6
, 0, ABST_NONE
},
832 {"l3m", "mem_load_uops_retired.l3_miss", REGNO_ANY
, STXT ("L3 Cache Misses"), PRELOADS_5
, 0, ABST_NONE
},
833 {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY
, STXT ("DTLB Misses"), PRELOADS_6
, 0, ABST_NONE
},
835 // counters that can be time converted (add FFCs if we decide to support them)
836 // counters that are load-store (did not include any... do we want to?)
837 {/*08.10*/"dtlb_load_misses.walk_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
838 {/*0d.03*/"int_misc.recovery_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
839 {/*0e.01*/"uops_issued.stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
840 {/*0e.01*/"uops_issued.core_stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
841 {/*14.01*/"arith.fpu_div_active", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
842 {/*3c.00*/"cpu_clk_unhalted.thread_p_any", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
843 {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
844 {/*3c.02*/"cpu_clk_thread_unhalted.one_thread_active", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
845 {/*48.01*/"l1d_pend_miss.pending_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
846 {/*48.01*/"l1d_pend_miss.pending_cycles_any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
847 {/*49.10*/"dtlb_store_misses.walk_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
848 {/*4f.10*/"ept.walk_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
849 {/*5c.01*/"cpl_cycles.ring0", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
850 {/*5c.01*/"cpl_cycles.ring0_trans", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
851 {/*5c.02*/"cpl_cycles.ring123", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
852 {/*5e.01*/"rs_events.empty_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
853 {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
854 {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
855 {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
856 {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
857 {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
858 {/*63.02*/"lock_cycles.cache_lock_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
859 {/*79.02*/"idq.empty", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
860 {/*79.04*/"idq.mite_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
861 {/*79.08*/"idq.dsb_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
862 {/*79.10*/"idq.ms_dsb_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
863 {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
864 {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
865 {/*79.24*/"idq.all_mite_cycles_4_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
866 {/*79.24*/"idq.all_mite_cycles_any_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
867 {/*79.30*/"idq.ms_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
868 {/*85.10*/"itlb_misses.walk_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
869 {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
870 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
871 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
872 {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
873 {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
874 {/*a1.01*/"uops_executed_port.port_0", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
875 {/*a1.02*/"uops_executed_port.port_1", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
876 {/*a1.04*/"uops_executed_port.port_2", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
877 {/*a1.08*/"uops_executed_port.port_3", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
878 {/*a1.10*/"uops_executed_port.port_4", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
879 {/*a1.20*/"uops_executed_port.port_5", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
880 {/*a1.40*/"uops_executed_port.port_6", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
881 {/*a1.80*/"uops_executed_port.port_7", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
882 {/*a1.01*/"uops_executed_port.port_0_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
883 {/*a1.02*/"uops_executed_port.port_1_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
884 {/*a1.04*/"uops_executed_port.port_2_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
885 {/*a1.08*/"uops_executed_port.port_3_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
886 {/*a1.10*/"uops_executed_port.port_4_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
887 {/*a1.20*/"uops_executed_port.port_5_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
888 {/*a1.40*/"uops_executed_port.port_6_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
889 {/*a1.80*/"uops_executed_port.port_7_core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
890 {/*a2.01*/"resource_stalls.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
891 {/*a2.04*/"resource_stalls.rs", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
892 {/*a2.08*/"resource_stalls.sb", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
893 {/*a2.10*/"resource_stalls.rob", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
894 {/*a3.01*/"cycle_activity.cycles_l2_pending", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
895 {/*a3.02*/"cycle_activity.cycles_ldm_pending", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
896 {/*a3.04*/"cycle_activity.cycles_no_execute", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
897 {/*a3.08*/"cycle_activity.cycles_l1d_pending", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
898 {/*a8.01*/"lsd.cycles_active", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
899 {/*a8.01*/"lsd.cycles_4_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
900 {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
901 {/*b1.01*/"uops_executed.stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
902 {/*c2.01*/"uops_retired.stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
903 {/*c2.01*/"uops_retired.total_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
904 {/*c2.01*/"uops_retired.core_stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
905 {/*c3.01*/"machine_clears.cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
906 {/*ca.1e*/"fp_assist.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
908 /* "Architectural" events: */
909 {/*3c*/"unhalted-core-cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
910 PERF_EVENTS_SW_EVENT_DEFS
912 /* additional (hidden) aliases for convenience */
914 USE_INTEL_REF_CYCLES (100),
916 {"insts0", "inst_retired.any_p", 0, NULL
, PRELOADS_8
, 0, ABST_NONE
},
917 {"insts1", "inst_retired.any_p", 1, NULL
, PRELOADS_8
, 0, ABST_NONE
},
918 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
921 static Hwcentry intelSkylakeList
[] = {
922 /* see comments for "cycles" and "insts" for intelNehalemList */
923 PERF_EVENTS_SW_EVENT_ALIASES
924 USE_INTEL_REF_CYCLES (25)
925 {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
926 {"insts", "inst_retired.any_p", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
929 {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY
, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4
, 65, ABST_EXACT_PEBS_PLUS1
},
930 {/*cd.01*/"mem_trans_retired.load_latency", NULL
, REGNO_ANY
, NULL
, PRELOADS_4
, 65, ABST_EXACT_PEBS_PLUS1
},
932 // aliases (the first set are PEBS, but on Intel the only precise counter we support is l2m_latency)
933 {"dch", "mem_load_retired.l1_hit", REGNO_ANY
, STXT ("L1 D-cache Hits"), PRELOADS_7
, 0, ABST_NONE
},
934 {"dcm", "mem_load_retired.l1_miss", REGNO_ANY
, STXT ("L1 D-cache Misses"), PRELOADS_65
, 0, ABST_NONE
},
935 {"l2h", "mem_load_retired.l2_hit", REGNO_ANY
, STXT ("L2 Cache Hits"), PRELOADS_65
, 0, ABST_NONE
},
936 {"l2m", "mem_load_retired.l2_miss", REGNO_ANY
, STXT ("L2 Cache Misses"), PRELOADS_6
, 0, ABST_NONE
},
937 {"l2m_stall", "cycle_activity.stalls_l2_miss", REGNO_ANY
, STXT ("L2 Cache Miss Stall"), PRELOADS_7
, 1, ABST_NONE
}, // needs validation
938 {"l3h", "mem_load_retired.l3_hit", REGNO_ANY
, STXT ("L3 Cache Hits"), PRELOADS_6
, 0, ABST_NONE
},
939 {"l3m", "mem_load_retired.l3_miss", REGNO_ANY
, STXT ("L3 Cache Misses"), PRELOADS_5
, 0, ABST_NONE
},
940 {"l3m_stall", "cycle_activity.stalls_l3_miss", REGNO_ANY
, STXT ("L3 Cache Miss Stall"), PRELOADS_7
, 1, ABST_NONE
}, // needs validation
941 {"dtlbm_stall", "dtlb_load_misses.walk_active", REGNO_ANY
, STXT ("DTLB Miss Est Stall"), PRELOADS_7
, 1, ABST_NONE
, STXT ("Estimated time stalled on DTLB misses requiring a tablewalk. Does not include time related to STLB hits.")}, // needs validation
942 // PEBS mem_inst_retired.stlb_miss_loads for finding location of DTLB issues
943 // what about: dtlb_load_misses.walk_completed, dtlb_load_misses.walk_pending, dtlb_load_misses.stlb_hit
945 {"fp_scalar", "fp_arith_inst_retired.scalar_double~umask=0x3", REGNO_ANY
, STXT ("FP Scalar uOps"), PRELOADS_7
, 0, ABST_NONE
, STXT ("Floating-point scalar micro-ops that retired")},
946 {"fp_vector", "fp_arith_inst_retired.128b_packed_double~umask=0x3c", REGNO_ANY
, STXT ("FP Vector uOps"), /*needs test*/ PRELOADS_7
, 0, ABST_NONE
, STXT ("Floating-point vector micro-ops that retired")},
948 // counters that can be time converted (add FFCs if we decide to support them)
949 // counters that are load-store (did not include any... do we want to?)
950 {/*08.10*/"dtlb_load_misses.walk_active", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
951 {/*08.10*/"dtlb_load_misses.walk_pending", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
952 {/*0d.01*/"int_misc.recovery_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
953 {/*0d.01*/"int_misc.recovery_cycles_any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
954 {/*0d.80*/"int_misc.clear_resteer_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
955 {/*0e.01*/"uops_issued.stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
956 {/*14.01*/"arith.divider_active", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
957 {/*3c.00*/"cpu_clk_unhalted.ring0_trans", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
958 {/*3c.00*/"cpu_clk_unhalted.thread_p_any", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
959 {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
960 {/*3c.00*/"cpu_clk_unhalted.core", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
961 {/*48.01*/"l1d_pend_miss.pending_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
962 {/*48.01*/"l1d_pend_miss.pending_cycles_any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
963 {/*49.10*/"dtlb_store_misses.walk_active", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
964 {/*49.10*/"dtlb_store_misses.walk_pending", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
965 {/*4f.10*/"ept.walk_pending", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
966 {/*5e.01*/"rs_events.empty_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
967 {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
968 {/*60.01*/"offcore_requests_outstanding.demand_data_rd_ge_6", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
969 {/*60.02*/"offcore_requests_outstanding.cycles_with_demand_code_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
970 {/*60.04*/"offcore_requests_outstanding.cycles_with_demand_rfo", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
971 {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
972 {/*60.10*/"offcore_requests_outstanding.cycles_with_l3_miss_demand_data_rd", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
973 {/*60.10*/"offcore_requests_outstanding.l3_miss_demand_data_rd_ge_6", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
974 {/*63.02*/"lock_cycles.cache_lock_duration", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
975 {/*79.04*/"idq.mite_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
976 {/*79.08*/"idq.dsb_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
977 {/*79.10*/"idq.ms_dsb_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
978 {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
979 {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
980 {/*79.24*/"idq.all_mite_cycles_4_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
981 {/*79.24*/"idq.all_mite_cycles_any_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
982 {/*79.30*/"idq.ms_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
983 {/*80.04*/"icache_16b.ifdata_stall", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
984 {/*83.04*/"icache_64b.iftag_stall", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
985 {/*85.10*/"itlb_misses.walk_active", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
986 {/*85.10*/"itlb_misses.walk_pending", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
987 {/*87.01*/"ild_stall.lcp", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
988 {/*9c.01*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
989 {/*9c.01*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
990 {/*9c.01*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
991 {/*9c.01*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
992 {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
993 {/*a1.01*/"uops_dispatched_port.port_0", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
994 {/*a1.02*/"uops_dispatched_port.port_1", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
995 {/*a1.04*/"uops_dispatched_port.port_2", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
996 {/*a1.08*/"uops_dispatched_port.port_3", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
997 {/*a1.10*/"uops_dispatched_port.port_4", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
998 {/*a1.20*/"uops_dispatched_port.port_5", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
999 {/*a1.40*/"uops_dispatched_port.port_6", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1000 {/*a1.80*/"uops_dispatched_port.port_7", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1001 {/*a2.01*/"resource_stalls.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1002 {/*a2.08*/"resource_stalls.sb", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1003 {/*a3.01*/"cycle_activity.cycles_l2_miss", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1004 {/*a3.02*/"cycle_activity.cycles_l3_miss", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1005 {/*a3.04*/"cycle_activity.stalls_total", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1006 {/*a3.05*/"cycle_activity.stalls_l2_miss", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1007 {/*a3.06*/"cycle_activity.stalls_l3_miss", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1008 {/*a3.08*/"cycle_activity.cycles_l1d_miss", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1009 {/*a3.0c*/"cycle_activity.stalls_l1d_miss", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1010 {/*a3.10*/"cycle_activity.cycles_mem_any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1011 {/*a3.14*/"cycle_activity.stalls_mem_any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1012 {/*a6.01*/"exe_activity.exe_bound_0_ports", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1013 {/*a6.02*/"exe_activity.1_ports_util", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1014 {/*a6.04*/"exe_activity.2_ports_util", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1015 {/*a6.08*/"exe_activity.3_ports_util", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1016 {/*a6.10*/"exe_activity.4_ports_util", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1017 {/*a6.40*/"exe_activity.bound_on_stores", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1018 {/*a8.01*/"lsd.cycles_4_uops", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1019 {/*a8.01*/"lsd.cycles_active", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1020 {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1021 {/*b1.01*/"uops_executed.cycles_ge_1_uop_exec", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1022 {/*b1.01*/"uops_executed.cycles_ge_2_uops_exec", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1023 {/*b1.01*/"uops_executed.cycles_ge_3_uops_exec", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1024 {/*b1.01*/"uops_executed.cycles_ge_4_uops_exec", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1025 {/*b1.01*/"uops_executed.stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1026 {/*b1.02*/"uops_executed.core_cycles_ge_1", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1027 {/*b1.02*/"uops_executed.core_cycles_ge_2", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1028 {/*b1.02*/"uops_executed.core_cycles_ge_3", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1029 {/*b1.02*/"uops_executed.core_cycles_ge_4", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1030 {/*b1.02*/"uops_executed.core_cycles_none", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1031 {/*c0.1*/"inst_retired.total_cycles_ps", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1032 {/*c2.01*/"uops_retired.stall_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1033 {/*c2.01*/"uops_retired.total_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1034 {/*ca.1e*/"fp_assist.any", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1036 /* "Architectural" events: */
1037 {/* FFC */"cpu_clk_unhalted.thread", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
1038 {/* FFC */"unhalted-core-cycles", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
1039 PERF_EVENTS_SW_EVENT_DEFS
1041 /* additional (hidden) aliases for convenience */
1043 USE_INTEL_REF_CYCLES (25),
1045 {"insts0", "inst_retired.any_p", 0, NULL
, PRELOADS_8
, 0, ABST_NONE
},
1046 {"insts1", "inst_retired.any_p", 1, NULL
, PRELOADS_8
, 0, ABST_NONE
},
1047 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
1050 static Hwcentry intelLinuxUnknown
[] = {
1051 PERF_EVENTS_SW_EVENT_ALIASES
1052 // USE_INTEL_REF_CYCLES(100) // freq is unknown
1053 {"cycles", "unhalted-core-cycles", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
1054 {"cycles", "PERF_COUNT_HW_CPU_CYCLES", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
1055 {"insts", "instruction-retired", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
1056 {"insts", "PERF_COUNT_HW_INSTRUCTIONS", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
1058 {"dcm", "PERF_COUNT_HW_CACHE_MISSES.L1D", REGNO_ANY
, STXT ("L1 D-cache Misses"), PRELOADS_65
, 0, ABST_NONE
},
1059 {"llm", "llc-misses", REGNO_ANY
, STXT ("Last-Level Cache Misses"), PRELOADS_5
, 0, ABST_NONE
},
1060 {"llm", "PERF_COUNT_HW_CACHE_MISSES.LL", REGNO_ANY
, STXT ("Last-Level Cache Misses"), PRELOADS_5
, 0, ABST_NONE
},
1062 {"br_msp", "branch-misses-retired", REGNO_ANY
, STXT ("Branch Mispredict"), PRELOADS_6
, 0, ABST_NONE
},
1063 {"br_msp", "PERF_COUNT_HW_BRANCH_MISSES", REGNO_ANY
, STXT ("Branch Mispredict"), PRELOADS_6
, 0, ABST_NONE
},
1064 {"br_ins", "branch-instruction-retired", REGNO_ANY
, STXT ("Branch Instructions"), PRELOADS_7
, 0, ABST_NONE
},
1065 {"br_ins", "PERF_COUNT_HW_BRANCH_INSTRUCTIONS", REGNO_ANY
, STXT ("Branch Instructions"), PRELOADS_7
, 0, ABST_NONE
},
1067 // counters that can be time converted (add FFCs if we decide to support them)
1068 // counters that are load-store (did not include any... do we want to?)
1069 /* "Architectural" events: */
1070 {/* FFC */"cpu_clk_unhalted.thread", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
1071 {/* FFC */"unhalted-core-cycles", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
1072 PERF_EVENTS_SW_EVENT_DEFS
1074 /* additional (hidden) aliases for convenience */
1075 {"cycles0", "unhalted-reference-cycles", 0, NULL
, PRELOADS_6
, -(25), ABST_NONE
}, //YXXX -can't do with ref cycles #
1076 {"cycles0", "PERF_COUNT_HW_BUS_CYCLES", 0, NULL
, PRELOADS_6
, -(25), ABST_NONE
}, //YXXX -can't do with ref cycles #
1077 {"cycles1", "unhalted-reference-cycles", 1, NULL
, PRELOADS_65
, -(25), ABST_NONE
}, //YXXX - can't do with ref cycles #
1078 {"cycles1", "PERF_COUNT_HW_BUS_CYCLES", 1, NULL
, PRELOADS_65
, -(25), ABST_NONE
}, //YXXX - can't do with ref cycles #
1079 {"insts0", "instruction-retired", 0, NULL
, PRELOADS_8
, 0, ABST_NONE
},
1080 {"insts0", "PERF_COUNT_HW_INSTRUCTIONS", 0, NULL
, PRELOADS_8
, 0, ABST_NONE
},
1081 {"insts1", "instruction-retired", 1, NULL
, PRELOADS_8
, 0, ABST_NONE
},
1082 {"insts1", "PERF_COUNT_HW_INSTRUCTIONS", 1, NULL
, PRELOADS_8
, 0, ABST_NONE
},
1083 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
1086 static Hwcentry intelAtomList
[] = {
1087 {"cycles", "cpu_clk_unhalted.core", /*6759307*/ REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_7
, 1, ABST_NONE
},
1088 {"cycles", "cpu_clk_unhalted.thread", /*6759307*/ REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_7
, 1, ABST_NONE
},
1089 {"insts", "instr_retired.any", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_7
, 0, ABST_NONE
},
1091 /* explicit definitions of (hidden) entries for proper counters */
1092 /* Only counters that can be time converted, or are load-store need to be in this table */
1093 /* XXXX add core2-related entries if appropriate */
1094 {/*30A*/"cpu_clk_unhalted.core", /*6759307*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_7
, 1, ABST_NONE
},
1095 {/*30A*/"cpu_clk_unhalted.thread", /*6759307*/ NULL
, REGNO_ANY
, NULL
, PRELOADS_7
, 1, ABST_NONE
},
1096 {/*0c*/"page_walks.cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1097 {/*14*/"cycles_div_busy", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1098 {/*21*/"l2_ads", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1099 {/*22*/"l2_dbus_busy", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1100 {/*32*/"l2_no_req", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1101 {/*3c*/"cpu_clk_unhalted.core_p", NULL
, REGNO_ANY
, NULL
, PRELOADS_7
, 1, ABST_NONE
},
1102 {/*3c*/"cpu_clk_unhalted.bus", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1103 {/*3c*/"cpu_clk_unhalted.no_other", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1104 {/*62*/"bus_drdy_clocks", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1105 {/*63*/"bus_lock_clocks", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1106 {/*64*/"bus_data_rcv", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1107 {/*7a*/"bus_hit_drv", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1108 {/*7b*/"bus_hitm_drv", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1109 {/*7d*/"busq_empty", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1110 {/*7e*/"snoop_stall_drv", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1111 {/*7f*/"bus_io_wait", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1112 {/*c6*/"cycles_int_masked.cycles_int_masked", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1113 {/*c6*/"cycles_int_masked.cycles_int_pending_and_masked", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1115 /* "Architectural" events: */
1116 {/*3c*/"unhalted-core-cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1118 /* additional (hidden) aliases for convenience */
1119 {"cycles0", "cpu_clk_unhalted.core_p", 0, NULL
, PRELOADS_75
, 1, ABST_NONE
},
1120 {"cycles1", "cpu_clk_unhalted.core_p", 1, NULL
, PRELOADS_75
, 1, ABST_NONE
},
1121 {"insts0", "inst_retired.any_p", 0, NULL
, PRELOADS_75
, 0, ABST_NONE
},
1122 {"insts1", "inst_retired.any_p", 1, NULL
, PRELOADS_75
, 0, ABST_NONE
},
1123 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
1126 static Hwcentry amd_opteron_10h_11h
[] = {
1127 {"cycles", "BU_cpu_clk_unhalted", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
1128 {"insts", "FR_retired_x86_instr_w_excp_intr", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
1129 {"icr", "IC_fetch", REGNO_ANY
, STXT ("L1 I-cache Refs"), PRELOADS_7
, 0, ABST_NONE
}, /* new */
1130 {"icm", "IC_miss", REGNO_ANY
, STXT ("L1 I-cache Misses"), PRELOADS_6
, 0, ABST_NONE
},
1131 {"l2itlbh", "IC_itlb_L1_miss_L2_hit", REGNO_ANY
, STXT ("L2 ITLB Hits"), PRELOADS_6
, 0, ABST_NONE
}, /* new */
1132 {"l2itlbm", "IC_itlb_L1_miss_L2_miss", REGNO_ANY
, STXT ("L2 ITLB Misses"), PRELOADS_5
, 0, ABST_NONE
}, /* new */
1133 {"l2ir", "BU_internal_L2_req~umask=0x1", REGNO_ANY
, STXT ("L2 I-cache Refs"), PRELOADS_6
, 0, ABST_NONE
},
1134 {"l2im", "BU_fill_req_missed_L2~umask=0x1", REGNO_ANY
, STXT ("L2 I-cache Misses"), PRELOADS_4
, 0, ABST_NONE
},
1135 {"dcr", "DC_access", REGNO_ANY
, STXT ("L1 D-cache Refs"), PRELOADS_7
, 0, ABST_NONE
}, /* new */
1136 {"dcm", "DC_miss", REGNO_ANY
, STXT ("L1 D-cache Misses"), PRELOADS_65
, 0, ABST_NONE
}, /* new */
1137 {"l2dtlbh", "DC_dtlb_L1_miss_L2_hit", REGNO_ANY
, STXT ("L2 DTLB Hits"), PRELOADS_6
, 0, ABST_NONE
}, /* new */
1138 {"l2dtlbm", "DC_dtlb_L1_miss_L2_miss", REGNO_ANY
, STXT ("L2 DTLB Misses"), PRELOADS_5
, 0, ABST_NONE
}, /* new */
1139 {"l2dr", "BU_internal_L2_req~umask=0x2", REGNO_ANY
, STXT ("L2 D-cache Refs"), PRELOADS_65
, 0, ABST_NONE
}, /* hwc_cache_load: 1.6x overcount on shanghai01 */
1140 {"l2dm", "BU_fill_req_missed_L2~umask=0x2", REGNO_ANY
, STXT ("L2 D-cache Misses"), PRELOADS_6
, 0, ABST_NONE
}, /* new */
1141 {"fpadd", "FP_dispatched_fpu_ops~umask=0x1", REGNO_ANY
, STXT ("FP Adds"), PRELOADS_7
, 0, ABST_NONE
},
1142 {"fpmul", "FP_dispatched_fpu_ops~umask=0x2", REGNO_ANY
, STXT ("FP Muls"), PRELOADS_7
, 0, ABST_NONE
},
1143 {"fpustall", "FR_dispatch_stall_fpu_full", REGNO_ANY
, STXT ("FPU Stall Cycles"), PRELOADS_7
, 1, ABST_NONE
},
1144 {"memstall", "FR_dispatch_stall_ls_full", REGNO_ANY
, STXT ("Memory Unit Stall Cycles"), PRELOADS_7
, 1, ABST_NONE
},
1145 // For PAPI mappings, see hwctable.README.family10h
1146 // For PAPI mappings, see hwctable.README.opteron
1148 /* explicit definitions of (hidden) entries for proper counters */
1149 /* Only counters that can be time converted, or are load-store need to be in this table */
1150 {"BU_cpu_clk_unhalted", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
1151 {"FP_cycles_no_fpu_ops_retired", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1152 {"FP_serialize_ops_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1153 {"FR_dispatch_stall_branch_abort_to_retire", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1154 {"FR_dispatch_stall_far_ctl_trsfr_resync_branch_pend", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1155 {"FR_dispatch_stall_fpu_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1156 {"FR_dispatch_stall_ls_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1157 {"FR_dispatch_stall_reorder_buffer_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1158 {"FR_dispatch_stall_resv_stations_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1159 {"FR_dispatch_stall_segment_load", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1160 {"FR_dispatch_stall_serialization", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1161 {"FR_dispatch_stall_waiting_all_quiet", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1162 {"FR_dispatch_stalls", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1163 {"FR_intr_masked_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1164 {"FR_intr_masked_while_pending_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1165 {"FR_nothing_to_dispatch", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1166 {"IC_instr_fetch_stall", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1167 {"LS_buffer_2_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1168 {"NB_mem_ctrlr_dram_cmd_slots_missed", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1169 {"NB_mem_ctrlr_turnaround", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_TBD
},
1171 /* additional (hidden) aliases, for convenience */
1172 {"cycles0", "BU_cpu_clk_unhalted", 0, NULL
, PRELOADS_8
, 1, ABST_NONE
},
1173 {"cycles1", "BU_cpu_clk_unhalted", 1, NULL
, PRELOADS_8
, 1, ABST_NONE
},
1174 {"insts0", "FR_retired_x86_instr_w_excp_intr", 0, NULL
, PRELOADS_8
, 0, ABST_NONE
},
1175 {"insts1", "FR_retired_x86_instr_w_excp_intr", 1, NULL
, PRELOADS_8
, 0, ABST_NONE
},
1176 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
1179 static Hwcentry amd_15h
[] = {
1180 {"cycles", "CU_cpu_clk_unhalted", REGNO_ANY
, STXT ("CPU Cycles"), PRELOADS_75
, 1, ABST_NONE
},
1181 {"insts", "EX_retired_instr_w_excp_intr", REGNO_ANY
, STXT ("Instructions Executed"), PRELOADS_75
, 0, ABST_NONE
},
1182 {"icr", "IC_fetch", REGNO_ANY
, STXT ("L1 I-cache Refs"), PRELOADS_7
, 0, ABST_NONE
}, /* new */
1183 {"icm", "IC_miss", REGNO_ANY
, STXT ("L1 I-cache Misses"), PRELOADS_6
, 0, ABST_NONE
},
1184 {"l2im", "IC_refill_from_system", REGNO_ANY
, STXT ("L2 I-cache Misses"), PRELOADS_6
, 0, ABST_NONE
},
1185 {"dcr", "DC_access", REGNO_ANY
, STXT ("L1 D-cache Refs"), PRELOADS_7
, 0, ABST_NONE
}, /* new */
1186 {"dcm", "DC_miss~umask=0x3", REGNO_ANY
, STXT ("L1 D-cache Misses"), PRELOADS_65
, 0, ABST_NONE
}, /* new */
1187 {"l2dm", "DC_refill_from_system", REGNO_ANY
, STXT ("L2 D-cache Misses"), PRELOADS_6
, 0, ABST_NONE
}, /* new */
1188 {"dtlbm", "DC_unified_tlb_miss~umask=0x7", REGNO_ANY
, STXT ("L2 DTLB Misses"), PRELOADS_5
, 0, ABST_NONE
}, /* new */
1189 // For PAPI mappings, see hwctable.README.family15h
1191 /* explicit definitions of (hidden) entries for proper counters */
1192 /* Only counters that can be time converted, or are load-store need to be in this table */
1193 {/*001.xx*/"FP_scheduler_empty", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1194 {/*006.xx*/"FP_bottom_execute_uops_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1195 {/*023.xx*/"LS_ldq_stq_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1196 {/*024.xx*/"LS_locked_operation", /*umask!=0*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1197 {/*069.xx*/"CU_mab_wait_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1198 {/*076.xx*/"CU_cpu_clk_unhalted", NULL
, REGNO_ANY
, NULL
, PRELOADS_75
, 1, ABST_NONE
},
1199 {/*087.xx*/"IC_instr_fetch_stall", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1200 {/*0cd.xx*/"EX_intr_masked_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1201 {/*0ce.xx*/"EX_intr_masked_while_pending_cycles", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1202 {/*0d0.xx*/"DE_nothing_to_dispatch", /*future*/ NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1203 {/*0d1.xx*/"DE_dispatch_stalls", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1204 {/*0d3.xx*/"DE_dispatch_stall_serialization", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1205 {/*0d5.xx*/"DE_dispatch_stall_instr_retire_q_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1206 {/*0d6.xx*/"DE_dispatch_stall_int_scheduler_q_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1207 {/*0d7.xx*/"DE_dispatch_stall_fp_scheduler_q_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1208 {/*0d8.xx*/"DE_dispatch_stall_ldq_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1209 {/*0d9.xx*/"DE_dispatch_stall_waiting_all_quiet", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1210 {/*1d8.xx*/"EX_dispatch_stall_stq_full", NULL
, REGNO_ANY
, NULL
, PRELOAD_DEF
, 1, ABST_NONE
},
1212 /* additional (hidden) aliases, for convenience */
1213 {"cycles0", "CU_cpu_clk_unhalted", 0, NULL
, PRELOADS_8
, 1, ABST_NONE
},
1214 {"cycles1", "CU_cpu_clk_unhalted", 1, NULL
, PRELOADS_8
, 1, ABST_NONE
},
1215 {"insts0", "EX_retired_instr_w_excp_intr", 0, NULL
, PRELOADS_8
, 0, ABST_NONE
},
1216 {"insts1", "EX_retired_instr_w_excp_intr", 1, NULL
, PRELOADS_8
, 0, ABST_NONE
},
1217 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
1219 #endif /* __i386__ or __x86_64 */
1221 #define INIT_HWC(nm, mtr, cfg, ty) .name = (nm), .metric = (mtr), \
1222 .config = (cfg), .type = ty, .use_perf_event_type = 1, \
1223 .val = PRELOAD_DEF, .reg_num = REGNO_ANY
1224 #define HWE(nm, mtr, cfg) INIT_HWC(nm, mtr, cfg, PERF_TYPE_HARDWARE)
1225 #define SWE(nm, mtr, cfg) INIT_HWC(nm, mtr, cfg, PERF_TYPE_SOFTWARE)
1226 #define HWCE(nm, mtr, id, op, res) \
1227 INIT_HWC(nm, mtr, (id) | ((op) << 8) | ((res) << 16), PERF_TYPE_HW_CACHE)
1229 #define HWC_GENERIC \
1230 /* Hardware event: */\
1231 { HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,\
1232 .int_name = "cycles" },\
1233 { HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,\
1234 .int_name = "cycles~system=1~user=0" },\
1235 { HWE("branch-instructions", STXT("Branch-instructions"),\
1236 PERF_COUNT_HW_BRANCH_INSTRUCTIONS) },\
1237 { HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) },\
1238 { HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES),\
1240 { HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) },\
1241 { HWE("cache-references", STXT("Cache-references"),\
1242 PERF_COUNT_HW_CACHE_REFERENCES) },\
1243 { HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 },\
1244 { HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS),\
1245 .int_name = "instructions" },\
1246 { HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES),\
1248 { HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."),\
1249 PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 },\
1250 { HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."),\
1251 PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 },\
1252 /* Software event: */\
1253 { SWE("alignment-faults", STXT("Alignment Faults"),\
1254 PERF_COUNT_SW_ALIGNMENT_FAULTS) },\
1255 { SWE("context-switches", STXT("Context Switches"),\
1256 PERF_COUNT_SW_CONTEXT_SWITCHES) },\
1257 { SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK),\
1259 { SWE("cpu-migrations", STXT("CPU Migrations"),\
1260 PERF_COUNT_SW_CPU_MIGRATIONS) },\
1261 { SWE("emulation-faults", STXT("Emulation Faults"),\
1262 PERF_COUNT_SW_EMULATION_FAULTS) },\
1263 { SWE("major-faults", STXT("Major Page Faults"),\
1264 PERF_COUNT_SW_PAGE_FAULTS_MAJ) },\
1265 { SWE("minor-faults", STXT("Minor Page Faults"),\
1266 PERF_COUNT_SW_PAGE_FAULTS_MIN) },\
1267 { SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) },\
1268 { SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK),\
1270 /* Hardware cache event: */\
1271 { HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"),\
1272 PERF_COUNT_HW_CACHE_L1D,\
1273 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
1274 { HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"),\
1275 PERF_COUNT_HW_CACHE_L1D,\
1276 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
1277 { HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"),\
1278 PERF_COUNT_HW_CACHE_L1D,\
1279 PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
1280 { HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"),\
1281 PERF_COUNT_HW_CACHE_L1D,\
1282 PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
1283 { HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"),\
1284 PERF_COUNT_HW_CACHE_L1I,\
1285 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
1286 { HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"),\
1287 PERF_COUNT_HW_CACHE_L1I,\
1288 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
1289 { HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"),\
1290 PERF_COUNT_HW_CACHE_DTLB,\
1291 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
1292 { HWCE("dTLB-loads", STXT("D-TLB Loads"),\
1293 PERF_COUNT_HW_CACHE_DTLB,\
1294 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
1295 { HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"),\
1296 PERF_COUNT_HW_CACHE_ITLB,\
1297 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
1298 { HWCE("iTLB-loads", STXT("The Instruction TLB Loads"),\
1299 PERF_COUNT_HW_CACHE_ITLB,\
1300 PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
1301 static Hwcentry generic_list
[] = {
1303 {NULL
, NULL
, 0, NULL
, 0, 0, 0, 0, ABST_NONE
}
1306 #if defined(__i386__) || defined(__x86_64)
1307 #include "hwc_amd_zen3.h"
1308 #include "hwc_amd_zen4.h"
1309 #include "hwc_intel_icelake.h"
1310 #elif defined(__aarch64__)
1311 #include "hwc_arm64_amcc.h"
1312 #include "hwc_arm_neoverse_n1.h"
1313 #include "hwc_arm_ampere_1.h"
1316 /* structure defining the counters for a CPU type */
1320 Hwcentry
*stdlist_table
;
1321 #define MAX_DEFAULT_HWC_DEFS 4 // allows multiple defs to handle OS variations; extend as needed
1322 char *default_exp_p
[MAX_DEFAULT_HWC_DEFS
+ 1]; // end of list MUST be marked with NULL
1327 * Any default HWC string must consist of counter names separated by -TWO- commas,
1328 * with a no trailing comma/value after the last counter name
1330 * Only aliased counters should be specified; non-aliased counters will
1331 * not get the right overflow values set.
1332 * If the string is not formatted that way, -h hi and -h lo will fail
1334 static cpu_list_t cputabs
[] = {
1335 #if defined(__i386__) || defined(__x86_64)
1336 {CPC_PENTIUM_PRO_MMX
, pentiumIIlist
, {"insts", 0}},
1337 {CPC_PENTIUM_PRO
, pentiumIIIlist
, {"insts", 0}},
1338 {CPC_PENTIUM_4
, pentium4
, {"insts", 0}},
1339 {CPC_PENTIUM_4_HT
, pentium4
, {"insts", 0}},
1340 {CPC_INTEL_CORE2
, intelCore2list
, {"insts,,cycles", 0}},
1341 {CPC_INTEL_NEHALEM
, intelNehalemList
, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1342 "insts,,cycles,,l3m_stall,,dtlbm_stall", 0}},
1343 {CPC_INTEL_WESTMERE
, intelNehalemList
, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1344 "insts,,cycles,,l3m_stall,,dtlbm_stall", 0}},
1345 {CPC_INTEL_SANDYBRIDGE
, intelSandyBridgeList
, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1346 "insts,,cycles,,l3m,,dtlbm", 0}},
1347 {CPC_INTEL_IVYBRIDGE
, intelSandyBridgeList
, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1348 "insts,,cycles,,l3m,,dtlbm", 0}},
1349 {CPC_INTEL_HASWELL
, intelHaswellList
, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1350 "insts,,cycles,,l3m,,dtlbm", 0}},
1351 {CPC_INTEL_BROADWELL
, intelBroadwellList
, {"insts,,cycles,,+l2m_latency,,dtlbm",
1352 "insts,,cycles,,l3m,,dtlbm", 0}},
1353 {CPC_INTEL_SKYLAKE
, intelSkylakeList
, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
1354 "insts,,cycles,,l2m_stall,,dtlbm_stall", 0}},
1355 {CPC_INTEL_ICELAKE
, intelIcelakeList
, {"insts,,cycles,,dTLB-load-misses", 0}},
1356 {CPC_INTEL_UNKNOWN
, intelLinuxUnknown
, {"cycles,,insts,,llm",
1357 "user_time,,system_time,,cycles,,insts,,llm", 0}},
1358 {CPC_INTEL_ATOM
, intelAtomList
, {"insts", 0}},
1359 {CPC_AMD_K8C
, amd_opteron_10h_11h
, {"insts,,cycles,,l2dm,,l2dtlbm", 0}},
1360 {CPC_AMD_FAM_10H
, amd_opteron_10h_11h
, {"insts,,cycles,,l2dm,,l2dtlbm", 0}},
1361 {CPC_AMD_FAM_11H
, amd_opteron_10h_11h
, {"insts,,cycles,,l2dm,,l2dtlbm", 0}},
1362 {CPC_AMD_FAM_15H
, amd_15h
, {"insts,,cycles", 0}},
1363 {CPC_KPROF
, kproflist
, {NULL
}}, // OBSOLETE (To support 12.3 and earlier, TBR)
1364 {CPC_AMD_Authentic
, generic_list
, {"insts,,cycles", 0}},
1365 {CPC_AMD_FAM_19H_ZEN3
, amd_zen3_list
, {"insts,,cycles", 0}},
1366 {CPC_AMD_FAM_19H_ZEN4
, amd_zen4_list
, {"insts,,cycles", 0}},
1367 #elif defined(__aarch64__)
1368 {CPC_ARM64_AMCC
, arm64_amcc_list
, {"insts,,cycles", 0}},
1369 {CPC_ARM_NEOVERSE_N1
, arm_neoverse_n1_list
, {"insts,,cycles", 0}},
1370 {CPC_ARM_AMPERE_1
, arm_ampere_1_list
, {"insts,,cycles", 0}},
1371 {CPC_ARM_GENERIC
, generic_list
, {"insts,,cycles", 0}},
1373 {0, generic_list
, {"insts,,cycles", 0}},
1376 /*---------------------------------------------------------------------------*/
1377 /* state variables */
1378 static int initialized
;
1379 static int signals_disabled
;
1381 // Simple array list
1384 void** array
; // array of ptrs, last item set to null
1385 int sz
; // num live elements in array
1386 int max
; // array allocation size
1390 ptr_list_init (ptr_list
*lst
)
1398 ptr_list_add (ptr_list
*lst
, char* ptr
)
1399 { // ptr must be freeable
1400 if (lst
->sz
>= lst
->max
- 1)
1403 int newmax
= lst
->max
? lst
->max
* 2 : 16;
1404 new = (void**) realloc (lst
->array
, newmax
* sizeof (void*));
1405 if (!new) return; // failed, discard add
1409 lst
->array
[lst
->sz
++] = ptr
;
1410 lst
->array
[lst
->sz
] = NULL
; // mark new end-of-list
1414 ptr_list_free (ptr_list
*lst
)
1415 { // includes shallow free of all elements
1418 for (int ii
= 0; lst
->array
[ii
]; ii
++)
1419 free (lst
->array
[ii
]);
1427 // Capabilities of this machine (initialized by setup_cpc())
1428 static int cpcx_cpuver
= CPUVER_UNDEFINED
;
1429 static uint_t cpcx_npics
;
1430 static const char *cpcx_cciname
;
1431 static const char *cpcx_docref
;
1432 static uint64_t cpcx_support_bitmask
;
1434 // cpcx_*[0]: collect lists
1435 // cpcx_*[1]: er_kernel lists
1436 // Each cpcx_*[] list is an array of ptrs with null ptr marking end of list
1437 static char **cpcx_attrs
[2];
1439 static Hwcentry
**cpcx_std
[2];
1440 static Hwcentry
**cpcx_raw
[2];
1441 static Hwcentry
**cpcx_hidden
[2];
1443 static uint_t cpcx_max_concurrent
[2];
1444 static char *cpcx_default_hwcs
[2];
1445 static char *cpcx_orig_default_hwcs
[2];
1446 static int cpcx_has_precise
[2];
1448 #define VALID_FOR_KERNEL(forKernel) ((forKernel)>=0 && (forKernel)<=1)
1449 #define IS_KERNEL(forKernel) ((forKernel)==1)
1451 // used to build lists:
1452 static ptr_list unfiltered_attrs
;
1453 static ptr_list unfiltered_raw
;
1455 /*---------------------------------------------------------------------------*/
1456 /* misc internal utilities */
1458 /* compare 2 strings to either \0 or <termchar> */
1459 #define IS_EOL(currchar, termchar) ((currchar)==(termchar) || (currchar)==0)
1462 is_same (const char * regname
, const char * int_name
, char termchar
)
1468 if (IS_EOL (a
, termchar
))
1470 if (IS_EOL (b
, termchar
))
1471 return 1; /* strings are the same up to terminating char */
1473 break; /* strings differ */
1476 break; /* strings differ */
1485 is_numeric (const char *name
, uint64_t *pval
)
1488 uint64_t val
= strtoull (name
, &endptr
, 0);
1489 if (!*name
|| *endptr
)
1490 return 0; /* name does not specify a numeric value */
1497 is_visible_alias (Hwcentry
* pctr
)
1501 if (pctr
->name
&& pctr
->int_name
&& pctr
->metric
)
1507 is_hidden_alias (Hwcentry
* pctr
)
1511 if (pctr
->name
&& pctr
->int_name
&& pctr
->metric
== NULL
)
1517 #define hwcentry_print(lvl,x1,x2)
1520 /* print a Hwcentry */
1522 hwcentry_print (int lvl
, const char * header
, const Hwcentry
*pentry
)
1524 Tprintf (lvl
, "%s '%s', '%s', %d, '%s', %d, %d, %d, %d, %d, %d, /\n",
1526 pentry
->name
? pentry
->name
: "NULL",
1527 pentry
->int_name
? pentry
->int_name
: "NULL",
1529 pentry
->metric
? pentry
->metric
: "NULL",
1530 pentry
->lval
, /* low-resolution/long run */
1531 pentry
->val
, /* normal */
1532 pentry
->hval
, /* high-resolution/short run */
1534 pentry
->memop
, /* type of instruction that can trigger */
1535 pentry
->sort_order
);
1539 /*---------------------------------------------------------------------------*/
1540 /* utilities for rawlist (list of raw counters with reglist[] filled in) */
1542 /* search the 'raw' list of counters for <name> */
1544 ptrarray_find_by_name (Hwcentry
** array
, const char * name
)
1548 Tprintf (DBG_LT3
, "hwctable: array_find_by_name(%s):\n", name
);
1549 for (int ii
= 0; array
&& array
[ii
]; ii
++)
1550 if (strcmp (array
[ii
]->name
, name
) == 0)
1552 return NULL
; /* not found */
1555 /* add Hwcentry to the 'raw' list of counters */
1557 alloc_shallow_copy (const Hwcentry
*pctr
)
1559 Hwcentry
*node
= (Hwcentry
*) malloc (sizeof (Hwcentry
));
1561 return NULL
; // fail
1562 *node
= *pctr
; /* shallow copy! */
1564 node
->name
= strdup (pctr
->name
);
1568 /* add Hwcentry to the 'raw' list of counters */
1570 list_append_shallow_copy (ptr_list
*list
, const Hwcentry
*pctr
)
1572 Hwcentry
*node
= alloc_shallow_copy (pctr
);
1574 return NULL
; // fail
1575 ptr_list_add (list
, (void*) node
);
1580 list_add (ptr_list
*list
, uint_t regno
, const char *name
)
1583 praw
= ptrarray_find_by_name ((Hwcentry
**) list
->array
, name
);
1586 Hwcentry tmpctr
= empty_ctr
;
1587 tmpctr
.name
= (char *) name
;
1588 praw
= list_append_shallow_copy (list
, &tmpctr
);
1593 /*---------------------------------------------------------------------------*/
1594 /* utilities for stdlist (table of aliased, hidden, & convenience, ctrs) */
1596 /* find top level definition for <cpuid> */
1598 cputabs_find_entry (int cpuid
)
1601 /* now search for the appropriate table */
1604 if (cputabs
[i
].cputag
== 0)
1606 if (cpuid
== cputabs
[i
].cputag
)
1609 Tprintf (0, "hwctable: cputabs_find_entry: WARNING: "
1610 "cpu_id = %d not defined. No 'standard' counters are available\n",
1615 /* find Hwcentry table for <cpuid> */
1617 stdlist_get_table (int cpuid
)
1619 cpu_list_t
* tmp
= cputabs_find_entry (cpuid
);
1621 return tmp
->stdlist_table
;
1625 /* search the 'standard' list of counters for <name>,<regno> */
1626 /* note: <regno>=REGNO_ANY is a wildcard that matches any value. */
1628 /* note: int_name==NULL is a wildcard */
1629 static const Hwcentry
*
1630 ptrarray_find (const Hwcentry
**array
, const char *name
, const char *int_name
,
1631 int check_regno
, regno_t regno
)
1633 const Hwcentry
*pctr
;
1636 for (int ii
= 0; array
[ii
]; ii
++)
1639 if (strcmp (pctr
->name
, name
))
1641 if (int_name
&& int_name
[0] != 0 && pctr
->int_name
)
1643 if (NULL
== strstr (int_name
, pctr
->int_name
))
1651 /* search the 'standard' list of counters for <name>,<regno> */
1653 /* note: <regno>=REGNO_ANY is a wildcard that matches any value. */
1654 static const Hwcentry
*
1655 static_table_find (const Hwcentry
*table
, const char *name
, const char *int_name
,
1656 int check_regno
, regno_t regno
)
1659 for (sz
= 0; table
&& table
[sz
].name
; sz
++)
1663 const Hwcentry
** list
= calloc (sz
+ 1, sizeof (void*));
1666 for (int ii
= 0; ii
< sz
; ii
++)
1667 list
[ii
] = &table
[ii
];
1669 const Hwcentry
*pctr
= ptrarray_find (list
, name
, int_name
, check_regno
, regno
);
1675 #define stdlist_print(dbg_lvl,table)
1678 /* print all Hwcentries in standard table. Check for weird stuff */
1680 stdlist_print (int dbg_lvl
, const Hwcentry
* table
)
1684 Tprintf (0, "hwctable: stdlist_print: ERROR: "
1685 "table is invalid.\n");
1688 for (const Hwcentry
*pctr
= table
; pctr
->name
; pctr
++)
1690 hwcentry_print (dbg_lvl
, "hwctable: stdlist: ", pctr
);
1695 /*---------------------------------------------------------------------------*/
1696 /* utilities for init */
1698 /* try to bind counters to hw. Return 0 on success, nonzero otherwise */
1700 test_hwcs (const Hwcentry
* entries
[], unsigned numctrs
)
1705 hwcdrv_api_t
*hwcdrv
= get_hwcdrv ();
1706 Tprintf (DBG_LT2
, "hwctable: test_hwcs()...\n");
1707 rc
= hwcfuncs_bind_hwcentry (entries
, numctrs
);
1710 Tprintf (0, "hwctable: WARNING: test "
1711 "counters could not be created\n");
1715 if (!signals_disabled
)
1717 (void) signal (HWCFUNCS_SIGNAL
, SIG_IGN
);
1718 signals_disabled
= 1;
1720 rc
= hwcdrv
->hwcdrv_start ();
1723 Tprintf (0, "hwctable: WARNING: test "
1724 "counters could not be started\n");
1727 rc
= hwcdrv
->hwcdrv_read_events (&sample
, NULL
);
1729 Tprintf (0, "hwctable: WARNING: test sample failed\n");
1734 Tprintf (DBG_LT1
, "hwctable: test_hwcs(");
1735 for (ii
= 0; ii
< numctrs
; ii
++)
1736 Tprintf (DBG_LT1
, "%s%s", ii
? "," : "", entries
[ii
]->name
);
1737 Tprintf (DBG_LT1
, ") PASS\n");
1742 if (created
&& hwcdrv
->hwcdrv_free_counters ())
1743 Tprintf (0, "hwctable: WARNING: test counters could not be freed\n");
1748 #define check_tables()
1751 /* check for typos in tables */
1756 /* now search the known table of counters */
1760 int cputag
= cputabs
[i
].cputag
;
1763 if (cputag
== CPC_KPROF
)
1765 pentry
= cputabs
[i
].stdlist_table
;
1766 for (; pentry
; pentry
++)
1770 if (!pentry
->int_name
)
1771 {/* internal, only to supply ABST and timecvt */
1773 Tprintf (DBG_LT0
, "hwctable: check_tables: ERROR:"
1774 " internal && metric @%d, %s\n", cputag
, pentry
->name
);
1775 if (pentry
->val
!= PRELOAD_DEF
1776 && pentry
->memop
!= ABST_EXACT_PEBS_PLUS1
)
1777 Tprintf (DBG_LT2
, "hwctable: check_tables: INFO:"
1778 " internal && custom val=%d @%d, %s\n",
1779 pentry
->val
, cputag
, pentry
->name
);
1783 if (!pentry
->int_name
)
1784 Tprintf (DBG_LT0
, "hwctable: check_tables: ERROR:"
1785 " aliased && !int_name @%d, %s\n", cputag
, pentry
->name
);
1787 if (pentry
->int_name
&& !pentry
->metric
)
1789 if (!strcmp (pentry
->name
, pentry
->int_name
))
1790 Tprintf (DBG_LT0
, "hwctable: check_tables: ERROR:"
1791 " convenience && name==int_name @%d, %s\n",
1792 cputag
, pentry
->name
);
1799 static int try_a_counter (int forKernel
);
1800 static void hwc_process_raw_ctrs (int forKernel
, Hwcentry
***pstd_out
,
1801 Hwcentry
***praw_out
, Hwcentry
***phidden_out
,
1802 Hwcentry
**static_tables
,
1803 Hwcentry
**raw_unfiltered_in
);
1805 /* internal call to initialize libs, ctr tables */
1807 setup_cpc_general (int skip_hwc_test
)
1809 const cpu_list_t
* cputabs_entry
;
1811 Tprintf (DBG_LT2
, "hwctable: setup_cpc()... \n");
1814 Tprintf (0, "hwctable: WARNING: setup_cpc() has already been called\n");
1818 cpcx_cpuver
= CPUVER_UNDEFINED
;
1819 cpcx_cciname
= NULL
;
1822 cpcx_support_bitmask
= 0;
1823 for (int kk
= 0; kk
< 2; kk
++)
1824 { // collect-0 and kernel-1
1825 cpcx_attrs
[kk
] = NULL
;
1826 cpcx_std
[kk
] = NULL
;
1827 cpcx_raw
[kk
] = NULL
;
1828 cpcx_hidden
[kk
] = NULL
;
1829 cpcx_max_concurrent
[kk
] = 0;
1830 cpcx_default_hwcs
[kk
] = NULL
;
1831 cpcx_orig_default_hwcs
[kk
] = NULL
;
1832 cpcx_has_precise
[kk
] = 0;
1835 hwcdrv_api_t
*hwcdrv
= get_hwcdrv ();
1836 if (hwcdrv
->hwcdrv_init_status
)
1838 Tprintf (0, "WARNING: setup_cpc_general() failed. init_status=%d \n",
1839 hwcdrv
->hwcdrv_init_status
);
1840 goto setup_cpc_wrapup
;
1842 hwcdrv
->hwcdrv_get_info (&cpcx_cpuver
, &cpcx_cciname
, &cpcx_npics
,
1843 &cpcx_docref
, &cpcx_support_bitmask
);
1845 /* Fix cpcx_cpuver for new Zen and Intel machines */
1846 cpu_info_t
*cpu_p
= read_cpuinfo ();
1847 if (strcmp (cpu_p
->cpu_vendorstr
, "AuthenticAMD") == 0)
1849 if (cpu_p
->cpu_family
== AMD_ZEN3_FAMILY
)
1850 switch (cpu_p
->cpu_model
)
1852 case AMD_ZEN3_RYZEN
:
1853 case AMD_ZEN3_RYZEN2
:
1854 case AMD_ZEN3_RYZEN3
:
1855 case AMD_ZEN3_EPYC_TRENTO
:
1856 cpcx_cpuver
= CPC_AMD_FAM_19H_ZEN3
;
1858 case AMD_ZEN4_RYZEN
:
1860 cpcx_cpuver
= CPC_AMD_FAM_19H_ZEN4
;
1864 else if (strcmp (cpu_p
->cpu_vendorstr
, "GenuineIntel") == 0)
1866 if (cpu_p
->cpu_family
== 6)
1868 if (cpu_p
->cpu_model
== 106)
1869 cpcx_cpuver
= CPC_INTEL_ICELAKE
;
1872 else if (strcmp (cpu_p
->cpu_vendorstr
, AARCH64_VENDORSTR_ARM
) == 0)
1874 if (cpu_p
->cpu_family
== 0x50)
1875 cpcx_cpuver
= CPC_ARM64_AMCC
;
1876 else if (cpu_p
->cpu_family
== 0x41)
1877 cpcx_cpuver
= CPC_ARM_NEOVERSE_N1
;
1878 else if (cpu_p
->cpu_family
== 0xc0)
1879 cpcx_cpuver
= CPC_ARM_AMPERE_1
;
1881 cpcx_cpuver
= CPC_ARM_GENERIC
;
1884 #ifdef DISALLOW_PENTIUM_PRO_MMX_7007575
1885 if (cpcx_cpuver
== CPC_PENTIUM_PRO_MMX
)
1887 Tprintf (0, "hwctable: WARNING: setup_cpc(): cpu=%d"
1888 " `Pentium Pro with MMX, Pentium II' is not supported\n", cpcx_cpuver
);
1889 hwcfuncs_int_logerr (GTXT ("libcpc cannot identify processor type\n"));
1890 goto setup_cpc_wrapup
;
1894 /* now search the known table of counters */
1895 cputabs_entry
= cputabs_find_entry (cpcx_cpuver
);
1896 if (cputabs_entry
== NULL
)
1898 Tprintf (0, "hwctable: WARNING: setup_cpc(): cpu=%d"
1899 " could not be found in the tables\n", cpcx_cpuver
);
1900 /* strange, should have at least selected "unknownlist" */
1901 hwcfuncs_int_logerr (GTXT ("Analyzer CPU table could not be found\n"));
1902 goto setup_cpc_wrapup
;
1905 Hwcentry
* valid_cpu_tables
[2]; // [0]:static table of counters, [1]:static table of generic counters
1906 valid_cpu_tables
[0] = cputabs_entry
->stdlist_table
;
1907 if (valid_cpu_tables
[0] == NULL
)
1909 Tprintf (0, "hwctable: WARNING: setup_cpc(): "
1910 " valid_cpu_tables was NULL??\n");
1911 /* strange, someone put a NULL in the lookup table? */
1912 hwcfuncs_int_logerr (GTXT ("Analyzer CPU table is invalid\n"));
1913 goto setup_cpc_wrapup
;
1915 valid_cpu_tables
[1] = papi_generic_list
;
1916 Tprintf (DBG_LT2
, "hwctable: setup_cpc(): getting descriptions \n");
1917 // populate cpcx_raw and cpcx_attr
1918 hwcdrv
->hwcdrv_get_descriptions (hwc_cb
, attrs_cb
, cputabs_entry
->stdlist_table
);
1919 for (int kk
= 0; kk
< 2; kk
++)
1920 { // collect and er_kernel
1921 hwc_process_raw_ctrs (kk
, &cpcx_std
[kk
], &cpcx_raw
[kk
], &cpcx_hidden
[kk
],
1922 valid_cpu_tables
, (Hwcentry
**) unfiltered_raw
.array
);
1923 cpcx_has_precise
[kk
] = 0;
1924 for (int rr
= 0; cpcx_raw
[kk
] && cpcx_raw
[kk
][rr
]; rr
++)
1926 int memop
= cpcx_raw
[kk
][rr
]->memop
;
1927 if (ABST_MEMSPACE_ENABLED (memop
))
1929 cpcx_has_precise
[kk
] = 1;
1933 cpcx_attrs
[kk
] = (char**) unfiltered_attrs
.array
;
1934 cpcx_max_concurrent
[kk
] = cpcx_npics
;
1936 #if 1 // 22897042 - DTrace cpc provider does not support profiling on multiple ctrs on some systems
1937 if ((cpcx_support_bitmask
& HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID
) != HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID
)
1939 // kernel profiling only supports one counter if overflowing counter can't be identified
1940 cpcx_max_concurrent
[1] = cpcx_npics
? 1 : 0;
1944 /* --- quick test of the cpc interface --- */
1948 rc
= try_a_counter (0);
1950 /* initialize the default counter string definition */
1951 for (int kk
= 0; kk
< 2; kk
++)
1953 char * default_exp
= 0;
1955 for (jj
= 0; (default_exp
= cputabs_entry
->default_exp_p
[jj
]); jj
++)
1957 int rc
= hwc_lookup (kk
, 0, default_exp
, NULL
, 0, NULL
, NULL
);
1963 char * fallback
[3] = {NTXT ("insts,,cycles,,l3m"), NTXT ("insts,,cycles"), NTXT ("insts")};
1964 for (int ff
= 0; ff
< 3; ff
++)
1966 int rc
= hwc_lookup (kk
, 0, fallback
[ff
], NULL
, 0, NULL
, NULL
);
1969 default_exp
= strdup (fallback
[ff
]);
1974 cpcx_default_hwcs
[kk
] = default_exp
;
1975 cpcx_orig_default_hwcs
[kk
] = default_exp
;
1983 ptr_list_free(&tmp_raw); // free stuff... YXXX
1984 ptr_list_free(&unfiltered_attrs);
1995 setup_cpc_general (0); // set up and include a hwc test run
1999 setup_cpc_skip_hwctest ()
2003 setup_cpc_general (1); // set up but skip hwc test run
2007 try_a_counter (int forKernel
)
2009 if (!VALID_FOR_KERNEL (forKernel
))
2012 const Hwcentry
* testevent
;
2013 if (cpcx_std
[forKernel
] == NULL
)
2015 Tprintf (0, "hwctable: WARNING: cpcx_std not initialized");
2016 return 0; /* consider this an automatic PASS */
2018 /* look for a valid table entry, only try valid_cpu_tables[0] */
2019 testevent
= cpcx_std
[forKernel
][0];
2020 if (!testevent
|| !testevent
->name
)
2022 Tprintf (0, "hwctable: WARNING: no test metric"
2023 " available to verify counters\n");
2024 return 0; /* consider this an automatic PASS */
2026 Hwcentry tmp_testevent
;
2027 tmp_testevent
= *testevent
; /* shallow copy */
2028 if (tmp_testevent
.int_name
== NULL
)
2030 /* counter is defined in 'hidden' section of table, supply int_name */
2031 tmp_testevent
.int_name
= strdup (tmp_testevent
.name
);
2033 Hwcentry
* test_array
[1] = {&tmp_testevent
};
2034 rc
= test_hwcs ((const Hwcentry
**) test_array
, 1);
2035 if (rc
== HWCFUNCS_ERROR_UNAVAIL
)
2037 // consider this a pass (allow HWC table to be printed)
2038 Tprintf (0, "hwctable: WARNING: "
2039 "cpc_bind_event() shows counters busy; allow to continue\n");
2044 // failed to start for some other reason
2045 Tprintf (0, "hwctable: WARNING: "
2046 "test of counter '%s' failed\n",
2054 hwc_update_val (Hwcentry
*hwc
)
2056 if (hwc
->ref_val
== 0)
2057 hwc
->ref_val
= hwc
->val
; // save original reference
2059 hrtime_t min_time_nsec
= hwc
->min_time
;
2060 if (min_time_nsec
== HWCTIME_TBD
)
2061 min_time_nsec
= hwc
->min_time_default
;
2062 switch (min_time_nsec
)
2064 case 0: // disable time-based intervals
2065 // do not modify val
2069 newVal
= HWC_VAL_ON (hwc
->ref_val
);
2072 newVal
= HWC_VAL_LO (hwc
->ref_val
);
2075 newVal
= HWC_VAL_HI (hwc
->ref_val
);
2078 newVal
= HWC_VAL_CUSTOM (hwc
->ref_val
, min_time_nsec
);
2081 #define MAX_INT_VAL (2*1000*1000*1000 + 1000100)// yuck, limited to signed int
2082 if (newVal
>= MAX_INT_VAL
)
2083 newVal
= MAX_INT_VAL
;
2087 /* convert value string to value and store result in hwc->val */
2088 /* This function moved here from collctrl.cc */
2090 * Keep the HWCTIME_* definitions in sync with those in
2091 * collctrl.cc Coll_Ctrl::add_hwcstring().
2094 set_hwcval (Hwcentry
*hwc
, hrtime_t global_min_time_nsec
, const char *valptr
)
2096 hwc
->min_time_default
= global_min_time_nsec
;
2099 // An interval of 1 is used for certain types of count data.
2100 // (er_bit, er_generic, er_rock ...)
2101 // Hi and Lo do not apply.
2102 /* use the default */
2104 else if (valptr
== NULL
|| valptr
[0] == 0 || strcmp (valptr
, "auto") == 0)
2105 hwc
->min_time
= HWCTIME_TBD
;
2106 else if (strcmp (valptr
, "on") == 0)
2107 hwc
->min_time
= HWCTIME_ON
;
2108 else if (strcmp (valptr
, "lo") == 0 || strcmp (valptr
, "low") == 0)
2109 hwc
->min_time
= HWCTIME_LO
;
2110 else if (strcmp (valptr
, "hi") == 0 || strcmp (valptr
, "high") == 0
2111 || strcmp (valptr
, "h") == 0)
2112 hwc
->min_time
= HWCTIME_HI
;
2115 /* the remaining string should be a number > 0 */
2116 char *endchar
= NULL
;
2117 long long tmp
= strtoll (valptr
, &endchar
, 0);
2118 int value
= (int) tmp
;
2119 if (*endchar
!= 0 || tmp
<= 0 || value
!= tmp
)
2121 // also covers errno == ERANGE
2122 Tprintf (0, "hwctable: set_hwcval(): ERROR: "
2123 "Invalid counter value %s for counter `%s'\n",
2127 if (tmp
> UINT32_MAX
/ 2)
2129 /* Roch B. says that we MUST do this check for er_kernel
2130 because some platforms deliver overflow interrupts without
2131 identifying which counter overflowed. The only way to
2132 determine which counter overflowed is to have enough
2133 margin on 32 bit counters to make sure they don't
2136 Tprintf (0, "hwctable: set_hwcval(): ERROR: "
2137 "Counter value %s exceeds %lu\n",
2138 valptr
, (unsigned long) UINT32_MAX
/ 2);
2144 if (hwc
->ref_val
== 0)
2145 hwc
->ref_val
= hwc
->val
; // save original reference
2147 hwc
->min_time
= 0; // turn off auto-adjust
2150 hwc_update_val (hwc
);
2155 canonical_name (const char *counter
)
2157 char *nameOnly
= NULL
;
2161 hwcfuncs_parse_ctr (counter
, NULL
, &nameOnly
, &attrs
, NULL
, NULL
);
2162 snprintf (tmpbuf
+ strlen (tmpbuf
), sizeof (tmpbuf
) - strlen (tmpbuf
),
2166 hwcfuncs_attr_t cpc2_attrs
[HWCFUNCS_MAX_ATTRS
];
2171 /* extract attributes from counter */
2172 attr_mem
= hwcfuncs_parse_attrs (counter
, cpc2_attrs
, HWCFUNCS_MAX_ATTRS
,
2176 snprintf (tmpbuf
+ strlen (tmpbuf
), sizeof (tmpbuf
) - strlen (tmpbuf
),
2178 goto canonical_attrs_wrapup
;
2181 /* sort the attributes */
2182 for (ii
= 0; ii
< (int) nattrs
- 1; ii
++)
2184 for (jj
= ii
+ 1; jj
< nattrs
; jj
++)
2186 int cmp
= strcmp (cpc2_attrs
[ii
].ca_name
,
2187 cpc2_attrs
[jj
].ca_name
);
2190 hwcfuncs_attr_t tmp
= cpc2_attrs
[jj
];
2191 cpc2_attrs
[jj
] = cpc2_attrs
[ii
];
2192 cpc2_attrs
[ii
] = tmp
;
2197 /* print attributes in canonical format */
2198 for (ii
= 0; ii
< nattrs
; ii
++)
2199 snprintf (tmpbuf
+ strlen (tmpbuf
), sizeof (tmpbuf
) - strlen (tmpbuf
),
2200 "~%s=0x%llx", cpc2_attrs
[ii
].ca_name
, (long long) cpc2_attrs
[ii
].ca_val
);
2203 canonical_attrs_wrapup
:
2206 return strdup (tmpbuf
);
2209 /* process counter and value strings - put results in <*pret_ctr> */
2211 /* Print errors to UEbuf for any failure that results in nonzero return */
2213 process_ctr_def (int forKernel
, hrtime_t global_min_time_nsec
,
2214 const char *counter
, const char *value
, Hwcentry
*pret_ctr
,
2215 char* UWbuf
, size_t UWsz
, char* UEbuf
, size_t UEsz
)
2218 char *nameOnly
= NULL
;
2220 char *regstr
= NULL
;
2223 const Hwcentry
*pfound
= NULL
;
2224 const char *uname
= NULL
;
2225 int disable_backtrack
;
2228 Tprintf (DBG_LT3
, "hwctable: process_ctr_def(): counter=%s value=%s \n",
2229 counter
, value
? value
: "NULL");
2230 hwcfuncs_parse_ctr (counter
, &plus
, &nameOnly
, &attrs
, ®str
, ®no
);
2232 /* search for the counter in the std and raw lists */
2234 pfound
= ptrarray_find ((const Hwcentry
**) cpcx_std
[forKernel
], nameOnly
, NULL
, 1, regno
);
2236 hwcentry_print (DBG_LT1
, "hwctable: process_ctr_def: found in stdlist:",
2241 pfound
= ptrarray_find ((const Hwcentry
**) cpcx_hidden
[forKernel
], nameOnly
, NULL
, 1, regno
);
2243 hwcentry_print (DBG_LT1
, "hwctable: process_ctr_def: found in stdlist(hidden):", pfound
);
2247 pfound
= ptrarray_find_by_name (cpcx_raw
[forKernel
], nameOnly
); /* (regno match checked later) */
2249 hwcentry_print (DBG_LT1
, "hwctable: process_ctr_def: found in rawlist:", pfound
);
2253 pfound
= ptrarray_find ((const Hwcentry
**) cpcx_std
[forKernel
], nameOnly
, NULL
, 1, REGNO_ANY
);
2255 hwcentry_print (DBG_LT1
, "hwctable: process_ctr_def: found in stdlist but regno didn't match:", pfound
);
2259 pfound
= ptrarray_find ((const Hwcentry
**) cpcx_hidden
[forKernel
], nameOnly
, NULL
, 1, REGNO_ANY
);
2261 hwcentry_print (DBG_LT1
, "hwctable: process_ctr_def: found in stdlist(hidden) but regno didn't match:", pfound
);
2266 if (is_numeric (nameOnly
, &val
))
2268 Hwcentry
*tmp
= alloc_shallow_copy (&empty_ctr
); // Leaks?
2271 tmp
->name
= strdup (nameOnly
);
2276 hwcentry_print (DBG_LT1
, "hwctable: process_ctr_def: counter specified by numeric value:", pfound
);
2280 snprintf (UEbuf
+ strlen (UEbuf
), UEsz
- strlen (UEbuf
),
2281 GTXT ("Invalid HW counter name: %s\n"), nameOnly
);
2282 snprintf (UEbuf
+ strlen (UEbuf
), UEsz
- strlen (UEbuf
),
2283 GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
2284 (IS_KERNEL (forKernel
) ? "er_kernel" : "collect"));
2285 goto process_ctr_def_wrapup
;
2289 *pret_ctr
= *pfound
; /* shallow copy */
2290 pret_ctr
->int_name
= NULL
; /* so free doesn't try to free these pfound's ptrs */
2291 pret_ctr
->name
= NULL
; /* so free doesn't try to free these pfound's ptrs */
2293 /* update uname,memop */
2295 disable_backtrack
= 0;
2296 if (plus
!= 0 || ABST_PLUS_BY_DEFAULT (pret_ctr
->memop
))
2298 // attempt to process memoryspace profiling
2299 int message_printed
= 0;
2300 if (cpcx_cpuver
== CPUVER_GENERIC
)
2302 // accept plus, since we don't know what this CPU is
2303 snprintf (UEbuf
+ strlen (UEbuf
), UEsz
- strlen (UEbuf
),
2304 GTXT ("`+' may not be correctly supported on `%s' because processor is not recognized."),
2306 pret_ctr
->memop
= ABST_LDST
; // supply a backtracking data type - required for collector
2308 else if (cpcx_cpuver
== CPC_ULTRA1
|| cpcx_cpuver
== CPC_ULTRA2
2309 || cpcx_cpuver
== CPC_ULTRA3
|| cpcx_cpuver
== CPC_ULTRA3_PLUS
2310 || cpcx_cpuver
== CPC_ULTRA3_I
|| cpcx_cpuver
== CPC_ULTRA4_PLUS
2311 || cpcx_cpuver
== CPC_ULTRA4
|| cpcx_cpuver
== CPC_ULTRA_T1
2312 || cpcx_cpuver
== CPC_ULTRA_T2
|| cpcx_cpuver
== CPC_ULTRA_T2P
2313 || cpcx_cpuver
== CPC_ULTRA_T3
)
2315 if (!ABST_BACKTRACK_ENABLED (pret_ctr
->memop
))
2316 disable_backtrack
= 1;
2318 else if (cpcx_cpuver
== CPC_SPARC_T4
|| cpcx_cpuver
== CPC_SPARC_T5
2319 || cpcx_cpuver
== CPC_SPARC_T6
|| cpcx_cpuver
== CPC_SPARC_M4
2320 || cpcx_cpuver
== CPC_SPARC_M5
|| cpcx_cpuver
== CPC_SPARC_M6
2321 || cpcx_cpuver
== CPC_SPARC_M7
|| cpcx_cpuver
== CPC_SPARC_M8
)
2323 if (pret_ctr
->memop
!= ABST_EXACT
)
2324 disable_backtrack
= 1;
2326 else if (cpcx_cpuver
== CPC_INTEL_NEHALEM
|| cpcx_cpuver
== CPC_INTEL_WESTMERE
2327 || cpcx_cpuver
== CPC_INTEL_SANDYBRIDGE
2328 || cpcx_cpuver
== CPC_INTEL_IVYBRIDGE
2329 || cpcx_cpuver
== CPC_INTEL_HASWELL
2330 || cpcx_cpuver
== CPC_INTEL_BROADWELL
2331 || cpcx_cpuver
== CPC_INTEL_SKYLAKE
)
2333 if (pret_ctr
->memop
!= ABST_EXACT_PEBS_PLUS1
)
2334 disable_backtrack
= 1;
2337 // disabling memoryspace not supported for
2338 // remove specified -
2341 snprintf (UWbuf
+ strlen (UWbuf
), UWsz
- strlen (UWbuf
),
2342 GTXT ("Warning: `-' is not supported on `%s' -- memory reference backtracking will remain enabled for this counter\n"),
2348 message_printed
= 1;
2349 snprintf (UWbuf
+ strlen (UWbuf
), UWsz
- strlen (UWbuf
),
2350 GTXT ("Warning: `+' is not supported on `%s' -- memory reference backtracking will not be enabled for `%s'\n"),
2351 cpcx_cciname
, nameOnly
);
2352 disable_backtrack
= 1;
2354 if (disable_backtrack
)
2357 uname
++; // remove specified + or -
2358 if (!message_printed
&& plus
> 0)
2359 snprintf (UWbuf
+ strlen (UWbuf
), UWsz
- strlen (UWbuf
),
2360 GTXT ("Warning: `+' is not supported on `%s' -- memory reference backtracking will not be enabled for this counter\n"),
2365 disable_backtrack
= 1;
2366 if (disable_backtrack
|| plus
< 0)
2367 if (pret_ctr
->memop
!= ABST_NOPC
)
2368 pret_ctr
->memop
= ABST_NONE
;
2369 if (pret_ctr
->memop
== ABST_NOPC
)
2370 snprintf (UWbuf
+ strlen (UWbuf
), UWsz
- strlen (UWbuf
),
2371 GTXT ("Warning: HW counter `%s' is not program-related -- callstacks will be not be recorded for this counter\n"),
2374 /* update name and int_name */
2376 // validate attributes
2379 hwcfuncs_attr_t cpc2_attrs
[HWCFUNCS_MAX_ATTRS
];
2383 /* extract attributes from uname */
2384 attr_mem
= hwcfuncs_parse_attrs (uname
, cpc2_attrs
, HWCFUNCS_MAX_ATTRS
,
2388 snprintf (UEbuf
+ strlen (UEbuf
), UEsz
- strlen (UEbuf
),
2391 goto process_ctr_def_wrapup
;
2393 /* make sure all attributes are valid */
2394 for (unsigned ii
= 0; ii
< nattrs
; ii
++)
2396 if (!attr_is_valid (forKernel
, cpc2_attrs
[ii
].ca_name
))
2398 snprintf (UEbuf
+ strlen (UEbuf
), UEsz
- strlen (UEbuf
),
2399 GTXT ("Invalid attribute specified for counter `%s': %s\n"),
2400 nameOnly
, cpc2_attrs
[ii
].ca_name
);
2401 snprintf (UEbuf
+ strlen (UEbuf
), UEsz
- strlen (UEbuf
),
2402 GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
2403 (IS_KERNEL (forKernel
) ? "er_kernel" : "collect"));
2405 goto process_ctr_def_wrapup
;
2407 for (unsigned jj
= ii
+ 1; jj
< nattrs
; jj
++)
2409 if (strcmp (cpc2_attrs
[ii
].ca_name
,
2410 cpc2_attrs
[jj
].ca_name
) == 0)
2412 snprintf (UEbuf
+ strlen (UEbuf
), UEsz
- strlen (UEbuf
),
2413 GTXT ("Duplicate attribute specified for counter `%s': %s\n"),
2414 nameOnly
, cpc2_attrs
[ii
].ca_name
);
2416 goto process_ctr_def_wrapup
;
2422 pret_ctr
->name
= strdup (uname
);
2425 if (pfound
->int_name
)
2427 // Counter is one of the following:
2428 // - aliased (e.g. cycles~system=1),
2429 // - convenience (e.g. cycles0~system=1),
2430 if (!attrs
) // convert alias to internal name
2431 pret_ctr
->int_name
= strdup (pfound
->int_name
);
2434 // convert alias to internal name and
2435 // append user-supplied attributes
2436 size_t sz
= strlen (pfound
->int_name
) + strlen (attrs
) + 1;
2437 char *tbuf
= calloc (sz
, 1);
2439 snprintf (tbuf
, sz
, "%s%s", pfound
->int_name
, attrs
);
2440 pret_ctr
->int_name
= tbuf
;
2444 pret_ctr
->int_name
= strdup (uname
); // user-supplied name
2448 if (set_hwcval (pret_ctr
, global_min_time_nsec
, value
))
2450 snprintf (UEbuf
+ strlen (UEbuf
), UEsz
- strlen (UEbuf
),
2451 GTXT ("Invalid interval for HW counter `%s': %s\n"),
2453 goto process_ctr_def_wrapup
;
2455 hwcentry_print (DBG_LT2
, "hwctable: process_ctr_def:", pret_ctr
);
2458 process_ctr_def_wrapup
:
2465 /*---------------------------------------------------------------------------*/
2467 /* external interfaces, see hwcentry.h for descriptions. */
2470 hwc_lookup (int forKernel
, hrtime_t global_min_time_nsec
, const char *instring
,
2471 Hwcentry
*caller_entries
[], unsigned maxctrs
, char **emsg
, char **wmsg
)
2474 char *instr_copy
= NULL
, *ss
= NULL
;
2475 unsigned numctrs
= 0;
2477 char *tokenptr
[MAX_PICS
* 2];
2478 unsigned numtokens
= 0;
2479 char UEbuf
[1024 * 5]; /* error message buffer; strdup of it is passed back to user */
2480 char UWbuf
[1024 * 5]; /* warning message buffer; strdup of it is passed back to user */
2488 // supply temporary result buffers as needed
2489 Hwcentry tmp_entry_table
[MAX_PICS
];
2490 Hwcentry
* tmp_entries
[MAX_PICS
];
2493 entries
= caller_entries
;
2496 // user doesn't care about results; provide temporary storage for results
2497 for (ii
= 0; ii
< MAX_PICS
; ii
++)
2498 tmp_entries
[ii
] = &tmp_entry_table
[ii
];
2499 entries
= tmp_entries
;
2502 Tprintf (DBG_LT1
, "hwctable: hwc_lookup(%s)\n",
2503 instring
? instring
: "NULL");
2505 /* clear <entries> first - prevent seg faults in hwc_lookup_wrapup */
2506 for (ii
= 0; ii
< maxctrs
; ii
++)
2507 *entries
[ii
] = empty_ctr
;
2510 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2511 GTXT ("No HW counters were specified."));
2513 goto hwc_lookup_wrapup
;
2516 /* make sure tables are initialized */
2517 setup_cpc_skip_hwctest ();
2518 if (cpcx_npics
== 0)
2520 if (cpcx_cpuver
< 0)
2524 char *pch
= hwcfuncs_errmsg_get (buf
, sizeof (buf
), 0); /* get first err msg, disable capture */
2526 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2527 GTXT ("HW counter profiling is not supported on this system: %s%s"),
2528 pch
, pch
[strlen (pch
) - 1] == '\n' ? "" : "\n");
2530 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2531 GTXT ("HW counter profiling is not supported on this system\n"));
2534 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2535 GTXT ("HW counter profiling is not supported on '%s'\n"),
2538 goto hwc_lookup_wrapup
;
2540 ss
= instr_copy
= strdup (instring
);
2541 while (*ss
!= 0 && (*ss
== ' ' || *ss
== '\t'))
2543 tokenptr
[numtokens
++] = ss
;
2546 /* find end of previous token, replace w/ NULL, skip whitespace, set <tokenptr>, repeat */
2549 if (*ss
== ',' || *ss
== ' ' || *ss
== '\t')
2551 /* end of previous token found */
2552 *ss
= 0; /* terminate the previous token */
2554 while (*ss
!= 0 && (*ss
== ' ' || *ss
== '\t'))
2557 tokenptr
[numtokens
++] = ss
;
2558 break; // from for loop
2562 while (*ss
&& numtokens
< (MAX_PICS
* 2));
2566 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2567 GTXT ("The number of HW counters specified exceeds internal resources\n"));
2568 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2569 GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
2570 (IS_KERNEL (forKernel
) ? "er_kernel" : "collect"));
2572 goto hwc_lookup_wrapup
;
2574 Tprintf (DBG_LT3
, "hwctable: hwc_lookup(): numtokens=%d\n", numtokens
);
2576 /* look up individual counters */
2579 for (ii
= 0; ii
< numtokens
&& numctrs
< maxctrs
; ii
+= 2)
2581 const char *counter
;
2583 Hwcentry
*pret_ctr
= entries
[numctrs
];
2585 /* assign the tokens to ctrnames, timeoutValues. */
2586 counter
= tokenptr
[ii
];
2587 if (ii
+ 1 < numtokens
)
2588 value
= tokenptr
[ii
+ 1];
2591 if (process_ctr_def (forKernel
, global_min_time_nsec
, counter
, value
, pret_ctr
,
2592 UWbuf
+ strlen (UWbuf
),
2593 sizeof (UWbuf
) - strlen (UWbuf
),
2594 UEbuf
+ strlen (UEbuf
),
2595 sizeof (UEbuf
) - strlen (UEbuf
)))
2597 /* could choose to set fail=1 and continue here,
2598 but errmsgs would be aggregated (messy) */
2600 goto hwc_lookup_wrapup
;
2607 goto hwc_lookup_wrapup
;
2613 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2614 GTXT ("No HW counters were specified.\n"));
2616 goto hwc_lookup_wrapup
;
2618 if (numctrs
> cpcx_max_concurrent
[forKernel
])
2620 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2621 GTXT ("The HW counter configuration could not be loaded: More than %d counters were specified\n"), cpcx_max_concurrent
[forKernel
]);
2622 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2623 GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
2624 (IS_KERNEL (forKernel
) ? "er_kernel" : "collect"));
2626 goto hwc_lookup_wrapup
;
2631 if (wmsg
&& strlen (UWbuf
))
2632 *wmsg
= strdup (UWbuf
);
2633 if (emsg
&& strlen (UEbuf
))
2634 *emsg
= strdup (UEbuf
);
2641 hwc_validate_ctrs (int forKernel
, Hwcentry
*entries
[], unsigned numctrs
)
2643 char UEbuf
[1024 * 5];
2647 hwcfuncs_errmsg_get (NULL
, 0, 1); /* enable errmsg capture */
2648 int hwc_rc
= test_hwcs ((const Hwcentry
**) entries
, numctrs
);
2651 if (cpcx_cpuver
== CPC_PENTIUM_4_HT
|| cpcx_cpuver
== CPC_PENTIUM_4
)
2653 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2654 GTXT ("HW counter profiling is disabled unless only one logical CPU per HyperThreaded processor is online (see psradm)\n"));
2655 return strdup (UEbuf
);
2659 char * pch
= hwcfuncs_errmsg_get (buf
, sizeof (buf
), 0); /* get first err msg, disable capture */
2661 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2662 GTXT ("The HW counter configuration could not be loaded: %s%s"),
2663 pch
, pch
[strlen (pch
) - 1] == '\n' ? "" : "\n");
2665 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2666 GTXT ("The HW counter configuration could not be loaded\n"));
2667 snprintf (UEbuf
+ strlen (UEbuf
), sizeof (UEbuf
) - strlen (UEbuf
),
2668 GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
2669 (IS_KERNEL (forKernel
) ? "er_kernel" : "collect"));
2670 return strdup (UEbuf
);
2676 hwc_post_lookup (Hwcentry
* pret_ctr
, char *counter
, char * int_name
, int cpuver
)
2678 const Hwcentry
*pfound
;
2680 char *nameOnly
= NULL
;
2683 /* fields in pret_ctr (name and int_name) should already be free */
2684 hwcfuncs_parse_ctr (counter
, NULL
, &nameOnly
, &attrs
, NULL
, ®no
);
2686 /* look for it in the canonical list */
2687 pfound
= static_table_find (stdlist_get_table (cpuver
),
2688 nameOnly
, int_name
, 0, REGNO_ANY
);
2689 if (!pfound
) /* try the generic list */
2690 pfound
= static_table_find (papi_generic_list
,
2691 nameOnly
, int_name
, 0, REGNO_ANY
);
2694 /* in standard list */
2695 *pret_ctr
= *pfound
; /* shallow copy */
2696 if (pret_ctr
->int_name
)
2699 pret_ctr
->int_name
= strdup (pret_ctr
->int_name
);
2700 if (pret_ctr
->short_desc
== NULL
)
2702 // look for short_desc of corresponding raw counter
2703 const Hwcentry
*praw
= static_table_find (stdlist_get_table (cpuver
),
2704 pret_ctr
->int_name
, NULL
, 0, REGNO_ANY
);
2705 if (praw
&& praw
->short_desc
)
2706 pret_ctr
->short_desc
= strdup (praw
->short_desc
);
2710 pret_ctr
->int_name
= strdup (counter
);
2714 /* not a standard counter */
2715 *pret_ctr
= empty_ctr
;
2716 pret_ctr
->int_name
= strdup (counter
);
2719 /* update the name */
2722 pret_ctr
->name
= canonical_name (counter
);
2723 if (pret_ctr
->metric
)
2725 // metric text is supplied from a table. (User supplied HWC alias)
2726 // Append user-supplied attributes to metric name:
2727 size_t len
= strlen (pret_ctr
->metric
) + strlen (attrs
) + 4;
2728 char *pch
= calloc (len
, 1);
2730 snprintf (pch
, len
, "%s (%s)", pret_ctr
->metric
, attrs
);
2731 pret_ctr
->metric
= pch
; // leaks
2735 pret_ctr
->name
= strdup (nameOnly
);
2738 hwcentry_print (DBG_LT2
, "hwctable: hwc_post_lookup: found: ", pret_ctr
);
2740 hwcentry_print (DBG_LT2
, "hwctable: hwc_post_lookup: default: ", pret_ctr
);
2747 hwc_on_lo_hi (const Hwcentry
*pctr
)
2751 switch (pctr
->min_time
)
2763 rate
= NULL
; // null => use interval count
2771 return rate
; //strdup( rate );
2775 hwc_rate_string (const Hwcentry
*pctr
, int force_numeric
)
2777 const char * rateString
= hwc_on_lo_hi (pctr
);
2779 if (!rateString
|| force_numeric
)
2781 snprintf (buf
, sizeof (buf
), NTXT ("%d"), pctr
->val
);
2784 return strdup (rateString
);
2787 static char metricbuf
[2048];
2790 hwc_i18n_metric (const Hwcentry
*pctr
)
2792 if (pctr
->metric
!= NULL
)
2793 snprintf (metricbuf
, sizeof (metricbuf
), NTXT ("%s"), PTXT (pctr
->metric
));
2794 else if (pctr
->name
!= NULL
)
2795 snprintf (metricbuf
, sizeof (metricbuf
), GTXT ("%s Events"), pctr
->name
);
2796 else if (pctr
->int_name
!= NULL
)
2797 snprintf (metricbuf
, sizeof (metricbuf
), GTXT ("%s Events"), pctr
->int_name
);
2799 snprintf (metricbuf
, sizeof (metricbuf
), GTXT ("Undefined Events"));
2803 /* return cpu version, should only be called when about to generate an experiment,
2804 not when reading back an experiment */
2805 #if 0 /* called by ... */
2806 . / perfan
/ collect
/ src
/ collect
.cc
: start
: 245 : cpuver
= hwc_get_cpc_cpuver ();
2807 . / ccr_components
/ Collector_Interface
/ collctrl
.cc
: constructor
: 202 : cpcx_cpuver
= hwc_get_cpc_cpuver ();
2808 . / perfan
/ dbe
/ src
/ Dbe
.cc
: 3041 : JApplication::cpuver
= hwc_get_cpc_cpuver ();
2809 . / perfan
/ dbe
/ src
/ Dbe
.cc
: 3164 : JApplication::cpuver
= hwc_get_cpc_cpuver ();
2812 cpc_getcpuver () : only papi
, ostest
, this and hwprofile
.c call it
2815 hwc_get_cpc_cpuver ()
2822 hwc_get_cpuname (char *buf
, size_t buflen
)
2825 if (!buf
|| !buflen
)
2830 strncpy (buf
, cpcx_cciname
, buflen
- 1);
2831 buf
[buflen
- 1] = 0;
2837 hwc_get_docref (char *buf
, size_t buflen
)
2840 if (!buf
|| !buflen
)
2845 strncpy (buf
, cpcx_docref
, buflen
- 1);
2846 buf
[buflen
- 1] = 0;
2852 hwc_get_default_cntrs2 (int forKernel
, int style
)
2855 if (!VALID_FOR_KERNEL (forKernel
))
2857 char *cpcx_default
= cpcx_default_hwcs
[forKernel
];
2858 if (cpcx_default
== NULL
|| cpcx_npics
== 0)
2861 return strdup (cpcx_default
);
2864 // we will replace "," delimiters with " -h " (an extra 3 chars per HWC)
2865 char *s
= (char *) malloc (strlen (cpcx_default
) + 3 * cpcx_npics
);
2866 if (s
== NULL
) return s
;
2868 char *q
= cpcx_default
;
2870 for (i
= 0; i
< cpcx_npics
; i
++)
2872 int qlen
= strlen (q
);
2878 // add " -h " if not the first HWC
2888 // find second comma
2889 char *r
= strchr (q
, ',');
2891 r
= strchr (r
+ 1, ',');
2893 // we didn't find one, so the rest of the string is the last HWC
2896 // EUGENE could check i==cpcx_npicx-1, but what if it isn't???
2898 if (p
[qlen
- 1] == ',')
2904 // copy the HWC, trim trailing comma, add null char
2907 if (p
[qlen
- 1] == ',')
2917 hwc_get_orig_default_cntrs (int forKernel
)
2920 if (!VALID_FOR_KERNEL (forKernel
))
2922 if (cpcx_orig_default_hwcs
[forKernel
] != NULL
)
2923 return strdup (cpcx_orig_default_hwcs
[forKernel
]);
2928 hwc_memop_string (ABST_type memop
)
2940 s
= GTXT ("store ");
2944 case ABST_LDST_SPARC64
:
2945 s
= GTXT ("load-store ");
2947 case ABST_EXACT_PEBS_PLUS1
:
2949 s
= GTXT ("memoryspace ");
2952 s
= GTXT ("count ");
2955 s
= GTXT ("not-program-related ");
2958 s
= ""; // was "ABST_UNK", but that's meaningless to users
2965 timecvt_string (int timecvt
)
2968 return GTXT ("CPU-cycles");
2970 return GTXT ("ref-cycles");
2971 return GTXT ("events");
2974 int show_regs
= 0; // The register setting is available on Solaris only
2977 * print the specified strings in aligned columns
2980 format_columns (char *buf
, int bufsiz
, char *s1
, char *s2
, const char *s3
,
2981 const char *s4
, const char *s6
)
2983 // NULL strings are blanks
2984 char *blank
= NTXT ("");
2992 // get the lengths and target widths
2993 // (s6 can be as wide as it likes)
2994 int l1
= strlen (s1
), n1
= 10, l2
= strlen (s2
), n2
= 13;
2995 int l3
= strlen (s3
), n3
= 20, l4
= strlen (s4
), n4
= 10;
2998 // adjust widths, stealing from one column to help a neighbor
2999 // There's a ragged boundary between s2 and s3.
3000 // So push this boundary to the right.
3004 // If s3 is empty, push the boundary over to s4.
3011 // If there's enough room to fit s1 and s2, do so.
3012 if (n1
+ n2
>= l1
+ l2
)
3027 // not enough room, so we need to divide the line
3028 n3
+= 4 // 4-blank margin
3030 + 1 // space between 1st and 2nd columns
3032 + 1; // space between 2nd and 3th columns
3035 // make 1st column large enough
3039 // width of 2nd column no longer matters since we divided the line
3043 snprintf (buf
, bufsiz
, "%-*s %-*s%c%*s%*s %s",
3044 n1
, s1
, n2
, s2
, divide
, n3
, s3
, n4
, s4
, s6
);
3045 for (int i
= strlen (buf
); i
> 0; i
--)
3046 if (buf
[i
] == ' ' || buf
[i
] == '\t')
3052 /* routine to return HW counter string formatted and i18n'd */
3054 hwc_hwcentry_string_internal (char *buf
, size_t buflen
, const Hwcentry
*ctr
,
3055 int show_short_desc
)
3057 if (!buf
|| !buflen
)
3061 snprintf (buf
, buflen
, GTXT ("HW counter not available"));
3065 if (show_short_desc
)
3066 desc
= ctr
->short_desc
;
3068 desc
= ctr
->metric
? hwc_i18n_metric (ctr
) : NULL
;
3069 format_columns (buf
, buflen
, ctr
->name
, ctr
->int_name
,
3070 hwc_memop_string (ctr
->memop
), timecvt_string (ctr
->timecvt
),
3075 /* routine to return HW counter string formatted and i18n'd */
3077 hwc_hwcentry_string (char *buf
, size_t buflen
, const Hwcentry
*ctr
)
3079 return hwc_hwcentry_string_internal (buf
, buflen
, ctr
, 0);
3082 /* routine to return HW counter string formatted and i18n'd */
3084 hwc_hwcentry_specd_string (char *buf
, size_t buflen
, const Hwcentry
*ctr
)
3086 const char *memop
, *timecvt
;
3088 if (!buf
|| !buflen
)
3092 snprintf (buf
, buflen
, GTXT ("HW counter not available"));
3095 timecvt
= timecvt_string (ctr
->timecvt
);
3097 memop
= hwc_memop_string (ctr
->memop
);
3100 if (ctr
->metric
!= NULL
) /* a standard counter for a specific register */
3101 snprintf (descstr
, sizeof (descstr
), " (`%s'; %s%s)",
3102 hwc_i18n_metric (ctr
), memop
, timecvt
);
3103 else /* raw counter */
3104 snprintf (descstr
, sizeof (descstr
), " (%s%s)", memop
, timecvt
);
3106 char *rateString
= hwc_rate_string (ctr
, 1);
3107 snprintf (buf
, buflen
, "%s,%s%s", ctr
->name
,
3108 rateString
? rateString
: "", descstr
);
3121 hwc_get_max_concurrent (int forKernel
)
3124 if (!VALID_FOR_KERNEL (forKernel
))
3126 return cpcx_max_concurrent
[forKernel
];
3130 hwc_get_attrs (int forKernel
)
3133 if (!VALID_FOR_KERNEL (forKernel
))
3135 return cpcx_attrs
[forKernel
];
3139 hwc_get_std_ctrs (int forKernel
)
3142 if (!VALID_FOR_KERNEL (forKernel
))
3144 return cpcx_std
[forKernel
];
3148 hwc_get_raw_ctrs (int forKernel
)
3151 if (!VALID_FOR_KERNEL (forKernel
))
3153 return cpcx_raw
[forKernel
];
3156 /* Call an action function for each attribute supported */
3158 hwc_scan_attrs (void (*action
)(const char *attr
, const char *desc
))
3162 for (int ii
= 0; cpcx_attrs
[0] && cpcx_attrs
[0][ii
]; ii
++, cnt
++)
3165 action (cpcx_attrs
[0][ii
], NULL
);
3168 action (NULL
, NULL
);
3173 hwc_scan_std_ctrs (void (*action
)(const Hwcentry
*))
3176 Tprintf (DBG_LT1
, "hwctable: hwc_scan_standard_ctrs()...\n");
3178 for (int ii
= 0; cpcx_std
[0] && cpcx_std
[0][ii
]; ii
++, cnt
++)
3180 action (cpcx_std
[0][ii
]);
3186 /* Call an action function for each counter supported */
3187 /* action is called with NULL when all counters have been seen */
3189 hwc_scan_raw_ctrs (void (*action
)(const Hwcentry
*))
3192 Tprintf (DBG_LT1
, "hwctable: hwc_scan_raw_ctrs()...\n");
3194 for (int ii
= 0; cpcx_raw
[0] && cpcx_raw
[0][ii
]; ii
++, cnt
++)
3196 action (cpcx_raw
[0][ii
]);
3203 hwc_usage_raw_overview_sparc (FILE *f_usage
, int cpuver
)
3205 /* All these cpuver's use cputabs[]==sparc_t5_m6 anyhow. */
3206 if ((cpuver
== CPC_SPARC_M5
) || (cpuver
== CPC_SPARC_M6
)
3207 || (cpuver
== CPC_SPARC_T5
) || (cpuver
== CPC_SPARC_T6
))
3208 cpuver
= CPC_SPARC_M4
; // M4 was renamed to M5
3210 /* While there are small differences between
3211 * cputabs[]== sparc_t4
3212 * cputabs[]== sparc_t5_m6
3213 * they are in HWCs we don't discuss in the overview anyhow.
3214 * So just lump them in with T4.
3216 if (cpuver
== CPC_SPARC_M4
)
3217 cpuver
= CPC_SPARC_T4
;
3219 /* Check for the cases we support. */
3220 if (cpuver
!= CPC_SPARC_T4
&& cpuver
!= CPC_SPARC_M7
&& cpuver
!= CPC_SPARC_M8
)
3222 fprintf (f_usage
, GTXT (" While the above aliases represent the most useful hardware counters\n"
3223 " for this processor, a full list of raw (unaliased) counter names appears\n"
3224 " below. First is an overview of some of these names.\n\n"));
3225 fprintf (f_usage
, GTXT (" == Cycles.\n"
3226 " Count active cycles with\n"
3228 " Set attributes to choose user, system, and/or hyperprivileged cycles.\n\n"));
3229 fprintf (f_usage
, GTXT (" == Instructions.\n"
3230 " Count instructions when they are committed with:\n"));
3231 fprintf (f_usage
, NTXT (" Instr_all\n"));
3232 if (cpuver
!= CPC_SPARC_M8
)
3233 fprintf (f_usage
, GTXT (" It is the total of these counters:\n"));
3235 fprintf (f_usage
, GTXT (" Some subsets of instructions can be counted separately:\n"));
3236 fprintf (f_usage
, NTXT (" Branches %s\n"), GTXT ("branches"));
3237 fprintf (f_usage
, NTXT (" Instr_FGU_crypto %s\n"), GTXT ("Floating Point and Graphics Unit"));
3238 fprintf (f_usage
, NTXT (" Instr_ld %s\n"), GTXT ("loads"));
3239 fprintf (f_usage
, NTXT (" Instr_st %s\n"), GTXT ("stores"));
3240 fprintf (f_usage
, NTXT (" %-19s %s\n"),
3241 cpuver
== CPC_SPARC_M7
? NTXT ("Instr_SPR_ring_ops")
3242 : NTXT ("SPR_ring_ops"),
3243 GTXT ("internal use of SPR ring"));
3244 fprintf (f_usage
, NTXT (" Instr_other %s\n"), GTXT ("basic arithmetic and logical instructions"));
3245 if (cpuver
!= CPC_SPARC_M8
)
3246 fprintf (f_usage
, GTXT (" Some subsets of these instructions can be counted separately:\n"));
3247 fprintf (f_usage
, NTXT (" Br_taken %s\n"), GTXT ("Branches that are taken"));
3248 fprintf (f_usage
, NTXT (" %-19s %s\n"),
3249 cpuver
== CPC_SPARC_M7
? NTXT ("Instr_block_ld_st")
3250 : NTXT ("Block_ld_st"),
3251 GTXT ("block load/store"));
3252 fprintf (f_usage
, NTXT (" %-19s %s\n"),
3253 cpuver
== CPC_SPARC_M7
? NTXT ("Instr_atomic")
3255 GTXT ("atomic instructions"));
3256 fprintf (f_usage
, NTXT (" %-19s %s\n"),
3257 cpuver
== CPC_SPARC_M7
? NTXT ("Instr_SW_prefetch")
3258 : NTXT ("SW_prefetch"),
3259 GTXT ("prefetches"));
3260 fprintf (f_usage
, NTXT (" %-19s %s\n"),
3261 cpuver
== CPC_SPARC_M7
? NTXT ("Instr_SW_count")
3262 : NTXT ("Sw_count_intr"),
3263 GTXT ("SW Count instructions (counts special no-op assembler instructions)"));
3264 fprintf (f_usage
, NTXT ("\n"));
3267 compilation error
: we
're trying to use a macro that's already defined
3270 char s0
[TMPLEN
], s1
[TMPLEN
], s2
[TMPLEN
], s3
[TMPLEN
];
3271 if (cpuver
== CPC_SPARC_M7
)
3273 snprintf (s0
, TMPLEN
, "Commit_0_cyc");
3274 snprintf (s1
, TMPLEN
, "Commit_1_cyc");
3275 snprintf (s2
, TMPLEN
, "Commit_2_cyc");
3276 snprintf (s3
, TMPLEN
, "Commit_1_or_2_cyc");
3280 snprintf (s0
, TMPLEN
, "Commit_0");
3281 snprintf (s1
, TMPLEN
, "Commit_1");
3282 snprintf (s2
, TMPLEN
, "Commit_2");
3283 snprintf (s3
, TMPLEN
, "Commit_1_or_2");
3286 fprintf (f_usage
, GTXT (" == Commit.\n"
3287 " Instructions may be launched speculatively, executed out of order, etc.\n"));
3288 if (cpuver
!= CPC_SPARC_M8
)
3290 fprintf (f_usage
, GTXT (" We can count the number of cycles during which 0, 1, or 2 instructions are\n"
3291 " actually completed and their results committed:\n"));
3292 fprintf (f_usage
, GTXT (" %s\n"
3296 " %s is a useful way of identifying parts of your application with\n"
3297 " high-latency instructions.\n\n"),
3298 s0
, s1
, s2
, s3
, s0
);
3302 fprintf (f_usage
, GTXT (" We can count the number of cycles during which no instructions were\n"
3303 " able to commit results using:\n"));
3304 fprintf (f_usage
, GTXT (" %s\n"
3305 " %s is a useful way of identifying parts of your application with\n"
3306 " high-latency instructions.\n\n"),
3310 fprintf (f_usage
, GTXT (" == Cache/memory hierarchy.\n"));
3311 if (cpuver
== CPC_SPARC_M7
)
3313 fprintf (f_usage
, GTXT (" In the cache hierarchy:\n"
3314 " * Each socket has memory and multiple SPARC core clusters (scc).\n"
3315 " * Each scc has an L3 cache and multiple L2 and L1 caches.\n"));
3316 fprintf (f_usage
, GTXT (" Loads can be counted by where they hit on socket:\n"));
3317 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3318 NTXT ("DC_hit"), GTXT ("hit own L1 data cache"));
3319 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3320 NTXT ("DC_miss_L2_hit"), GTXT ("hit own L2"));
3321 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3322 NTXT ("DC_miss_L3_hit"), GTXT ("hit own L3"));
3323 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3324 NTXT ("DC_miss_nbr_L2_hit"), GTXT ("hit neighbor L2 (same scc)"));
3325 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3326 NTXT ("DC_miss_nbr_scc_hit"), GTXT ("hit neighbor scc (same socket)"));
3327 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3328 NTXT ("DC_miss_nbr_scc_miss"), GTXT ("miss all caches (same socket)"));
3329 fprintf (f_usage
, GTXT (" These loads can also be grouped:\n"));
3330 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3331 NTXT ("DC_miss"), GTXT ("all - DC_hit"));
3332 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3333 NTXT ("DC_miss_L2_miss"), GTXT ("all - DC_hit - DC_miss_L2_hit"));
3334 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3335 NTXT ("DC_miss_L3_miss"), GTXT ("DC_miss_nbr_scc_hit + DC_miss_nbr_scc_miss"));
3336 fprintf (f_usage
, GTXT (" Loads that miss all caches on this socket can be counted:\n"));
3337 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3338 NTXT ("DC_miss_remote_scc_hit"), GTXT ("hit cache on different socket"));
3339 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3340 NTXT ("DC_miss_local_mem_hit"), GTXT ("hit local memory (same socket)"));
3341 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3342 NTXT ("DC_miss_remote_mem_hit"), GTXT ("hit remote memory (off socket)"));
3343 fprintf (f_usage
, GTXT (" These events are for speculative loads, launched in anticipation\n"
3344 " of helping performance but whose results might not be committed.\n"));
3345 #if 0 // was: #if defined(linux). See 22236226 - sparc-Linux: Support basic Memoryspace and Dataspace profiling (capture VADDR)
3346 /* 21869427 should not look like memoryspace profiling is supported on Linux */
3347 /* 21869424 desire memoryspace profiling on Linux */
3348 fprintf (f_usage
, GTXT (" To count only data-cache misses that commit, use:\n"));
3349 fprintf (f_usage
, NTXT (" DC_miss_commit\n"));
3351 fprintf (f_usage
, GTXT (" To count only data-cache misses that commit, or for memoryspace profiling,\n"
3352 " use the 'memoryspace' counter:\n"));
3353 fprintf (f_usage
, NTXT (" DC_miss_commit\n"));
3355 fprintf (f_usage
, NTXT ("\n"));
3357 else if (cpuver
== CPC_SPARC_M8
)
3359 fprintf (f_usage
, GTXT (" In the cache hierarchy:\n"
3360 " * Each processor has 4 memory controllers and 2 quad core clusters (QCC).\n"
3361 " * Each QCC contains 4 cache processor clusters (CPC).\n"
3362 " * Each CPC contains 4 cores.\n"
3363 " * Each core supports 8 hardware threads.\n"
3364 " * The L3 consists of 2 partitions with 1 QCC per partition.\n"
3366 fprintf (f_usage
, GTXT (" Loads can be counted by where they hit on socket:\n"));
3367 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3368 NTXT ("DC_miss_L2_hit"), GTXT ("hit own L2"));
3369 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3370 NTXT ("DC_miss_L3_hit"), GTXT ("hit own L3"));
3371 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3372 NTXT ("DC_miss_L3_dirty_copyback"), GTXT ("hit own L3 but require copyback from L2D"));
3373 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3374 NTXT ("DC_miss_nbr_L3_hit"), GTXT ("hit neighbor L3 (same socket)"));
3375 fprintf (f_usage
, GTXT (" Loads that miss all caches on this socket can be counted:\n"));
3376 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3377 NTXT ("DC_miss_remote_L3_hit"), GTXT ("hit cache on different socket"));
3378 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3379 NTXT ("DC_miss_local_mem_hit"), GTXT ("hit local memory (same socket)"));
3380 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3381 NTXT ("DC_miss_remote_mem_hit"), GTXT ("hit remote memory (off socket)"));
3382 fprintf (f_usage
, GTXT (" These events are for speculative loads, launched in anticipation\n"
3383 " of helping performance but whose results might not be committed.\n"));
3384 #if 0 // was: #if defined(linux). See 22236226 - sparc-Linux: Support basic Memoryspace and Dataspace profiling (capture VADDR)
3385 /* 21869427 should not look like memoryspace profiling is supported on Linux */
3386 /* 21869424 desire memoryspace profiling on Linux */
3387 fprintf (f_usage
, GTXT (" To count only data-cache misses that commit, use:\n"));
3388 fprintf (f_usage
, NTXT (" DC_miss_commit\n"));
3390 fprintf (f_usage
, GTXT (" To count only data-cache misses that commit, or for memoryspace profiling,\n"
3391 " use the 'memoryspace' counter:\n"));
3392 fprintf (f_usage
, NTXT (" DC_miss_commit\n"));
3394 fprintf (f_usage
, NTXT ("\n"));
3398 fprintf (f_usage
, GTXT (" Total data-cache misses can be counted with:\n"));
3399 fprintf (f_usage
, NTXT (" DC_miss DC_miss_nospec\n"));
3400 fprintf (f_usage
, GTXT (" They are the totals of misses that hit in L2/L3 cache, local memory, or\n"
3401 " remote memory:\n"));
3402 fprintf (f_usage
, NTXT (" DC_miss_L2_L3_hit DC_miss_L2_L3_hit_nospec\n"));
3403 fprintf (f_usage
, NTXT (" DC_miss_local_hit DC_miss_local_hit_nospec\n"));
3404 fprintf (f_usage
, NTXT (" DC_miss_remote_L3_hit DC_miss_remote_L3_hit_nospec\n"));
3405 fprintf (f_usage
, GTXT (" The events in the left column include speculative operations. Use the\n"
3406 " right-hand _nospec events to count only data accesses that commit\n"
3407 " or for memoryspace profiling.\n\n"));
3410 fprintf (f_usage
, GTXT (" == TLB misses.\n"
3411 " The Translation Lookaside Buffer (TLB) is a cache of virtual-to-physical\n"
3412 " page translations."));
3413 fprintf (f_usage
, GTXT (" If a virtual address (VA) is not represented in the\n"
3414 " TLB, an expensive hardware table walk (HWTW) must be conducted."));
3415 fprintf (f_usage
, GTXT (" If the\n"
3416 " page is still not found, a trap results. There is a data TLB (DTLB) and\n"
3417 " an instruction TLB (ITLB).\n\n"));
3418 fprintf (f_usage
, GTXT (" TLB misses can be counted by:\n"));
3419 fprintf (f_usage
, NTXT (" %s\n"),
3420 cpuver
== CPC_SPARC_M7
?
3421 NTXT ("DTLB_HWTW_search ITLB_HWTW_search") :
3422 cpuver
== CPC_SPARC_M8
?
3423 NTXT ("DTLB_HWTW ITLB_HWTW") :
3424 NTXT ("DTLB_miss_asynch ITLB_miss_asynch"));
3425 fprintf (f_usage
, GTXT (" or broken down by page size:\n"));
3426 fprintf (f_usage
, NTXT (" %s"),
3427 cpuver
== CPC_SPARC_M7
?
3428 NTXT ("DTLB_HWTW_hit_8K ITLB_HWTW_hit_8K\n"
3429 " DTLB_HWTW_hit_64K ITLB_HWTW_hit_64K\n"
3430 " DTLB_HWTW_hit_4M ITLB_HWTW_hit_4M\n") :
3431 NTXT ("DTLB_fill_8KB ITLB_fill_8KB\n"
3432 " DTLB_fill_64KB ITLB_fill_64KB\n"
3433 " DTLB_fill_4MB ITLB_fill_4MB\n"));
3434 fprintf (f_usage
, NTXT (" %s\n\n"),
3435 cpuver
== CPC_SPARC_M7
?
3436 NTXT ("DTLB_HWTW_hit_256M ITLB_HWTW_hit_256M\n"
3437 " DTLB_HWTW_hit_2G_16G ITLB_HWTW_hit_2G_16G\n"
3438 " DTLB_HWTW_miss_trap ITLB_HWTW_miss_trap") :
3439 cpuver
== CPC_SPARC_M8
?
3440 NTXT ("DTLB_HWTW_hit_256M ITLB_HWTW_hit_256M\n"
3441 " DTLB_HWTW_hit_16G ITLB_HWTW_hit_16G\n"
3442 " DTLB_HWTW_hit_1T ITLB_HWTW_hit_1T") :
3443 NTXT ("DTLB_fill_256MB ITLB_fill_256MB\n"
3444 " DTLB_fill_2GB ITLB_fill_2GB\n"
3445 " DTLB_fill_trap ITLB_fill_trap"));
3446 if (cpuver
== CPC_SPARC_M8
)
3448 fprintf (f_usage
, GTXT (" TLB traps, which can require hundreds of cycles, can be counted with:\n"));
3449 fprintf (f_usage
, NTXT (" %s\n\n"),
3450 NTXT ("DTLB_fill_trap ITLB_fill_trap"));
3453 fprintf (f_usage
, GTXT (" == Branch misprediction.\n"
3454 " Count branch mispredictions with:\n"
3456 " It is the total of:\n"
3457 " Br_dir_mispred direction was mispredicted\n"
3458 " %s target was mispredicted\n"
3459 "\n"), cpuver
== CPC_SPARC_M7
? NTXT ("Br_tgt_mispred") : NTXT ("Br_trg_mispred"));
3461 fprintf (f_usage
, GTXT (" == RAW hazards.\n"
3462 " A read-after-write (RAW) delay occurs when we attempt to read a datum\n"
3463 " before an earlier write has had time to complete:\n"));
3464 if (cpuver
== CPC_SPARC_M8
)
3466 fprintf (f_usage
, NTXT (" RAW_hit\n"));
3467 fprintf (f_usage
, GTXT (" RAW_hit events can be broken down into:\n"));
3471 fprintf (f_usage
, NTXT (" RAW_hit_st_q~emask=0xf\n"));
3472 fprintf (f_usage
, GTXT (" The mask 0xf counts the total of all types such as:\n"));
3474 fprintf (f_usage
, NTXT (" RAW_hit_st_buf write is still in store buffer\n"
3475 " RAW_hit_st_q write is still in store queue\n"
3477 if (cpuver
== CPC_SPARC_M7
)
3479 fprintf (f_usage
, GTXT (" == Flush.\n"
3480 " One can count the number of times the pipeline must be flushed:\n"));
3481 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3482 NTXT ("Flush_L3_miss"), GTXT ("load missed L3 and >1 strand is active on the core"));
3483 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3484 NTXT ("Flush_br_mispred"), GTXT ("branch misprediction"));
3485 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3486 NTXT ("Flush_arch_exception"), GTXT ("SPARC exceptions and trap entry/return"));
3487 fprintf (f_usage
, NTXT (" %-22s %s\n"),
3488 NTXT ("Flush_other"), GTXT ("state change to/from halted/paused"));
3489 fprintf (f_usage
, NTXT ("\n"));
3494 hwc_usage_internal (int forKernel
, FILE *f_usage
, const char *cmd
, const char *dataspace_msg
, int show_syntax
, int show_short_desc
)
3496 if (!VALID_FOR_KERNEL (forKernel
))
3499 hwc_get_cpuname (cpuname
, 128);
3500 Hwcentry
** raw_ctrs
= hwc_get_raw_ctrs (forKernel
);
3501 int has_raw_ctrs
= (raw_ctrs
&& raw_ctrs
[0]);
3502 Hwcentry
** std_ctrs
= hwc_get_std_ctrs (forKernel
);
3503 int has_std_ctrs
= (std_ctrs
&& std_ctrs
[0]);
3504 unsigned hwc_maxregs
= hwc_get_max_concurrent (forKernel
);
3505 int cpuver
= hwc_get_cpc_cpuver ();
3506 if (hwc_maxregs
!= 0)
3510 fprintf (f_usage
, GTXT ("\nSpecifying HW counters on `%s' (cpuver=%d):\n\n"), cpuname
, cpuver
);
3511 fprintf (f_usage
, GTXT (" -h {auto|lo|on|hi}\n"));
3512 fprintf (f_usage
, GTXT ("\tturn on default set of HW counters at the specified rate\n"));
3513 if (hwc_maxregs
== 1)
3515 fprintf (f_usage
, GTXT (" -h <ctr_def>\n"));
3516 fprintf (f_usage
, GTXT ("\tspecify HW counter profiling for one HW counter only\n"));
3520 fprintf (f_usage
, GTXT (" -h <ctr_def> [-h <ctr_def>]...\n"));
3521 fprintf (f_usage
, GTXT (" -h <ctr_def>[,<ctr_def>]...\n"));
3522 fprintf (f_usage
, GTXT ("\tspecify HW counter profiling for up to %u HW counters\n"), hwc_maxregs
);
3524 fprintf (f_usage
, NTXT ("\n"));
3528 fprintf (f_usage
, GTXT ("\nSpecifying HW counters on `%s' (cpuver=%d)\n\n"), cpuname
, cpuver
);
3529 if (hwc_maxregs
== 1)
3530 fprintf (f_usage
, GTXT (" Hardware counter profiling is supported for only one counter.\n"));
3532 fprintf (f_usage
, GTXT (" Hardware counter profiling is supported for up to %u HW counters.\n"), hwc_maxregs
);
3537 if (!IS_KERNEL (forKernel
))
3538 { // EUGENE I don't see why we don't also use this for er_kernel
3541 char *pch
= hwcfuncs_errmsg_get (buf
, sizeof (buf
), 0);
3543 fprintf (f_usage
, GTXT ("HW counter profiling is not supported on this system: %s%s"),
3544 pch
, pch
[strlen (pch
) - 1] == '\n' ? "" : "\n");
3546 fprintf (f_usage
, GTXT ("HW counter profiling is not supported on this system\n"));
3551 /* At this point, we know we have counters */
3552 char**hwc_attrs
= hwc_get_attrs (forKernel
);
3553 int has_attrs
= (hwc_attrs
&& hwc_attrs
[0]);
3556 const char *reg_s
= show_regs
? "[/<reg#>]" : "";
3557 const char *attr_s
= has_attrs
? "[[~<attr>=<val>]...]" : "";
3558 fprintf (f_usage
, GTXT (" <ctr_def> == <ctr>%s%s,[<rate>]\n"), attr_s
, reg_s
);
3560 fprintf (f_usage
, NTXT ("%s"), dataspace_msg
);
3561 fprintf (f_usage
, GTXT (" <ctr>\n"));
3562 fprintf (f_usage
, GTXT (" counter name, "));
3565 fprintf (f_usage
, GTXT (" Counter name "));
3566 fprintf (f_usage
, GTXT ("must be selected from the available counters\n"
3567 " listed below. On most systems, if a counter is not listed\n"
3568 " below, it may still be specified by its numeric value.\n"));
3569 if (cpcx_has_precise
[forKernel
])
3572 fprintf (f_usage
, GTXT (" Counters labeled as 'memoryspace' in the list below will\n"
3573 " collect memoryspace data by default.\n"));
3575 fprintf (f_usage
, GTXT ("\n"));
3580 fprintf (f_usage
, GTXT (" ~<attr>=<val>\n"));
3581 fprintf (f_usage
, GTXT (" optional attribute where <val> can be in decimal or hex\n"
3582 " format, and <attr> can be one of: \n"));
3585 fprintf (f_usage
, GTXT (" Optional attribute where <val> can be in decimal or hex\n"
3586 " format, and <attr> can be one of: \n"));
3587 for (char **pattr
= hwc_attrs
; *pattr
; pattr
++)
3588 fprintf (f_usage
, NTXT (" `%s'\n"), *pattr
);
3590 fprintf (f_usage
, GTXT (" Multiple attributes may be specified, and each must be preceded by a ~.\n\n"));
3592 fprintf (f_usage
, GTXT (" Multiple attributes may be specified.\n\n"));
3593 if (IS_KERNEL (forKernel
))
3594 fprintf (f_usage
, GTXT (" Other attributes may be supported by the chip, but are not supported by DTrace and will be ignored by er_kernel.\n\n"));
3600 fprintf (f_usage
, GTXT (" /<reg#>\n"
3601 " forces use of a specific hardware register. (Solaris only)\n"
3602 " If not specified, %s will attempt to place the counter into the first\n"
3603 " available register and as a result may be unable to place\n"
3604 " subsequent counters due to register conflicts.\n"
3605 " The / in front of the register number is required if a register is specified.\n\n"),
3608 fprintf (f_usage
, GTXT (" <rate> == {auto|lo|on|hi}\n"));
3609 fprintf (f_usage
, GTXT (" `auto' (default) match the rate used by clock profiling.\n"));
3610 fprintf (f_usage
, GTXT (" If clock profiling is disabled, use `on'.\n"));
3611 fprintf (f_usage
, GTXT (" `lo' per-thread maximum rate of ~10 samples/second\n"));
3612 fprintf (f_usage
, GTXT (" `on' per-thread maximum rate of ~100 samples/second\n"));
3613 fprintf (f_usage
, GTXT (" `hi' per-thread maximum rate of ~1000 samples/second\n\n"));
3614 fprintf (f_usage
, GTXT (" <rate> == <interval>\n"
3615 " Fixed event interval value to trigger a sample.\n"
3616 " Smaller intervals imply more frequent samples.\n"
3617 " Example: when counting cycles on a 2 GHz processor,\n"
3618 " an interval of 2,000,003 implies ~1000 samples/sec\n"
3620 " Use this feature with caution, because:\n"
3621 " (1) Frequent sampling increases overhead and may disturb \n"
3622 " other applications on your system.\n"
3623 " (2) Event counts vary dramatically depending on the event \n"
3624 " and depending on the application.\n"
3625 " (3) A fixed event interval disables any other gprofng\n"
3626 " internal mechanisms that may limit event rates.\n"
3628 " Guidelines: Aim at <1000 events per second. Start by \n"
3629 " collecting with the 'hi' option; in the experiment overview,\n"
3630 " notice how many events are recorded per second; divide by\n"
3631 " 1000, and use that as your starting point.\n\n"));
3633 fprintf (f_usage
, GTXT (" A comma ',' followed immediately by white space may be omitted.\n\n"));
3636 /* default counters */
3637 fprintf (f_usage
, GTXT ("Default set of HW counters:\n\n"));
3638 char * defctrs
= hwc_get_default_cntrs2 (forKernel
, 1);
3639 if (defctrs
== NULL
)
3640 fprintf (f_usage
, GTXT (" No default HW counter set defined for this system.\n"));
3641 else if (strlen (defctrs
) == 0)
3643 char *s
= hwc_get_orig_default_cntrs (forKernel
);
3644 fprintf (f_usage
, GTXT (" The default HW counter set (%s) defined for %s cannot be loaded on this system.\n"),
3651 char *defctrs2
= hwc_get_default_cntrs2 (forKernel
, 2);
3652 fprintf (f_usage
, GTXT (" -h %s\n"), defctrs
);
3661 fprintf (f_usage
, GTXT ("\nAliases for most useful HW counters:\n\n"));
3662 format_columns (tmp
, 1024, "alias", "raw name", "type ", "units", "description");
3663 fprintf (f_usage
, NTXT (" %s\n\n"), tmp
);
3664 for (Hwcentry
**pctr
= std_ctrs
; *pctr
; pctr
++)
3666 Hwcentry
*ctr
= *pctr
;
3667 hwc_hwcentry_string_internal (tmp
, sizeof (tmp
), ctr
, 0);
3668 fprintf (f_usage
, NTXT (" %s\n"), tmp
);
3673 fprintf (f_usage
, GTXT ("\nRaw HW counters:\n\n"));
3674 hwc_usage_raw_overview_sparc (f_usage
, cpuver
);
3675 format_columns (tmp
, 1024, "name", NULL
, "type ", "units", "description");
3676 fprintf (f_usage
, NTXT (" %s\n\n"), tmp
);
3677 for (Hwcentry
**pctr
= raw_ctrs
; *pctr
; pctr
++)
3679 Hwcentry
*ctr
= *pctr
;
3680 hwc_hwcentry_string_internal (tmp
, sizeof (tmp
), ctr
, show_short_desc
);
3681 fprintf (f_usage
, NTXT (" %s\n"), tmp
);
3685 /* documentation notice */
3686 hwc_get_docref (tmp
, 1024);
3688 fprintf (f_usage
, NTXT ("\n%s\n"), tmp
);
3691 /* Print a description of "-h" usage, largely common to collect and er_kernel. */
3693 hwc_usage (int forKernel
, const char *cmd
, const char *dataspace_msg
)
3695 hwc_usage_internal (forKernel
, stdout
, cmd
, dataspace_msg
, 1, 0);
3699 hwc_usage_f (int forKernel
, FILE *f
, const char *cmd
, const char *dataspace_msg
, int show_syntax
, int show_short_desc
)
3701 hwc_usage_internal (forKernel
, f
, cmd
, dataspace_msg
, show_syntax
, show_short_desc
);
3704 /*---------------------------------------------------------------------------*/
3705 /* init functions */
3707 static char* supported_pebs_counters
[] = {
3708 "mem_inst_retired.latency_above_threshold",
3709 "mem_trans_retired.load_latency",
3710 "mem_trans_retired.precise_store",
3714 /* callback, (see setup_cpc()) called for each valid regno/name combo */
3716 hwc_cb (uint_t cpc_regno
, const char *name
)
3718 regno_t regno
= cpc_regno
; /* convert type */
3719 list_add (&unfiltered_raw
, regno
, name
);
3723 supported_hwc (Hwcentry
*pctr
)
3725 if (ABST_PLUS_BY_DEFAULT (pctr
->memop
) &&
3726 (cpcx_support_bitmask
& SUPPORT_MEMORYSPACE_PROFILING
) == 0)
3728 // remove specific PEBs counters when back end doesn't support sampling
3729 if ((cpcx_support_bitmask
& HWCFUNCS_SUPPORT_PEBS_SAMPLING
) == 0)
3730 for (int ii
= 0; supported_pebs_counters
[ii
]; ii
++)
3731 if (strcmp (supported_pebs_counters
[ii
], pctr
->name
) == 0)
3737 * forKernel: 1 - generate lists for er_kernel, 0 - generate lists for collect
3739 * raw_orig: HWCs as generated by hwc_cb()
3741 * pstd_out[], praw_out[]: malloc'd array of pointers to malloc'd hwcentry, or NULL
3744 hwc_process_raw_ctrs (int forKernel
, Hwcentry
***pstd_out
,
3745 Hwcentry
***praw_out
, Hwcentry
***phidden_out
,
3746 Hwcentry
**static_tables
, Hwcentry
**raw_unfiltered_in
)
3748 // set up output buffers
3749 ptr_list s_outbufs
[3];
3750 ptr_list
*std_out
= &s_outbufs
[0];
3751 ptr_list_init (std_out
);
3752 ptr_list
*raw_out
= &s_outbufs
[1];
3753 ptr_list_init (raw_out
);
3754 ptr_list
*hidden_out
= &s_outbufs
[2];
3755 ptr_list_init (hidden_out
);
3757 #define NUM_TABLES 3
3758 ptr_list table_copy
[NUM_TABLES
]; // copy of data from static tables. [0]std, [1]generic, and [2]hidden
3759 for (int tt
= 0; tt
< NUM_TABLES
; tt
++)
3760 ptr_list_init (&table_copy
[tt
]);
3762 // copy records from std [0] and generic [1] static input tables into table_copy[0],[1],or[2]
3763 for (int tt
= 0; tt
< 2; tt
++)
3764 for (Hwcentry
*pctr
= static_tables
[tt
]; pctr
&& pctr
->name
; pctr
++)
3766 if (!supported_hwc (pctr
))
3768 if (is_hidden_alias (pctr
))
3769 list_append_shallow_copy (&table_copy
[2], pctr
); // hidden list
3771 list_append_shallow_copy (&table_copy
[tt
], pctr
);
3774 // copy raw_unfiltered_in to raw_out
3775 for (int ii
= 0; raw_unfiltered_in
&& raw_unfiltered_in
[ii
]; ii
++)
3777 Hwcentry
*pctr
= raw_unfiltered_in
[ii
];
3778 if (supported_hwc (pctr
))
3779 list_append_shallow_copy (raw_out
, pctr
);
3782 // Scan raw counters to populate Hwcentry fields from matching static_tables entries
3783 for (int uu
= 0; uu
< raw_out
->sz
; uu
++)
3785 Hwcentry
*praw
= (Hwcentry
*) raw_out
->array
[uu
];
3786 Hwcentry
*pstd
= NULL
; // set if non-alias entry from std table matches
3787 char *name
= praw
->name
;
3788 for (int tt
= 0; tt
< NUM_TABLES
; tt
++)
3789 { // std, generic, and hidden
3790 if (table_copy
[tt
].sz
== 0)
3792 Hwcentry
**array
= (Hwcentry
**) table_copy
[tt
].array
;
3793 for (int jj
= 0; array
[jj
]; jj
++)
3794 { // all table counters
3795 Hwcentry
*pctr
= array
[jj
];
3798 pname
= pctr
->int_name
;
3801 if (!is_same (name
, pname
, '~'))
3804 if (!is_visible_alias (pctr
) && !is_hidden_alias (pctr
))
3806 // Note: we could expand criteria to also allow aliases to set default rates for raw HWCs
3807 /* This is an 'internal' raw counter */
3809 pstd
= pctr
; /* use info as a template when adding to raw list */
3811 hwcentry_print (DBG_LT0
, "hwctable: hwc_cb: Warning: "
3812 "counter %s appears in table more than once: ",
3815 }/* for table rows */
3816 }/* for std and generic tables */
3820 /* the main table had an entry that matched <name> exactly */
3821 /* Apply the main table entry as a template */
3824 }/* for (raw_out) */
3826 // update std_out and hidden_out
3827 for (int tt
= 0; tt
< NUM_TABLES
; tt
++)
3829 if (tt
== 1 /*skip std_raw*/ || table_copy
[tt
].sz
== 0)
3832 for (int ii
= 0; (pctr
= table_copy
[tt
].array
[ii
]); ii
++)
3834 // prune unsupported rows from std table
3835 if (!is_visible_alias (pctr
) && !is_hidden_alias (pctr
))
3836 continue; // only aliases
3837 ptr_list
*dest
= (tt
== 0) ? std_out
: hidden_out
;
3839 if (pctr
->short_desc
== NULL
)
3841 isInList
= ptrarray_find_by_name ((Hwcentry
**) raw_out
->array
, pctr
->int_name
);
3843 pctr
->short_desc
= isInList
->short_desc
; // copy the raw counter's detailed description
3845 isInList
= ptrarray_find_by_name ((Hwcentry
**) dest
->array
, pctr
->name
);
3847 hwcentry_print (DBG_LT0
, "hwctable: hwc_cb: Warning: "
3848 "counter %s appears in alias list more than once: ",
3851 list_append_shallow_copy (dest
, pctr
);
3854 for (int tt
= 0; tt
< NUM_TABLES
; tt
++)
3855 ptr_list_free (&table_copy
[tt
]);
3859 // for er_kernel, use baseline value of PRELOAD_DEF_ERKERNEL instead of PRELOAD_DEF
3860 for (int tt
= 0; tt
< 3; tt
++)
3861 { // std_out-0, raw_out-1, hidden_out-2
3862 Hwcentry
** hwcs
= (Hwcentry
**) (s_outbufs
[tt
].array
);
3863 for (int ii
= 0; hwcs
&& hwcs
[ii
]; ii
++)
3865 Hwcentry
*hwc
= hwcs
[ii
];
3866 if (hwc
->val
== PRELOAD_DEF
)
3867 hwc
->val
= PRELOAD_DEF_ERKERNEL
;
3871 *pstd_out
= (Hwcentry
**) std_out
->array
;
3872 *praw_out
= (Hwcentry
**) raw_out
->array
;
3873 *phidden_out
= (Hwcentry
**) hidden_out
->array
;
3876 /* callback, (see setup_cpc()) called for each valid attribute */
3877 /* builds attrlist */
3879 attrs_cb (const char *attr
)
3881 Tprintf (DBG_LT3
, "hwctable: attrs_cb(): %s\n", attr
);
3882 if (strcmp (attr
, "picnum") == 0)
3883 return; /* don't make this attribute available to users */
3884 ptr_list_add (&unfiltered_attrs
, (void*) strdup (attr
));
3887 /* returns true if attribute is valid for this platform */
3889 attr_is_valid (int forKernel
, const char *attr
)
3892 if (!VALID_FOR_KERNEL (forKernel
) || !cpcx_attrs
[forKernel
])
3894 for (int ii
= 0; cpcx_attrs
[forKernel
][ii
]; ii
++)
3895 if (strcmp (attr
, cpcx_attrs
[forKernel
][ii
]) == 0)