Remove Debian from SECURITY.txt
[binutils-gdb.git] / gprofng / common / hwcdrv.c
blob51492a8d35cdb752157eb42c98c37c92824b8e57
1 /* Copyright (C) 2021-2024 Free Software Foundation, Inc.
2 Contributed by Oracle.
4 This file is part of GNU Binutils.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
21 #include <errno.h>
22 #include <unistd.h>
23 #include <fcntl.h>
24 #include <sys/mman.h>
25 #include <sys/ioctl.h>
26 #include <sys/syscall.h>
27 #include <linux/perf_event.h>
29 #include "hwcdrv.h"
31 /*---------------------------------------------------------------------------*/
32 /* macros */
33 #define IS_GLOBAL /* Mark global symbols */
35 #include "cpuid.c" /* ftns for identifying a chip */
37 static hdrv_pcbe_api_t *pcbe_driver = NULL;
38 static hdrv_pcbe_api_t hdrv_pcbe_core_api;
39 static hdrv_pcbe_api_t hdrv_pcbe_opteron_api;
40 static hdrv_pcbe_api_t *hdrv_pcbe_drivers[] = {
41 &hdrv_pcbe_core_api,
42 &hdrv_pcbe_opteron_api,
43 NULL
45 #include "opteron_pcbe.c" /* CPU-specific code */
46 #include "core_pcbe.c" /* CPU-specific code */
48 /*---------------------------------------------------------------------------*/
49 static int
50 hwcdrv_lookup_cpuver (const char * cpcN_cciname)
52 /* returns hwc_cpus.h ID for a given string. */
53 libcpc2_cpu_lookup_t *plookup;
54 static libcpc2_cpu_lookup_t cpu_table[] = {
55 LIBCPC2_CPU_LOOKUP_LIST
57 if (cpcN_cciname == NULL)
58 return CPUVER_UNDEFINED;
60 /* search table for name */
61 for (plookup = cpu_table; plookup->cpc2_cciname; plookup++)
63 int n = strlen (plookup->cpc2_cciname);
64 if (!strncmp (plookup->cpc2_cciname, cpcN_cciname, n))
65 return plookup->cpc2_cpuver;
67 /* unknown, but does have a descriptive string */
68 TprintfT (DBG_LT0, "hwcfuncs: CPC2: WARNING: Id of processor '%s' "
69 "could not be determined\n",
70 cpcN_cciname);
71 return CPUVER_GENERIC;
74 /*---------------------------------------------------------------------------*/
75 /* utils to generate x86 register definitions on Linux */
78 * This code is structured as though we're going to initialize the
79 * HWC by writing the Intel MSR register directly. That is, we
80 * assume the lowest 16 bits of the event number will have the event
81 * and that higher bits will set attributes.
83 * While SPARC is different, we can nonetheless use basically the
84 * same "x86"-named functions:
86 * - The event code will still be 16 bits. It will still
87 * be in the lowest 16 bits of the event number. Though
88 * perf_event_code() on SPARC will expect those bits to
89 * shifted, hwcdrv_pcl.c can easily perform that shift.
91 * - On SPARC we support only two attributes, "user" and "system",
92 * which hwcdrv_pcl.c already converts to the "exclude_user"
93 * and "exclude_kernel" fields expected by perf_event_open().
94 * "user" and "system" are stored in event bits 16 and 17.
95 * For M8, a 4-bit mask of supported PICs is stored in bits [23:20].
98 static const attr_info_t perfctr_sparc_attrs[] = {
99 {NTXT ("user"), 0, 0x01, 16}, //usr
100 {NTXT ("system"), 0, 0x01, 17}, //os
101 {NULL, 0, 0x00, 0},
103 static const attr_info_t perfctr_x64_attrs[] = {/* ok for Core2 & later */
104 {NTXT ("umask"), 0, 0xff, 8},
105 {NTXT ("user"), 0, 0x01, 16}, //usr
106 //{NTXT("nouser"), 1, 0x01, 16}, //usr (inverted)
107 {NTXT ("system"), 0, 0x01, 17}, //os
108 {NTXT ("edge"), 0, 0x01, 18},
109 {NTXT ("pc"), 0, 0x01, 19},
110 {NTXT ("inv"), 0, 0x01, 23},
111 {NTXT ("cmask"), 0, 0xff, 24},
112 {NULL, 0, 0x00, 0},
114 const attr_info_t *perfctr_attrs_table = perfctr_x64_attrs;
116 static const eventsel_t perfctr_evntsel_enable_bits = (0x01 << 16) | /* usr */
117 // (0xff << 0) | /* event*/
118 // (0xff << 8) | /* umask */
119 // (0x01 << 17) | /* os */
120 // (0x01 << 18) | /* edge */
121 // (0x01 << 19) | /* pc */
122 (0x01 << 20) | /* int */
123 // (0x01 << 21) | /* reserved */
124 (0x01 << 22) | /* enable */
125 // (0x01 << 23) | /* inv */
126 // (0xff << 24) | /* cmask */
129 static int
130 myperfctr_get_x86_eventnum (const char *eventname, uint_t pmc,
131 eventsel_t *eventsel, eventsel_t *valid_umask,
132 uint_t *pmc_sel)
134 if (pcbe_driver && pcbe_driver->hdrv_pcbe_get_eventnum &&
135 !pcbe_driver->hdrv_pcbe_get_eventnum (eventname, pmc, eventsel,
136 valid_umask, pmc_sel))
137 return 0;
139 /* check for numerically-specified counters */
140 char * endptr;
141 uint64_t num = strtoull (eventname, &endptr, 0);
142 if (*eventname && !*endptr)
144 *eventsel = EXTENDED_EVNUM_2_EVSEL (num);
145 *valid_umask = 0xff; /* allow any umask (unused for SPARC?) */
146 *pmc_sel = pmc;
147 return 0;
150 /* name does not specify a numeric value */
151 *eventsel = (eventsel_t) - 1;
152 *valid_umask = 0x0;
153 *pmc_sel = pmc;
154 return -1;
157 static int
158 mask_shift_set (eventsel_t *presult, eventsel_t invalue,
159 eventsel_t mask, eventsel_t shift)
161 if (invalue & ~mask)
162 return -1; /* invalue attempts to set bits outside of mask */
163 *presult &= ~(mask << shift); /* clear all the mask bits */
164 *presult |= (invalue << shift); /* set bits according to invalue */
165 return 0;
168 static int
169 set_x86_attr_bits (eventsel_t *result_mask, eventsel_t evnt_valid_umask,
170 hwcfuncs_attr_t attrs[], int nattrs, const char*nameOnly)
172 eventsel_t evntsel = *result_mask;
173 for (int ii = 0; ii < (int) nattrs; ii++)
175 const char *attrname = attrs[ii].ca_name;
176 eventsel_t attrval = (eventsel_t) attrs[ii].ca_val;
177 const char *tmpname;
178 int attr_found = 0;
179 for (int jj = 0; (tmpname = perfctr_attrs_table[jj].attrname); jj++)
181 if (strcmp (attrname, tmpname) == 0)
183 if (strcmp (attrname, "umask") == 0)
185 if (attrval & ~evnt_valid_umask)
187 logerr (GTXT ("for `%s', allowable umask bits are: 0x%llx\n"),
188 nameOnly, (long long) evnt_valid_umask);
189 return -1;
192 if (mask_shift_set (&evntsel,
193 perfctr_attrs_table[jj].is_inverted ? (attrval^1) : attrval,
194 perfctr_attrs_table[jj].mask,
195 perfctr_attrs_table[jj].shift))
197 logerr (GTXT ("`%s' attribute `%s' could not be set to 0x%llx\n"),
198 nameOnly, attrname, (long long) attrval);
199 return -1;
201 TprintfT (DBG_LT2, "hwcfuncs: Counter %s, attribute %s set to 0x%llx\n",
202 nameOnly, attrname, (long long) attrval);
203 attr_found = 1;
204 break;
207 if (!attr_found)
209 logerr (GTXT ("attribute `%s' is invalid\n"), attrname);
210 return -1;
213 *result_mask = evntsel;
214 return 0;
217 static int
218 hwcfuncs_get_x86_eventsel (Hwcentry *h,
219 eventsel_t *return_event, uint_t *return_pmc_sel)
221 hwcfuncs_attr_t attrs[HWCFUNCS_MAX_ATTRS + 1];
222 unsigned nattrs = 0;
223 char *nameOnly = NULL;
224 eventsel_t evntsel = h->config;
225 eventsel_t evnt_valid_umask = 0;
226 uint_t pmc_sel = 0;
227 int rc = -1;
228 *return_event = 0;
229 *return_pmc_sel = 0;
230 void *attr_mem = hwcfuncs_parse_attrs (h->int_name, attrs, HWCFUNCS_MAX_ATTRS,
231 &nattrs, NULL);
232 if (!attr_mem)
234 logerr (GTXT ("out of memory, could not parse attributes\n"));
235 return -1;
237 hwcfuncs_parse_ctr (h->int_name, NULL, &nameOnly, NULL, NULL, NULL);
239 /* look up evntsel */
240 if (myperfctr_get_x86_eventnum (nameOnly, h->reg_num,
241 &evntsel, &evnt_valid_umask, &pmc_sel))
243 logerr (GTXT ("counter `%s' is not valid\n"), nameOnly);
244 goto attr_wrapup;
246 TprintfT (DBG_LT1, "hwcfuncs: event=0x%llx pmc=0x%x '%s' nattrs = %u\n",
247 (long long) evntsel, pmc_sel, nameOnly, nattrs);
249 /* determine event attributes */
250 eventsel_t evnt_attrs = perfctr_evntsel_enable_bits;
251 if (set_x86_attr_bits (&evnt_attrs, evnt_valid_umask, attrs, nattrs, nameOnly))
252 goto attr_wrapup;
253 if (evntsel & evnt_attrs)
254 TprintfT (DBG_LT0, "hwcfuncs: ERROR - evntsel & enable bits overlap: 0x%llx 0x%llx 0x%llx\n",
255 (long long) evntsel, (long long) evnt_attrs,
256 (long long) (evntsel & evnt_attrs));
257 *return_event = evntsel | evnt_attrs;
258 *return_pmc_sel = pmc_sel;
259 rc = 0;
261 attr_wrapup:
262 free (attr_mem);
263 free (nameOnly);
264 return rc;
267 #ifdef __x86_64__
268 #define syscall_instr "syscall"
269 #define syscall_clobber "rcx", "r11", "memory"
270 #endif
271 #ifdef __i386__
272 #define syscall_instr "int $0x80"
273 #define syscall_clobber "memory"
274 #endif
276 static inline int
277 perf_event_open (struct perf_event_attr *hw_event_uptr, pid_t pid,
278 int cpu, int group_fd, unsigned long flags)
280 /* It seems that perf_event_open() sometimes fails spuriously,
281 * even while an immediate retry succeeds.
282 * So, let's try a few retries if the call fails just to be sure.
284 int rc;
285 for (int retry = 0; retry < 5; retry++)
287 rc = syscall (__NR_perf_event_open, hw_event_uptr, pid, cpu, group_fd, flags);
288 if (rc != -1)
289 return rc;
290 TprintfT (0, "perf_event_open %d: errno=%d %s\n", retry, errno, strerror(errno));
292 return rc;
295 /*---------------------------------------------------------------------------*/
296 /* macros & fwd prototypes */
298 #define HWCDRV_API static /* Mark functions used by hwcdrv API */
300 HWCDRV_API int hwcdrv_start (void);
301 HWCDRV_API int hwcdrv_free_counters ();
303 static pid_t
304 hwcdrv_gettid (void)
306 #ifndef LIBCOLLECTOR_SRC
307 return syscall (__NR_gettid);
308 #elif defined(intel)
309 pid_t r;
310 __asm__ __volatile__(syscall_instr
311 : "=a" (r) : "0" (__NR_gettid)
312 : syscall_clobber);
313 return r;
314 #else
315 return syscall (__NR_gettid); // FIXUP_XXX_SPARC_LINUX // write gettid in asm
316 #endif
319 /*---------------------------------------------------------------------------*/
320 /* types */
322 #define NPAGES_PER_BUF 1 // number of pages to be used for perf_event samples
323 // must be a power of 2
325 /*---------------------------------------------------------------------------*/
327 /* typedefs */
329 typedef struct
330 { // event (hwc) definition
331 unsigned int reg_num; // PMC assignment, potentially for detecting conflicts
332 eventsel_t eventsel; // raw event bits (Intel/AMD)
333 uint64_t counter_preload; // number of HWC events before signal
334 struct perf_event_attr hw; // perf_event definition
335 hrtime_t min_time; // minimum time we're targeting between events
336 char *name;
337 } perf_event_def_t;
338 static perf_event_def_t event_def_0;
340 typedef struct
341 { // runtime state of perf_event buffer
342 void *buf; // pointer to mmapped buffer
343 size_t pagesz; // size of pages
344 } buffer_state_t;
346 typedef struct
347 { // runtime state of counter values
348 uint64_t prev_ena_ts; // previous perf_event "enabled" time
349 uint64_t prev_run_ts; // previous perf_event "running" time
350 uint64_t prev_value; // previous HWC value
351 } counter_value_state_t;
353 typedef struct
354 { // per-counter information
355 perf_event_def_t *ev_def; // global HWC definition for one counter
356 int fd; // perf_event fd
357 buffer_state_t buf_state; // perf_event buffer's state
358 counter_value_state_t value_state; // counter state
359 int needs_restart; // workaround for dbx failure to preserve si_fd
360 uint64_t last_overflow_period;
361 hrtime_t last_overflow_time;
362 } counter_state_t;
364 typedef struct
365 { // per-thread context
366 counter_state_t *ctr_list;
367 int signal_fd; // fd that caused the most recent signal
368 pid_t tid; // for debugging signal delivery problems
369 } hdrv_pcl_ctx_t;
371 /*---------------------------------------------------------------------------*/
373 /* static variables */
374 static struct
376 int library_ok;
377 int internal_open_called;
378 hwcfuncs_tsd_get_fn_t find_vpc_ctx;
379 unsigned hwcdef_cnt; /* number of *active* hardware counters */
380 } hdrv_pcl_state;
382 static hwcdrv_about_t hdrv_pcl_about = {.cpcN_cpuver = CPUVER_UNDEFINED};
383 static perf_event_def_t global_perf_event_def[MAX_PICS];
385 #define COUNTERS_ENABLED() (hdrv_pcl_state.hwcdef_cnt)
388 /* perf_event buffer formatting and handling */
389 static void
390 reset_buf (buffer_state_t *bufstate)
392 TprintfT (0, "hwcdrv: ERROR: perf_event reset_buf() called!\n");
393 struct perf_event_mmap_page *metadata = bufstate->buf;
394 if (metadata)
395 metadata->data_tail = metadata->data_head;
398 static int
399 skip_buf (buffer_state_t *bufstate, size_t sz)
401 TprintfT (DBG_LT1, "hwcdrv: WARNING: perf_event skip_buf called!\n");
402 struct perf_event_mmap_page *metadata = bufstate->buf;
403 if (metadata == NULL)
404 return -1;
405 size_t pgsz = bufstate->pagesz;
406 size_t bufsz = NPAGES_PER_BUF*pgsz;
407 uint64_t d_tail = metadata->data_tail;
408 uint64_t d_head = metadata->data_head;
410 // validate request size
411 if (sz > d_head - d_tail || sz >= bufsz)
413 reset_buf (bufstate);
414 return -1;
416 metadata->data_tail = d_tail + sz; // advance tail
417 return 0;
420 static int
421 read_buf (buffer_state_t *bufstate, void *buf, size_t sz)
423 struct perf_event_mmap_page *metadata = bufstate->buf;
424 if (metadata == NULL)
425 return -1;
426 size_t pgsz = bufstate->pagesz;
427 size_t bufsz = NPAGES_PER_BUF*pgsz;
428 uint64_t d_tail = metadata->data_tail;
429 uint64_t d_head = metadata->data_head;
431 // validate request size
432 if (sz > d_head - d_tail || sz >= bufsz)
434 reset_buf (bufstate);
435 return -1;
437 char *buf_base = ((char *) metadata) + pgsz; // start of data buffer
438 uint64_t start_pos = d_tail & (bufsz - 1); // char offset into data buffer
439 size_t nbytes = sz;
440 if (start_pos + sz > bufsz)
442 // will wrap past end of buffer
443 nbytes = bufsz - start_pos;
444 memcpy (buf, buf_base + start_pos, nbytes);
445 start_pos = 0; // wrap to start
446 buf = (void *) (((char *) buf) + nbytes);
447 nbytes = sz - nbytes;
449 memcpy (buf, buf_base + start_pos, nbytes);
450 metadata->data_tail += sz;
451 return 0;
454 static int
455 read_u64 (buffer_state_t *bufstate, uint64_t *value)
457 return read_buf (bufstate, value, sizeof (uint64_t));
460 static int
461 read_sample (counter_state_t *ctr_state, int msgsz, uint64_t *rvalue,
462 uint64_t *rlost)
464 // returns count of bytes read
465 buffer_state_t *bufstate = &ctr_state->buf_state;
466 counter_value_state_t *cntstate = &ctr_state->value_state;
467 int readsz = 0;
469 // PERF_SAMPLE_IP
470 uint64_t ipc = 0;
471 int rc = read_u64 (bufstate, &ipc);
472 if (rc)
473 return -1;
474 readsz += sizeof (uint64_t);
476 // PERF_SAMPLE_READ: value
477 uint64_t value = 0;
478 rc = read_u64 (bufstate, &value);
479 if (rc)
480 return -2;
481 readsz += sizeof (uint64_t);
483 /* Bug 20806896
484 * Old Linux kernels (e.g. 2.6.32) on certain systems return enabled and
485 * running times in the sample data that correspond to the metadata times
486 * metadata->time_enabled
487 * metadata->time_running
488 * from the PREVIOUS (not current) sample. Probably just ignore this bug
489 * since it's on old kernels and we only use the enabled and running times
490 * to construct loss_estimate.
492 // PERF_SAMPLE_READ: PERF_FORMAT_ENABLED
493 uint64_t enabled_time = 0;
494 rc = read_u64 (bufstate, &enabled_time);
495 if (rc)
496 return -3;
497 readsz += sizeof (uint64_t);
499 // PERF_SAMPLE_READ: PERF_FORMAT_RUNNING
500 uint64_t running_time = 0;
501 rc = read_u64 (bufstate, &running_time);
502 if (rc)
503 return -4;
504 readsz += sizeof (uint64_t);
506 uint64_t value_delta = value - cntstate->prev_value;
507 uint64_t enabled_delta = enabled_time - cntstate->prev_ena_ts;
508 uint64_t running_delta = running_time - cntstate->prev_run_ts;
509 cntstate->prev_value = value;
510 cntstate->prev_ena_ts = enabled_time;
511 cntstate->prev_run_ts = running_time;
513 // 24830461 need workaround for Linux anomalous HWC skid overrun
514 int set_error_flag = 0;
515 if (value_delta > 2 * ctr_state->last_overflow_period + 2000 /* HWC_SKID_TOLERANCE */)
516 set_error_flag = 1;
518 uint64_t loss_estimate = 0; // estimate loss of events caused by multiplexing
519 if (running_delta == enabled_delta)
521 // counter was running 100% of time, no multiplexing
523 else if (running_delta == 0)
524 loss_estimate = 1; // token amount to aid in debugging perfctr oddities
525 else if ((running_delta > enabled_delta) || (enabled_delta & 0x1000000000000000ll))
527 // running should be smaller than enabled, can't estimate
529 * 21418391 HWC can have a negative count
531 * We've also seen enabled not only be smaller than running
532 * but in fact go negative. Guard against this.
534 loss_estimate = 2; // token amount to aid in debugging perfctr oddities
536 else
538 // counter was running less than 100% of time
539 // Example: ena=7772268 run=6775669 raw_value=316004 scaled_value=362483 loss_est=46479
540 uint64_t scaled_delta = (double) value_delta * enabled_delta / running_delta;
541 value_delta = scaled_delta;
542 #if 0
543 // We should perhaps warn the user that multiplexing is going on,
544 // but hwcdrv_pcl.c doesn't know about the collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_* values.
545 // For now we simply don't report.
546 // Perhaps we should address the issue not here but in the caller collector_sigemt_handler(),
547 // but at that level "lost" has a meaning that's considerably broader than just multiplexing.
548 collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n",
549 SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name,
550 ctr_list[idx].last_overflow_period, new_period);
551 #endif
553 TprintfT ((loss_estimate || set_error_flag) ? DBG_LT1 : DBG_LT3,
554 "hwcdrv: '%s' ipc=0x%llx ena=%llu run=%llu "
555 "value_delta=%lld(0x%llx) loss_est=%llu %s error_flag='%s'\n",
556 ctr_state->ev_def->name, (long long) ipc,
557 (long long) enabled_delta, (long long) running_delta,
558 (long long) value_delta, (long long) value_delta,
559 (unsigned long long) loss_estimate,
560 loss_estimate ? ", WARNING - SCALED" : "",
561 set_error_flag ? ", ERRORFLAG" : "");
562 if (set_error_flag == 1)
563 value_delta |= (1ULL << 63) /* HWCVAL_ERR_FLAG */;
564 *rvalue = value_delta;
565 *rlost = loss_estimate;
566 if (readsz != msgsz)
568 TprintfT (0, "hwcdrv: ERROR: perf_event sample not fully parsed\n");
569 return -5;
571 return 0;
574 static void
575 dump_perf_event_attr (struct perf_event_attr *at)
577 #if defined(DEBUG)
578 TprintfT (DBG_LT2, "dump_perf_event_attr: size=%d type=%d sample_period=%lld\n"
579 " config=0x%llx config1=0x%llx config2=0x%llx wakeup_events=%lld __reserved_1=%lld\n",
580 (int) at->size, (int) at->type, (unsigned long long) at->sample_period,
581 (unsigned long long) at->config, (unsigned long long) at->config1,
582 (unsigned long long) at->config2, (unsigned long long) at->wakeup_events,
583 (unsigned long long) at->__reserved_1);
584 #define DUMP_F(fld) if (at->fld) TprintfT(DBG_LT2, " %-10s : %lld\n", #fld, (long long) at->fld)
585 DUMP_F (disabled);
586 DUMP_F (inherit);
587 DUMP_F (pinned);
588 DUMP_F (exclusive);
589 DUMP_F (exclude_user);
590 DUMP_F (exclude_kernel);
591 DUMP_F (exclude_hv);
592 DUMP_F (exclude_idle);
593 DUMP_F (comm);
594 DUMP_F (freq);
595 DUMP_F (inherit_stat);
596 DUMP_F (enable_on_exec);
597 DUMP_F (task);
598 DUMP_F (watermark);
599 #endif
602 static void
603 init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period,
604 Hwcentry *hwce)
606 static struct perf_event_attr perf_event_attr_0 = {
607 .size = sizeof (struct perf_event_attr),
608 .disabled = 1, /* off by default */
609 .exclude_hv = 1,
610 .wakeup_events = 1 /* wakeup every n events */
612 *hw = perf_event_attr_0;
613 if (hwce && hwce->use_perf_event_type)
615 hw->config = hwce->config;
616 hw->config1 = hwce->config1;
617 hw->type = hwce->type;
619 else
620 { // backward compatibility. The old interface had no 'hwce' argument.
621 hw->config = event;
622 hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw...
624 hw->sample_period = period;
625 hw->sample_type = PERF_SAMPLE_IP |
626 // PERF_SAMPLE_TID |
627 // PERF_SAMPLE_TIME | // possibly interesting
628 // PERF_SAMPLE_ADDR |
629 PERF_SAMPLE_READ | // HWC value
630 // PERF_SAMPLE_CALLCHAIN | // interesting
631 // PERF_SAMPLE_ID |
632 // PERF_SAMPLE_CPU | // possibly interesting
633 // PERF_SAMPLE_PERIOD |
634 // PERF_SAMPLE_STREAM_ID |
635 // PERF_SAMPLE_RAW |
637 hw->read_format =
638 PERF_FORMAT_TOTAL_TIME_ENABLED | // detect when hwc not scheduled
639 PERF_FORMAT_TOTAL_TIME_RUNNING | // detect when hwc not scheduled
640 // PERF_FORMAT_ID |
641 // PERF_FORMAT_GROUP |
644 // Note: the following override config.priv bits!
645 hw->exclude_user = (event & (1 << 16)) == 0; /* don't count user */
646 hw->exclude_kernel = (event & (1 << 17)) == 0; /* ditto kernel */
647 dump_perf_event_attr (hw);
650 static int
651 start_one_ctr (int ii, size_t pgsz, hdrv_pcl_ctx_t * pctx, char *error_string)
653 // pe_attr should have been initialized in hwcdrv_create_counters()
654 struct perf_event_attr pe_attr;
655 memcpy (&pe_attr, &global_perf_event_def[ii].hw, sizeof (pe_attr));
657 // but we adjust the period, so make sure that pctx->ctr_list[ii].last_overflow_period has been set
658 pe_attr.sample_period = pctx->ctr_list[ii].last_overflow_period;
660 int hwc_fd = perf_event_open (&pe_attr, pctx->tid, -1, -1, 0);
661 if (hwc_fd == -1)
663 TprintfT (DBG_LT1, "%s idx=%d perf_event_open failed, errno=%d\n",
664 error_string, ii, errno);
665 return 1;
668 size_t buffer_area_sz = (NPAGES_PER_BUF + 1) * pgsz; // add a page for metadata
669 void * buf = mmap (NULL, buffer_area_sz, //YXXX is this a safe call?
670 PROT_READ | PROT_WRITE, MAP_SHARED, hwc_fd, 0);
671 if (buf == MAP_FAILED)
673 TprintfT (0, "sz = %ld, pgsz = %ld\n err=%s idx=%d mmap failed: %s\n",
674 (long) buffer_area_sz, (long) pgsz, error_string, ii, strerror (errno));
675 return 1;
677 pctx->ctr_list[ii].ev_def = &global_perf_event_def[ii]; // why do we set ev_def? we never seem to use it
678 pctx->ctr_list[ii].fd = hwc_fd;
679 pctx->ctr_list[ii].buf_state.buf = buf;
680 pctx->ctr_list[ii].buf_state.pagesz = pgsz;
681 pctx->ctr_list[ii].value_state.prev_ena_ts = 0;
682 pctx->ctr_list[ii].value_state.prev_run_ts = 0;
683 pctx->ctr_list[ii].value_state.prev_value = 0;
684 pctx->ctr_list[ii].last_overflow_time = gethrtime ();
686 /* set async mode */
687 long flags = fcntl (hwc_fd, F_GETFL, 0) | O_ASYNC;
688 int rc = fcntl (hwc_fd, F_SETFL, flags);
689 if (rc == -1)
691 TprintfT (0, "%s idx=%d O_ASYNC failed\n", error_string, ii);
692 return 1;
696 * set lwp ownership of the fd
697 * See BUGS section of "man perf_event_open":
698 * The F_SETOWN_EX option to fcntl(2) is needed to properly get
699 * overflow signals in threads. This was introduced in Linux 2.6.32.
700 * Legacy references:
701 * see http://lkml.org/lkml/2009/8/4/128
702 * google man fcntl F_SETOWN_EX -conflict
703 * "From Linux 2.6.32 onward, use F_SETOWN_EX to target
704 * SIGIO and SIGURG signals at a particular thread."
705 * http://icl.cs.utk.edu/papi/docs/da/d2a/examples__v2_8x_2self__smpl__multi_8c.html
706 * See 2010 CSCADS presentation by Eranian
708 struct f_owner_ex fowner_ex;
709 fowner_ex.type = F_OWNER_TID;
710 fowner_ex.pid = pctx->tid;
711 rc = fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex);
712 if (rc == -1)
714 TprintfT (0, "%s idx=%d F_SETOWN failed\n", error_string, ii);
715 return 1;
718 /* Use sigio so handler can determine FD via siginfo->si_fd. */
719 rc = fcntl (hwc_fd, F_SETSIG, SIGIO);
720 if (rc == -1)
722 TprintfT (0, "%s idx=%d F_SETSIG failed\n", error_string, ii);
723 return 1;
725 return 0;
728 static int
729 stop_one_ctr (int ii, counter_state_t *ctr_list)
731 int hwc_rc = 0;
732 if (-1 == ioctl (ctr_list[ii].fd, PERF_EVENT_IOC_DISABLE, 1))
734 TprintfT (0, "hwcdrv: ERROR: PERF_EVENT_IOC_DISABLE #%d failed: errno=%d\n", ii, errno);
735 hwc_rc = HWCFUNCS_ERROR_GENERIC;
737 void *buf = ctr_list[ii].buf_state.buf;
738 if (buf)
740 size_t bufsz = (NPAGES_PER_BUF + 1) * ctr_list[ii].buf_state.pagesz;
741 ctr_list[ii].buf_state.buf = NULL;
742 int tmprc = munmap (buf, bufsz);
743 if (tmprc)
745 TprintfT (0, "hwcdrv: ERROR: munmap() #%d failed: errno=%d\n", ii, errno);
746 hwc_rc = HWCFUNCS_ERROR_GENERIC;
749 if (-1 == close (ctr_list[ii].fd))
751 TprintfT (0, "hwcdrv: ERROR: close(fd) #%d failed: errno=%d\n", ii, errno);
752 hwc_rc = HWCFUNCS_ERROR_GENERIC;
754 return hwc_rc;
757 /* HWCDRV_API for thread-specific actions */
758 HWCDRV_API int
759 hwcdrv_lwp_init (void)
761 return hwcdrv_start ();
764 HWCDRV_API void
765 hwcdrv_lwp_fini (void)
767 hwcdrv_free_counters (); /* also sets pctx->ctr_list=NULL; */
770 /* open */
771 static int
772 hdrv_pcl_internal_open ()
774 if (hdrv_pcl_state.internal_open_called)
776 TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open: already called\n");
777 return HWCFUNCS_ERROR_ALREADY_CALLED;
780 // determine if PCL is available
781 perf_event_def_t tmp_event_def = event_def_0;
782 struct perf_event_attr *pe_attr = &tmp_event_def.hw;
783 init_perf_event (pe_attr, 0, 0, NULL);
784 pe_attr->type = PERF_TYPE_HARDWARE; // specify abstracted HW event
785 pe_attr->config = PERF_COUNT_HW_INSTRUCTIONS; // specify abstracted insts
786 int hwc_fd = perf_event_open (pe_attr,
787 0, // pid/tid, 0 is self
788 -1, // cpu, -1 is per-thread mode
789 -1, // group_fd, -1 is root
790 0); // flags
791 if (hwc_fd == -1)
793 TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open:"
794 " perf_event_open() failed, errno=%d\n", errno);
795 goto internal_open_error;
798 /* see if the PCL is new enough to know about F_SETOWN_EX */
799 struct f_owner_ex fowner_ex;
800 fowner_ex.type = F_OWNER_TID;
801 fowner_ex.pid = hwcdrv_gettid (); // "pid=tid" is correct w/F_OWNER_TID
802 if (fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex) == -1)
804 TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open: "
805 "F_SETOWN failed, errno=%d\n", errno);
806 close (hwc_fd);
807 goto internal_open_error;
809 close (hwc_fd);
811 hdrv_pcl_state.internal_open_called = 1;
812 hdrv_pcl_state.library_ok = 1; // set to non-zero to show it's initted
813 hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED;
814 TprintfT (DBG_LT2, "hwcdrv: hdrv_pcl_internal_open()\n");
815 for (int ii = 0; hdrv_pcbe_drivers[ii]; ii++)
817 hdrv_pcbe_api_t *ppcbe = hdrv_pcbe_drivers[ii];
818 if (!ppcbe->hdrv_pcbe_init ())
820 pcbe_driver = ppcbe;
821 hdrv_pcl_about.cpcN_cciname = ppcbe->hdrv_pcbe_impl_name ();
822 hdrv_pcl_about.cpcN_cpuver = hwcdrv_lookup_cpuver (hdrv_pcl_about.cpcN_cciname);
823 if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED)
824 goto internal_open_error;
825 hdrv_pcl_about.cpcN_npics = ppcbe->hdrv_pcbe_ncounters ();
826 hdrv_pcl_about.cpcN_docref = ppcbe->hdrv_pcbe_cpuref ();
827 break;
830 if (hdrv_pcl_about.cpcN_npics > MAX_PICS)
832 TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open:"
833 " reducing number of HWCs from %u to %u on processor '%s'\n",
834 hdrv_pcl_about.cpcN_npics, MAX_PICS, hdrv_pcl_about.cpcN_cciname);
835 hdrv_pcl_about.cpcN_npics = MAX_PICS;
837 TprintfT (DBG_LT1, "hwcdrv: hdrv_pcl_internal_open:"
838 " perf_event cpuver=%d, name='%s'\n",
839 hdrv_pcl_about.cpcN_cpuver, hdrv_pcl_about.cpcN_cciname);
840 return 0;
842 internal_open_error:
843 hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED;
844 hdrv_pcl_about.cpcN_npics = 0;
845 hdrv_pcl_about.cpcN_docref = NULL;
846 hdrv_pcl_about.cpcN_cciname = NULL;
847 return HWCFUNCS_ERROR_NOT_SUPPORTED;
850 static void *
851 single_thread_tsd_ftn ()
853 static hdrv_pcl_ctx_t tsd_context;
854 return &tsd_context;
857 /* HWCDRV_API */
858 HWCDRV_API int
859 hwcdrv_init (hwcfuncs_abort_fn_t abort_ftn, int *tsd_sz)
861 hdrv_pcl_state.find_vpc_ctx = single_thread_tsd_ftn;
862 if (tsd_sz)
863 *tsd_sz = sizeof (hdrv_pcl_ctx_t);
865 if (hdrv_pcl_state.internal_open_called)
866 return HWCFUNCS_ERROR_ALREADY_CALLED;
867 return hdrv_pcl_internal_open ();
870 HWCDRV_API void
871 hwcdrv_get_info (int *cpuver, const char **cciname, uint_t *npics,
872 const char **docref, uint64_t *support)
874 if (cpuver)
875 *cpuver = hdrv_pcl_about.cpcN_cpuver;
876 if (cciname)
877 *cciname = hdrv_pcl_about.cpcN_cciname;
878 if (npics)
879 *npics = hdrv_pcl_about.cpcN_npics;
880 if (docref)
881 *docref = hdrv_pcl_about.cpcN_docref;
882 if (support)
883 *support = HWCFUNCS_SUPPORT_OVERFLOW_PROFILING | HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID;
886 HWCDRV_API int
887 hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn)
889 if (tsd_ftn)
890 hdrv_pcl_state.find_vpc_ctx = tsd_ftn;
891 else
893 TprintfT (0, "hwcdrv: ERROR: enable_mt(): tsd_ftn==NULL\n");
894 return HWCFUNCS_ERROR_UNAVAIL;
896 return 0;
899 HWCDRV_API int
900 hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb,
901 Hwcentry *raw_hwc_tbl)
903 int count = 0;
904 if (hwc_cb && pcbe_driver && pcbe_driver->hdrv_pcbe_get_events)
905 count = pcbe_driver->hdrv_pcbe_get_events (hwc_cb, raw_hwc_tbl);
906 if (attr_cb)
907 for (int ii = 0; perfctr_attrs_table && perfctr_attrs_table[ii].attrname; ii++)
908 attr_cb (perfctr_attrs_table[ii].attrname);
909 if (!count)
910 return -1;
911 return 0;
914 HWCDRV_API int
915 hwcdrv_assign_regnos (Hwcentry* entries[], unsigned numctrs)
917 return 0;
920 static int
921 internal_hwc_start (int fd)
923 int rc = ioctl (fd, PERF_EVENT_IOC_REFRESH, 1);
924 if (rc == -1)
926 TprintfT (DBG_LT0, "hwcdrv: ERROR: internal_hwc_start:"
927 " PERF_EVENT_IOC_REFRESH(fd=%d) failed: errno=%d\n", fd, errno);
928 return HWCFUNCS_ERROR_UNAVAIL;
930 TprintfT (DBG_LT3, "hwcdrv: internal_hwc_start(fd=%d)\n", fd);
931 return 0;
934 HWCDRV_API int
935 hwcdrv_overflow (siginfo_t *si, hwc_event_t *eventp, hwc_event_t *lost_events)
937 /* set expired counters to overflow value and all others to 0 */
938 /* return 0: OK, counters should be restarted */
939 /* return non-zero: eventp not set, counters should not be restarted */
940 /* clear return values */
941 int ii;
942 for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
944 eventp->ce_pic[ii] = 0;
945 lost_events->ce_pic[ii] = 0;
947 hrtime_t sig_ts = gethrtime (); //YXXX get this from HWC event?
948 eventp->ce_hrt = sig_ts;
949 lost_events->ce_hrt = sig_ts;
951 /* determine source signal */
952 int signal_fd = -1;
953 switch (si->si_code)
955 case POLL_HUP: /* expected value from pcl */
956 /* According to Stephane Eranian:
957 * "expect POLL_HUP instead of POLL_IN because we are
958 * in one-shot mode (IOC_REFRESH)"
960 signal_fd = si->si_fd;
961 break;
962 case SI_TKILL: /* event forwarded by tkill */
963 /* DBX can only forward SI_TKILL when it detects POLL_HUP
964 * unfortunately, this means that si->si_fd has been lost...
965 * We need to process the buffers, but we don't know the fd!
967 TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
968 " SI_TKILL detected\n", sig_ts);
969 break;
970 default:
971 // "sometimes we see a POLL_IN (1) with very high event rates,"
972 // according to eranian(?)
973 TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
974 " unexpected si_code 0x%x\n", sig_ts, si->si_code);
975 return HWCFUNCS_ERROR_GENERIC;
978 hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx ();
979 if (!pctx)
981 TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
982 " tsd context is NULL\n", sig_ts);
983 return HWCFUNCS_ERROR_UNEXPECTED;
985 counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list;
986 if (!ctr_list)
988 TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
989 " ctr_list is NULL\n", sig_ts);
990 return HWCFUNCS_ERROR_UNEXPECTED;
993 /* clear needs_restart flag */
994 for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
995 ctr_list[ii].needs_restart = 0;
997 /* attempt to identify the counter to read */
998 int signal_idx = -1;
999 pctx->signal_fd = signal_fd; // save the signal provided by siginfo_t
1000 if (signal_fd != -1)
1002 for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
1004 if (ctr_list[ii].fd == signal_fd)
1006 signal_idx = ii;
1007 break;
1012 if (signal_idx < 0)
1014 TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
1015 " pmc not determined!\n", sig_ts);
1016 lost_events->ce_pic[0] = 1; /* record a bogus value into experiment */
1017 // note: bogus value may get overwritten in loop below
1020 /* capture sample(s). In addition to signal_idx, check other counters. */
1021 struct perf_event_header sheader;
1022 int idx;
1023 for (idx = 0; idx < hdrv_pcl_state.hwcdef_cnt; idx++)
1025 int num_recs = 0;
1026 while (1)
1028 /* check for samples */
1029 struct perf_event_mmap_page *metadata = ctr_list[idx].buf_state.buf;
1030 if (metadata == NULL)
1031 break; // empty
1032 if (metadata->data_tail == metadata->data_head)
1033 break; // empty
1035 /* read header */
1036 if (read_buf (&ctr_list[idx].buf_state, &sheader, sizeof (sheader)))
1037 break;
1038 num_recs++;
1040 /* check for PERF_RECORD_SAMPLE */
1041 size_t datasz = sheader.size - sizeof (struct perf_event_header);
1042 if (sheader.type != PERF_RECORD_SAMPLE)
1044 TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
1045 " unexpected recd type=%d\n",
1046 sig_ts, sheader.type);
1047 if (skip_buf (&ctr_list[idx].buf_state, datasz))
1049 TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
1050 " skip recd type=%d failed\n", sig_ts, sheader.type);
1051 lost_events->ce_pic[idx] = 4; /* record a bogus value */
1052 break; // failed to skip buffer??
1054 lost_events->ce_pic[idx] = 2; /* record a bogus value */
1055 continue; // advance to next record
1058 /* type is PERF_RECORD_SAMPLE */
1059 uint64_t value, lostv;
1060 if (read_sample (&ctr_list[idx], datasz, &value, &lostv))
1062 TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
1063 " read_sample() failed\n", sig_ts);
1064 lost_events->ce_pic[idx] = 3; // record a bogus value
1065 break; // failed to read sample data??
1067 TprintfT (DBG_LT3, "hwcdrv: sig_ts=%llu: hwcdrv_overflow:"
1068 " idx=%d value=%llu lost=%llu\n", (unsigned long long) sig_ts,
1069 idx, (unsigned long long) value, (unsigned long long) lostv);
1070 if (eventp->ce_pic[idx])
1072 TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
1073 " idx=%d previous sample recorded as lost_event\n", sig_ts, idx);
1074 lost_events->ce_pic[idx] += eventp->ce_pic[idx];
1076 eventp->ce_pic[idx] = value;
1077 lost_events->ce_pic[idx] += lostv;
1080 /* debug output for unexpected (but common) cases */
1081 if (idx == signal_idx)
1083 if (num_recs != 1)
1084 TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
1085 " %d records for signal_idx=%d\n", sig_ts, num_recs, signal_idx);
1087 else if (num_recs)
1088 TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
1089 " %d unexpected record(s) for idx=%d (signal_idx=%d)\n",
1090 sig_ts, num_recs, idx, signal_idx);
1092 /* trigger counter restart whenever records were found */
1093 if (num_recs)
1095 /* check whether to adapt the overflow interval */
1096 /* This is the Linux version.
1097 * The Solaris version is in hwprofile.c collector_update_overflow_counters().
1099 hrtime_t min_time = global_perf_event_def[idx].min_time;
1100 if (min_time > 0 // overflow interval is adaptive
1101 && sig_ts - ctr_list[idx].last_overflow_time < min_time) // last interval below min
1103 /* pick a new overflow interval */
1104 /* roughly doubled, but add funny numbers */
1105 /* hopefully the result is prime or not a multiple of some # of ops/loop */
1106 uint64_t new_period = 2 * ctr_list[idx].last_overflow_period + 37;
1107 #if 0
1108 // On Solaris, we report the adjustment to the log file.
1109 // On Linux it's hard for us to do so since hwcdrv_pcl.c doesn't know about collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_HWCADJ.
1110 // For now we simply don't report.
1111 collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n",
1112 SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name,
1113 ctr_list[idx].last_overflow_period, new_period);
1114 #endif
1115 /* There are a variety of ways of resetting the period on Linux.
1116 * The most elegant is
1117 * ioctl(fd,PERF_EVENT_IOC_PERIOD,&period)
1118 * but check the perf_event_open man page for PERF_EVENT_IOC_PERIOD:
1119 * > Prior to Linux 2.6.36 this ioctl always failed due to a bug in the kernel.
1120 * > Prior to Linux 3.14 (or 3.7 on ARM), the new period did not take effect
1121 * until after the next overflow.
1122 * So we're kind of stuck shutting the fd down and restarting it with the new period.
1124 if (stop_one_ctr (idx, ctr_list))
1126 // EUGENE figure out what to do on error
1128 ctr_list[idx].last_overflow_period = new_period;
1129 if (start_one_ctr (idx, ctr_list[idx].buf_state.pagesz, pctx, "hwcdrv: ERROR: hwcdrv_overflow (readjust overflow):"))
1131 // EUGENE figure out what to do on error
1134 ctr_list[idx].last_overflow_time = sig_ts;
1135 #if 0
1136 ctr_list[idx].needs_restart = 1;
1137 #else // seems to be more reliable to restart here instead of hwcdrv_sighlr_restart()
1138 internal_hwc_start (ctr_list[idx].fd);
1139 #endif
1142 return 0; // OK to restart counters
1145 HWCDRV_API int
1146 hwcdrv_sighlr_restart (const hwc_event_t *pp)
1148 #if 0 // restarting here doesn't seem to work as well as restarting in hwcdrv_overflow()
1149 hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx ();
1150 if (!pctx)
1152 TprintfT (DBG_LT0, "hwcdrv: ERROR: hwcdrv_sighlr_restart: find_vpc_ctx()==NULL\n");
1153 return -1;
1155 counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list;
1156 if (!ctr_list)
1158 TprintfT (DBG_LT0, "hwcdrv: WARNING: hwcdrv_sighlr_restart: ctr_list is NULL\n");
1159 return -1;
1161 int errors = 0;
1162 for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
1164 if (ctr_list[ii].needs_restart)
1165 errors |= internal_hwc_start (ctr_list[ii].fd);
1166 ctr_list[ii].needs_restart = 0;
1168 return errors;
1169 #else
1170 return 0;
1171 #endif
1174 /* create counters based on hwcdef[] */
1175 HWCDRV_API int
1176 hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef)
1178 if (hwcdef_cnt > hdrv_pcl_about.cpcN_npics)
1180 logerr (GTXT ("More than %d counters were specified\n"), hdrv_pcl_about.cpcN_npics); /*!*/
1181 return HWCFUNCS_ERROR_HWCARGS;
1183 if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED)
1185 logerr (GTXT ("Processor not supported\n"));
1186 return HWCFUNCS_ERROR_HWCARGS;
1189 /* add counters */
1190 for (unsigned idx = 0; idx < hwcdef_cnt; idx++)
1192 perf_event_def_t *glb_event_def = &global_perf_event_def[idx];
1193 *glb_event_def = event_def_0;
1194 unsigned int pmc_sel;
1195 eventsel_t evntsel;
1196 if (hwcfuncs_get_x86_eventsel (hwcdef + idx, &evntsel, &pmc_sel))
1198 TprintfT (0, "hwcdrv: ERROR: hwcfuncs_get_x86_eventsel() failed\n");
1199 return HWCFUNCS_ERROR_HWCARGS;
1201 glb_event_def->reg_num = pmc_sel;
1202 glb_event_def->eventsel = evntsel;
1203 glb_event_def->counter_preload = hwcdef[idx].val;
1204 glb_event_def->min_time = hwcdef[idx].min_time;
1205 glb_event_def->name = strdup (hwcdef[idx].name); // memory leak??? very minor
1206 init_perf_event (&glb_event_def->hw, glb_event_def->eventsel,
1207 glb_event_def->counter_preload, hwcdef + idx);
1208 TprintfT (DBG_LT1, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld"
1209 "(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n",
1210 idx, hwcdef[idx].int_name, (long long) glb_event_def->counter_preload,
1211 (long long) glb_event_def->min_time, (int) glb_event_def->reg_num,
1212 (long long) glb_event_def->eventsel,
1213 (long long) HW_INTERVAL_PRESET (hwcdef[idx].val),
1214 (long long) glb_event_def->hw.exclude_user,
1215 (long long) glb_event_def->hw.exclude_kernel);
1218 hdrv_pcl_state.hwcdef_cnt = hwcdef_cnt;
1219 return 0;
1222 HWCDRV_API int
1223 hwcdrv_free_counters () // note: only performs shutdown for this thread
1225 hdrv_pcl_ctx_t * pctx;
1226 if (!COUNTERS_ENABLED ())
1227 return 0;
1228 pctx = hdrv_pcl_state.find_vpc_ctx ();
1229 if (!pctx)
1231 TprintfT (0, "hwcdrv: WARNING: hwcdrv_free_counters: tsd context is NULL\n");
1232 return HWCFUNCS_ERROR_GENERIC;
1234 counter_state_t *ctr_list = pctx->ctr_list;
1235 if (!ctr_list)
1237 // fork child: prolog suspends hwcs, then epilog frees them
1238 TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_free_counters: ctr_list is already NULL\n");
1239 return 0;
1241 int hwc_rc = 0;
1242 for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
1243 if (stop_one_ctr (ii, ctr_list))
1244 hwc_rc = HWCFUNCS_ERROR_GENERIC;
1245 TprintfT (DBG_LT1, "hwcdrv: hwcdrv_free_counters(tid=0x%lx).\n", (long) pctx->tid);
1246 pctx->ctr_list = NULL;
1247 return hwc_rc;
1250 HWCDRV_API int
1251 hwcdrv_start (void) /* must be called from each thread ? */
1253 hdrv_pcl_ctx_t *pctx = NULL;
1254 if (!COUNTERS_ENABLED ())
1256 TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_start: no counters to start \n");
1257 return 0;
1259 if (!hdrv_pcl_state.library_ok)
1261 TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: library is not open\n");
1262 return HWCFUNCS_ERROR_NOT_SUPPORTED;
1266 * set up per-thread context
1268 pctx = hdrv_pcl_state.find_vpc_ctx ();
1269 if (!pctx)
1271 TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: tsd context is NULL\n");
1272 return HWCFUNCS_ERROR_UNEXPECTED;
1274 pctx->tid = hwcdrv_gettid ();
1275 TprintfT (DBG_LT1, "hwcdrv: hwcdrv_start(tid=0x%lx)\n", (long) pctx->tid);
1278 * create per-thread counter list
1280 counter_state_t *ctr_list = (counter_state_t *) calloc (hdrv_pcl_state.hwcdef_cnt,
1281 sizeof (counter_state_t));
1282 if (!ctr_list)
1284 TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: calloc(ctr_list) failed\n");
1285 return HWCFUNCS_ERROR_MEMORY;
1287 int ii;
1288 for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
1289 ctr_list[ii].fd = -1; // invalidate fds in case we have to close prematurely
1290 pctx->ctr_list = ctr_list;
1293 * bind the counters
1295 size_t pgsz = sysconf (_SC_PAGESIZE);
1296 for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
1298 ctr_list[ii].last_overflow_period = global_perf_event_def[ii].hw.sample_period;
1299 if (start_one_ctr (ii, pgsz, pctx, "hwcdrv: ERROR: hwcdrv_start:")) goto hwcdrv_start_cleanup;
1303 * start the counters
1305 for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
1307 int rc = internal_hwc_start (ctr_list[ii].fd);
1308 if (rc < 0)
1309 goto hwcdrv_start_cleanup;
1311 return 0;
1313 hwcdrv_start_cleanup:
1314 hwcdrv_free_counters (); // PERF_EVENT_IOC_DISABLE and close() for all fds
1315 return HWCFUNCS_ERROR_UNAVAIL;
1318 HWCDRV_API int
1319 hwcdrv_lwp_suspend (void) /* must be called from each thread */
1321 if (!COUNTERS_ENABLED ())
1323 TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_suspend: no counters\n");
1324 return 0;
1326 TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_suspend()\n");
1327 return hwcdrv_free_counters ();
1330 HWCDRV_API int
1331 hwcdrv_lwp_resume (void) /* must be called from each thread */
1333 if (!COUNTERS_ENABLED ())
1335 TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_resume: no counters\n");
1336 return 0;
1338 TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_resume()\n");
1339 return hwcdrv_start ();
1342 HWCDRV_API int
1343 hwcdrv_read_events (hwc_event_t *overflow_data, hwc_event_samples_t *sampled_data)
1345 overflow_data->ce_hrt = 0;
1346 for (int i = 0; i < MAX_PICS; i++)
1348 overflow_data->ce_pic[i] = 0;
1349 if (sampled_data)
1350 HWCFUNCS_SAMPLE_RESET (&sampled_data->sample[i]);
1352 return 0;
1355 /*---------------------------------------------------------------------------*/
1356 /* HWCDRV_API */
1358 hwcdrv_api_t hwcdrv_pcl_api = {
1359 hwcdrv_init,
1360 hwcdrv_get_info,
1361 hwcdrv_enable_mt,
1362 hwcdrv_get_descriptions,
1363 hwcdrv_assign_regnos,
1364 hwcdrv_create_counters,
1365 hwcdrv_start,
1366 hwcdrv_overflow,
1367 hwcdrv_read_events,
1368 hwcdrv_sighlr_restart,
1369 hwcdrv_lwp_suspend,
1370 hwcdrv_lwp_resume,
1371 hwcdrv_free_counters,
1372 hwcdrv_lwp_init,
1373 hwcdrv_lwp_fini,
1374 -1 // hwcdrv_init_status