1 /* Copyright (C) 2021 Free Software Foundation, Inc.
4 This file is part of GNU Binutils.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
25 #include <sys/ioctl.h>
26 #include <sys/syscall.h>
27 #include <linux/perf_event.h>
31 /*---------------------------------------------------------------------------*/
33 #define IS_GLOBAL /* Mark global symbols */
35 #include "cpuid.c" /* ftns for identifying a chip */
37 static hdrv_pcbe_api_t hdrv_pcbe_core_api
;
38 static hdrv_pcbe_api_t hdrv_pcbe_opteron_api
;
39 static hdrv_pcbe_api_t
*hdrv_pcbe_drivers
[] = {
41 &hdrv_pcbe_opteron_api
,
44 #include "opteron_pcbe.c" /* CPU-specific code */
45 #include "core_pcbe.c" /* CPU-specific code */
47 extern hwcdrv_api_t hwcdrv_pcl_api
;
48 IS_GLOBAL hwcdrv_api_t
*hwcdrv_drivers
[] = {
53 /*---------------------------------------------------------------------------*/
55 /* utils for drivers */
57 hwcdrv_assign_all_regnos (Hwcentry
* entries
[], unsigned numctrs
)
59 unsigned int pmc_assigned
[MAX_PICS
];
61 for (int ii
= 0; ii
< MAX_PICS
; ii
++)
64 /* assign the HWCs that we already know about */
65 for (idx
= 0; idx
< numctrs
; idx
++)
67 regno_t regno
= entries
[idx
]->reg_num
;
68 if (regno
== REGNO_ANY
)
70 /* check to see if list of possible registers only contains one entry */
71 regno
= REG_LIST_SINGLE_VALID_ENTRY (entries
[idx
]->reg_list
);
73 if (regno
!= REGNO_ANY
)
75 if (regno
< 0 || regno
>= MAX_PICS
|| !regno_is_valid (entries
[idx
], regno
))
77 logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx
+ 1, regno
); /*!*/
78 return HWCFUNCS_ERROR_HWCARGS
;
80 TprintfT (DBG_LT2
, "hwcfuncs_assign_regnos(): preselected: idx=%d, regno=%d\n", idx
, regno
);
81 entries
[idx
]->reg_num
= regno
; /* assigning back to entries */
82 pmc_assigned
[regno
] = 1;
86 /* assign HWCs that are currently REGNO_ANY */
87 for (idx
= 0; idx
< numctrs
; idx
++)
89 if (entries
[idx
]->reg_num
== REGNO_ANY
)
92 regno_t
*reg_list
= entries
[idx
]->reg_list
;
93 for (; reg_list
&& *reg_list
!= REGNO_ANY
; reg_list
++)
95 regno_t regno
= *reg_list
;
96 if (regno
< 0 || regno
>= MAX_PICS
)
98 logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx
+ 1, regno
); /*!*/
99 return HWCFUNCS_ERROR_HWCARGS
;
101 if (pmc_assigned
[regno
] == 0)
103 TprintfT (DBG_LT2
, "hwcfuncs_assign_regnos(): assigned: idx=%d, regno=%d\n", idx
, regno
);
104 entries
[idx
]->reg_num
= regno
; /* assigning back to entries */
105 pmc_assigned
[regno
] = 1;
112 logerr (GTXT ("Counter '%s' could not be bound to a register\n"),
113 entries
[idx
]->name
? entries
[idx
]->name
: "<NULL>");
114 return HWCFUNCS_ERROR_HWCARGS
;
122 hwcdrv_lookup_cpuver (const char * cpcN_cciname
)
124 libcpc2_cpu_lookup_t
*plookup
;
125 static libcpc2_cpu_lookup_t cpu_table
[] = {
126 LIBCPC2_CPU_LOOKUP_LIST
128 if (cpcN_cciname
== NULL
)
129 return CPUVER_UNDEFINED
;
131 /* search table for name */
132 for (plookup
= cpu_table
; plookup
->cpc2_cciname
; plookup
++)
134 int n
= strlen (plookup
->cpc2_cciname
);
135 if (!strncmp (plookup
->cpc2_cciname
, cpcN_cciname
, n
))
136 return plookup
->cpc2_cpuver
;
138 /* unknown, but does have a descriptive string */
139 TprintfT (DBG_LT0
, "hwcfuncs: CPC2: WARNING: Id of processor '%s' "
140 "could not be determined\n",
142 return CPUVER_GENERIC
;
145 /*---------------------------------------------------------------------------*/
146 /* utils to generate x86 register definitions on Linux */
149 * This code is structured as though we're going to initialize the
150 * HWC by writing the Intel MSR register directly. That is, we
151 * assume the lowest 16 bits of the event number will have the event
152 * and that higher bits will set attributes.
154 * While SPARC is different, we can nonetheless use basically the
155 * same "x86"-named functions:
157 * - The event code will still be 16 bits. It will still
158 * be in the lowest 16 bits of the event number. Though
159 * perf_event_code() on SPARC will expect those bits to
160 * shifted, hwcdrv_pcl.c can easily perform that shift.
162 * - On SPARC we support only two attributes, "user" and "system",
163 * which hwcdrv_pcl.c already converts to the "exclude_user"
164 * and "exclude_kernel" fields expected by perf_event_open().
165 * "user" and "system" are stored in event bits 16 and 17.
166 * For M8, a 4-bit mask of supported PICs is stored in bits [23:20].
169 IS_GLOBAL hwcdrv_get_eventnum_fn_t
*hwcdrv_get_x86_eventnum
= 0;
171 static const attr_info_t perfctr_sparc_attrs
[] = {
172 {NTXT ("user"), 0, 0x01, 16}, //usr
173 {NTXT ("system"), 0, 0x01, 17}, //os
176 static const attr_info_t perfctr_x64_attrs
[] = {/* ok for Core2 & later */
177 {NTXT ("umask"), 0, 0xff, 8},
178 {NTXT ("user"), 0, 0x01, 16}, //usr
179 //{NTXT("nouser"), 1, 0x01, 16}, //usr (inverted)
180 {NTXT ("system"), 0, 0x01, 17}, //os
181 {NTXT ("edge"), 0, 0x01, 18},
182 {NTXT ("pc"), 0, 0x01, 19},
183 {NTXT ("inv"), 0, 0x01, 23},
184 {NTXT ("cmask"), 0, 0xff, 24},
187 const attr_info_t
*perfctr_attrs_table
= perfctr_x64_attrs
;
189 static const eventsel_t perfctr_evntsel_enable_bits
= (0x01 << 16) | /* usr */
190 // (0xff << 0) | /* event*/
191 // (0xff << 8) | /* umask */
192 // (0x01 << 17) | /* os */
193 // (0x01 << 18) | /* edge */
194 // (0x01 << 19) | /* pc */
195 (0x01 << 20) | /* int */
196 // (0x01 << 21) | /* reserved */
197 (0x01 << 22) | /* enable */
198 // (0x01 << 23) | /* inv */
199 // (0xff << 24) | /* cmask */
203 myperfctr_get_x86_eventnum (const char *eventname
, uint_t pmc
,
204 eventsel_t
*eventsel
, eventsel_t
*valid_umask
,
207 if (hwcdrv_get_x86_eventnum
&&
208 !hwcdrv_get_x86_eventnum (eventname
, pmc
, eventsel
, valid_umask
, pmc_sel
))
211 /* check for numerically-specified counters */
213 uint64_t num
= strtoull (eventname
, &endptr
, 0);
214 if (*eventname
&& !*endptr
)
216 *eventsel
= EXTENDED_EVNUM_2_EVSEL (num
);
217 *valid_umask
= 0xff; /* allow any umask (unused for SPARC?) */
222 /* name does not specify a numeric value */
223 *eventsel
= (eventsel_t
) - 1;
230 mask_shift_set (eventsel_t
*presult
, eventsel_t invalue
,
231 eventsel_t mask
, eventsel_t shift
)
234 return -1; /* invalue attempts to set bits outside of mask */
235 *presult
&= ~(mask
<< shift
); /* clear all the mask bits */
236 *presult
|= (invalue
<< shift
); /* set bits according to invalue */
241 set_x86_attr_bits (eventsel_t
*result_mask
, eventsel_t evnt_valid_umask
,
242 hwcfuncs_attr_t attrs
[], int nattrs
, const char*nameOnly
)
244 eventsel_t evntsel
= *result_mask
;
245 for (int ii
= 0; ii
< (int) nattrs
; ii
++)
247 const char *attrname
= attrs
[ii
].ca_name
;
248 eventsel_t attrval
= (eventsel_t
) attrs
[ii
].ca_val
;
251 for (int jj
= 0; (tmpname
= perfctr_attrs_table
[jj
].attrname
); jj
++)
253 if (strcmp (attrname
, tmpname
) == 0)
255 if (strcmp (attrname
, "umask") == 0)
257 if (attrval
& ~evnt_valid_umask
)
259 logerr (GTXT ("for `%s', allowable umask bits are: 0x%llx\n"),
260 nameOnly
, (long long) evnt_valid_umask
);
264 if (mask_shift_set (&evntsel
,
265 perfctr_attrs_table
[jj
].is_inverted
? (attrval
^1) : attrval
,
266 perfctr_attrs_table
[jj
].mask
,
267 perfctr_attrs_table
[jj
].shift
))
269 logerr (GTXT ("`%s' attribute `%s' could not be set to 0x%llx\n"),
270 nameOnly
, attrname
, (long long) attrval
);
273 TprintfT (DBG_LT2
, "hwcfuncs: Counter %s, attribute %s set to 0x%llx\n",
274 nameOnly
, attrname
, (long long) attrval
);
281 logerr (GTXT ("attribute `%s' is invalid\n"), attrname
);
285 *result_mask
= evntsel
;
290 hwcfuncs_get_x86_eventsel (unsigned int regno
, const char *int_name
,
291 eventsel_t
*return_event
, uint_t
*return_pmc_sel
)
293 hwcfuncs_attr_t attrs
[HWCFUNCS_MAX_ATTRS
+ 1];
295 char *nameOnly
= NULL
;
296 eventsel_t evntsel
= 0; // event number
297 eventsel_t evnt_valid_umask
= 0;
302 void *attr_mem
= hwcfuncs_parse_attrs (int_name
, attrs
, HWCFUNCS_MAX_ATTRS
,
306 logerr (GTXT ("out of memory, could not parse attributes\n"));
309 hwcfuncs_parse_ctr (int_name
, NULL
, &nameOnly
, NULL
, NULL
, NULL
);
310 if (regno
== REGNO_ANY
)
312 logerr (GTXT ("reg# could not be determined for `%s'\n"), nameOnly
);
316 /* look up evntsel */
317 if (myperfctr_get_x86_eventnum (nameOnly
, regno
,
318 &evntsel
, &evnt_valid_umask
, &pmc_sel
))
320 logerr (GTXT ("counter `%s' is not valid\n"), nameOnly
);
323 TprintfT (DBG_LT1
, "hwcfuncs: event=0x%llx pmc=0x%x '%s' nattrs = %u\n",
324 (long long) evntsel
, pmc_sel
, nameOnly
, nattrs
);
326 /* determine event attributes */
327 eventsel_t evnt_attrs
= perfctr_evntsel_enable_bits
;
328 if (set_x86_attr_bits (&evnt_attrs
, evnt_valid_umask
, attrs
, nattrs
, nameOnly
))
330 if (evntsel
& evnt_attrs
)
331 TprintfT (DBG_LT0
, "hwcfuncs: ERROR - evntsel & enable bits overlap: 0x%llx 0x%llx 0x%llx\n",
332 (long long) evntsel
, (long long) evnt_attrs
,
333 (long long) (evntsel
& evnt_attrs
));
334 *return_event
= evntsel
| evnt_attrs
;
335 *return_pmc_sel
= pmc_sel
;
345 #define syscall_instr "syscall"
346 #define syscall_clobber "rcx", "r11", "memory"
349 #define syscall_instr "int $0x80"
350 #define syscall_clobber "memory"
354 perf_event_open (struct perf_event_attr
*hw_event_uptr
, pid_t pid
,
355 int cpu
, int group_fd
, unsigned long flags
)
357 /* It seems that perf_event_open() sometimes fails spuriously,
358 * even while an immediate retry succeeds.
359 * So, let's try a few retries if the call fails just to be sure.
362 for (int retry
= 0; retry
< 5; retry
++)
364 rc
= syscall (__NR_perf_event_open
, hw_event_uptr
, pid
, cpu
, group_fd
, flags
);
371 /*---------------------------------------------------------------------------*/
372 /* macros & fwd prototypes */
374 #define HWCDRV_API static /* Mark functions used by hwcdrv API */
376 HWCDRV_API
int hwcdrv_start (void);
377 HWCDRV_API
int hwcdrv_free_counters ();
382 #ifndef LIBCOLLECTOR_SRC
383 return syscall (__NR_gettid
);
386 __asm__
__volatile__(syscall_instr
387 : "=a" (r
) : "0" (__NR_gettid
)
391 return syscall (__NR_gettid
); // FIXUP_XXX_SPARC_LINUX // write gettid in asm
395 /*---------------------------------------------------------------------------*/
398 #define NPAGES_PER_BUF 1 // number of pages to be used for perf_event samples
399 // must be a power of 2
401 /*---------------------------------------------------------------------------*/
406 { // event (hwc) definition
407 unsigned int reg_num
; // PMC assignment, potentially for detecting conflicts
408 eventsel_t eventsel
; // raw event bits (Intel/AMD)
409 uint64_t counter_preload
; // number of HWC events before signal
410 struct perf_event_attr hw
; // perf_event definition
411 hrtime_t min_time
; // minimum time we're targeting between events
416 { // runtime state of perf_event buffer
417 void *buf
; // pointer to mmapped buffer
418 size_t pagesz
; // size of pages
422 { // runtime state of counter values
423 uint64_t prev_ena_ts
; // previous perf_event "enabled" time
424 uint64_t prev_run_ts
; // previous perf_event "running" time
425 uint64_t prev_value
; // previous HWC value
426 } counter_value_state_t
;
429 { // per-counter information
430 perf_event_def_t
*ev_def
; // global HWC definition for one counter
431 int fd
; // perf_event fd
432 buffer_state_t buf_state
; // perf_event buffer's state
433 counter_value_state_t value_state
; // counter state
434 int needs_restart
; // workaround for dbx failure to preserve si_fd
435 uint64_t last_overflow_period
;
436 hrtime_t last_overflow_time
;
440 { // per-thread context
441 counter_state_t
*ctr_list
;
442 int signal_fd
; // fd that caused the most recent signal
443 pthread_t tid
; // for debugging signal delivery problems
446 /*---------------------------------------------------------------------------*/
448 /* static variables */
452 int internal_open_called
;
453 hwcfuncs_tsd_get_fn_t find_vpc_ctx
;
454 unsigned hwcdef_cnt
; /* number of *active* hardware counters */
455 hwcdrv_get_events_fn_t
*get_events
;
458 static hwcdrv_about_t hdrv_pcl_about
= {.cpcN_cpuver
= CPUVER_UNDEFINED
};
459 static perf_event_def_t global_perf_event_def
[MAX_PICS
];
461 #define COUNTERS_ENABLED() (hdrv_pcl_state.hwcdef_cnt)
464 /* perf_event buffer formatting and handling */
466 reset_buf (buffer_state_t
*bufstate
)
468 TprintfT (0, "hwcdrv: ERROR: perf_event reset_buf() called!\n");
469 struct perf_event_mmap_page
*metadata
= bufstate
->buf
;
471 metadata
->data_tail
= metadata
->data_head
;
475 skip_buf (buffer_state_t
*bufstate
, size_t sz
)
477 TprintfT (DBG_LT1
, "hwcdrv: WARNING: perf_event skip_buf called!\n");
478 struct perf_event_mmap_page
*metadata
= bufstate
->buf
;
479 if (metadata
== NULL
)
481 size_t pgsz
= bufstate
->pagesz
;
482 size_t bufsz
= NPAGES_PER_BUF
*pgsz
;
483 uint64_t d_tail
= metadata
->data_tail
;
484 uint64_t d_head
= metadata
->data_head
;
486 // validate request size
487 if (sz
> d_head
- d_tail
|| sz
>= bufsz
)
489 reset_buf (bufstate
);
492 metadata
->data_tail
= d_tail
+ sz
; // advance tail
497 read_buf (buffer_state_t
*bufstate
, void *buf
, size_t sz
)
499 struct perf_event_mmap_page
*metadata
= bufstate
->buf
;
500 if (metadata
== NULL
)
502 size_t pgsz
= bufstate
->pagesz
;
503 size_t bufsz
= NPAGES_PER_BUF
*pgsz
;
504 uint64_t d_tail
= metadata
->data_tail
;
505 uint64_t d_head
= metadata
->data_head
;
507 // validate request size
508 if (sz
> d_head
- d_tail
|| sz
>= bufsz
)
510 reset_buf (bufstate
);
513 char *buf_base
= ((char *) metadata
) + pgsz
; // start of data buffer
514 uint64_t start_pos
= d_tail
& (bufsz
- 1); // char offset into data buffer
516 if (start_pos
+ sz
> bufsz
)
518 // will wrap past end of buffer
519 nbytes
= bufsz
- start_pos
;
520 memcpy (buf
, buf_base
+ start_pos
, nbytes
);
521 start_pos
= 0; // wrap to start
522 buf
= (void *) (((char *) buf
) + nbytes
);
523 nbytes
= sz
- nbytes
;
525 memcpy (buf
, buf_base
+ start_pos
, nbytes
);
526 metadata
->data_tail
+= sz
;
531 read_u64 (buffer_state_t
*bufstate
, uint64_t *value
)
533 return read_buf (bufstate
, value
, sizeof (uint64_t));
537 read_sample (counter_state_t
*ctr_state
, int msgsz
, uint64_t *rvalue
,
540 // returns count of bytes read
541 buffer_state_t
*bufstate
= &ctr_state
->buf_state
;
542 counter_value_state_t
*cntstate
= &ctr_state
->value_state
;
547 int rc
= read_u64 (bufstate
, &ipc
);
550 readsz
+= sizeof (uint64_t);
552 // PERF_SAMPLE_READ: value
554 rc
= read_u64 (bufstate
, &value
);
557 readsz
+= sizeof (uint64_t);
560 * Old Linux kernels (e.g. 2.6.32) on certain systems return enabled and
561 * running times in the sample data that correspond to the metadata times
562 * metadata->time_enabled
563 * metadata->time_running
564 * from the PREVIOUS (not current) sample. Probably just ignore this bug
565 * since it's on old kernels and we only use the enabled and running times
566 * to construct loss_estimate.
568 // PERF_SAMPLE_READ: PERF_FORMAT_ENABLED
569 uint64_t enabled_time
= 0;
570 rc
= read_u64 (bufstate
, &enabled_time
);
573 readsz
+= sizeof (uint64_t);
575 // PERF_SAMPLE_READ: PERF_FORMAT_RUNNING
576 uint64_t running_time
= 0;
577 rc
= read_u64 (bufstate
, &running_time
);
580 readsz
+= sizeof (uint64_t);
582 uint64_t value_delta
= value
- cntstate
->prev_value
;
583 uint64_t enabled_delta
= enabled_time
- cntstate
->prev_ena_ts
;
584 uint64_t running_delta
= running_time
- cntstate
->prev_run_ts
;
585 cntstate
->prev_value
= value
;
586 cntstate
->prev_ena_ts
= enabled_time
;
587 cntstate
->prev_run_ts
= running_time
;
589 // 24830461 need workaround for Linux anomalous HWC skid overrun
590 int set_error_flag
= 0;
591 if (value_delta
> 2 * ctr_state
->last_overflow_period
+ 2000 /* HWC_SKID_TOLERANCE */)
594 uint64_t loss_estimate
= 0; // estimate loss of events caused by multiplexing
595 if (running_delta
== enabled_delta
)
597 // counter was running 100% of time, no multiplexing
599 else if (running_delta
== 0)
600 loss_estimate
= 1; // token amount to aid in debugging perfctr oddities
601 else if ((running_delta
> enabled_delta
) || (enabled_delta
& 0x1000000000000000ll
))
603 // running should be smaller than enabled, can't estimate
605 * 21418391 HWC can have a negative count
607 * We've also seen enabled not only be smaller than running
608 * but in fact go negative. Guard against this.
610 loss_estimate
= 2; // token amount to aid in debugging perfctr oddities
614 // counter was running less than 100% of time
615 // Example: ena=7772268 run=6775669 raw_value=316004 scaled_value=362483 loss_est=46479
616 uint64_t scaled_delta
= (double) value_delta
* enabled_delta
/ running_delta
;
617 value_delta
= scaled_delta
;
619 // We should perhaps warn the user that multiplexing is going on,
620 // but hwcdrv_pcl.c doesn't know about the collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_* values.
621 // For now we simply don't report.
622 // Perhaps we should address the issue not here but in the caller collector_sigemt_handler(),
623 // but at that level "lost" has a meaning that's considerably broader than just multiplexing.
624 collector_interface
->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n",
625 SP_JCMD_COMMENT
, COL_COMMENT_HWCADJ
, global_perf_event_def
[idx
].name
,
626 ctr_list
[idx
].last_overflow_period
, new_period
);
629 TprintfT ((loss_estimate
|| set_error_flag
) ? DBG_LT1
: DBG_LT3
,
630 "hwcdrv: '%s' ipc=0x%llx ena=%llu run=%llu "
631 "value_delta=%lld(0x%llx) loss_est=%llu %s error_flag='%s'\n",
632 ctr_state
->ev_def
->name
, (long long) ipc
,
633 (long long) enabled_delta
, (long long) running_delta
,
634 (long long) value_delta
, (long long) value_delta
,
635 (unsigned long long) loss_estimate
,
636 loss_estimate
? ", WARNING - SCALED" : "",
637 set_error_flag
? ", ERRORFLAG" : "");
638 if (set_error_flag
== 1)
639 value_delta
|= (1ULL << 63) /* HWCVAL_ERR_FLAG */;
640 *rvalue
= value_delta
;
641 *rlost
= loss_estimate
;
644 TprintfT (0, "hwcdrv: ERROR: perf_event sample not fully parsed\n");
651 dump_perf_event_attr (struct perf_event_attr
*at
)
653 TprintfT (DBG_LT2
, "dump_perf_event_attr: size=%d type=%d sample_period=%lld\n"
654 " config=0x%llx config1=0x%llx config2=0x%llx wakeup_events=%lld __reserved_1=%lld\n",
655 (int) at
->size
, (int) at
->type
, (unsigned long long) at
->sample_period
,
656 (unsigned long long) at
->config
, (unsigned long long) at
->config1
,
657 (unsigned long long) at
->config2
, (unsigned long long) at
->wakeup_events
,
658 (unsigned long long) at
->__reserved_1
);
659 #define DUMP_F(fld) if (at->fld) TprintfT(DBG_LT2, " %-10s : %lld\n", #fld, (long long) at->fld)
664 DUMP_F (exclude_user
);
665 DUMP_F (exclude_kernel
);
667 DUMP_F (exclude_idle
);
671 DUMP_F (inherit_stat
);
672 DUMP_F (enable_on_exec
);
678 init_perf_event (struct perf_event_attr
*hw
, uint64_t event
, uint64_t period
)
680 memset (hw
, 0, sizeof (struct perf_event_attr
));
681 hw
->size
= sizeof (struct perf_event_attr
); // fwd/bwd compat
683 #if defined(__i386__) || defined(__x86_64)
684 //note: Nehalem/Westmere OFFCORE_RESPONSE in upper 32 bits
686 hw
->type
= PERF_TYPE_RAW
; // hw/sw/trace/raw...
687 #elif defined(__aarch64__)
688 hw
->type
= (event
>> 24) & 7;
689 hw
->config
= event
& 0xff;
691 //SPARC needs to be shifted up 16 bits
692 hw
->config
= (event
& 0xFFFF) << 16; // uint64_t event
693 uint64_t regs
= (event
>> 20) & 0xf; // see sparc_pcbe.c
694 hw
->config
|= regs
<< 4; // for M8, supported PICs need to be placed at bits [7:4]
695 hw
->type
= PERF_TYPE_RAW
; // hw/sw/trace/raw...
698 hw
->sample_period
= period
;
699 hw
->sample_type
= PERF_SAMPLE_IP
| PERF_SAMPLE_READ
|
701 // PERF_SAMPLE_TIME | // possibly interesting
702 // PERF_SAMPLE_ADDR |
703 PERF_SAMPLE_READ
| // HWC value
704 // PERF_SAMPLE_CALLCHAIN | // interesting
706 // PERF_SAMPLE_CPU | // possibly interesting
707 // PERF_SAMPLE_PERIOD |
708 // PERF_SAMPLE_STREAM_ID |
712 PERF_FORMAT_TOTAL_TIME_ENABLED
| // detect when hwc not scheduled
713 PERF_FORMAT_TOTAL_TIME_RUNNING
| // detect when hwc not scheduled
715 // PERF_FORMAT_GROUP |
717 hw
->disabled
= 1; /* off by default */
719 // Note: the following override config.priv bits!
720 hw
->exclude_user
= (event
& (1 << 16)) == 0; /* don't count user */
721 hw
->exclude_kernel
= (event
& (1 << 17)) == 0; /* ditto kernel */
722 hw
->exclude_hv
= 1; /* ditto hypervisor */
723 hw
->wakeup_events
= 1; /* wakeup every n events */
724 dump_perf_event_attr (hw
);
728 start_one_ctr (int ii
, size_t pgsz
, hdrv_pcl_ctx_t
* pctx
, char *error_string
)
730 // pe_attr should have been initialized in hwcdrv_create_counters()
731 struct perf_event_attr pe_attr
;
732 memcpy (&pe_attr
, &global_perf_event_def
[ii
].hw
, sizeof (pe_attr
));
734 // but we adjust the period, so make sure that pctx->ctr_list[ii].last_overflow_period has been set
735 pe_attr
.sample_period
= pctx
->ctr_list
[ii
].last_overflow_period
;
737 int hwc_fd
= perf_event_open (&pe_attr
, pctx
->tid
, -1, -1, 0);
740 TprintfT (DBG_LT1
, "%s idx=%d perf_event_open failed, errno=%d\n",
741 error_string
, ii
, errno
);
745 size_t buffer_area_sz
= (NPAGES_PER_BUF
+ 1) * pgsz
; // add a page for metadata
746 void * buf
= mmap (NULL
, buffer_area_sz
, //YXXX is this a safe call?
747 PROT_READ
| PROT_WRITE
, MAP_SHARED
, hwc_fd
, 0);
748 if (buf
== MAP_FAILED
)
750 TprintfT (0, "sz = %ld, pgsz = %ld\n err=%s idx=%d mmap failed: %s\n",
751 (long) buffer_area_sz
, (long) pgsz
, error_string
, ii
, strerror (errno
));
754 pctx
->ctr_list
[ii
].ev_def
= &global_perf_event_def
[ii
]; // why do we set ev_def? we never seem to use it
755 pctx
->ctr_list
[ii
].fd
= hwc_fd
;
756 pctx
->ctr_list
[ii
].buf_state
.buf
= buf
;
757 pctx
->ctr_list
[ii
].buf_state
.pagesz
= pgsz
;
758 pctx
->ctr_list
[ii
].value_state
.prev_ena_ts
= 0;
759 pctx
->ctr_list
[ii
].value_state
.prev_run_ts
= 0;
760 pctx
->ctr_list
[ii
].value_state
.prev_value
= 0;
761 pctx
->ctr_list
[ii
].last_overflow_time
= gethrtime ();
764 long flags
= fcntl (hwc_fd
, F_GETFL
, 0) | O_ASYNC
;
765 int rc
= fcntl (hwc_fd
, F_SETFL
, flags
);
768 TprintfT (0, "%s idx=%d O_ASYNC failed\n", error_string
, ii
);
773 * set lwp ownership of the fd
774 * See BUGS section of "man perf_event_open":
775 * The F_SETOWN_EX option to fcntl(2) is needed to properly get
776 * overflow signals in threads. This was introduced in Linux 2.6.32.
778 * see http://lkml.org/lkml/2009/8/4/128
779 * google man fcntl F_SETOWN_EX -conflict
780 * "From Linux 2.6.32 onward, use F_SETOWN_EX to target
781 * SIGIO and SIGURG signals at a particular thread."
782 * http://icl.cs.utk.edu/papi/docs/da/d2a/examples__v2_8x_2self__smpl__multi_8c.html
783 * See 2010 CSCADS presentation by Eranian
785 struct f_owner_ex fowner_ex
;
786 fowner_ex
.type
= F_OWNER_TID
;
787 fowner_ex
.pid
= pctx
->tid
;
788 rc
= fcntl (hwc_fd
, F_SETOWN_EX
, (unsigned long) &fowner_ex
);
791 TprintfT (0, "%s idx=%d F_SETOWN failed\n", error_string
, ii
);
795 /* Use sigio so handler can determine FD via siginfo->si_fd. */
796 rc
= fcntl (hwc_fd
, F_SETSIG
, SIGIO
);
799 TprintfT (0, "%s idx=%d F_SETSIG failed\n", error_string
, ii
);
806 stop_one_ctr (int ii
, counter_state_t
*ctr_list
)
809 if (-1 == ioctl (ctr_list
[ii
].fd
, PERF_EVENT_IOC_DISABLE
, 1))
811 TprintfT (0, "hwcdrv: ERROR: PERF_EVENT_IOC_DISABLE #%d failed: errno=%d\n", ii
, errno
);
812 hwc_rc
= HWCFUNCS_ERROR_GENERIC
;
814 void *buf
= ctr_list
[ii
].buf_state
.buf
;
817 size_t bufsz
= (NPAGES_PER_BUF
+ 1) * ctr_list
[ii
].buf_state
.pagesz
;
818 ctr_list
[ii
].buf_state
.buf
= NULL
;
819 int tmprc
= munmap (buf
, bufsz
);
822 TprintfT (0, "hwcdrv: ERROR: munmap() #%d failed: errno=%d\n", ii
, errno
);
823 hwc_rc
= HWCFUNCS_ERROR_GENERIC
;
826 if (-1 == close (ctr_list
[ii
].fd
))
828 TprintfT (0, "hwcdrv: ERROR: close(fd) #%d failed: errno=%d\n", ii
, errno
);
829 hwc_rc
= HWCFUNCS_ERROR_GENERIC
;
834 /* HWCDRV_API for thread-specific actions */
836 hwcdrv_lwp_init (void)
838 return hwcdrv_start ();
842 hwcdrv_lwp_fini (void)
844 hwcdrv_free_counters (); /* also sets pctx->ctr_list=NULL; */
849 hdrv_pcl_internal_open ()
851 if (hdrv_pcl_state
.internal_open_called
)
853 TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open: already called\n");
854 return HWCFUNCS_ERROR_ALREADY_CALLED
;
857 // determine if PCL is available
858 perf_event_def_t tmp_event_def
;
859 memset (&tmp_event_def
, 0, sizeof (tmp_event_def
));
860 struct perf_event_attr
*pe_attr
= &tmp_event_def
.hw
;
861 init_perf_event (pe_attr
, 0, 0);
862 pe_attr
->type
= PERF_TYPE_HARDWARE
; // specify abstracted HW event
863 pe_attr
->config
= PERF_COUNT_HW_INSTRUCTIONS
; // specify abstracted insts
864 int hwc_fd
= perf_event_open (pe_attr
,
865 0, // pid/tid, 0 is self
866 -1, // cpu, -1 is per-thread mode
867 -1, // group_fd, -1 is root
871 TprintfT (DBG_LT1
, "hwcdrv: WARNING: hdrv_pcl_internal_open:"
872 " perf_event_open() failed, errno=%d\n", errno
);
873 goto internal_open_error
;
876 /* see if the PCL is new enough to know about F_SETOWN_EX */
877 struct f_owner_ex fowner_ex
;
878 fowner_ex
.type
= F_OWNER_TID
;
879 fowner_ex
.pid
= hwcdrv_gettid (); // "pid=tid" is correct w/F_OWNER_TID
880 if (fcntl (hwc_fd
, F_SETOWN_EX
, (unsigned long) &fowner_ex
) == -1)
882 TprintfT (DBG_LT1
, "hwcdrv: WARNING: hdrv_pcl_internal_open: "
883 "F_SETOWN failed, errno=%d\n", errno
);
885 goto internal_open_error
;
889 hdrv_pcl_state
.internal_open_called
= 1;
890 hdrv_pcl_state
.library_ok
= 1; // set to non-zero to show it's initted
891 hdrv_pcl_about
.cpcN_cpuver
= CPUVER_UNDEFINED
;
892 TprintfT (DBG_LT2
, "hwcdrv: hdrv_pcl_internal_open()\n");
893 for (int ii
= 0; hdrv_pcbe_drivers
[ii
]; ii
++)
895 hdrv_pcbe_api_t
*ppcbe
= hdrv_pcbe_drivers
[ii
];
896 if (!ppcbe
->hdrv_pcbe_init ())
898 hdrv_pcl_about
.cpcN_cciname
= ppcbe
->hdrv_pcbe_impl_name ();
899 hdrv_pcl_about
.cpcN_cpuver
= hwcdrv_lookup_cpuver (hdrv_pcl_about
.cpcN_cciname
);
900 if (hdrv_pcl_about
.cpcN_cpuver
== CPUVER_UNDEFINED
)
901 goto internal_open_error
;
902 hdrv_pcl_about
.cpcN_npics
= ppcbe
->hdrv_pcbe_ncounters ();
903 hdrv_pcl_about
.cpcN_docref
= ppcbe
->hdrv_pcbe_cpuref ();
904 hdrv_pcl_state
.get_events
= ppcbe
->hdrv_pcbe_get_events
;
905 hwcdrv_get_x86_eventnum
= ppcbe
->hdrv_pcbe_get_eventnum
;
909 if (hdrv_pcl_about
.cpcN_npics
> MAX_PICS
)
911 TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open:"
912 " reducing number of HWCs from %u to %u on processor '%s'\n",
913 hdrv_pcl_about
.cpcN_npics
, MAX_PICS
, hdrv_pcl_about
.cpcN_cciname
);
914 hdrv_pcl_about
.cpcN_npics
= MAX_PICS
;
916 TprintfT (DBG_LT1
, "hwcdrv: hdrv_pcl_internal_open:"
917 " perf_event cpuver=%d, name='%s'\n",
918 hdrv_pcl_about
.cpcN_cpuver
, hdrv_pcl_about
.cpcN_cciname
);
922 hdrv_pcl_about
.cpcN_cpuver
= CPUVER_UNDEFINED
;
923 hdrv_pcl_about
.cpcN_npics
= 0;
924 hdrv_pcl_about
.cpcN_docref
= NULL
;
925 hdrv_pcl_about
.cpcN_cciname
= NULL
;
926 return HWCFUNCS_ERROR_NOT_SUPPORTED
;
930 single_thread_tsd_ftn ()
932 static hdrv_pcl_ctx_t tsd_context
;
938 hwcdrv_init (hwcfuncs_abort_fn_t abort_ftn
, int *tsd_sz
)
940 hdrv_pcl_state
.find_vpc_ctx
= single_thread_tsd_ftn
;
942 *tsd_sz
= sizeof (hdrv_pcl_ctx_t
);
944 if (hdrv_pcl_state
.internal_open_called
)
945 return HWCFUNCS_ERROR_ALREADY_CALLED
;
946 return hdrv_pcl_internal_open ();
950 hwcdrv_get_info (int *cpuver
, const char **cciname
, uint_t
*npics
,
951 const char **docref
, uint64_t *support
)
954 *cpuver
= hdrv_pcl_about
.cpcN_cpuver
;
956 *cciname
= hdrv_pcl_about
.cpcN_cciname
;
958 *npics
= hdrv_pcl_about
.cpcN_npics
;
960 *docref
= hdrv_pcl_about
.cpcN_docref
;
962 *support
= HWCFUNCS_SUPPORT_OVERFLOW_PROFILING
| HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID
;
966 hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn
)
969 hdrv_pcl_state
.find_vpc_ctx
= tsd_ftn
;
972 TprintfT (0, "hwcdrv: ERROR: enable_mt(): tsd_ftn==NULL\n");
973 return HWCFUNCS_ERROR_UNAVAIL
;
979 hwcdrv_get_descriptions (hwcf_hwc_cb_t
*hwc_cb
, hwcf_attr_cb_t
*attr_cb
)
982 if (hwc_cb
&& hdrv_pcl_state
.get_events
)
983 count
= hdrv_pcl_state
.get_events (hwc_cb
);
985 for (int ii
= 0; perfctr_attrs_table
&& perfctr_attrs_table
[ii
].attrname
; ii
++)
986 attr_cb (perfctr_attrs_table
[ii
].attrname
);
993 hwcdrv_assign_regnos (Hwcentry
* entries
[], unsigned numctrs
)
995 return hwcdrv_assign_all_regnos (entries
, numctrs
);
999 internal_hwc_start (int fd
)
1001 int rc
= ioctl (fd
, PERF_EVENT_IOC_REFRESH
, 1);
1004 TprintfT (DBG_LT0
, "hwcdrv: ERROR: internal_hwc_start:"
1005 " PERF_EVENT_IOC_REFRESH(fd=%d) failed: errno=%d\n", fd
, errno
);
1006 return HWCFUNCS_ERROR_UNAVAIL
;
1008 TprintfT (DBG_LT3
, "hwcdrv: internal_hwc_start(fd=%d)\n", fd
);
1013 hwcdrv_overflow (siginfo_t
*si
, hwc_event_t
*eventp
, hwc_event_t
*lost_events
)
1015 /* set expired counters to overflow value and all others to 0 */
1016 /* return 0: OK, counters should be restarted */
1017 /* return non-zero: eventp not set, counters should not be restarted */
1018 /* clear return values */
1020 for (ii
= 0; ii
< hdrv_pcl_state
.hwcdef_cnt
; ii
++)
1022 eventp
->ce_pic
[ii
] = 0;
1023 lost_events
->ce_pic
[ii
] = 0;
1025 hrtime_t sig_ts
= gethrtime (); //YXXX get this from HWC event?
1026 eventp
->ce_hrt
= sig_ts
;
1027 lost_events
->ce_hrt
= sig_ts
;
1029 /* determine source signal */
1031 switch (si
->si_code
)
1033 case POLL_HUP
: /* expected value from pcl */
1034 /* According to Stephane Eranian:
1035 * "expect POLL_HUP instead of POLL_IN because we are
1036 * in one-shot mode (IOC_REFRESH)"
1038 signal_fd
= si
->si_fd
;
1040 case SI_TKILL
: /* event forwarded by tkill */
1041 /* DBX can only forward SI_TKILL when it detects POLL_HUP
1042 * unfortunately, this means that si->si_fd has been lost...
1043 * We need to process the buffers, but we don't know the fd!
1045 TprintfT (DBG_LT0
, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
1046 " SI_TKILL detected\n", sig_ts
);
1049 // "sometimes we see a POLL_IN (1) with very high event rates,"
1050 // according to eranian(?)
1051 TprintfT (DBG_LT0
, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
1052 " unexpected si_code 0x%x\n", sig_ts
, si
->si_code
);
1053 return HWCFUNCS_ERROR_GENERIC
;
1056 hdrv_pcl_ctx_t
* pctx
= hdrv_pcl_state
.find_vpc_ctx ();
1059 TprintfT (DBG_LT0
, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
1060 " tsd context is NULL\n", sig_ts
);
1061 return HWCFUNCS_ERROR_UNEXPECTED
;
1063 counter_state_t
* ctr_list
= (counter_state_t
*) pctx
->ctr_list
;
1066 TprintfT (DBG_LT0
, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
1067 " ctr_list is NULL\n", sig_ts
);
1068 return HWCFUNCS_ERROR_UNEXPECTED
;
1071 /* clear needs_restart flag */
1072 for (ii
= 0; ii
< hdrv_pcl_state
.hwcdef_cnt
; ii
++)
1073 ctr_list
[ii
].needs_restart
= 0;
1075 /* attempt to identify the counter to read */
1076 int signal_idx
= -1;
1077 pctx
->signal_fd
= signal_fd
; // save the signal provided by siginfo_t
1078 if (signal_fd
!= -1)
1080 for (ii
= 0; ii
< hdrv_pcl_state
.hwcdef_cnt
; ii
++)
1082 if (ctr_list
[ii
].fd
== signal_fd
)
1092 TprintfT (DBG_LT0
, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
1093 " pmc not determined!\n", sig_ts
);
1094 lost_events
->ce_pic
[0] = 1; /* record a bogus value into experiment */
1095 // note: bogus value may get overwritten in loop below
1098 /* capture sample(s). In addition to signal_idx, check other counters. */
1099 struct perf_event_header sheader
;
1101 for (idx
= 0; idx
< hdrv_pcl_state
.hwcdef_cnt
; idx
++)
1106 /* check for samples */
1107 struct perf_event_mmap_page
*metadata
= ctr_list
[idx
].buf_state
.buf
;
1108 if (metadata
== NULL
)
1110 if (metadata
->data_tail
== metadata
->data_head
)
1114 if (read_buf (&ctr_list
[idx
].buf_state
, &sheader
, sizeof (sheader
)))
1118 /* check for PERF_RECORD_SAMPLE */
1119 size_t datasz
= sheader
.size
- sizeof (struct perf_event_header
);
1120 if (sheader
.type
!= PERF_RECORD_SAMPLE
)
1122 TprintfT (DBG_LT2
, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
1123 " unexpected recd type=%d\n",
1124 sig_ts
, sheader
.type
);
1125 if (skip_buf (&ctr_list
[idx
].buf_state
, datasz
))
1127 TprintfT (DBG_LT0
, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
1128 " skip recd type=%d failed\n", sig_ts
, sheader
.type
);
1129 lost_events
->ce_pic
[idx
] = 4; /* record a bogus value */
1130 break; // failed to skip buffer??
1132 lost_events
->ce_pic
[idx
] = 2; /* record a bogus value */
1133 continue; // advance to next record
1136 /* type is PERF_RECORD_SAMPLE */
1137 uint64_t value
, lostv
;
1138 if (read_sample (&ctr_list
[idx
], datasz
, &value
, &lostv
))
1140 TprintfT (DBG_LT0
, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
1141 " read_sample() failed\n", sig_ts
);
1142 lost_events
->ce_pic
[idx
] = 3; // record a bogus value
1143 break; // failed to read sample data??
1145 TprintfT (DBG_LT3
, "hwcdrv: sig_ts=%llu: hwcdrv_overflow:"
1146 " idx=%d value=%llu lost=%llu\n", (unsigned long long) sig_ts
,
1147 idx
, (unsigned long long) value
, (unsigned long long) lostv
);
1148 if (eventp
->ce_pic
[idx
])
1150 TprintfT (DBG_LT2
, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
1151 " idx=%d previous sample recorded as lost_event\n", sig_ts
, idx
);
1152 lost_events
->ce_pic
[idx
] += eventp
->ce_pic
[idx
];
1154 eventp
->ce_pic
[idx
] = value
;
1155 lost_events
->ce_pic
[idx
] += lostv
;
1158 /* debug output for unexpected (but common) cases */
1159 if (idx
== signal_idx
)
1162 TprintfT (DBG_LT2
, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
1163 " %d records for signal_idx=%d\n", sig_ts
, num_recs
, signal_idx
);
1166 TprintfT (DBG_LT2
, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
1167 " %d unexpected record(s) for idx=%d (signal_idx=%d)\n",
1168 sig_ts
, num_recs
, idx
, signal_idx
);
1170 /* trigger counter restart whenever records were found */
1173 /* check whether to adapt the overflow interval */
1174 /* This is the Linux version.
1175 * The Solaris version is in hwprofile.c collector_update_overflow_counters().
1177 hrtime_t min_time
= global_perf_event_def
[idx
].min_time
;
1178 if (min_time
> 0 // overflow interval is adaptive
1179 && sig_ts
- ctr_list
[idx
].last_overflow_time
< min_time
) // last interval below min
1181 /* pick a new overflow interval */
1182 /* roughly doubled, but add funny numbers */
1183 /* hopefully the result is prime or not a multiple of some # of ops/loop */
1184 uint64_t new_period
= 2 * ctr_list
[idx
].last_overflow_period
+ 37;
1186 // On Solaris, we report the adjustment to the log file.
1187 // On Linux it's hard for us to do so since hwcdrv_pcl.c doesn't know about collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_HWCADJ.
1188 // For now we simply don't report.
1189 collector_interface
->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n",
1190 SP_JCMD_COMMENT
, COL_COMMENT_HWCADJ
, global_perf_event_def
[idx
].name
,
1191 ctr_list
[idx
].last_overflow_period
, new_period
);
1193 /* There are a variety of ways of resetting the period on Linux.
1194 * The most elegant is
1195 * ioctl(fd,PERF_EVENT_IOC_PERIOD,&period)
1196 * but check the perf_event_open man page for PERF_EVENT_IOC_PERIOD:
1197 * > Prior to Linux 2.6.36 this ioctl always failed due to a bug in the kernel.
1198 * > Prior to Linux 3.14 (or 3.7 on ARM), the new period did not take effect
1199 * until after the next overflow.
1200 * So we're kind of stuck shutting the fd down and restarting it with the new period.
1202 if (stop_one_ctr (idx
, ctr_list
))
1204 // EUGENE figure out what to do on error
1206 ctr_list
[idx
].last_overflow_period
= new_period
;
1207 if (start_one_ctr (idx
, ctr_list
[idx
].buf_state
.pagesz
, pctx
, "hwcdrv: ERROR: hwcdrv_overflow (readjust overflow):"))
1209 // EUGENE figure out what to do on error
1212 ctr_list
[idx
].last_overflow_time
= sig_ts
;
1214 ctr_list
[idx
].needs_restart
= 1;
1215 #else // seems to be more reliable to restart here instead of hwcdrv_sighlr_restart()
1216 internal_hwc_start (ctr_list
[idx
].fd
);
1220 return 0; // OK to restart counters
1224 hwcdrv_sighlr_restart (const hwc_event_t
*pp
)
1226 #if 0 // restarting here doesn't seem to work as well as restarting in hwcdrv_overflow()
1227 hdrv_pcl_ctx_t
* pctx
= hdrv_pcl_state
.find_vpc_ctx ();
1230 TprintfT (DBG_LT0
, "hwcdrv: ERROR: hwcdrv_sighlr_restart: find_vpc_ctx()==NULL\n");
1233 counter_state_t
* ctr_list
= (counter_state_t
*) pctx
->ctr_list
;
1236 TprintfT (DBG_LT0
, "hwcdrv: WARNING: hwcdrv_sighlr_restart: ctr_list is NULL\n");
1240 for (int ii
= 0; ii
< hdrv_pcl_state
.hwcdef_cnt
; ii
++)
1242 if (ctr_list
[ii
].needs_restart
)
1243 errors
|= internal_hwc_start (ctr_list
[ii
].fd
);
1244 ctr_list
[ii
].needs_restart
= 0;
1252 /* create counters based on hwcdef[] */
1254 hwcdrv_create_counters (unsigned hwcdef_cnt
, Hwcentry
*hwcdef
)
1256 if (hwcdef_cnt
> hdrv_pcl_about
.cpcN_npics
)
1258 logerr (GTXT ("More than %d counters were specified\n"), hdrv_pcl_about
.cpcN_npics
); /*!*/
1259 return HWCFUNCS_ERROR_HWCARGS
;
1261 if (hdrv_pcl_about
.cpcN_cpuver
== CPUVER_UNDEFINED
)
1263 logerr (GTXT ("Processor not supported\n"));
1264 return HWCFUNCS_ERROR_HWCARGS
;
1268 for (unsigned idx
= 0; idx
< hwcdef_cnt
; idx
++)
1270 perf_event_def_t
*glb_event_def
= &global_perf_event_def
[idx
];
1271 memset (glb_event_def
, 0, sizeof (perf_event_def_t
));
1272 unsigned int pmc_sel
;
1274 if (hwcfuncs_get_x86_eventsel (hwcdef
[idx
].reg_num
,
1275 hwcdef
[idx
].int_name
, &evntsel
, &pmc_sel
))
1277 TprintfT (0, "hwcdrv: ERROR: hwcfuncs_get_x86_eventsel() failed\n");
1278 return HWCFUNCS_ERROR_HWCARGS
;
1280 glb_event_def
->reg_num
= pmc_sel
;
1281 glb_event_def
->eventsel
= evntsel
;
1282 glb_event_def
->counter_preload
= hwcdef
[idx
].val
;
1283 glb_event_def
->min_time
= hwcdef
[idx
].min_time
;
1284 glb_event_def
->name
= strdup (hwcdef
[idx
].name
); // memory leak??? very minor
1285 init_perf_event (&glb_event_def
->hw
, glb_event_def
->eventsel
,
1286 glb_event_def
->counter_preload
);
1287 TprintfT (DBG_LT1
, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld"
1288 "(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n",
1289 idx
, hwcdef
[idx
].int_name
, (long long) glb_event_def
->counter_preload
,
1290 (long long) glb_event_def
->min_time
, (int) glb_event_def
->reg_num
,
1291 (long long) glb_event_def
->eventsel
,
1292 (long long) HW_INTERVAL_PRESET (hwcdef
[idx
].val
),
1293 (long long) glb_event_def
->hw
.exclude_user
,
1294 (long long) glb_event_def
->hw
.exclude_kernel
);
1297 hdrv_pcl_state
.hwcdef_cnt
= hwcdef_cnt
;
1302 hwcdrv_free_counters () // note: only performs shutdown for this thread
1304 hdrv_pcl_ctx_t
* pctx
;
1305 if (!COUNTERS_ENABLED ())
1307 pctx
= hdrv_pcl_state
.find_vpc_ctx ();
1310 TprintfT (0, "hwcdrv: WARNING: hwcdrv_free_counters: tsd context is NULL\n");
1311 return HWCFUNCS_ERROR_GENERIC
;
1313 counter_state_t
*ctr_list
= pctx
->ctr_list
;
1316 // fork child: prolog suspends hwcs, then epilog frees them
1317 TprintfT (DBG_LT1
, "hwcdrv: WARNING: hwcdrv_free_counters: ctr_list is already NULL\n");
1321 for (int ii
= 0; ii
< hdrv_pcl_state
.hwcdef_cnt
; ii
++)
1322 if (stop_one_ctr (ii
, ctr_list
))
1323 hwc_rc
= HWCFUNCS_ERROR_GENERIC
;
1324 TprintfT (DBG_LT1
, "hwcdrv: hwcdrv_free_counters(tid=0x%lx).\n", pctx
->tid
);
1325 pctx
->ctr_list
= NULL
;
1330 hwcdrv_start (void) /* must be called from each thread ? */
1332 hdrv_pcl_ctx_t
*pctx
= NULL
;
1333 if (!COUNTERS_ENABLED ())
1335 TprintfT (DBG_LT1
, "hwcdrv: WARNING: hwcdrv_start: no counters to start \n");
1338 if (!hdrv_pcl_state
.library_ok
)
1340 TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: library is not open\n");
1341 return HWCFUNCS_ERROR_NOT_SUPPORTED
;
1345 * set up per-thread context
1347 pctx
= hdrv_pcl_state
.find_vpc_ctx ();
1350 TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: tsd context is NULL\n");
1351 return HWCFUNCS_ERROR_UNEXPECTED
;
1353 pctx
->tid
= hwcdrv_gettid ();
1354 TprintfT (DBG_LT1
, "hwcdrv: hwcdrv_start(tid=0x%lx)\n", pctx
->tid
);
1357 * create per-thread counter list
1359 counter_state_t
*ctr_list
= (counter_state_t
*) calloc (hdrv_pcl_state
.hwcdef_cnt
,
1360 sizeof (counter_state_t
));
1363 TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: calloc(ctr_list) failed\n");
1364 return HWCFUNCS_ERROR_MEMORY
;
1367 for (ii
= 0; ii
< hdrv_pcl_state
.hwcdef_cnt
; ii
++)
1368 ctr_list
[ii
].fd
= -1; // invalidate fds in case we have to close prematurely
1369 pctx
->ctr_list
= ctr_list
;
1374 size_t pgsz
= sysconf (_SC_PAGESIZE
);
1375 for (ii
= 0; ii
< hdrv_pcl_state
.hwcdef_cnt
; ii
++)
1377 ctr_list
[ii
].last_overflow_period
= global_perf_event_def
[ii
].hw
.sample_period
;
1378 if (start_one_ctr (ii
, pgsz
, pctx
, "hwcdrv: ERROR: hwcdrv_start:")) goto hwcdrv_start_cleanup
;
1382 * start the counters
1384 for (ii
= 0; ii
< hdrv_pcl_state
.hwcdef_cnt
; ii
++)
1386 int rc
= internal_hwc_start (ctr_list
[ii
].fd
);
1388 goto hwcdrv_start_cleanup
;
1392 hwcdrv_start_cleanup
:
1393 hwcdrv_free_counters (); // PERF_EVENT_IOC_DISABLE and close() for all fds
1394 return HWCFUNCS_ERROR_UNAVAIL
;
1398 hwcdrv_lwp_suspend (void) /* must be called from each thread */
1400 if (!COUNTERS_ENABLED ())
1402 TprintfT (DBG_LT1
, "hwcdrv: WARNING: hwcdrv_lwp_suspend: no counters\n");
1405 TprintfT (DBG_LT1
, "hwcdrv: hwcdrv_lwp_suspend()\n");
1406 return hwcdrv_free_counters ();
1410 hwcdrv_lwp_resume (void) /* must be called from each thread */
1412 if (!COUNTERS_ENABLED ())
1414 TprintfT (DBG_LT1
, "hwcdrv: WARNING: hwcdrv_lwp_resume: no counters\n");
1417 TprintfT (DBG_LT1
, "hwcdrv: hwcdrv_lwp_resume()\n");
1418 return hwcdrv_start ();
1422 hwcdrv_read_events (hwc_event_t
*overflow_data
, hwc_event_samples_t
*sampled_data
)
1424 overflow_data
->ce_hrt
= 0;
1425 for (int i
= 0; i
< MAX_PICS
; i
++)
1427 overflow_data
->ce_pic
[i
] = 0;
1429 HWCFUNCS_SAMPLE_RESET (&sampled_data
->sample
[i
]);
1434 /*---------------------------------------------------------------------------*/
1437 hwcdrv_api_t hwcdrv_pcl_api
= {
1441 hwcdrv_get_descriptions
,
1442 hwcdrv_assign_regnos
,
1443 hwcdrv_create_counters
,
1447 hwcdrv_sighlr_restart
,
1450 hwcdrv_free_counters
,
1453 -1 // hwcdrv_init_status