2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2005-2007, Joseph Koshy
5 * Copyright (c) 2007 The FreeBSD Foundation
8 * Portions of this software were developed by A. Joseph Koshy under
9 * sponsorship from the FreeBSD Foundation and Google, Inc.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * Transform a hwpmc(4) log into human readable form, and into
35 * gprof(1) compatible profiles.
38 #include <sys/param.h>
39 #include <sys/endian.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
45 #include <sys/queue.h>
46 #include <sys/socket.h>
50 #include <netinet/in.h>
72 #include "pmcstat_log.h"
73 #include "pmcstat_top.h"
74 #include "pmcpl_callgraph.h"
76 #define min(A,B) ((A) < (B) ? (A) : (B))
77 #define max(A,B) ((A) > (B) ? (A) : (B))
79 /* Get the sample value in percent related to nsamples. */
80 #define PMCPL_CG_COUNTP(a) \
81 ((a)->pcg_count * 100.0 / nsamples)
84 * The toplevel CG nodes (i.e., with rank == 0) are placed in a hash table.
87 struct pmcstat_cgnode_hash_list pmcstat_cgnode_hash
[PMCSTAT_NHASH
];
88 int pmcstat_cgnode_hash_count
;
90 static pmcstat_interned_string pmcstat_previous_filename_printed
;
92 static struct pmcstat_cgnode
*
93 pmcstat_cgnode_allocate(struct pmcstat_image
*image
, uintfptr_t pc
)
95 struct pmcstat_cgnode
*cg
;
97 if ((cg
= malloc(sizeof(*cg
))) == NULL
)
98 err(EX_OSERR
, "ERROR: Cannot allocate callgraph node");
100 cg
->pcg_image
= image
;
104 cg
->pcg_nchildren
= 0;
105 LIST_INIT(&cg
->pcg_children
);
111 * Free a node and its children.
114 pmcstat_cgnode_free(struct pmcstat_cgnode
*cg
)
116 struct pmcstat_cgnode
*cgc
, *cgtmp
;
118 LIST_FOREACH_SAFE(cgc
, &cg
->pcg_children
, pcg_sibling
, cgtmp
)
119 pmcstat_cgnode_free(cgc
);
124 * Look for a callgraph node associated with pmc `pmcid' in the global
125 * hash table that corresponds to the given `pc' value in the process
128 static struct pmcstat_cgnode
*
129 pmcstat_cgnode_hash_lookup_pc(struct pmcstat_process
*pp
, pmc_id_t pmcid
,
130 uintfptr_t pc
, int usermode
)
132 struct pmcstat_pcmap
*ppm
;
133 struct pmcstat_symbol
*sym
;
134 struct pmcstat_image
*image
;
135 struct pmcstat_cgnode
*cg
;
136 struct pmcstat_cgnode_hash
*h
;
137 uintfptr_t loadaddress
;
138 unsigned int i
, hash
;
140 ppm
= pmcstat_process_find_map(usermode
? pp
: pmcstat_kernproc
, pc
);
144 image
= ppm
->ppm_image
;
146 loadaddress
= ppm
->ppm_lowpc
+ image
->pi_vaddr
- image
->pi_start
;
147 pc
-= loadaddress
; /* Convert to an offset in the image. */
150 * Try determine the function at this offset. If we can't
151 * find a function round leave the `pc' value alone.
153 if (!(args
.pa_flags
& (FLAG_SKIP_TOP_FN_RES
| FLAG_SHOW_OFFSET
))) {
154 if ((sym
= pmcstat_symbol_search(image
, pc
)) != NULL
)
157 pmcstat_stats
.ps_samples_unknown_function
++;
160 for (hash
= i
= 0; i
< sizeof(uintfptr_t
); i
++)
161 hash
+= (pc
>> i
) & 0xFF;
163 hash
&= PMCSTAT_HASH_MASK
;
166 LIST_FOREACH(h
, &pmcstat_cgnode_hash
[hash
], pch_next
)
168 if (h
->pch_pmcid
!= pmcid
)
175 if (cg
->pcg_image
== image
&& cg
->pcg_func
== pc
)
180 * We haven't seen this (pmcid, pc) tuple yet, so allocate a
181 * new callgraph node and a new hash table entry for it.
183 cg
= pmcstat_cgnode_allocate(image
, pc
);
184 if ((h
= malloc(sizeof(*h
))) == NULL
)
185 err(EX_OSERR
, "ERROR: Could not allocate callgraph node");
187 h
->pch_pmcid
= pmcid
;
189 LIST_INSERT_HEAD(&pmcstat_cgnode_hash
[hash
], h
, pch_next
);
191 pmcstat_cgnode_hash_count
++;
197 * Compare two callgraph nodes for sorting.
200 pmcstat_cgnode_compare(const void *a
, const void *b
)
202 const struct pmcstat_cgnode
*const *pcg1
, *const *pcg2
, *cg1
, *cg2
;
204 pcg1
= (const struct pmcstat_cgnode
*const *) a
;
206 pcg2
= (const struct pmcstat_cgnode
*const *) b
;
209 /* Sort in reverse order */
210 if (cg1
->pcg_count
< cg2
->pcg_count
)
212 if (cg1
->pcg_count
> cg2
->pcg_count
)
218 * Find (allocating if a needed) a callgraph node in the given
219 * parent with the same (image, pcoffset) pair.
222 static struct pmcstat_cgnode
*
223 pmcstat_cgnode_find(struct pmcstat_cgnode
*parent
, struct pmcstat_image
*image
,
226 struct pmcstat_cgnode
*child
;
228 LIST_FOREACH(child
, &parent
->pcg_children
, pcg_sibling
) {
229 if (child
->pcg_image
== image
&&
230 child
->pcg_func
== pcoffset
)
235 * Allocate a new structure.
238 child
= pmcstat_cgnode_allocate(image
, pcoffset
);
241 * Link it into the parent.
243 LIST_INSERT_HEAD(&parent
->pcg_children
, child
, pcg_sibling
);
244 parent
->pcg_nchildren
++;
250 * Print one callgraph node. The output format is:
252 * indentation %(parent's samples) #nsamples function@object
255 pmcstat_cgnode_print(struct pmcstat_cgnode
*cg
, int depth
, uint32_t total
)
259 struct pmcstat_symbol
*sym
;
260 struct pmcstat_cgnode
**sortbuffer
, **cgn
, *pcg
;
265 (void) fprintf(args
.pa_graphfile
, "%*s", depth
, space
);
267 if (cg
->pcg_count
== total
)
268 (void) fprintf(args
.pa_graphfile
, "100.0%% ");
270 (void) fprintf(args
.pa_graphfile
, "%05.2f%% ",
271 100.0 * cg
->pcg_count
/ total
);
273 n
= fprintf(args
.pa_graphfile
, " [%u] ", cg
->pcg_count
);
275 /* #samples is a 12 character wide field. */
277 (void) fprintf(args
.pa_graphfile
, "%*s", 12 - n
, space
);
280 (void) fprintf(args
.pa_graphfile
, "%*s", depth
, space
);
282 sym
= pmcstat_symbol_search(cg
->pcg_image
, cg
->pcg_func
);
284 (void) fprintf(args
.pa_graphfile
, "%s",
285 pmcstat_string_unintern(sym
->ps_name
));
287 (void) fprintf(args
.pa_graphfile
, "%p",
288 (void *) (cg
->pcg_image
->pi_vaddr
+ cg
->pcg_func
));
290 if (pmcstat_previous_filename_printed
!=
291 cg
->pcg_image
->pi_fullpath
) {
292 pmcstat_previous_filename_printed
= cg
->pcg_image
->pi_fullpath
;
293 (void) fprintf(args
.pa_graphfile
, " @ %s\n",
294 pmcstat_string_unintern(
295 pmcstat_previous_filename_printed
));
297 (void) fprintf(args
.pa_graphfile
, "\n");
299 if (cg
->pcg_nchildren
== 0)
302 if ((sortbuffer
= (struct pmcstat_cgnode
**)
303 malloc(sizeof(struct pmcstat_cgnode
*) *
304 cg
->pcg_nchildren
)) == NULL
)
305 err(EX_OSERR
, "ERROR: Cannot print callgraph");
308 LIST_FOREACH(pcg
, &cg
->pcg_children
, pcg_sibling
)
311 assert(cgn
- sortbuffer
== (int) cg
->pcg_nchildren
);
313 qsort(sortbuffer
, cg
->pcg_nchildren
, sizeof(struct pmcstat_cgnode
*),
314 pmcstat_cgnode_compare
);
316 for (cgn
= sortbuffer
, n
= 0; n
< cg
->pcg_nchildren
; n
++, cgn
++)
317 pmcstat_cgnode_print(*cgn
, depth
+1, cg
->pcg_count
);
323 * Record a callchain.
327 pmcpl_cg_process(struct pmcstat_process
*pp
, struct pmcstat_pmcrecord
*pmcr
,
328 uint32_t nsamples
, uintfptr_t
*cc
, int usermode
, uint32_t cpu
)
330 uintfptr_t pc
, loadaddress
;
332 struct pmcstat_image
*image
;
333 struct pmcstat_pcmap
*ppm
;
334 struct pmcstat_symbol
*sym
;
335 struct pmcstat_cgnode
*parent
, *child
;
336 struct pmcstat_process
*km
;
342 * Find the callgraph node recorded in the global hash table
343 * for this (pmcid, pc).
347 pmcid
= pmcr
->pr_pmcid
;
348 child
= parent
= pmcstat_cgnode_hash_lookup_pc(pp
, pmcid
, pc
, usermode
);
349 if (parent
== NULL
) {
350 pmcstat_stats
.ps_callchain_dubious_frames
++;
351 pmcr
->pr_dubious_frames
++;
358 * For each return address in the call chain record, subject
359 * to the maximum depth desired.
360 * - Find the image associated with the sample. Stop if there
361 * is no valid image at that address.
362 * - Find the function that overlaps the return address.
363 * - If found: use the start address of the function.
364 * If not found (say an object's symbol table is not present or
365 * is incomplete), round down to th gprof bucket granularity.
366 * - Convert return virtual address to an offset in the image.
367 * - Look for a child with the same {offset,image} tuple,
368 * inserting one if needed.
369 * - Increment the count of occurrences of the child.
371 km
= pmcstat_kernproc
;
373 for (n
= 1; n
< (uint32_t) args
.pa_graphdepth
&& n
< nsamples
; n
++,
377 ppm
= pmcstat_process_find_map(usermode
? pp
: km
, pc
);
379 /* Detect full frame capture (kernel + user). */
381 ppm
= pmcstat_process_find_map(pp
, pc
);
389 image
= ppm
->ppm_image
;
390 loadaddress
= ppm
->ppm_lowpc
+ image
->pi_vaddr
-
394 if ((sym
= pmcstat_symbol_search(image
, pc
)) != NULL
)
397 child
= pmcstat_cgnode_find(parent
, image
, pc
);
403 * Printing a callgraph for a PMC.
406 pmcstat_callgraph_print_for_pmcid(struct pmcstat_pmcrecord
*pmcr
)
411 struct pmcstat_cgnode
**sortbuffer
, **cgn
;
412 struct pmcstat_cgnode_hash
*pch
;
415 * We pull out all callgraph nodes in the top-level hash table
416 * with a matching PMC id. We then sort these based on the
417 * frequency of occurrence. Each callgraph node is then
422 pmcid
= pmcr
->pr_pmcid
;
423 if ((sortbuffer
= (struct pmcstat_cgnode
**)
424 malloc(sizeof(struct pmcstat_cgnode
*) *
425 pmcstat_cgnode_hash_count
)) == NULL
)
426 err(EX_OSERR
, "ERROR: Cannot sort callgraph");
429 for (n
= 0; n
< PMCSTAT_NHASH
; n
++)
430 LIST_FOREACH(pch
, &pmcstat_cgnode_hash
[n
], pch_next
)
431 if (pch
->pch_pmcid
== pmcid
) {
432 nsamples
+= pch
->pch_cgnode
->pcg_count
;
433 *cgn
++ = pch
->pch_cgnode
;
436 nentries
= cgn
- sortbuffer
;
437 assert(nentries
<= pmcstat_cgnode_hash_count
);
444 qsort(sortbuffer
, nentries
, sizeof(struct pmcstat_cgnode
*),
445 pmcstat_cgnode_compare
);
447 (void) fprintf(args
.pa_graphfile
,
448 "@ %s [%u samples]\n\n",
449 pmcstat_string_unintern(pmcr
->pr_pmcname
),
452 for (cgn
= sortbuffer
, n
= 0; n
< nentries
; n
++, cgn
++) {
453 pmcstat_previous_filename_printed
= NULL
;
454 pmcstat_cgnode_print(*cgn
, 0, nsamples
);
455 (void) fprintf(args
.pa_graphfile
, "\n");
462 * Print out callgraphs.
466 pmcstat_callgraph_print(void)
468 struct pmcstat_pmcrecord
*pmcr
;
470 LIST_FOREACH(pmcr
, &pmcstat_pmcs
, pr_next
)
471 pmcstat_callgraph_print_for_pmcid(pmcr
);
475 pmcstat_cgnode_topprint(struct pmcstat_cgnode
*cg
,
476 int depth __unused
, uint32_t nsamples
)
478 int v_attrs
, vs_len
, ns_len
, width
, len
, n
, nchildren
;
481 struct pmcstat_symbol
*sym
;
482 struct pmcstat_cgnode
**sortbuffer
, **cgn
, *pcg
;
485 v
= PMCPL_CG_COUNTP(cg
);
486 snprintf(vs
, sizeof(vs
), "%.1f", v
);
487 v_attrs
= PMCSTAT_ATTRPERCENT(v
);
490 sym
= pmcstat_symbol_search(cg
->pcg_image
, cg
->pcg_func
);
492 snprintf(ns
, sizeof(ns
), "%p",
493 (void *)(cg
->pcg_image
->pi_vaddr
+ cg
->pcg_func
));
495 switch (args
.pa_flags
& (FLAG_SKIP_TOP_FN_RES
| FLAG_SHOW_OFFSET
)) {
496 case FLAG_SKIP_TOP_FN_RES
| FLAG_SHOW_OFFSET
:
497 case FLAG_SKIP_TOP_FN_RES
:
498 snprintf(ns
, sizeof(ns
), "%p",
499 (void *)(cg
->pcg_image
->pi_vaddr
+ cg
->pcg_func
));
501 case FLAG_SHOW_OFFSET
:
502 snprintf(ns
, sizeof(ns
), "%s+%#0" PRIx64
,
503 pmcstat_string_unintern(sym
->ps_name
),
504 cg
->pcg_func
- sym
->ps_start
);
507 snprintf(ns
, sizeof(ns
), "%s",
508 pmcstat_string_unintern(sym
->ps_name
));
513 PMCSTAT_ATTRON(v_attrs
);
514 PMCSTAT_PRINTW("%5.5s", vs
);
515 PMCSTAT_ATTROFF(v_attrs
);
516 PMCSTAT_PRINTW(" %-10.10s %-30.30s",
517 pmcstat_string_unintern(cg
->pcg_image
->pi_name
),
520 nchildren
= cg
->pcg_nchildren
;
521 if (nchildren
== 0) {
522 PMCSTAT_PRINTW("\n");
526 width
= pmcstat_displaywidth
- 40;
528 if ((sortbuffer
= (struct pmcstat_cgnode
**)
529 malloc(sizeof(struct pmcstat_cgnode
*) *
531 err(EX_OSERR
, "ERROR: Cannot print callgraph");
534 LIST_FOREACH(pcg
, &cg
->pcg_children
, pcg_sibling
)
537 assert(cgn
- sortbuffer
== (int)nchildren
);
539 qsort(sortbuffer
, nchildren
, sizeof(struct pmcstat_cgnode
*),
540 pmcstat_cgnode_compare
);
542 /* Count how many callers. */
543 for (cgn
= sortbuffer
, n
= 0; n
< nchildren
; n
++, cgn
++) {
546 v
= PMCPL_CG_COUNTP(pcg
);
547 if (v
< pmcstat_threshold
)
552 for (cgn
= sortbuffer
, n
= 0; n
< nchildren
; n
++, cgn
++) {
557 v
= PMCPL_CG_COUNTP(pcg
);
558 vs_len
= snprintf(vs
, sizeof(vs
), ":%.1f", v
);
559 v_attrs
= PMCSTAT_ATTRPERCENT(v
);
564 sym
= pmcstat_symbol_search(pcg
->pcg_image
, pcg
->pcg_func
);
566 ns_len
= snprintf(ns
, sizeof(ns
), "%s",
567 pmcstat_string_unintern(sym
->ps_name
));
569 ns_len
= snprintf(ns
, sizeof(ns
), "%p",
570 (void *)pcg
->pcg_func
);
572 len
= ns_len
+ vs_len
+ 1;
573 if (width
- len
< 0) {
574 PMCSTAT_PRINTW(" ...");
579 PMCSTAT_PRINTW(" %s", ns
);
581 PMCSTAT_ATTRON(v_attrs
);
582 PMCSTAT_PRINTW("%s", vs
);
583 PMCSTAT_ATTROFF(v_attrs
);
586 PMCSTAT_PRINTW("\n");
595 pmcpl_cg_topdisplay(void)
599 struct pmcstat_cgnode
**sortbuffer
, **cgn
;
600 struct pmcstat_cgnode_hash
*pch
;
601 struct pmcstat_pmcrecord
*pmcr
;
603 pmcr
= pmcstat_pmcindex_to_pmcr(pmcstat_pmcinfilter
);
605 err(EX_SOFTWARE
, "ERROR: invalid pmcindex");
608 * We pull out all callgraph nodes in the top-level hash table
609 * with a matching PMC index. We then sort these based on the
610 * frequency of occurrence. Each callgraph node is then
616 if ((sortbuffer
= (struct pmcstat_cgnode
**)
617 malloc(sizeof(struct pmcstat_cgnode
*) *
618 pmcstat_cgnode_hash_count
)) == NULL
)
619 err(EX_OSERR
, "ERROR: Cannot sort callgraph");
622 for (n
= 0; n
< PMCSTAT_NHASH
; n
++)
623 LIST_FOREACH(pch
, &pmcstat_cgnode_hash
[n
], pch_next
)
624 if (pmcr
== NULL
|| pch
->pch_pmcid
== pmcr
->pr_pmcid
) {
625 nsamples
+= pch
->pch_cgnode
->pcg_count
;
626 *cgn
++ = pch
->pch_cgnode
;
629 nentries
= cgn
- sortbuffer
;
630 assert(nentries
<= pmcstat_cgnode_hash_count
);
637 qsort(sortbuffer
, nentries
, sizeof(struct pmcstat_cgnode
*),
638 pmcstat_cgnode_compare
);
640 PMCSTAT_PRINTW("%5.5s %-10.10s %-30.30s %s\n",
641 "%SAMP", "IMAGE", "FUNCTION", "CALLERS");
643 nentries
= min(pmcstat_displayheight
- 2, nentries
);
645 for (cgn
= sortbuffer
, n
= 0; n
< nentries
; n
++, cgn
++) {
646 if (PMCPL_CG_COUNTP(*cgn
) < pmcstat_threshold
)
648 pmcstat_cgnode_topprint(*cgn
, 0, nsamples
);
655 * Handle top mode keypress.
659 pmcpl_cg_topkeypress(int c
, void *arg
)
675 pmcstat_cgnode_hash_count
= 0;
676 pmcstat_previous_filename_printed
= NULL
;
678 for (i
= 0; i
< PMCSTAT_NHASH
; i
++) {
679 LIST_INIT(&pmcstat_cgnode_hash
[i
]);
686 pmcpl_cg_shutdown(FILE *mf
)
689 struct pmcstat_cgnode_hash
*pch
, *pchtmp
;
693 if (args
.pa_flags
& FLAG_DO_CALLGRAPHS
)
694 pmcstat_callgraph_print();
699 for (i
= 0; i
< PMCSTAT_NHASH
; i
++) {
700 LIST_FOREACH_SAFE(pch
, &pmcstat_cgnode_hash
[i
], pch_next
,
702 pmcstat_cgnode_free(pch
->pch_cgnode
);
703 LIST_REMOVE(pch
, pch_next
);