1 /* hist.c - Histogram related operations.
3 Copyright (C) 1999-2024 Free Software Foundation, Inc.
5 This file is part of GNU Binutils.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
23 #include "libiberty.h"
24 #include "search_list.h"
37 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
39 static void scale_and_align_entries (void);
40 static void print_header (int);
41 static void print_line (Sym
*, double);
42 static int cmp_time (const void *, const void *);
44 /* Declarations of automatically generated functions to output blurbs. */
45 extern void flat_blurb (FILE * fp
);
47 static histogram
*find_histogram (bfd_vma lowpc
, bfd_vma highpc
);
48 static histogram
*find_histogram_for_pc (bfd_vma pc
);
50 histogram
* histograms
;
51 unsigned num_histograms
;
53 static char hist_dimension
[16] = "seconds";
54 static char hist_dimension_abbrev
= 's';
56 static double accum_time
; /* Accumulated time so far for print_line(). */
57 static double total_time
; /* Total time for all routines. */
59 /* Table of SI prefixes for powers of 10 (used to automatically
60 scale some of the values in the flat profile). */
68 { 'T', 1e-12 }, /* tera */
69 { 'G', 1e-09 }, /* giga */
70 { 'M', 1e-06 }, /* mega */
71 { 'K', 1e-03 }, /* kilo */
73 { 'm', 1e+03 }, /* milli */
74 { 'u', 1e+06 }, /* micro */
75 { 'n', 1e+09 }, /* nano */
76 { 'p', 1e+12 }, /* pico */
77 { 'f', 1e+15 }, /* femto */
78 { 'a', 1e+18 } /* ato */
81 /* Reads just the header part of histogram record into
82 *RECORD from IFP. FILENAME is the name of IFP and
83 is provided for formatting error messages only.
85 If FIRST is non-zero, sets global variables HZ, HIST_DIMENSION,
86 HIST_DIMENSION_ABBREV, HIST_SCALE. If FIRST is zero, checks
87 that the new histogram is compatible with already-set values
88 of those variables and emits an error if that's not so. */
90 read_histogram_header (histogram
*record
,
91 FILE *ifp
, const char *filename
,
94 unsigned int profrate
;
95 char n_hist_dimension
[15];
96 char n_hist_dimension_abbrev
;
99 if (gmon_io_read_vma (ifp
, &record
->lowpc
)
100 || gmon_io_read_vma (ifp
, &record
->highpc
)
101 || gmon_io_read_32 (ifp
, &record
->num_bins
)
102 || gmon_io_read_32 (ifp
, &profrate
)
103 || gmon_io_read (ifp
, n_hist_dimension
, 15)
104 || gmon_io_read (ifp
, &n_hist_dimension_abbrev
, 1))
106 fprintf (stderr
, _("%s: %s: unexpected end of file\n"),
112 n_hist_scale
= (double)((record
->highpc
- record
->lowpc
) / sizeof (UNIT
))
117 /* We don't try to veryfy profrate is the same for all histogram
118 records. If we have two histogram records for the same
119 address range and profiling samples is done as often
120 as possible as opposed on timer, then the actual profrate will
121 be slightly different. Most of the time the difference does not
122 matter and insisting that profiling rate is exactly the same
123 will only create inconvenient. */
125 memcpy (hist_dimension
, n_hist_dimension
, 15);
126 hist_dimension_abbrev
= n_hist_dimension_abbrev
;
127 hist_scale
= n_hist_scale
;
131 if (strncmp (n_hist_dimension
, hist_dimension
, 15) != 0)
134 _("%s: dimension unit changed between histogram records\n"
137 whoami
, whoami
, hist_dimension
, whoami
, n_hist_dimension
);
141 if (n_hist_dimension_abbrev
!= hist_dimension_abbrev
)
144 _("%s: dimension abbreviation changed between histogram records\n"
147 whoami
, whoami
, hist_dimension_abbrev
, whoami
, n_hist_dimension_abbrev
);
151 /* The only reason we require the same scale for histograms is that
152 there's code (notably printing code), that prints units,
153 and it would be very confusing to have one unit mean different
154 things for different functions. */
155 if (fabs (hist_scale
- n_hist_scale
) > 0.000001)
158 _("%s: different scales in histogram records"),
165 /* Read the histogram from file IFP. FILENAME is the name of IFP and
166 is provided for formatting error messages only. */
169 hist_read_rec (FILE * ifp
, const char *filename
)
171 bfd_vma lowpc
, highpc
;
173 histogram
*record
, *existing_record
;
176 /* 1. Read the header and see if there's existing record for the
177 same address range and that there are no overlapping records. */
178 read_histogram_header (&n_record
, ifp
, filename
, num_histograms
== 0);
180 existing_record
= find_histogram (n_record
.lowpc
, n_record
.highpc
);
183 record
= existing_record
;
187 /* If this record overlaps, but does not completely match an existing
188 record, it's an error. */
189 lowpc
= n_record
.lowpc
;
190 highpc
= n_record
.highpc
;
191 hist_clip_symbol_address (&lowpc
, &highpc
);
195 _("%s: overlapping histogram records\n"),
200 /* This is new record. Add it to global array and allocate space for
202 histograms
= (struct histogram
*)
203 xrealloc (histograms
, sizeof (histogram
) * (num_histograms
+ 1));
204 memcpy (histograms
+ num_histograms
,
205 &n_record
, sizeof (histogram
));
206 record
= &histograms
[num_histograms
];
209 record
->sample
= (int *) xmalloc (record
->num_bins
210 * sizeof (record
->sample
[0]));
211 memset (record
->sample
, 0, record
->num_bins
* sizeof (record
->sample
[0]));
214 /* 2. We have either a new record (with zeroed histogram data), or an existing
215 record with some data in the histogram already. Read new data into the
216 record, adding hit counts. */
219 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
220 (unsigned long) record
->lowpc
, (unsigned long) record
->highpc
,
223 for (i
= 0; i
< record
->num_bins
; ++i
)
226 if (fread (&count
[0], sizeof (count
), 1, ifp
) != 1)
229 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
230 whoami
, filename
, i
, record
->num_bins
);
233 record
->sample
[i
] += bfd_get_16 (core_bfd
, (bfd_byte
*) & count
[0]);
235 printf ("[hist_read_rec] 0x%lx: %u\n",
236 (unsigned long) (record
->lowpc
237 + i
* (record
->highpc
- record
->lowpc
)
244 /* Write all execution histograms file OFP. FILENAME is the name
245 of OFP and is provided for formatting error-messages only. */
248 hist_write_hist (FILE * ofp
, const char *filename
)
253 for (r
= 0; r
< num_histograms
; ++r
)
255 histogram
*record
= &histograms
[r
];
259 if (gmon_io_write_8 (ofp
, GMON_TAG_TIME_HIST
)
260 || gmon_io_write_vma (ofp
, record
->lowpc
)
261 || gmon_io_write_vma (ofp
, record
->highpc
)
262 || gmon_io_write_32 (ofp
, record
->num_bins
)
263 || gmon_io_write_32 (ofp
, hz
)
264 || gmon_io_write (ofp
, hist_dimension
, 15)
265 || gmon_io_write (ofp
, &hist_dimension_abbrev
, 1))
271 for (i
= 0; i
< record
->num_bins
; ++i
)
273 bfd_put_16 (core_bfd
, (bfd_vma
) record
->sample
[i
], (bfd_byte
*) &count
[0]);
275 if (fwrite (&count
[0], sizeof (count
), 1, ofp
) != 1)
284 /* Calculate scaled entry point addresses (to save time in
285 hist_assign_samples), and, on architectures that have procedure
286 entry masks at the start of a function, possibly push the scaled
287 entry points over the procedure entry mask, if it turns out that
288 the entry point is in one bin and the code for a routine is in the
292 scale_and_align_entries (void)
295 bfd_vma bin_of_entry
;
298 for (sym
= symtab
.base
; sym
< symtab
.limit
; sym
++)
300 histogram
*r
= find_histogram_for_pc (sym
->addr
);
302 sym
->hist
.scaled_addr
= sym
->addr
/ sizeof (UNIT
);
306 bin_of_entry
= (sym
->hist
.scaled_addr
- r
->lowpc
) / hist_scale
;
307 bin_of_code
= ((sym
->hist
.scaled_addr
+ UNITS_TO_CODE
- r
->lowpc
)
309 if (bin_of_entry
< bin_of_code
)
312 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
313 (unsigned long) sym
->hist
.scaled_addr
,
314 (unsigned long) (sym
->hist
.scaled_addr
316 sym
->hist
.scaled_addr
+= UNITS_TO_CODE
;
323 /* Assign samples to the symbol to which they belong.
325 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
326 which may overlap one more symbol address ranges. If a symbol
327 overlaps with the bin's address range by O percent, then O percent
328 of the bin's count is credited to that symbol.
330 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
331 with respect to the symbol's address range [SYM_LOW_PC,
332 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
333 the distance (in UNITs) between the arrows, the fraction of the
334 sample that is to be credited to the symbol which starts at
337 sym_low_pc sym_high_pc
341 +-----------------------------------------------+
343 | ->| |<- ->| |<- ->| |<- |
345 +---------+ +---------+ +---------+
349 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
351 For the VAX we assert that samples will never fall in the first two
352 bytes of any routine, since that is the entry mask, thus we call
353 scale_and_align_entries() to adjust the entry points if the entry
354 mask falls in one bin but the code for the routine doesn't start
355 until the next bin. In conjunction with the alignment of routine
356 addresses, this should allow us to have only one sample for every
357 four bytes of text space and never have any overlap (the two end
361 hist_assign_samples_1 (histogram
*r
)
363 bfd_vma bin_low_pc
, bin_high_pc
;
364 bfd_vma sym_low_pc
, sym_high_pc
;
365 bfd_vma overlap
, addr
;
366 unsigned int bin_count
;
367 unsigned int i
, j
, k
;
368 double count_time
, credit
;
370 bfd_vma lowpc
= r
->lowpc
/ sizeof (UNIT
);
372 /* Iterate over all sample bins. */
373 for (i
= 0, k
= 1; i
< r
->num_bins
; ++i
)
375 bin_count
= r
->sample
[i
];
379 bin_low_pc
= lowpc
+ (bfd_vma
) (hist_scale
* i
);
380 bin_high_pc
= lowpc
+ (bfd_vma
) (hist_scale
* (i
+ 1));
381 count_time
= bin_count
;
385 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
386 (unsigned long) (sizeof (UNIT
) * bin_low_pc
),
387 (unsigned long) (sizeof (UNIT
) * bin_high_pc
),
389 total_time
+= count_time
;
391 /* Credit all symbols that are covered by bin I.
393 PR gprof/13325: Make sure that K does not get decremented
394 and J will never be less than 0. */
395 for (j
= k
- 1; j
< symtab
.len
; k
= ++j
)
397 sym_low_pc
= symtab
.base
[j
].hist
.scaled_addr
;
398 sym_high_pc
= symtab
.base
[j
+ 1].hist
.scaled_addr
;
400 /* If high end of bin is below entry address,
402 if (bin_high_pc
< sym_low_pc
)
405 /* If low end of bin is above high end of symbol,
406 go for next symbol. */
407 if (bin_low_pc
>= sym_high_pc
)
411 MIN (bin_high_pc
, sym_high_pc
) - MAX (bin_low_pc
, sym_low_pc
);
416 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
417 (unsigned long) symtab
.base
[j
].addr
,
418 (unsigned long) (sizeof (UNIT
) * sym_high_pc
),
419 symtab
.base
[j
].name
, overlap
* count_time
/ hist_scale
,
422 addr
= symtab
.base
[j
].addr
;
423 credit
= overlap
* count_time
/ hist_scale
;
425 /* Credit symbol if it appears in INCL_FLAT or that
426 table is empty and it does not appear it in
428 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
429 || (syms
[INCL_FLAT
].len
== 0
430 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
432 symtab
.base
[j
].hist
.time
+= credit
;
436 total_time
-= credit
;
442 DBG (SAMPLEDEBUG
, printf ("[assign_samples] total_time %f\n",
446 /* Calls 'hist_assign_sampes_1' for all histogram records read so far. */
448 hist_assign_samples (void)
452 scale_and_align_entries ();
454 for (i
= 0; i
< num_histograms
; ++i
)
455 hist_assign_samples_1 (&histograms
[i
]);
459 /* Print header for flag histogram profile. */
462 print_header (int prefix
)
466 sprintf (unit
, _("%c%c/call"), prefix
, hist_dimension_abbrev
);
468 if (bsd_style_output
)
470 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
471 (long) hist_scale
* (long) sizeof (UNIT
));
472 if (total_time
> 0.0)
474 printf (_(" for %.2f%% of %.2f %s\n\n"),
475 100.0 / total_time
, total_time
/ hz
, hist_dimension
);
480 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz
, hist_dimension
);
483 if (total_time
<= 0.0)
485 printf (_(" no time accumulated\n\n"));
487 /* This doesn't hurt since all the numerators will be zero. */
491 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
492 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
494 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
495 _("time"), hist_dimension
, hist_dimension
, _("calls"), unit
, unit
,
501 print_line (Sym
*sym
, double scale
)
503 if (ignore_zeros
&& sym
->ncalls
== 0 && sym
->hist
.time
== 0)
506 accum_time
+= sym
->hist
.time
;
508 if (bsd_style_output
)
509 printf ("%5.1f %10.2f %8.2f",
510 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
511 accum_time
/ hz
, sym
->hist
.time
/ hz
);
513 printf ("%6.2f %9.2f %8.2f",
514 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
515 accum_time
/ hz
, sym
->hist
.time
/ hz
);
517 if (sym
->ncalls
!= 0)
518 printf (" %8lu %8.2f %8.2f ",
519 sym
->ncalls
, scale
* sym
->hist
.time
/ hz
/ sym
->ncalls
,
520 scale
* (sym
->hist
.time
+ sym
->cg
.child_time
) / hz
/ sym
->ncalls
);
522 printf (" %8.8s %8.8s %8.8s ", "", "", "");
524 if (bsd_style_output
)
527 print_name_only (sym
);
533 /* Compare LP and RP. The primary comparison key is execution time,
534 the secondary is number of invocation, and the tertiary is the
535 lexicographic order of the function names. */
538 cmp_time (const void *lp
, const void *rp
)
540 const Sym
*left
= *(const Sym
**) lp
;
541 const Sym
*right
= *(const Sym
**) rp
;
544 time_diff
= right
->hist
.time
- left
->hist
.time
;
552 if (right
->ncalls
> left
->ncalls
)
555 if (right
->ncalls
< left
->ncalls
)
558 return strcmp (left
->name
, right
->name
);
562 /* Print the flat histogram profile. */
567 Sym
**time_sorted_syms
, *top_dog
, *sym
;
568 unsigned int sym_index
;
574 first_output
= false;
580 if (bsd_style_output
)
582 if (print_descriptions
)
584 printf (_("\n\n\nflat profile:\n"));
590 printf (_("Flat profile:\n"));
593 /* Sort the symbol table by time (call-count and name as secondary
594 and tertiary keys). */
595 time_sorted_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
597 for (sym_index
= 0; sym_index
< symtab
.len
; ++sym_index
)
598 time_sorted_syms
[sym_index
] = &symtab
.base
[sym_index
];
600 qsort (time_sorted_syms
, symtab
.len
, sizeof (Sym
*), cmp_time
);
602 if (bsd_style_output
)
604 log_scale
= 5; /* Milli-seconds is BSD-default. */
608 /* Search for symbol with highest per-call
609 execution time and scale accordingly. */
614 for (sym_index
= 0; sym_index
< symtab
.len
; ++sym_index
)
616 sym
= time_sorted_syms
[sym_index
];
618 if (sym
->ncalls
!= 0)
622 call_time
= (sym
->hist
.time
+ sym
->cg
.child_time
) / sym
->ncalls
;
624 if (call_time
> top_time
)
627 top_time
= call_time
;
632 if (top_dog
&& top_dog
->ncalls
!= 0 && top_time
> 0.0)
636 for (log_scale
= 0; log_scale
< ARRAY_SIZE (SItab
); log_scale
++)
638 double scaled_value
= SItab
[log_scale
].scale
* top_time
;
640 if (scaled_value
>= 1.0 && scaled_value
< 1000.0)
646 /* For now, the dimension is always seconds. In the future, we
647 may also want to support other (pseudo-)dimensions (such as
648 I-cache misses etc.). */
649 print_header (SItab
[log_scale
].prefix
);
651 for (sym_index
= 0; sym_index
< symtab
.len
; ++sym_index
)
653 addr
= time_sorted_syms
[sym_index
]->addr
;
655 /* Print symbol if its in INCL_FLAT table or that table
656 is empty and the symbol is not in EXCL_FLAT. */
657 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
658 || (syms
[INCL_FLAT
].len
== 0
659 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
660 print_line (time_sorted_syms
[sym_index
], SItab
[log_scale
].scale
);
663 free (time_sorted_syms
);
665 if (print_descriptions
&& !bsd_style_output
)
670 hist_check_address (unsigned address
)
674 for (i
= 0; i
< num_histograms
; ++i
)
675 if (histograms
[i
].lowpc
<= address
&& address
< histograms
[i
].highpc
)
682 #define min(a,b) (((a)<(b)) ? (a) : (b))
685 #define max(a,b) (((a)>(b)) ? (a) : (b))
689 hist_clip_symbol_address (bfd_vma
*p_lowpc
, bfd_vma
*p_highpc
)
694 if (num_histograms
== 0)
696 *p_highpc
= *p_lowpc
;
700 for (i
= 0; i
< num_histograms
; ++i
)
702 bfd_vma common_low
, common_high
;
703 common_low
= max (histograms
[i
].lowpc
, *p_lowpc
);
704 common_high
= min (histograms
[i
].highpc
, *p_highpc
);
706 if (common_low
< common_high
)
711 _("%s: found a symbol that covers "
712 "several histogram records"),
718 *p_lowpc
= common_low
;
719 *p_highpc
= common_high
;
724 *p_highpc
= *p_lowpc
;
727 /* Find and return exising histogram record having the same lowpc and
728 highpc as passed via the parameters. Return NULL if nothing is found.
729 The return value is valid until any new histogram is read. */
731 find_histogram (bfd_vma lowpc
, bfd_vma highpc
)
734 for (i
= 0; i
< num_histograms
; ++i
)
736 if (histograms
[i
].lowpc
== lowpc
&& histograms
[i
].highpc
== highpc
)
737 return &histograms
[i
];
742 /* Given a PC, return histogram record which address range include this PC.
743 Return NULL if there's no such record. */
745 find_histogram_for_pc (bfd_vma pc
)
748 for (i
= 0; i
< num_histograms
; ++i
)
750 if (histograms
[i
].lowpc
<= pc
&& pc
< histograms
[i
].highpc
)
751 return &histograms
[i
];