1 /* hist.c - Histogram related operations.
3 Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2007, 2009
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
24 #include "libiberty.h"
25 #include "search_list.h"
38 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
40 static void scale_and_align_entries (void);
41 static void print_header (int);
42 static void print_line (Sym
*, double);
43 static int cmp_time (const PTR
, const PTR
);
45 /* Declarations of automatically generated functions to output blurbs. */
46 extern void flat_blurb (FILE * fp
);
48 static histogram
*find_histogram (bfd_vma lowpc
, bfd_vma highpc
);
49 static histogram
*find_histogram_for_pc (bfd_vma pc
);
51 histogram
* histograms
;
52 unsigned num_histograms
;
54 static char hist_dimension
[16] = "seconds";
55 static char hist_dimension_abbrev
= 's';
57 static double accum_time
; /* Accumulated time so far for print_line(). */
58 static double total_time
; /* Total time for all routines. */
60 /* Table of SI prefixes for powers of 10 (used to automatically
61 scale some of the values in the flat profile). */
69 { 'T', 1e-12 }, /* tera */
70 { 'G', 1e-09 }, /* giga */
71 { 'M', 1e-06 }, /* mega */
72 { 'K', 1e-03 }, /* kilo */
74 { 'm', 1e+03 }, /* milli */
75 { 'u', 1e+06 }, /* micro */
76 { 'n', 1e+09 }, /* nano */
77 { 'p', 1e+12 }, /* pico */
78 { 'f', 1e+15 }, /* femto */
79 { 'a', 1e+18 } /* ato */
82 /* Reads just the header part of histogram record into
83 *RECORD from IFP. FILENAME is the name of IFP and
84 is provided for formatting error messages only.
86 If FIRST is non-zero, sets global variables HZ, HIST_DIMENSION,
87 HIST_DIMENSION_ABBREV, HIST_SCALE. If FIRST is zero, checks
88 that the new histogram is compatible with already-set values
89 of those variables and emits an error if that's not so. */
91 read_histogram_header (histogram
*record
,
92 FILE *ifp
, const char *filename
,
95 unsigned int profrate
;
96 char n_hist_dimension
[15];
97 char n_hist_dimension_abbrev
;
100 if (gmon_io_read_vma (ifp
, &record
->lowpc
)
101 || gmon_io_read_vma (ifp
, &record
->highpc
)
102 || gmon_io_read_32 (ifp
, &record
->num_bins
)
103 || gmon_io_read_32 (ifp
, &profrate
)
104 || gmon_io_read (ifp
, n_hist_dimension
, 15)
105 || gmon_io_read (ifp
, &n_hist_dimension_abbrev
, 1))
107 fprintf (stderr
, _("%s: %s: unexpected end of file\n"),
113 n_hist_scale
= (double)((record
->highpc
- record
->lowpc
) / sizeof (UNIT
))
118 /* We don't try to veryfy profrate is the same for all histogram
119 records. If we have two histogram records for the same
120 address range and profiling samples is done as often
121 as possible as opposed on timer, then the actual profrate will
122 be slightly different. Most of the time the difference does not
123 matter and insisting that profiling rate is exactly the same
124 will only create inconvenient. */
126 memcpy (hist_dimension
, n_hist_dimension
, 15);
127 hist_dimension_abbrev
= n_hist_dimension_abbrev
;
128 hist_scale
= n_hist_scale
;
132 if (strncmp (n_hist_dimension
, hist_dimension
, 15) != 0)
135 _("%s: dimension unit changed between histogram records\n"
138 whoami
, whoami
, hist_dimension
, whoami
, n_hist_dimension
);
142 if (n_hist_dimension_abbrev
!= hist_dimension_abbrev
)
145 _("%s: dimension abbreviation changed between histogram records\n"
148 whoami
, whoami
, hist_dimension_abbrev
, whoami
, n_hist_dimension_abbrev
);
152 /* The only reason we require the same scale for histograms is that
153 there's code (notably printing code), that prints units,
154 and it would be very confusing to have one unit mean different
155 things for different functions. */
156 if (fabs (hist_scale
- n_hist_scale
) > 0.000001)
159 _("%s: different scales in histogram records"),
166 /* Read the histogram from file IFP. FILENAME is the name of IFP and
167 is provided for formatting error messages only. */
170 hist_read_rec (FILE * ifp
, const char *filename
)
172 bfd_vma lowpc
, highpc
;
174 histogram
*record
, *existing_record
;
177 /* 1. Read the header and see if there's existing record for the
178 same address range and that there are no overlapping records. */
179 read_histogram_header (&n_record
, ifp
, filename
, num_histograms
== 0);
181 existing_record
= find_histogram (n_record
.lowpc
, n_record
.highpc
);
184 record
= existing_record
;
188 /* If this record overlaps, but does not completely match an existing
189 record, it's an error. */
190 lowpc
= n_record
.lowpc
;
191 highpc
= n_record
.highpc
;
192 hist_clip_symbol_address (&lowpc
, &highpc
);
196 _("%s: overlapping histogram records\n"),
201 /* This is new record. Add it to global array and allocate space for
203 histograms
= (struct histogram
*)
204 xrealloc (histograms
, sizeof (histogram
) * (num_histograms
+ 1));
205 memcpy (histograms
+ num_histograms
,
206 &n_record
, sizeof (histogram
));
207 record
= &histograms
[num_histograms
];
210 record
->sample
= (int *) xmalloc (record
->num_bins
211 * sizeof (record
->sample
[0]));
212 memset (record
->sample
, 0, record
->num_bins
* sizeof (record
->sample
[0]));
215 /* 2. We have either a new record (with zeroed histogram data), or an existing
216 record with some data in the histogram already. Read new data into the
217 record, adding hit counts. */
220 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
221 (unsigned long) record
->lowpc
, (unsigned long) record
->highpc
,
224 for (i
= 0; i
< record
->num_bins
; ++i
)
227 if (fread (&count
[0], sizeof (count
), 1, ifp
) != 1)
230 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
231 whoami
, filename
, i
, record
->num_bins
);
234 record
->sample
[i
] += bfd_get_16 (core_bfd
, (bfd_byte
*) & count
[0]);
236 printf ("[hist_read_rec] 0x%lx: %u\n",
237 (unsigned long) (record
->lowpc
238 + i
* (record
->highpc
- record
->lowpc
)
245 /* Write all execution histograms file OFP. FILENAME is the name
246 of OFP and is provided for formatting error-messages only. */
249 hist_write_hist (FILE * ofp
, const char *filename
)
254 for (r
= 0; r
< num_histograms
; ++r
)
256 histogram
*record
= &histograms
[r
];
260 if (gmon_io_write_8 (ofp
, GMON_TAG_TIME_HIST
)
261 || gmon_io_write_vma (ofp
, record
->lowpc
)
262 || gmon_io_write_vma (ofp
, record
->highpc
)
263 || gmon_io_write_32 (ofp
, record
->num_bins
)
264 || gmon_io_write_32 (ofp
, hz
)
265 || gmon_io_write (ofp
, hist_dimension
, 15)
266 || gmon_io_write (ofp
, &hist_dimension_abbrev
, 1))
272 for (i
= 0; i
< record
->num_bins
; ++i
)
274 bfd_put_16 (core_bfd
, (bfd_vma
) record
->sample
[i
], (bfd_byte
*) &count
[0]);
276 if (fwrite (&count
[0], sizeof (count
), 1, ofp
) != 1)
285 /* Calculate scaled entry point addresses (to save time in
286 hist_assign_samples), and, on architectures that have procedure
287 entry masks at the start of a function, possibly push the scaled
288 entry points over the procedure entry mask, if it turns out that
289 the entry point is in one bin and the code for a routine is in the
293 scale_and_align_entries ()
296 bfd_vma bin_of_entry
;
299 for (sym
= symtab
.base
; sym
< symtab
.limit
; sym
++)
301 histogram
*r
= find_histogram_for_pc (sym
->addr
);
303 sym
->hist
.scaled_addr
= sym
->addr
/ sizeof (UNIT
);
307 bin_of_entry
= (sym
->hist
.scaled_addr
- r
->lowpc
) / hist_scale
;
308 bin_of_code
= ((sym
->hist
.scaled_addr
+ UNITS_TO_CODE
- r
->lowpc
)
310 if (bin_of_entry
< bin_of_code
)
313 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
314 (unsigned long) sym
->hist
.scaled_addr
,
315 (unsigned long) (sym
->hist
.scaled_addr
317 sym
->hist
.scaled_addr
+= UNITS_TO_CODE
;
324 /* Assign samples to the symbol to which they belong.
326 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
327 which may overlap one more symbol address ranges. If a symbol
328 overlaps with the bin's address range by O percent, then O percent
329 of the bin's count is credited to that symbol.
331 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
332 with respect to the symbol's address range [SYM_LOW_PC,
333 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
334 the distance (in UNITs) between the arrows, the fraction of the
335 sample that is to be credited to the symbol which starts at
338 sym_low_pc sym_high_pc
342 +-----------------------------------------------+
344 | ->| |<- ->| |<- ->| |<- |
346 +---------+ +---------+ +---------+
350 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
352 For the VAX we assert that samples will never fall in the first two
353 bytes of any routine, since that is the entry mask, thus we call
354 scale_and_align_entries() to adjust the entry points if the entry
355 mask falls in one bin but the code for the routine doesn't start
356 until the next bin. In conjunction with the alignment of routine
357 addresses, this should allow us to have only one sample for every
358 four bytes of text space and never have any overlap (the two end
362 hist_assign_samples_1 (histogram
*r
)
364 bfd_vma bin_low_pc
, bin_high_pc
;
365 bfd_vma sym_low_pc
, sym_high_pc
;
366 bfd_vma overlap
, addr
;
367 unsigned int bin_count
;
368 unsigned int i
, j
, k
;
369 double count_time
, credit
;
371 bfd_vma lowpc
= r
->lowpc
/ sizeof (UNIT
);
373 /* Iterate over all sample bins. */
374 for (i
= 0, k
= 1; i
< r
->num_bins
; ++i
)
376 bin_count
= r
->sample
[i
];
380 bin_low_pc
= lowpc
+ (bfd_vma
) (hist_scale
* i
);
381 bin_high_pc
= lowpc
+ (bfd_vma
) (hist_scale
* (i
+ 1));
382 count_time
= bin_count
;
386 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
387 (unsigned long) (sizeof (UNIT
) * bin_low_pc
),
388 (unsigned long) (sizeof (UNIT
) * bin_high_pc
),
390 total_time
+= count_time
;
392 /* Credit all symbols that are covered by bin I.
394 PR gprof/13325: Make sure that K does not get decremented
395 and J will never be less than 0. */
396 for (j
= k
- 1; j
< symtab
.len
; k
= ++j
)
398 sym_low_pc
= symtab
.base
[j
].hist
.scaled_addr
;
399 sym_high_pc
= symtab
.base
[j
+ 1].hist
.scaled_addr
;
401 /* If high end of bin is below entry address,
403 if (bin_high_pc
< sym_low_pc
)
406 /* If low end of bin is above high end of symbol,
407 go for next symbol. */
408 if (bin_low_pc
>= sym_high_pc
)
412 MIN (bin_high_pc
, sym_high_pc
) - MAX (bin_low_pc
, sym_low_pc
);
417 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
418 (unsigned long) symtab
.base
[j
].addr
,
419 (unsigned long) (sizeof (UNIT
) * sym_high_pc
),
420 symtab
.base
[j
].name
, overlap
* count_time
/ hist_scale
,
423 addr
= symtab
.base
[j
].addr
;
424 credit
= overlap
* count_time
/ hist_scale
;
426 /* Credit symbol if it appears in INCL_FLAT or that
427 table is empty and it does not appear it in
429 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
430 || (syms
[INCL_FLAT
].len
== 0
431 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
433 symtab
.base
[j
].hist
.time
+= credit
;
437 total_time
-= credit
;
443 DBG (SAMPLEDEBUG
, printf ("[assign_samples] total_time %f\n",
447 /* Calls 'hist_assign_sampes_1' for all histogram records read so far. */
449 hist_assign_samples ()
453 scale_and_align_entries ();
455 for (i
= 0; i
< num_histograms
; ++i
)
456 hist_assign_samples_1 (&histograms
[i
]);
460 /* Print header for flag histogram profile. */
463 print_header (int prefix
)
467 sprintf (unit
, _("%c%c/call"), prefix
, hist_dimension_abbrev
);
469 if (bsd_style_output
)
471 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
472 (long) hist_scale
* (long) sizeof (UNIT
));
473 if (total_time
> 0.0)
475 printf (_(" for %.2f%% of %.2f %s\n\n"),
476 100.0 / total_time
, total_time
/ hz
, hist_dimension
);
481 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz
, hist_dimension
);
484 if (total_time
<= 0.0)
486 printf (_(" no time accumulated\n\n"));
488 /* This doesn't hurt since all the numerators will be zero. */
492 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
493 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
495 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
496 _("time"), hist_dimension
, hist_dimension
, _("calls"), unit
, unit
,
502 print_line (Sym
*sym
, double scale
)
504 if (ignore_zeros
&& sym
->ncalls
== 0 && sym
->hist
.time
== 0)
507 accum_time
+= sym
->hist
.time
;
509 if (bsd_style_output
)
510 printf ("%5.1f %10.2f %8.2f",
511 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
512 accum_time
/ hz
, sym
->hist
.time
/ hz
);
514 printf ("%6.2f %9.2f %8.2f",
515 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
516 accum_time
/ hz
, sym
->hist
.time
/ hz
);
518 if (sym
->ncalls
!= 0)
519 printf (" %8lu %8.2f %8.2f ",
520 sym
->ncalls
, scale
* sym
->hist
.time
/ hz
/ sym
->ncalls
,
521 scale
* (sym
->hist
.time
+ sym
->cg
.child_time
) / hz
/ sym
->ncalls
);
523 printf (" %8.8s %8.8s %8.8s ", "", "", "");
525 if (bsd_style_output
)
528 print_name_only (sym
);
534 /* Compare LP and RP. The primary comparison key is execution time,
535 the secondary is number of invocation, and the tertiary is the
536 lexicographic order of the function names. */
539 cmp_time (const PTR lp
, const PTR rp
)
541 const Sym
*left
= *(const Sym
**) lp
;
542 const Sym
*right
= *(const Sym
**) rp
;
545 time_diff
= right
->hist
.time
- left
->hist
.time
;
553 if (right
->ncalls
> left
->ncalls
)
556 if (right
->ncalls
< left
->ncalls
)
559 return strcmp (left
->name
, right
->name
);
563 /* Print the flat histogram profile. */
568 Sym
**time_sorted_syms
, *top_dog
, *sym
;
569 unsigned int sym_index
;
575 first_output
= FALSE
;
581 if (bsd_style_output
)
583 if (print_descriptions
)
585 printf (_("\n\n\nflat profile:\n"));
591 printf (_("Flat profile:\n"));
594 /* Sort the symbol table by time (call-count and name as secondary
595 and tertiary keys). */
596 time_sorted_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
598 for (sym_index
= 0; sym_index
< symtab
.len
; ++sym_index
)
599 time_sorted_syms
[sym_index
] = &symtab
.base
[sym_index
];
601 qsort (time_sorted_syms
, symtab
.len
, sizeof (Sym
*), cmp_time
);
603 if (bsd_style_output
)
605 log_scale
= 5; /* Milli-seconds is BSD-default. */
609 /* Search for symbol with highest per-call
610 execution time and scale accordingly. */
615 for (sym_index
= 0; sym_index
< symtab
.len
; ++sym_index
)
617 sym
= time_sorted_syms
[sym_index
];
619 if (sym
->ncalls
!= 0)
623 call_time
= (sym
->hist
.time
+ sym
->cg
.child_time
) / sym
->ncalls
;
625 if (call_time
> top_time
)
628 top_time
= call_time
;
633 if (top_dog
&& top_dog
->ncalls
!= 0 && top_time
> 0.0)
637 for (log_scale
= 0; log_scale
< ARRAY_SIZE (SItab
); log_scale
++)
639 double scaled_value
= SItab
[log_scale
].scale
* top_time
;
641 if (scaled_value
>= 1.0 && scaled_value
< 1000.0)
647 /* For now, the dimension is always seconds. In the future, we
648 may also want to support other (pseudo-)dimensions (such as
649 I-cache misses etc.). */
650 print_header (SItab
[log_scale
].prefix
);
652 for (sym_index
= 0; sym_index
< symtab
.len
; ++sym_index
)
654 addr
= time_sorted_syms
[sym_index
]->addr
;
656 /* Print symbol if its in INCL_FLAT table or that table
657 is empty and the symbol is not in EXCL_FLAT. */
658 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
659 || (syms
[INCL_FLAT
].len
== 0
660 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
661 print_line (time_sorted_syms
[sym_index
], SItab
[log_scale
].scale
);
664 free (time_sorted_syms
);
666 if (print_descriptions
&& !bsd_style_output
)
671 hist_check_address (unsigned address
)
675 for (i
= 0; i
< num_histograms
; ++i
)
676 if (histograms
[i
].lowpc
<= address
&& address
< histograms
[i
].highpc
)
683 #define min(a,b) (((a)<(b)) ? (a) : (b))
686 #define max(a,b) (((a)>(b)) ? (a) : (b))
690 hist_clip_symbol_address (bfd_vma
*p_lowpc
, bfd_vma
*p_highpc
)
695 if (num_histograms
== 0)
697 *p_highpc
= *p_lowpc
;
701 for (i
= 0; i
< num_histograms
; ++i
)
703 bfd_vma common_low
, common_high
;
704 common_low
= max (histograms
[i
].lowpc
, *p_lowpc
);
705 common_high
= min (histograms
[i
].highpc
, *p_highpc
);
707 if (common_low
< common_high
)
712 _("%s: found a symbol that covers "
713 "several histogram records"),
719 *p_lowpc
= common_low
;
720 *p_highpc
= common_high
;
725 *p_highpc
= *p_lowpc
;
728 /* Find and return exising histogram record having the same lowpc and
729 highpc as passed via the parameters. Return NULL if nothing is found.
730 The return value is valid until any new histogram is read. */
732 find_histogram (bfd_vma lowpc
, bfd_vma highpc
)
735 for (i
= 0; i
< num_histograms
; ++i
)
737 if (histograms
[i
].lowpc
== lowpc
&& histograms
[i
].highpc
== highpc
)
738 return &histograms
[i
];
743 /* Given a PC, return histogram record which address range include this PC.
744 Return NULL if there's no such record. */
746 find_histogram_for_pc (bfd_vma pc
)
749 for (i
= 0; i
< num_histograms
; ++i
)
751 if (histograms
[i
].lowpc
<= pc
&& pc
< histograms
[i
].highpc
)
752 return &histograms
[i
];