1 /* hist.c - Histogram related operations.
3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
5 This file is part of GNU Binutils.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 #include "libiberty.h"
24 #include "search_list.h"
34 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
36 static void scale_and_align_entries
PARAMS ((void));
37 static void print_header
PARAMS ((int));
38 static void print_line
PARAMS ((Sym
*, double));
39 static int cmp_time
PARAMS ((const PTR
, const PTR
));
41 /* Declarations of automatically generated functions to output blurbs. */
42 extern void flat_blurb
PARAMS ((FILE * fp
));
44 bfd_vma s_lowpc
; /* Lowest address in .text. */
45 bfd_vma s_highpc
= 0; /* Highest address in .text. */
46 bfd_vma lowpc
, highpc
; /* Same, but expressed in UNITs. */
47 int hist_num_bins
= 0; /* Number of histogram samples. */
48 int *hist_sample
= 0; /* Histogram samples (shorts in the file!). */
50 char hist_dimension
[16] = "seconds";
51 char hist_dimension_abbrev
= 's';
53 static double accum_time
; /* Accumulated time so far for print_line(). */
54 static double total_time
; /* Total time for all routines. */
56 /* Table of SI prefixes for powers of 10 (used to automatically
57 scale some of the values in the flat profile). */
65 { 'T', 1e-12 }, /* tera */
66 { 'G', 1e-09 }, /* giga */
67 { 'M', 1e-06 }, /* mega */
68 { 'K', 1e-03 }, /* kilo */
70 { 'm', 1e+03 }, /* milli */
71 { 'u', 1e+06 }, /* micro */
72 { 'n', 1e+09 }, /* nano */
73 { 'p', 1e+12 }, /* pico */
74 { 'f', 1e+15 }, /* femto */
75 { 'a', 1e+18 } /* ato */
79 /* Read the histogram from file IFP. FILENAME is the name of IFP and
80 is provided for formatting error messages only. */
83 hist_read_rec (ifp
, filename
)
87 bfd_vma n_lowpc
, n_highpc
;
88 int i
, ncnt
, profrate
;
91 if (gmon_io_read_vma (ifp
, &n_lowpc
)
92 || gmon_io_read_vma (ifp
, &n_highpc
)
93 || gmon_io_read_32 (ifp
, &ncnt
)
94 || gmon_io_read_32 (ifp
, &profrate
)
95 || gmon_io_read (ifp
, hist_dimension
, 15)
96 || gmon_io_read (ifp
, &hist_dimension_abbrev
, 1))
98 fprintf (stderr
, _("%s: %s: unexpected end of file\n"),
106 /* This is the first histogram record. */
109 lowpc
= (bfd_vma
) n_lowpc
/ sizeof (UNIT
);
110 highpc
= (bfd_vma
) n_highpc
/ sizeof (UNIT
);
111 hist_num_bins
= ncnt
;
116 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %d\n",
117 (unsigned long) n_lowpc
, (unsigned long) n_highpc
, ncnt
);
118 printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %d\n",
119 (unsigned long) s_lowpc
, (unsigned long) s_highpc
,
121 printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n",
122 (unsigned long) lowpc
, (unsigned long) highpc
));
124 if (n_lowpc
!= s_lowpc
|| n_highpc
!= s_highpc
125 || ncnt
!= hist_num_bins
|| hz
!= profrate
)
127 fprintf (stderr
, _("%s: `%s' is incompatible with first gmon file\n"),
134 hist_sample
= (int *) xmalloc (hist_num_bins
* sizeof (hist_sample
[0]));
135 memset (hist_sample
, 0, hist_num_bins
* sizeof (hist_sample
[0]));
138 for (i
= 0; i
< hist_num_bins
; ++i
)
140 if (fread (&count
[0], sizeof (count
), 1, ifp
) != 1)
143 _("%s: %s: unexpected EOF after reading %d of %d samples\n"),
144 whoami
, filename
, i
, hist_num_bins
);
147 hist_sample
[i
] += bfd_get_16 (core_bfd
, (bfd_byte
*) & count
[0]);
149 printf ("[hist_read_rec] 0x%lx: %u\n",
150 (unsigned long) (n_lowpc
+ i
* (n_highpc
- n_lowpc
) / ncnt
),
156 /* Write execution histogram to file OFP. FILENAME is the name
157 of OFP and is provided for formatting error-messages only. */
160 hist_write_hist (ofp
, filename
)
162 const char *filename
;
169 if (gmon_io_write_8 (ofp
, GMON_TAG_TIME_HIST
)
170 || gmon_io_write_vma (ofp
, s_lowpc
)
171 || gmon_io_write_vma (ofp
, s_highpc
)
172 || gmon_io_write_32 (ofp
, hist_num_bins
)
173 || gmon_io_write_32 (ofp
, hz
)
174 || gmon_io_write (ofp
, hist_dimension
, 15)
175 || gmon_io_write (ofp
, &hist_dimension_abbrev
, 1))
181 for (i
= 0; i
< hist_num_bins
; ++i
)
183 bfd_put_16 (core_bfd
, (bfd_vma
) hist_sample
[i
], (bfd_byte
*) &count
[0]);
185 if (fwrite (&count
[0], sizeof (count
), 1, ofp
) != 1)
194 /* Calculate scaled entry point addresses (to save time in
195 hist_assign_samples), and, on architectures that have procedure
196 entry masks at the start of a function, possibly push the scaled
197 entry points over the procedure entry mask, if it turns out that
198 the entry point is in one bin and the code for a routine is in the
202 scale_and_align_entries ()
205 bfd_vma bin_of_entry
;
208 for (sym
= symtab
.base
; sym
< symtab
.limit
; sym
++)
210 sym
->hist
.scaled_addr
= sym
->addr
/ sizeof (UNIT
);
211 bin_of_entry
= (sym
->hist
.scaled_addr
- lowpc
) / hist_scale
;
212 bin_of_code
= ((sym
->hist
.scaled_addr
+ UNITS_TO_CODE
- lowpc
)
214 if (bin_of_entry
< bin_of_code
)
217 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
218 (unsigned long) sym
->hist
.scaled_addr
,
219 (unsigned long) (sym
->hist
.scaled_addr
221 sym
->hist
.scaled_addr
+= UNITS_TO_CODE
;
227 /* Assign samples to the symbol to which they belong.
229 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
230 which may overlap one more symbol address ranges. If a symbol
231 overlaps with the bin's address range by O percent, then O percent
232 of the bin's count is credited to that symbol.
234 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
235 with respect to the symbol's address range [SYM_LOW_PC,
236 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
237 the distance (in UNITs) between the arrows, the fraction of the
238 sample that is to be credited to the symbol which starts at
241 sym_low_pc sym_high_pc
245 +-----------------------------------------------+
247 | ->| |<- ->| |<- ->| |<- |
249 +---------+ +---------+ +---------+
253 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
255 For the VAX we assert that samples will never fall in the first two
256 bytes of any routine, since that is the entry mask, thus we call
257 scale_and_align_entries() to adjust the entry points if the entry
258 mask falls in one bin but the code for the routine doesn't start
259 until the next bin. In conjunction with the alignment of routine
260 addresses, this should allow us to have only one sample for every
261 four bytes of text space and never have any overlap (the two end
265 hist_assign_samples ()
267 bfd_vma bin_low_pc
, bin_high_pc
;
268 bfd_vma sym_low_pc
, sym_high_pc
;
269 bfd_vma overlap
, addr
;
274 /* Read samples and assign to symbols. */
275 hist_scale
= highpc
- lowpc
;
276 hist_scale
/= hist_num_bins
;
277 scale_and_align_entries ();
279 /* Iterate over all sample bins. */
280 for (i
= 0, j
= 1; i
< hist_num_bins
; ++i
)
282 bin_count
= hist_sample
[i
];
286 bin_low_pc
= lowpc
+ (bfd_vma
) (hist_scale
* i
);
287 bin_high_pc
= lowpc
+ (bfd_vma
) (hist_scale
* (i
+ 1));
292 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%d\n",
293 (unsigned long) (sizeof (UNIT
) * bin_low_pc
),
294 (unsigned long) (sizeof (UNIT
) * bin_high_pc
),
298 /* Credit all symbols that are covered by bin I. */
299 for (j
= j
- 1; j
< symtab
.len
; ++j
)
301 sym_low_pc
= symtab
.base
[j
].hist
.scaled_addr
;
302 sym_high_pc
= symtab
.base
[j
+ 1].hist
.scaled_addr
;
304 /* If high end of bin is below entry address,
306 if (bin_high_pc
< sym_low_pc
)
309 /* If low end of bin is above high end of symbol,
310 go for next symbol. */
311 if (bin_low_pc
>= sym_high_pc
)
315 MIN (bin_high_pc
, sym_high_pc
) - MAX (bin_low_pc
, sym_low_pc
);
320 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
321 (unsigned long) symtab
.base
[j
].addr
,
322 (unsigned long) (sizeof (UNIT
) * sym_high_pc
),
323 symtab
.base
[j
].name
, overlap
* time
/ hist_scale
,
326 addr
= symtab
.base
[j
].addr
;
327 credit
= overlap
* time
/ hist_scale
;
329 /* Credit symbol if it appears in INCL_FLAT or that
330 table is empty and it does not appear it in
332 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
333 || (syms
[INCL_FLAT
].len
== 0
334 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
336 symtab
.base
[j
].hist
.time
+= credit
;
340 total_time
-= credit
;
346 DBG (SAMPLEDEBUG
, printf ("[assign_samples] total_time %f\n",
351 /* Print header for flag histogram profile. */
354 print_header (prefix
)
359 sprintf (unit
, _("%c%c/call"), prefix
, hist_dimension_abbrev
);
361 if (bsd_style_output
)
363 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
364 (long) hist_scale
* sizeof (UNIT
));
365 if (total_time
> 0.0)
367 printf (_(" for %.2f%% of %.2f %s\n\n"),
368 100.0 / total_time
, total_time
/ hz
, hist_dimension
);
373 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz
, hist_dimension
);
376 if (total_time
<= 0.0)
378 printf (_(" no time accumulated\n\n"));
380 /* This doesn't hurt since all the numerators will be zero. */
384 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
385 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
387 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
388 _("time"), hist_dimension
, hist_dimension
, _("calls"), unit
, unit
,
394 print_line (sym
, scale
)
398 if (ignore_zeros
&& sym
->ncalls
== 0 && sym
->hist
.time
== 0)
401 accum_time
+= sym
->hist
.time
;
403 if (bsd_style_output
)
404 printf ("%5.1f %10.2f %8.2f",
405 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
406 accum_time
/ hz
, sym
->hist
.time
/ hz
);
408 printf ("%6.2f %9.2f %8.2f",
409 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
410 accum_time
/ hz
, sym
->hist
.time
/ hz
);
412 if (sym
->ncalls
!= 0)
413 printf (" %8lu %8.2f %8.2f ",
414 sym
->ncalls
, scale
* sym
->hist
.time
/ hz
/ sym
->ncalls
,
415 scale
* (sym
->hist
.time
+ sym
->cg
.child_time
) / hz
/ sym
->ncalls
);
417 printf (" %8.8s %8.8s %8.8s ", "", "", "");
419 if (bsd_style_output
)
422 print_name_only (sym
);
428 /* Compare LP and RP. The primary comparison key is execution time,
429 the secondary is number of invocation, and the tertiary is the
430 lexicographic order of the function names. */
437 const Sym
*left
= *(const Sym
**) lp
;
438 const Sym
*right
= *(const Sym
**) rp
;
441 time_diff
= right
->hist
.time
- left
->hist
.time
;
449 if (right
->ncalls
> left
->ncalls
)
452 if (right
->ncalls
< left
->ncalls
)
455 return strcmp (left
->name
, right
->name
);
459 /* Print the flat histogram profile. */
464 Sym
**time_sorted_syms
, *top_dog
, *sym
;
467 double top_time
, time
;
471 first_output
= false;
477 if (bsd_style_output
)
479 if (print_descriptions
)
481 printf (_("\n\n\nflat profile:\n"));
487 printf (_("Flat profile:\n"));
490 /* Sort the symbol table by time (call-count and name as secondary
491 and tertiary keys). */
492 time_sorted_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
494 for (index
= 0; index
< symtab
.len
; ++index
)
495 time_sorted_syms
[index
] = &symtab
.base
[index
];
497 qsort (time_sorted_syms
, symtab
.len
, sizeof (Sym
*), cmp_time
);
499 if (bsd_style_output
)
501 log_scale
= 5; /* Milli-seconds is BSD-default. */
505 /* Search for symbol with highest per-call
506 execution time and scale accordingly. */
511 for (index
= 0; index
< symtab
.len
; ++index
)
513 sym
= time_sorted_syms
[index
];
515 if (sym
->ncalls
!= 0)
517 time
= (sym
->hist
.time
+ sym
->cg
.child_time
) / sym
->ncalls
;
527 if (top_dog
&& top_dog
->ncalls
!= 0 && top_time
> 0.0)
531 for (log_scale
= 0; log_scale
< ARRAY_SIZE (SItab
); log_scale
++)
533 double scaled_value
= SItab
[log_scale
].scale
* top_time
;
535 if (scaled_value
>= 1.0 && scaled_value
< 1000.0)
541 /* For now, the dimension is always seconds. In the future, we
542 may also want to support other (pseudo-)dimensions (such as
543 I-cache misses etc.). */
544 print_header (SItab
[log_scale
].prefix
);
546 for (index
= 0; index
< symtab
.len
; ++index
)
548 addr
= time_sorted_syms
[index
]->addr
;
550 /* Print symbol if its in INCL_FLAT table or that table
551 is empty and the symbol is not in EXCL_FLAT. */
552 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
553 || (syms
[INCL_FLAT
].len
== 0
554 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
555 print_line (time_sorted_syms
[index
], SItab
[log_scale
].scale
);
558 free (time_sorted_syms
);
560 if (print_descriptions
&& !bsd_style_output
)