1 /* hist.c - Histogram related operations.
3 Copyright 2000, 2001 Free Software Foundation, Inc.
5 This file is part of GNU Binutils.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 #include "libiberty.h"
33 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
35 static void scale_and_align_entries
PARAMS ((void));
37 /* Declarations of automatically generated functions to output blurbs. */
38 extern void flat_blurb
PARAMS ((FILE * fp
));
40 bfd_vma s_lowpc
; /* Lowest address in .text. */
41 bfd_vma s_highpc
= 0; /* Highest address in .text. */
42 bfd_vma lowpc
, highpc
; /* Same, but expressed in UNITs. */
43 int hist_num_bins
= 0; /* Number of histogram samples. */
44 int *hist_sample
= 0; /* Histogram samples (shorts in the file!). */
46 char hist_dimension
[16] = "seconds";
47 char hist_dimension_abbrev
= 's';
49 static double accum_time
; /* Accumulated time so far for print_line(). */
50 static double total_time
; /* Total time for all routines. */
52 /* Table of SI prefixes for powers of 10 (used to automatically
53 scale some of the values in the flat profile). */
61 { 'T', 1e-12 }, /* tera */
62 { 'G', 1e-09 }, /* giga */
63 { 'M', 1e-06 }, /* mega */
64 { 'K', 1e-03 }, /* kilo */
66 { 'm', 1e+03 }, /* milli */
67 { 'u', 1e+06 }, /* micro */
68 { 'n', 1e+09 }, /* nano */
69 { 'p', 1e+12 }, /* pico */
70 { 'f', 1e+15 }, /* femto */
71 { 'a', 1e+18 } /* ato */
75 /* Read the histogram from file IFP. FILENAME is the name of IFP and
76 is provided for formatting error messages only. */
79 DEFUN (hist_read_rec
, (ifp
, filename
), FILE * ifp AND
const char *filename
)
81 bfd_vma n_lowpc
, n_highpc
;
82 int i
, ncnt
, profrate
;
85 if (gmon_io_read_vma (ifp
, &n_lowpc
)
86 || gmon_io_read_vma (ifp
, &n_highpc
)
87 || gmon_io_read_32 (ifp
, &ncnt
)
88 || gmon_io_read_32 (ifp
, &profrate
)
89 || gmon_io_read (ifp
, hist_dimension
, 15)
90 || gmon_io_read (ifp
, &hist_dimension_abbrev
, 1))
92 fprintf (stderr
, _("%s: %s: unexpected end of file\n"),
100 /* This is the first histogram record. */
103 lowpc
= (bfd_vma
) n_lowpc
/ sizeof (UNIT
);
104 highpc
= (bfd_vma
) n_highpc
/ sizeof (UNIT
);
105 hist_num_bins
= ncnt
;
110 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %d\n",
111 (unsigned long) n_lowpc
, (unsigned long) n_highpc
, ncnt
);
112 printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %d\n",
113 (unsigned long) s_lowpc
, (unsigned long) s_highpc
,
115 printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n",
116 (unsigned long) lowpc
, (unsigned long) highpc
));
118 if (n_lowpc
!= s_lowpc
|| n_highpc
!= s_highpc
119 || ncnt
!= hist_num_bins
|| hz
!= profrate
)
121 fprintf (stderr
, _("%s: `%s' is incompatible with first gmon file\n"),
128 hist_sample
= (int *) xmalloc (hist_num_bins
* sizeof (hist_sample
[0]));
129 memset (hist_sample
, 0, hist_num_bins
* sizeof (hist_sample
[0]));
132 for (i
= 0; i
< hist_num_bins
; ++i
)
134 if (fread (&count
[0], sizeof (count
), 1, ifp
) != 1)
137 _("%s: %s: unexpected EOF after reading %d of %d samples\n"),
138 whoami
, filename
, i
, hist_num_bins
);
141 hist_sample
[i
] += bfd_get_16 (core_bfd
, (bfd_byte
*) & count
[0]);
143 printf ("[hist_read_rec] 0x%lx: %u\n",
144 (unsigned long) (n_lowpc
+ i
* (n_highpc
- n_lowpc
) / ncnt
),
150 /* Write execution histogram to file OFP. FILENAME is the name
151 of OFP and is provided for formatting error-messages only. */
154 DEFUN (hist_write_hist
, (ofp
, filename
), FILE * ofp AND
const char *filename
)
161 if (gmon_io_write_8 (ofp
, GMON_TAG_TIME_HIST
)
162 || gmon_io_write_vma (ofp
, s_lowpc
)
163 || gmon_io_write_vma (ofp
, s_highpc
)
164 || gmon_io_write_32 (ofp
, hist_num_bins
)
165 || gmon_io_write_32 (ofp
, hz
)
166 || gmon_io_write (ofp
, hist_dimension
, 15)
167 || gmon_io_write (ofp
, &hist_dimension_abbrev
, 1))
173 for (i
= 0; i
< hist_num_bins
; ++i
)
175 bfd_put_16 (core_bfd
, hist_sample
[i
], (bfd_byte
*) & count
[0]);
177 if (fwrite (&count
[0], sizeof (count
), 1, ofp
) != 1)
186 /* Calculate scaled entry point addresses (to save time in
187 hist_assign_samples), and, on architectures that have procedure
188 entry masks at the start of a function, possibly push the scaled
189 entry points over the procedure entry mask, if it turns out that
190 the entry point is in one bin and the code for a routine is in the
194 scale_and_align_entries ()
197 bfd_vma bin_of_entry
;
200 for (sym
= symtab
.base
; sym
< symtab
.limit
; sym
++)
202 sym
->hist
.scaled_addr
= sym
->addr
/ sizeof (UNIT
);
203 bin_of_entry
= (sym
->hist
.scaled_addr
- lowpc
) / hist_scale
;
204 bin_of_code
= ((sym
->hist
.scaled_addr
+ UNITS_TO_CODE
- lowpc
)
206 if (bin_of_entry
< bin_of_code
)
209 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
210 (unsigned long) sym
->hist
.scaled_addr
,
211 (unsigned long) (sym
->hist
.scaled_addr
213 sym
->hist
.scaled_addr
+= UNITS_TO_CODE
;
219 /* Assign samples to the symbol to which they belong.
221 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
222 which may overlap one more symbol address ranges. If a symbol
223 overlaps with the bin's address range by O percent, then O percent
224 of the bin's count is credited to that symbol.
226 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
227 with respect to the symbol's address range [SYM_LOW_PC,
228 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
229 the distance (in UNITs) between the arrows, the fraction of the
230 sample that is to be credited to the symbol which starts at
233 sym_low_pc sym_high_pc
237 +-----------------------------------------------+
239 | ->| |<- ->| |<- ->| |<- |
241 +---------+ +---------+ +---------+
245 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
247 For the VAX we assert that samples will never fall in the first two
248 bytes of any routine, since that is the entry mask, thus we call
249 scale_and_align_entries() to adjust the entry points if the entry
250 mask falls in one bin but the code for the routine doesn't start
251 until the next bin. In conjunction with the alignment of routine
252 addresses, this should allow us to have only one sample for every
253 four bytes of text space and never have any overlap (the two end
257 DEFUN_VOID (hist_assign_samples
)
259 bfd_vma bin_low_pc
, bin_high_pc
;
260 bfd_vma sym_low_pc
, sym_high_pc
;
261 bfd_vma overlap
, addr
;
266 /* Read samples and assign to symbols. */
267 hist_scale
= highpc
- lowpc
;
268 hist_scale
/= hist_num_bins
;
269 scale_and_align_entries ();
271 /* Iterate over all sample bins. */
272 for (i
= 0, j
= 1; i
< hist_num_bins
; ++i
)
274 bin_count
= hist_sample
[i
];
278 bin_low_pc
= lowpc
+ (bfd_vma
) (hist_scale
* i
);
279 bin_high_pc
= lowpc
+ (bfd_vma
) (hist_scale
* (i
+ 1));
284 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%d\n",
285 (unsigned long) (sizeof (UNIT
) * bin_low_pc
),
286 (unsigned long) (sizeof (UNIT
) * bin_high_pc
),
290 /* Credit all symbols that are covered by bin I. */
291 for (j
= j
- 1; j
< symtab
.len
; ++j
)
293 sym_low_pc
= symtab
.base
[j
].hist
.scaled_addr
;
294 sym_high_pc
= symtab
.base
[j
+ 1].hist
.scaled_addr
;
296 /* If high end of bin is below entry address,
298 if (bin_high_pc
< sym_low_pc
)
301 /* If low end of bin is above high end of symbol,
302 go for next symbol. */
303 if (bin_low_pc
>= sym_high_pc
)
307 MIN (bin_high_pc
, sym_high_pc
) - MAX (bin_low_pc
, sym_low_pc
);
312 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
313 (unsigned long) symtab
.base
[j
].addr
,
314 (unsigned long) (sizeof (UNIT
) * sym_high_pc
),
315 symtab
.base
[j
].name
, overlap
* time
/ hist_scale
,
318 addr
= symtab
.base
[j
].addr
;
319 credit
= overlap
* time
/ hist_scale
;
321 /* Credit symbol if it appears in INCL_FLAT or that
322 table is empty and it does not appear it in
324 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
325 || (syms
[INCL_FLAT
].len
== 0
326 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
328 symtab
.base
[j
].hist
.time
+= credit
;
332 total_time
-= credit
;
338 DBG (SAMPLEDEBUG
, printf ("[assign_samples] total_time %f\n",
343 /* Print header for flag histogram profile. */
346 DEFUN (print_header
, (prefix
), const char prefix
)
350 sprintf (unit
, _("%c%c/call"), prefix
, hist_dimension_abbrev
);
352 if (bsd_style_output
)
354 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
355 (long) hist_scale
* sizeof (UNIT
));
356 if (total_time
> 0.0)
358 printf (_(" for %.2f%% of %.2f %s\n\n"),
359 100.0 / total_time
, total_time
/ hz
, hist_dimension
);
364 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz
, hist_dimension
);
367 if (total_time
<= 0.0)
369 printf (_(" no time accumulated\n\n"));
371 /* This doesn't hurt since all the numerators will be zero. */
375 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
376 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
378 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
379 _("time"), hist_dimension
, hist_dimension
, _("calls"), unit
, unit
,
385 DEFUN (print_line
, (sym
, scale
), Sym
* sym AND
double scale
)
387 if (ignore_zeros
&& sym
->ncalls
== 0 && sym
->hist
.time
== 0)
390 accum_time
+= sym
->hist
.time
;
392 if (bsd_style_output
)
393 printf ("%5.1f %10.2f %8.2f",
394 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
395 accum_time
/ hz
, sym
->hist
.time
/ hz
);
397 printf ("%6.2f %9.2f %8.2f",
398 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
399 accum_time
/ hz
, sym
->hist
.time
/ hz
);
401 if (sym
->ncalls
!= 0)
402 printf (" %8lu %8.2f %8.2f ",
403 sym
->ncalls
, scale
* sym
->hist
.time
/ hz
/ sym
->ncalls
,
404 scale
* (sym
->hist
.time
+ sym
->cg
.child_time
) / hz
/ sym
->ncalls
);
406 printf (" %8.8s %8.8s %8.8s ", "", "", "");
408 if (bsd_style_output
)
411 print_name_only (sym
);
417 /* Compare LP and RP. The primary comparison key is execution time,
418 the secondary is number of invocation, and the tertiary is the
419 lexicographic order of the function names. */
422 DEFUN (cmp_time
, (lp
, rp
), const PTR lp AND
const PTR rp
)
424 const Sym
*left
= *(const Sym
**) lp
;
425 const Sym
*right
= *(const Sym
**) rp
;
428 time_diff
= right
->hist
.time
- left
->hist
.time
;
436 if (right
->ncalls
> left
->ncalls
)
439 if (right
->ncalls
< left
->ncalls
)
442 return strcmp (left
->name
, right
->name
);
446 /* Print the flat histogram profile. */
449 DEFUN_VOID (hist_print
)
451 Sym
**time_sorted_syms
, *top_dog
, *sym
;
454 double top_time
, time
;
458 first_output
= FALSE
;
464 if (bsd_style_output
)
466 if (print_descriptions
)
468 printf (_("\n\n\nflat profile:\n"));
474 printf (_("Flat profile:\n"));
477 /* Sort the symbol table by time (call-count and name as secondary
478 and tertiary keys). */
479 time_sorted_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
481 for (index
= 0; index
< symtab
.len
; ++index
)
482 time_sorted_syms
[index
] = &symtab
.base
[index
];
484 qsort (time_sorted_syms
, symtab
.len
, sizeof (Sym
*), cmp_time
);
486 if (bsd_style_output
)
488 log_scale
= 5; /* Milli-seconds is BSD-default. */
492 /* Search for symbol with highest per-call
493 execution time and scale accordingly. */
498 for (index
= 0; index
< symtab
.len
; ++index
)
500 sym
= time_sorted_syms
[index
];
502 if (sym
->ncalls
!= 0)
504 time
= (sym
->hist
.time
+ sym
->cg
.child_time
) / sym
->ncalls
;
514 if (top_dog
&& top_dog
->ncalls
!= 0 && top_time
> 0.0)
518 while (SItab
[log_scale
].scale
* top_time
< 1000.0
519 && ((size_t) log_scale
520 < sizeof (SItab
) / sizeof (SItab
[0]) - 1))
527 /* For now, the dimension is always seconds. In the future, we
528 may also want to support other (pseudo-)dimensions (such as
529 I-cache misses etc.). */
530 print_header (SItab
[log_scale
].prefix
);
532 for (index
= 0; index
< symtab
.len
; ++index
)
534 addr
= time_sorted_syms
[index
]->addr
;
536 /* Print symbol if its in INCL_FLAT table or that table
537 is empty and the symbol is not in EXCL_FLAT. */
538 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
539 || (syms
[INCL_FLAT
].len
== 0
540 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
541 print_line (time_sorted_syms
[index
], SItab
[log_scale
].scale
);
544 free (time_sorted_syms
);
546 if (print_descriptions
&& !bsd_style_output
)