1 /* hist.c - Histogram related operations.
3 Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2007
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
24 #include "libiberty.h"
25 #include "search_list.h"
38 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
40 static void scale_and_align_entries (void);
41 static void print_header (int);
42 static void print_line (Sym
*, double);
43 static int cmp_time (const PTR
, const PTR
);
45 /* Declarations of automatically generated functions to output blurbs. */
46 extern void flat_blurb (FILE * fp
);
48 static histogram
*find_histogram (bfd_vma lowpc
, bfd_vma highpc
);
49 static histogram
*find_histogram_for_pc (bfd_vma pc
);
52 static char hist_dimension
[16] = "seconds";
53 static char hist_dimension_abbrev
= 's';
55 static double accum_time
; /* Accumulated time so far for print_line(). */
56 static double total_time
; /* Total time for all routines. */
58 /* Table of SI prefixes for powers of 10 (used to automatically
59 scale some of the values in the flat profile). */
67 { 'T', 1e-12 }, /* tera */
68 { 'G', 1e-09 }, /* giga */
69 { 'M', 1e-06 }, /* mega */
70 { 'K', 1e-03 }, /* kilo */
72 { 'm', 1e+03 }, /* milli */
73 { 'u', 1e+06 }, /* micro */
74 { 'n', 1e+09 }, /* nano */
75 { 'p', 1e+12 }, /* pico */
76 { 'f', 1e+15 }, /* femto */
77 { 'a', 1e+18 } /* ato */
80 /* Reads just the header part of histogram record into
81 *RECORD from IFP. FILENAME is the name of IFP and
82 is provided for formatting error messages only.
84 If FIRST is non-zero, sets global variables HZ, HIST_DIMENSION,
85 HIST_DIMENSION_ABBREV, HIST_SCALE. If FIRST is zero, checks
86 that the new histogram is compatible with already-set values
87 of those variables and emits an error if that's not so. */
89 read_histogram_header (histogram
*record
,
90 FILE *ifp
, const char *filename
,
93 unsigned int profrate
;
94 char n_hist_dimension
[15];
95 char n_hist_dimension_abbrev
;
98 if (gmon_io_read_vma (ifp
, &record
->lowpc
)
99 || gmon_io_read_vma (ifp
, &record
->highpc
)
100 || gmon_io_read_32 (ifp
, &record
->num_bins
)
101 || gmon_io_read_32 (ifp
, &profrate
)
102 || gmon_io_read (ifp
, n_hist_dimension
, 15)
103 || gmon_io_read (ifp
, &n_hist_dimension_abbrev
, 1))
105 fprintf (stderr
, _("%s: %s: unexpected end of file\n"),
111 n_hist_scale
= (double)((record
->highpc
- record
->lowpc
) / sizeof (UNIT
))
116 /* We don't try to veryfy profrate is the same for all histogram
117 records. If we have two histogram records for the same
118 address range and profiling samples is done as often
119 as possible as opposed on timer, then the actual profrate will
120 be slightly different. Most of the time the difference does not
121 matter and insisting that profiling rate is exactly the same
122 will only create inconvenient. */
124 memcpy (hist_dimension
, n_hist_dimension
, 15);
125 hist_dimension_abbrev
= n_hist_dimension_abbrev
;
126 hist_scale
= n_hist_scale
;
130 if (strncmp (n_hist_dimension
, hist_dimension
, 15) != 0)
133 _("%s: dimension unit changed between histogram records\n"
136 whoami
, whoami
, hist_dimension
, whoami
, n_hist_dimension
);
140 if (n_hist_dimension_abbrev
!= hist_dimension_abbrev
)
143 _("%s: dimension abbreviation changed between histogram records\n"
146 whoami
, whoami
, hist_dimension_abbrev
, whoami
, n_hist_dimension_abbrev
);
150 /* The only reason we require the same scale for histograms is that
151 there's code (notably printing code), that prints units,
152 and it would be very confusing to have one unit mean different
153 things for different functions. */
154 if (fabs (hist_scale
- n_hist_scale
) > 0.000001)
157 _("%s: different scales in histogram records"),
164 /* Read the histogram from file IFP. FILENAME is the name of IFP and
165 is provided for formatting error messages only. */
168 hist_read_rec (FILE * ifp
, const char *filename
)
170 bfd_vma lowpc
, highpc
;
172 histogram
*record
, *existing_record
;
175 /* 1. Read the header and see if there's existing record for the
176 same address range and that there are no overlapping records. */
177 read_histogram_header (&n_record
, ifp
, filename
, num_histograms
== 0);
179 existing_record
= find_histogram (n_record
.lowpc
, n_record
.highpc
);
182 record
= existing_record
;
186 /* If this record overlaps, but does not completely match an existing
187 record, it's an error. */
188 lowpc
= n_record
.lowpc
;
189 highpc
= n_record
.highpc
;
190 hist_clip_symbol_address (&lowpc
, &highpc
);
194 _("%s: overlapping histogram records\n"),
199 /* This is new record. Add it to global array and allocate space for
201 histograms
= (struct histogram
*)
202 xrealloc (histograms
, sizeof (histogram
) * (num_histograms
+ 1));
203 memcpy (histograms
+ num_histograms
,
204 &n_record
, sizeof (histogram
));
205 record
= &histograms
[num_histograms
];
208 record
->sample
= (int *) xmalloc (record
->num_bins
209 * sizeof (record
->sample
[0]));
210 memset (record
->sample
, 0, record
->num_bins
* sizeof (record
->sample
[0]));
213 /* 2. We have either a new record (with zeroed histogram data), or an existing
214 record with some data in the histogram already. Read new data into the
215 record, adding hit counts. */
218 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
219 (unsigned long) record
->lowpc
, (unsigned long) record
->highpc
,
222 for (i
= 0; i
< record
->num_bins
; ++i
)
225 if (fread (&count
[0], sizeof (count
), 1, ifp
) != 1)
228 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
229 whoami
, filename
, i
, record
->num_bins
);
232 record
->sample
[i
] += bfd_get_16 (core_bfd
, (bfd_byte
*) & count
[0]);
234 printf ("[hist_read_rec] 0x%lx: %u\n",
235 (unsigned long) (record
->lowpc
236 + i
* (record
->highpc
- record
->lowpc
)
243 /* Write all execution histograms file OFP. FILENAME is the name
244 of OFP and is provided for formatting error-messages only. */
247 hist_write_hist (FILE * ofp
, const char *filename
)
252 for (r
= 0; r
< num_histograms
; ++r
)
254 histogram
*record
= &histograms
[r
];
258 if (gmon_io_write_8 (ofp
, GMON_TAG_TIME_HIST
)
259 || gmon_io_write_vma (ofp
, record
->lowpc
)
260 || gmon_io_write_vma (ofp
, record
->highpc
)
261 || gmon_io_write_32 (ofp
, record
->num_bins
)
262 || gmon_io_write_32 (ofp
, hz
)
263 || gmon_io_write (ofp
, hist_dimension
, 15)
264 || gmon_io_write (ofp
, &hist_dimension_abbrev
, 1))
270 for (i
= 0; i
< record
->num_bins
; ++i
)
272 bfd_put_16 (core_bfd
, (bfd_vma
) record
->sample
[i
], (bfd_byte
*) &count
[0]);
274 if (fwrite (&count
[0], sizeof (count
), 1, ofp
) != 1)
283 /* Calculate scaled entry point addresses (to save time in
284 hist_assign_samples), and, on architectures that have procedure
285 entry masks at the start of a function, possibly push the scaled
286 entry points over the procedure entry mask, if it turns out that
287 the entry point is in one bin and the code for a routine is in the
291 scale_and_align_entries ()
294 bfd_vma bin_of_entry
;
297 for (sym
= symtab
.base
; sym
< symtab
.limit
; sym
++)
299 histogram
*r
= find_histogram_for_pc (sym
->addr
);
301 sym
->hist
.scaled_addr
= sym
->addr
/ sizeof (UNIT
);
305 bin_of_entry
= (sym
->hist
.scaled_addr
- r
->lowpc
) / hist_scale
;
306 bin_of_code
= ((sym
->hist
.scaled_addr
+ UNITS_TO_CODE
- r
->lowpc
)
308 if (bin_of_entry
< bin_of_code
)
311 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
312 (unsigned long) sym
->hist
.scaled_addr
,
313 (unsigned long) (sym
->hist
.scaled_addr
315 sym
->hist
.scaled_addr
+= UNITS_TO_CODE
;
322 /* Assign samples to the symbol to which they belong.
324 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
325 which may overlap one more symbol address ranges. If a symbol
326 overlaps with the bin's address range by O percent, then O percent
327 of the bin's count is credited to that symbol.
329 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
330 with respect to the symbol's address range [SYM_LOW_PC,
331 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
332 the distance (in UNITs) between the arrows, the fraction of the
333 sample that is to be credited to the symbol which starts at
336 sym_low_pc sym_high_pc
340 +-----------------------------------------------+
342 | ->| |<- ->| |<- ->| |<- |
344 +---------+ +---------+ +---------+
348 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
350 For the VAX we assert that samples will never fall in the first two
351 bytes of any routine, since that is the entry mask, thus we call
352 scale_and_align_entries() to adjust the entry points if the entry
353 mask falls in one bin but the code for the routine doesn't start
354 until the next bin. In conjunction with the alignment of routine
355 addresses, this should allow us to have only one sample for every
356 four bytes of text space and never have any overlap (the two end
360 hist_assign_samples_1 (histogram
*r
)
362 bfd_vma bin_low_pc
, bin_high_pc
;
363 bfd_vma sym_low_pc
, sym_high_pc
;
364 bfd_vma overlap
, addr
;
365 unsigned int bin_count
;
369 bfd_vma lowpc
= r
->lowpc
/ sizeof (UNIT
);
371 /* Iterate over all sample bins. */
372 for (i
= 0, j
= 1; i
< r
->num_bins
; ++i
)
374 bin_count
= r
->sample
[i
];
378 bin_low_pc
= lowpc
+ (bfd_vma
) (hist_scale
* i
);
379 bin_high_pc
= lowpc
+ (bfd_vma
) (hist_scale
* (i
+ 1));
384 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
385 (unsigned long) (sizeof (UNIT
) * bin_low_pc
),
386 (unsigned long) (sizeof (UNIT
) * bin_high_pc
),
390 /* Credit all symbols that are covered by bin I. */
391 for (j
= j
- 1; j
< symtab
.len
; ++j
)
393 sym_low_pc
= symtab
.base
[j
].hist
.scaled_addr
;
394 sym_high_pc
= symtab
.base
[j
+ 1].hist
.scaled_addr
;
396 /* If high end of bin is below entry address,
398 if (bin_high_pc
< sym_low_pc
)
401 /* If low end of bin is above high end of symbol,
402 go for next symbol. */
403 if (bin_low_pc
>= sym_high_pc
)
407 MIN (bin_high_pc
, sym_high_pc
) - MAX (bin_low_pc
, sym_low_pc
);
412 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
413 (unsigned long) symtab
.base
[j
].addr
,
414 (unsigned long) (sizeof (UNIT
) * sym_high_pc
),
415 symtab
.base
[j
].name
, overlap
* time
/ hist_scale
,
418 addr
= symtab
.base
[j
].addr
;
419 credit
= overlap
* time
/ hist_scale
;
421 /* Credit symbol if it appears in INCL_FLAT or that
422 table is empty and it does not appear it in
424 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
425 || (syms
[INCL_FLAT
].len
== 0
426 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
428 symtab
.base
[j
].hist
.time
+= credit
;
432 total_time
-= credit
;
438 DBG (SAMPLEDEBUG
, printf ("[assign_samples] total_time %f\n",
442 /* Calls 'hist_assign_sampes_1' for all histogram records read so far. */
444 hist_assign_samples ()
448 scale_and_align_entries ();
450 for (i
= 0; i
< num_histograms
; ++i
)
451 hist_assign_samples_1 (&histograms
[i
]);
455 /* Print header for flag histogram profile. */
458 print_header (int prefix
)
462 sprintf (unit
, _("%c%c/call"), prefix
, hist_dimension_abbrev
);
464 if (bsd_style_output
)
466 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
467 (long) hist_scale
* (long) sizeof (UNIT
));
468 if (total_time
> 0.0)
470 printf (_(" for %.2f%% of %.2f %s\n\n"),
471 100.0 / total_time
, total_time
/ hz
, hist_dimension
);
476 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz
, hist_dimension
);
479 if (total_time
<= 0.0)
481 printf (_(" no time accumulated\n\n"));
483 /* This doesn't hurt since all the numerators will be zero. */
487 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
488 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
490 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
491 _("time"), hist_dimension
, hist_dimension
, _("calls"), unit
, unit
,
497 print_line (Sym
*sym
, double scale
)
499 if (ignore_zeros
&& sym
->ncalls
== 0 && sym
->hist
.time
== 0)
502 accum_time
+= sym
->hist
.time
;
504 if (bsd_style_output
)
505 printf ("%5.1f %10.2f %8.2f",
506 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
507 accum_time
/ hz
, sym
->hist
.time
/ hz
);
509 printf ("%6.2f %9.2f %8.2f",
510 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
511 accum_time
/ hz
, sym
->hist
.time
/ hz
);
513 if (sym
->ncalls
!= 0)
514 printf (" %8lu %8.2f %8.2f ",
515 sym
->ncalls
, scale
* sym
->hist
.time
/ hz
/ sym
->ncalls
,
516 scale
* (sym
->hist
.time
+ sym
->cg
.child_time
) / hz
/ sym
->ncalls
);
518 printf (" %8.8s %8.8s %8.8s ", "", "", "");
520 if (bsd_style_output
)
523 print_name_only (sym
);
529 /* Compare LP and RP. The primary comparison key is execution time,
530 the secondary is number of invocation, and the tertiary is the
531 lexicographic order of the function names. */
534 cmp_time (const PTR lp
, const PTR rp
)
536 const Sym
*left
= *(const Sym
**) lp
;
537 const Sym
*right
= *(const Sym
**) rp
;
540 time_diff
= right
->hist
.time
- left
->hist
.time
;
548 if (right
->ncalls
> left
->ncalls
)
551 if (right
->ncalls
< left
->ncalls
)
554 return strcmp (left
->name
, right
->name
);
558 /* Print the flat histogram profile. */
563 Sym
**time_sorted_syms
, *top_dog
, *sym
;
566 double top_time
, time
;
570 first_output
= FALSE
;
576 if (bsd_style_output
)
578 if (print_descriptions
)
580 printf (_("\n\n\nflat profile:\n"));
586 printf (_("Flat profile:\n"));
589 /* Sort the symbol table by time (call-count and name as secondary
590 and tertiary keys). */
591 time_sorted_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
593 for (index
= 0; index
< symtab
.len
; ++index
)
594 time_sorted_syms
[index
] = &symtab
.base
[index
];
596 qsort (time_sorted_syms
, symtab
.len
, sizeof (Sym
*), cmp_time
);
598 if (bsd_style_output
)
600 log_scale
= 5; /* Milli-seconds is BSD-default. */
604 /* Search for symbol with highest per-call
605 execution time and scale accordingly. */
610 for (index
= 0; index
< symtab
.len
; ++index
)
612 sym
= time_sorted_syms
[index
];
614 if (sym
->ncalls
!= 0)
616 time
= (sym
->hist
.time
+ sym
->cg
.child_time
) / sym
->ncalls
;
626 if (top_dog
&& top_dog
->ncalls
!= 0 && top_time
> 0.0)
630 for (log_scale
= 0; log_scale
< ARRAY_SIZE (SItab
); log_scale
++)
632 double scaled_value
= SItab
[log_scale
].scale
* top_time
;
634 if (scaled_value
>= 1.0 && scaled_value
< 1000.0)
640 /* For now, the dimension is always seconds. In the future, we
641 may also want to support other (pseudo-)dimensions (such as
642 I-cache misses etc.). */
643 print_header (SItab
[log_scale
].prefix
);
645 for (index
= 0; index
< symtab
.len
; ++index
)
647 addr
= time_sorted_syms
[index
]->addr
;
649 /* Print symbol if its in INCL_FLAT table or that table
650 is empty and the symbol is not in EXCL_FLAT. */
651 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
652 || (syms
[INCL_FLAT
].len
== 0
653 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
654 print_line (time_sorted_syms
[index
], SItab
[log_scale
].scale
);
657 free (time_sorted_syms
);
659 if (print_descriptions
&& !bsd_style_output
)
664 hist_check_address (unsigned address
)
668 for (i
= 0; i
< num_histograms
; ++i
)
669 if (histograms
[i
].lowpc
<= address
&& address
< histograms
[i
].highpc
)
676 #define min(a,b) (((a)<(b)) ? (a) : (b))
679 #define max(a,b) (((a)>(b)) ? (a) : (b))
683 hist_clip_symbol_address (bfd_vma
*p_lowpc
, bfd_vma
*p_highpc
)
688 if (num_histograms
== 0)
690 *p_highpc
= *p_lowpc
;
694 for (i
= 0; i
< num_histograms
; ++i
)
696 bfd_vma common_low
, common_high
;
697 common_low
= max (histograms
[i
].lowpc
, *p_lowpc
);
698 common_high
= min (histograms
[i
].highpc
, *p_highpc
);
700 if (common_low
< common_high
)
705 _("%s: found a symbol that covers "
706 "several histogram records"),
712 *p_lowpc
= common_low
;
713 *p_highpc
= common_high
;
718 *p_highpc
= *p_lowpc
;
721 /* Find and return exising histogram record having the same lowpc and
722 highpc as passed via the parameters. Return NULL if nothing is found.
723 The return value is valid until any new histogram is read. */
725 find_histogram (bfd_vma lowpc
, bfd_vma highpc
)
728 for (i
= 0; i
< num_histograms
; ++i
)
730 if (histograms
[i
].lowpc
== lowpc
&& histograms
[i
].highpc
== highpc
)
731 return &histograms
[i
];
736 /* Given a PC, return histogram record which address range include this PC.
737 Return NULL if there's no such record. */
739 find_histogram_for_pc (bfd_vma pc
)
742 for (i
= 0; i
< num_histograms
; ++i
)
744 if (histograms
[i
].lowpc
<= pc
&& pc
< histograms
[i
].highpc
)
745 return &histograms
[i
];