Remove extra blank line.
[binutils.git] / gprof / hist.c
blobd0c02d3f0d4bae646bb4e53e4c0fb07fc2c58167
1 /* hist.c - Histogram related operations.
3 Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2007
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
23 #include "gprof.h"
24 #include "libiberty.h"
25 #include "search_list.h"
26 #include "source.h"
27 #include "symtab.h"
28 #include "corefile.h"
29 #include "gmon_io.h"
30 #include "gmon_out.h"
31 #include "hist.h"
32 #include "sym_ids.h"
33 #include "utils.h"
34 #include "math.h"
35 #include "stdio.h"
36 #include "stdlib.h"
38 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
40 static void scale_and_align_entries (void);
41 static void print_header (int);
42 static void print_line (Sym *, double);
43 static int cmp_time (const PTR, const PTR);
45 /* Declarations of automatically generated functions to output blurbs. */
46 extern void flat_blurb (FILE * fp);
48 static histogram *find_histogram (bfd_vma lowpc, bfd_vma highpc);
49 static histogram *find_histogram_for_pc (bfd_vma pc);
51 double hist_scale;
52 static char hist_dimension[16] = "seconds";
53 static char hist_dimension_abbrev = 's';
55 static double accum_time; /* Accumulated time so far for print_line(). */
56 static double total_time; /* Total time for all routines. */
58 /* Table of SI prefixes for powers of 10 (used to automatically
59 scale some of the values in the flat profile). */
60 const struct
62 char prefix;
63 double scale;
65 SItab[] =
67 { 'T', 1e-12 }, /* tera */
68 { 'G', 1e-09 }, /* giga */
69 { 'M', 1e-06 }, /* mega */
70 { 'K', 1e-03 }, /* kilo */
71 { ' ', 1e-00 },
72 { 'm', 1e+03 }, /* milli */
73 { 'u', 1e+06 }, /* micro */
74 { 'n', 1e+09 }, /* nano */
75 { 'p', 1e+12 }, /* pico */
76 { 'f', 1e+15 }, /* femto */
77 { 'a', 1e+18 } /* ato */
80 /* Reads just the header part of histogram record into
81 *RECORD from IFP. FILENAME is the name of IFP and
82 is provided for formatting error messages only.
84 If FIRST is non-zero, sets global variables HZ, HIST_DIMENSION,
85 HIST_DIMENSION_ABBREV, HIST_SCALE. If FIRST is zero, checks
86 that the new histogram is compatible with already-set values
87 of those variables and emits an error if that's not so. */
88 static void
89 read_histogram_header (histogram *record,
90 FILE *ifp, const char *filename,
91 int first)
93 unsigned int profrate;
94 char n_hist_dimension[15];
95 char n_hist_dimension_abbrev;
96 double n_hist_scale;
98 if (gmon_io_read_vma (ifp, &record->lowpc)
99 || gmon_io_read_vma (ifp, &record->highpc)
100 || gmon_io_read_32 (ifp, &record->num_bins)
101 || gmon_io_read_32 (ifp, &profrate)
102 || gmon_io_read (ifp, n_hist_dimension, 15)
103 || gmon_io_read (ifp, &n_hist_dimension_abbrev, 1))
105 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
106 whoami, filename);
108 done (1);
111 n_hist_scale = (double)((record->highpc - record->lowpc) / sizeof (UNIT))
112 / record->num_bins;
114 if (first)
116 /* We don't try to veryfy profrate is the same for all histogram
117 records. If we have two histogram records for the same
118 address range and profiling samples is done as often
119 as possible as opposed on timer, then the actual profrate will
120 be slightly different. Most of the time the difference does not
121 matter and insisting that profiling rate is exactly the same
122 will only create inconvenient. */
123 hz = profrate;
124 memcpy (hist_dimension, n_hist_dimension, 15);
125 hist_dimension_abbrev = n_hist_dimension_abbrev;
126 hist_scale = n_hist_scale;
128 else
130 if (strncmp (n_hist_dimension, hist_dimension, 15) != 0)
132 fprintf (stderr,
133 _("%s: dimension unit changed between histogram records\n"
134 "%s: from '%s'\n"
135 "%s: to '%s'\n"),
136 whoami, whoami, hist_dimension, whoami, n_hist_dimension);
137 done (1);
140 if (n_hist_dimension_abbrev != hist_dimension_abbrev)
142 fprintf (stderr,
143 _("%s: dimension abbreviation changed between histogram records\n"
144 "%s: from '%c'\n"
145 "%s: to '%c'\n"),
146 whoami, whoami, hist_dimension_abbrev, whoami, n_hist_dimension_abbrev);
147 done (1);
150 /* The only reason we require the same scale for histograms is that
151 there's code (notably printing code), that prints units,
152 and it would be very confusing to have one unit mean different
153 things for different functions. */
154 if (fabs (hist_scale - n_hist_scale) > 0.000001)
156 fprintf (stderr,
157 _("%s: different scales in histogram records"),
158 whoami);
159 done (1);
164 /* Read the histogram from file IFP. FILENAME is the name of IFP and
165 is provided for formatting error messages only. */
167 void
168 hist_read_rec (FILE * ifp, const char *filename)
170 bfd_vma lowpc, highpc;
171 histogram n_record;
172 histogram *record, *existing_record;
173 unsigned i;
175 /* 1. Read the header and see if there's existing record for the
176 same address range and that there are no overlapping records. */
177 read_histogram_header (&n_record, ifp, filename, num_histograms == 0);
179 existing_record = find_histogram (n_record.lowpc, n_record.highpc);
180 if (existing_record)
182 record = existing_record;
184 else
186 /* If this record overlaps, but does not completely match an existing
187 record, it's an error. */
188 lowpc = n_record.lowpc;
189 highpc = n_record.highpc;
190 hist_clip_symbol_address (&lowpc, &highpc);
191 if (lowpc != highpc)
193 fprintf (stderr,
194 _("%s: overlapping histogram records\n"),
195 whoami);
196 done (1);
199 /* This is new record. Add it to global array and allocate space for
200 the samples. */
201 histograms = xrealloc (histograms,
202 sizeof (histogram) * (num_histograms + 1));
203 memcpy (histograms + num_histograms,
204 &n_record, sizeof (histogram));
205 record = &histograms[num_histograms];
206 ++num_histograms;
208 record->sample = (int *) xmalloc (record->num_bins
209 * sizeof (record->sample[0]));
210 memset (record->sample, 0, record->num_bins * sizeof (record->sample[0]));
213 /* 2. We have either a new record (with zeroed histogram data), or an existing
214 record with some data in the histogram already. Read new data into the
215 record, adding hit counts. */
217 DBG (SAMPLEDEBUG,
218 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
219 (unsigned long) record->lowpc, (unsigned long) record->highpc,
220 record->num_bins));
222 for (i = 0; i < record->num_bins; ++i)
224 UNIT count;
225 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
227 fprintf (stderr,
228 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
229 whoami, filename, i, record->num_bins);
230 done (1);
232 record->sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
233 DBG (SAMPLEDEBUG,
234 printf ("[hist_read_rec] 0x%lx: %u\n",
235 (unsigned long) (record->lowpc
236 + i * (record->highpc - record->lowpc)
237 / record->num_bins),
238 record->sample[i]));
243 /* Write all execution histograms file OFP. FILENAME is the name
244 of OFP and is provided for formatting error-messages only. */
246 void
247 hist_write_hist (FILE * ofp, const char *filename)
249 UNIT count;
250 unsigned int i, r;
252 for (r = 0; r < num_histograms; ++r)
254 histogram *record = &histograms[r];
256 /* Write header. */
258 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
259 || gmon_io_write_vma (ofp, record->lowpc)
260 || gmon_io_write_vma (ofp, record->highpc)
261 || gmon_io_write_32 (ofp, record->num_bins)
262 || gmon_io_write_32 (ofp, hz)
263 || gmon_io_write (ofp, hist_dimension, 15)
264 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
266 perror (filename);
267 done (1);
270 for (i = 0; i < record->num_bins; ++i)
272 bfd_put_16 (core_bfd, (bfd_vma) record->sample[i], (bfd_byte *) &count[0]);
274 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
276 perror (filename);
277 done (1);
283 /* Calculate scaled entry point addresses (to save time in
284 hist_assign_samples), and, on architectures that have procedure
285 entry masks at the start of a function, possibly push the scaled
286 entry points over the procedure entry mask, if it turns out that
287 the entry point is in one bin and the code for a routine is in the
288 next bin. */
290 static void
291 scale_and_align_entries ()
293 Sym *sym;
294 bfd_vma bin_of_entry;
295 bfd_vma bin_of_code;
297 for (sym = symtab.base; sym < symtab.limit; sym++)
299 histogram *r = find_histogram_for_pc (sym->addr);
301 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
303 if (r)
305 bin_of_entry = (sym->hist.scaled_addr - r->lowpc) / hist_scale;
306 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - r->lowpc)
307 / hist_scale);
308 if (bin_of_entry < bin_of_code)
310 DBG (SAMPLEDEBUG,
311 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
312 (unsigned long) sym->hist.scaled_addr,
313 (unsigned long) (sym->hist.scaled_addr
314 + UNITS_TO_CODE)));
315 sym->hist.scaled_addr += UNITS_TO_CODE;
322 /* Assign samples to the symbol to which they belong.
324 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
325 which may overlap one more symbol address ranges. If a symbol
326 overlaps with the bin's address range by O percent, then O percent
327 of the bin's count is credited to that symbol.
329 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
330 with respect to the symbol's address range [SYM_LOW_PC,
331 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
332 the distance (in UNITs) between the arrows, the fraction of the
333 sample that is to be credited to the symbol which starts at
334 SYM_LOW_PC.
336 sym_low_pc sym_high_pc
340 +-----------------------------------------------+
342 | ->| |<- ->| |<- ->| |<- |
343 | | | | | |
344 +---------+ +---------+ +---------+
346 ^ ^ ^ ^ ^ ^
347 | | | | | |
348 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
350 For the VAX we assert that samples will never fall in the first two
351 bytes of any routine, since that is the entry mask, thus we call
352 scale_and_align_entries() to adjust the entry points if the entry
353 mask falls in one bin but the code for the routine doesn't start
354 until the next bin. In conjunction with the alignment of routine
355 addresses, this should allow us to have only one sample for every
356 four bytes of text space and never have any overlap (the two end
357 cases, above). */
359 static void
360 hist_assign_samples_1 (histogram *r)
362 bfd_vma bin_low_pc, bin_high_pc;
363 bfd_vma sym_low_pc, sym_high_pc;
364 bfd_vma overlap, addr;
365 unsigned int bin_count;
366 unsigned int i, j;
367 double time, credit;
369 bfd_vma lowpc = r->lowpc / sizeof (UNIT);
371 /* Iterate over all sample bins. */
372 for (i = 0, j = 1; i < r->num_bins; ++i)
374 bin_count = r->sample[i];
375 if (! bin_count)
376 continue;
378 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
379 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
380 time = bin_count;
382 DBG (SAMPLEDEBUG,
383 printf (
384 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
385 (unsigned long) (sizeof (UNIT) * bin_low_pc),
386 (unsigned long) (sizeof (UNIT) * bin_high_pc),
387 bin_count));
388 total_time += time;
390 /* Credit all symbols that are covered by bin I. */
391 for (j = j - 1; j < symtab.len; ++j)
393 sym_low_pc = symtab.base[j].hist.scaled_addr;
394 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
396 /* If high end of bin is below entry address,
397 go for next bin. */
398 if (bin_high_pc < sym_low_pc)
399 break;
401 /* If low end of bin is above high end of symbol,
402 go for next symbol. */
403 if (bin_low_pc >= sym_high_pc)
404 continue;
406 overlap =
407 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
408 if (overlap > 0)
410 DBG (SAMPLEDEBUG,
411 printf (
412 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
413 (unsigned long) symtab.base[j].addr,
414 (unsigned long) (sizeof (UNIT) * sym_high_pc),
415 symtab.base[j].name, overlap * time / hist_scale,
416 (long) overlap));
418 addr = symtab.base[j].addr;
419 credit = overlap * time / hist_scale;
421 /* Credit symbol if it appears in INCL_FLAT or that
422 table is empty and it does not appear it in
423 EXCL_FLAT. */
424 if (sym_lookup (&syms[INCL_FLAT], addr)
425 || (syms[INCL_FLAT].len == 0
426 && !sym_lookup (&syms[EXCL_FLAT], addr)))
428 symtab.base[j].hist.time += credit;
430 else
432 total_time -= credit;
438 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
439 total_time));
442 /* Calls 'hist_assign_sampes_1' for all histogram records read so far. */
443 void
444 hist_assign_samples ()
446 unsigned i;
448 scale_and_align_entries ();
450 for (i = 0; i < num_histograms; ++i)
451 hist_assign_samples_1 (&histograms[i]);
455 /* Print header for flag histogram profile. */
457 static void
458 print_header (int prefix)
460 char unit[64];
462 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
464 if (bsd_style_output)
466 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
467 (long) hist_scale * (long) sizeof (UNIT));
468 if (total_time > 0.0)
470 printf (_(" for %.2f%% of %.2f %s\n\n"),
471 100.0 / total_time, total_time / hz, hist_dimension);
474 else
476 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
479 if (total_time <= 0.0)
481 printf (_(" no time accumulated\n\n"));
483 /* This doesn't hurt since all the numerators will be zero. */
484 total_time = 1.0;
487 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
488 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
489 "");
490 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
491 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
492 _("name"));
496 static void
497 print_line (Sym *sym, double scale)
499 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
500 return;
502 accum_time += sym->hist.time;
504 if (bsd_style_output)
505 printf ("%5.1f %10.2f %8.2f",
506 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
507 accum_time / hz, sym->hist.time / hz);
508 else
509 printf ("%6.2f %9.2f %8.2f",
510 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
511 accum_time / hz, sym->hist.time / hz);
513 if (sym->ncalls != 0)
514 printf (" %8lu %8.2f %8.2f ",
515 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
516 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
517 else
518 printf (" %8.8s %8.8s %8.8s ", "", "", "");
520 if (bsd_style_output)
521 print_name (sym);
522 else
523 print_name_only (sym);
525 printf ("\n");
529 /* Compare LP and RP. The primary comparison key is execution time,
530 the secondary is number of invocation, and the tertiary is the
531 lexicographic order of the function names. */
533 static int
534 cmp_time (const PTR lp, const PTR rp)
536 const Sym *left = *(const Sym **) lp;
537 const Sym *right = *(const Sym **) rp;
538 double time_diff;
540 time_diff = right->hist.time - left->hist.time;
542 if (time_diff > 0.0)
543 return 1;
545 if (time_diff < 0.0)
546 return -1;
548 if (right->ncalls > left->ncalls)
549 return 1;
551 if (right->ncalls < left->ncalls)
552 return -1;
554 return strcmp (left->name, right->name);
558 /* Print the flat histogram profile. */
560 void
561 hist_print ()
563 Sym **time_sorted_syms, *top_dog, *sym;
564 unsigned int index;
565 unsigned log_scale;
566 double top_time, time;
567 bfd_vma addr;
569 if (first_output)
570 first_output = FALSE;
571 else
572 printf ("\f\n");
574 accum_time = 0.0;
576 if (bsd_style_output)
578 if (print_descriptions)
580 printf (_("\n\n\nflat profile:\n"));
581 flat_blurb (stdout);
584 else
586 printf (_("Flat profile:\n"));
589 /* Sort the symbol table by time (call-count and name as secondary
590 and tertiary keys). */
591 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
593 for (index = 0; index < symtab.len; ++index)
594 time_sorted_syms[index] = &symtab.base[index];
596 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
598 if (bsd_style_output)
600 log_scale = 5; /* Milli-seconds is BSD-default. */
602 else
604 /* Search for symbol with highest per-call
605 execution time and scale accordingly. */
606 log_scale = 0;
607 top_dog = 0;
608 top_time = 0.0;
610 for (index = 0; index < symtab.len; ++index)
612 sym = time_sorted_syms[index];
614 if (sym->ncalls != 0)
616 time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
618 if (time > top_time)
620 top_dog = sym;
621 top_time = time;
626 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
628 top_time /= hz;
630 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
632 double scaled_value = SItab[log_scale].scale * top_time;
634 if (scaled_value >= 1.0 && scaled_value < 1000.0)
635 break;
640 /* For now, the dimension is always seconds. In the future, we
641 may also want to support other (pseudo-)dimensions (such as
642 I-cache misses etc.). */
643 print_header (SItab[log_scale].prefix);
645 for (index = 0; index < symtab.len; ++index)
647 addr = time_sorted_syms[index]->addr;
649 /* Print symbol if its in INCL_FLAT table or that table
650 is empty and the symbol is not in EXCL_FLAT. */
651 if (sym_lookup (&syms[INCL_FLAT], addr)
652 || (syms[INCL_FLAT].len == 0
653 && !sym_lookup (&syms[EXCL_FLAT], addr)))
654 print_line (time_sorted_syms[index], SItab[log_scale].scale);
657 free (time_sorted_syms);
659 if (print_descriptions && !bsd_style_output)
660 flat_blurb (stdout);
664 hist_check_address (unsigned address)
666 unsigned i;
668 for (i = 0; i < num_histograms; ++i)
669 if (histograms[i].lowpc <= address && address < histograms[i].highpc)
670 return 1;
672 return 0;
675 #if ! defined(min)
676 #define min(a,b) (((a)<(b)) ? (a) : (b))
677 #endif
678 #if ! defined(max)
679 #define max(a,b) (((a)>(b)) ? (a) : (b))
680 #endif
682 void
683 hist_clip_symbol_address (bfd_vma *p_lowpc, bfd_vma *p_highpc)
685 unsigned i;
686 int found = 0;
688 if (num_histograms == 0)
690 *p_highpc = *p_lowpc;
691 return;
694 for (i = 0; i < num_histograms; ++i)
696 bfd_vma common_low, common_high;
697 common_low = max (histograms[i].lowpc, *p_lowpc);
698 common_high = min (histograms[i].highpc, *p_highpc);
700 if (common_low < common_high)
702 if (found)
704 fprintf (stderr,
705 _("%s: found a symbol that covers "
706 "several histogram records"),
707 whoami);
708 done (1);
711 found = 1;
712 *p_lowpc = common_low;
713 *p_highpc = common_high;
717 if (!found)
718 *p_highpc = *p_lowpc;
721 /* Find and return exising histogram record having the same lowpc and
722 highpc as passed via the parameters. Return NULL if nothing is found.
723 The return value is valid until any new histogram is read. */
724 static histogram *
725 find_histogram (bfd_vma lowpc, bfd_vma highpc)
727 unsigned i;
728 for (i = 0; i < num_histograms; ++i)
730 if (histograms[i].lowpc == lowpc && histograms[i].highpc == highpc)
731 return &histograms[i];
733 return 0;
736 /* Given a PC, return histogram record which address range include this PC.
737 Return NULL if there's no such record. */
738 static histogram *
739 find_histogram_for_pc (bfd_vma pc)
741 unsigned i;
742 for (i = 0; i < num_histograms; ++i)
744 if (histograms[i].lowpc <= pc && pc < histograms[i].highpc)
745 return &histograms[i];
747 return 0;