2006-12-01 Paul Brook <paul@codesourcery.com>
[binutils.git] / gprof / hist.c
blob46124e8069632c9bacf71a41ba0aee873af6aaf6
1 /* hist.c - Histogram related operations.
3 Copyright 1999, 2000, 2001, 2002, 2004, 2005
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
23 #include "libiberty.h"
24 #include "gprof.h"
25 #include "search_list.h"
26 #include "source.h"
27 #include "symtab.h"
28 #include "corefile.h"
29 #include "gmon_io.h"
30 #include "gmon_out.h"
31 #include "hist.h"
32 #include "sym_ids.h"
33 #include "utils.h"
34 #include "math.h"
35 #include "stdio.h"
36 #include "stdlib.h"
38 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
40 static void scale_and_align_entries (void);
41 static void print_header (int);
42 static void print_line (Sym *, double);
43 static int cmp_time (const PTR, const PTR);
45 /* Declarations of automatically generated functions to output blurbs. */
46 extern void flat_blurb (FILE * fp);
48 static histogram *find_histogram (bfd_vma lowpc, bfd_vma highpc);
49 static histogram *find_histogram_for_pc (bfd_vma pc);
51 double hist_scale;
52 static char hist_dimension[16] = "seconds";
53 static char hist_dimension_abbrev = 's';
55 static double accum_time; /* Accumulated time so far for print_line(). */
56 static double total_time; /* Total time for all routines. */
58 /* Table of SI prefixes for powers of 10 (used to automatically
59 scale some of the values in the flat profile). */
60 const struct
62 char prefix;
63 double scale;
65 SItab[] =
67 { 'T', 1e-12 }, /* tera */
68 { 'G', 1e-09 }, /* giga */
69 { 'M', 1e-06 }, /* mega */
70 { 'K', 1e-03 }, /* kilo */
71 { ' ', 1e-00 },
72 { 'm', 1e+03 }, /* milli */
73 { 'u', 1e+06 }, /* micro */
74 { 'n', 1e+09 }, /* nano */
75 { 'p', 1e+12 }, /* pico */
76 { 'f', 1e+15 }, /* femto */
77 { 'a', 1e+18 } /* ato */
80 /* Reads just the header part of histogram record into
81 *RECORD from IFP. FILENAME is the name of IFP and
82 is provided for formatting error messages only.
84 If FIRST is non-zero, sets global variables HZ, HIST_DIMENSION,
85 HIST_DIMENSION_ABBREV, HIST_SCALE. If FIRST is zero, checks
86 that the new histogram is compatible with already-set values
87 of those variables and emits an error if that's not so. */
88 static void
89 read_histogram_header (histogram *record,
90 FILE *ifp, const char *filename,
91 int first)
93 unsigned int profrate;
94 char n_hist_dimension[15];
95 char n_hist_dimension_abbrev;
96 double n_hist_scale;
98 if (gmon_io_read_vma (ifp, &record->lowpc)
99 || gmon_io_read_vma (ifp, &record->highpc)
100 || gmon_io_read_32 (ifp, &record->num_bins)
101 || gmon_io_read_32 (ifp, &profrate)
102 || gmon_io_read (ifp, n_hist_dimension, 15)
103 || gmon_io_read (ifp, &n_hist_dimension_abbrev, 1))
105 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
106 whoami, filename);
108 done (1);
111 n_hist_scale = (double)((record->highpc - record->lowpc) / sizeof (UNIT))
112 / record->num_bins;
114 if (first)
116 /* We could try to verify it's the same for all records,
117 but it isn't practical, since when profiling is done
118 no by a timer but with a loop running in a stub, polling PC
119 from the target as fast as possible, the rate is not fixed. */
120 hz = profrate;
121 memcpy (hist_dimension, n_hist_dimension, 15);
122 hist_dimension_abbrev = n_hist_dimension_abbrev;
123 hist_scale = n_hist_scale;
125 else
127 if (strncmp (n_hist_dimension, hist_dimension, 15) != 0)
129 fprintf (stderr,
130 _("%s: dimension unit changed between histogram records\n"
131 "%s: from '%s'\n"
132 "%s: to '%s'\n"),
133 whoami, whoami, hist_dimension, whoami, n_hist_dimension);
134 done (1);
137 if (n_hist_dimension_abbrev != hist_dimension_abbrev)
139 fprintf (stderr,
140 _("%s: dimension abbreviation changed between histogram records\n"
141 "%s: from '%c'\n"
142 "%s: to '%c'\n"),
143 whoami, whoami, hist_dimension_abbrev, whoami, n_hist_dimension_abbrev);
144 done (1);
147 /* The only reason we require the same scale for histograms is that
148 there's code (notably printing code), that prints units,
149 and it would be very confusing to have one unit mean different
150 things for different functions. */
151 if (fabs (hist_scale - n_hist_scale) > 0.000001)
153 fprintf (stderr,
154 _("%s: different scales in histogram records"),
155 whoami);
156 done (1);
161 /* Read the histogram from file IFP. FILENAME is the name of IFP and
162 is provided for formatting error messages only. */
164 void
165 hist_read_rec (FILE * ifp, const char *filename)
167 bfd_vma lowpc, highpc;
168 histogram n_record;
169 histogram *record, *existing_record;
170 unsigned i;
172 /* 1. Read the header and see if there's existing record for the
173 same address range and that there are no overlapping records. */
174 read_histogram_header (&n_record, ifp, filename, num_histograms == 0);
176 existing_record = find_histogram (n_record.lowpc, n_record.highpc);
177 if (existing_record)
179 record = existing_record;
181 else
183 /* If this record overlaps, but does not completely match an existing
184 record, it's an error. */
185 lowpc = n_record.lowpc;
186 highpc = n_record.highpc;
187 hist_clip_symbol_address (&lowpc, &highpc);
188 if (lowpc != highpc)
190 fprintf (stderr,
191 _("%s: overlapping histogram records\n"),
192 whoami);
193 done (1);
196 /* This is new record. Add it to global array and allocate space for
197 the samples. */
198 histograms = (histogram *)realloc (histograms,
199 sizeof (histogram)
200 * (num_histograms + 1));
201 memcpy (histograms + num_histograms,
202 &n_record, sizeof (histogram));
203 record = &histograms[num_histograms];
204 ++num_histograms;
206 record->sample = (int *) xmalloc (record->num_bins
207 * sizeof (record->sample[0]));
208 memset (record->sample, 0, record->num_bins * sizeof (record->sample[0]));
211 /* 2. We have either a new record (with zeroed histogram data), or an existing
212 record with some data in the histogram already. Read new data into the
213 record, adding hit counts. */
215 DBG (SAMPLEDEBUG,
216 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
217 (unsigned long) record->lowpc, (unsigned long) record->highpc,
218 record->num_bins));
220 for (i = 0; i < record->num_bins; ++i)
222 UNIT count;
223 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
225 fprintf (stderr,
226 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
227 whoami, filename, i, record->num_bins);
228 done (1);
230 record->sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
231 DBG (SAMPLEDEBUG,
232 printf ("[hist_read_rec] 0x%lx: %u\n",
233 (unsigned long) (record->lowpc
234 + i * (record->highpc - record->lowpc)
235 / record->num_bins),
236 record->sample[i]));
241 /* Write all execution histograms file OFP. FILENAME is the name
242 of OFP and is provided for formatting error-messages only. */
244 void
245 hist_write_hist (FILE * ofp, const char *filename)
247 UNIT count;
248 unsigned int i, r;
250 for (r = 0; r < num_histograms; ++r)
252 histogram *record = &histograms[r];
254 /* Write header. */
256 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
257 || gmon_io_write_vma (ofp, record->lowpc)
258 || gmon_io_write_vma (ofp, record->highpc)
259 || gmon_io_write_32 (ofp, record->num_bins)
260 || gmon_io_write_32 (ofp, hz)
261 || gmon_io_write (ofp, hist_dimension, 15)
262 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
264 perror (filename);
265 done (1);
268 for (i = 0; i < record->num_bins; ++i)
270 bfd_put_16 (core_bfd, (bfd_vma) record->sample[i], (bfd_byte *) &count[0]);
272 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
274 perror (filename);
275 done (1);
281 /* Calculate scaled entry point addresses (to save time in
282 hist_assign_samples), and, on architectures that have procedure
283 entry masks at the start of a function, possibly push the scaled
284 entry points over the procedure entry mask, if it turns out that
285 the entry point is in one bin and the code for a routine is in the
286 next bin. */
288 static void
289 scale_and_align_entries ()
291 Sym *sym;
292 bfd_vma bin_of_entry;
293 bfd_vma bin_of_code;
295 for (sym = symtab.base; sym < symtab.limit; sym++)
297 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
299 histogram *r = find_histogram_for_pc (sym->addr);
301 if (r)
303 bin_of_entry = (sym->hist.scaled_addr - r->lowpc) / hist_scale;
304 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - r->lowpc)
305 / hist_scale);
306 if (bin_of_entry < bin_of_code)
308 DBG (SAMPLEDEBUG,
309 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
310 (unsigned long) sym->hist.scaled_addr,
311 (unsigned long) (sym->hist.scaled_addr
312 + UNITS_TO_CODE)));
313 sym->hist.scaled_addr += UNITS_TO_CODE;
320 /* Assign samples to the symbol to which they belong.
322 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
323 which may overlap one more symbol address ranges. If a symbol
324 overlaps with the bin's address range by O percent, then O percent
325 of the bin's count is credited to that symbol.
327 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
328 with respect to the symbol's address range [SYM_LOW_PC,
329 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
330 the distance (in UNITs) between the arrows, the fraction of the
331 sample that is to be credited to the symbol which starts at
332 SYM_LOW_PC.
334 sym_low_pc sym_high_pc
338 +-----------------------------------------------+
340 | ->| |<- ->| |<- ->| |<- |
341 | | | | | |
342 +---------+ +---------+ +---------+
344 ^ ^ ^ ^ ^ ^
345 | | | | | |
346 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
348 For the VAX we assert that samples will never fall in the first two
349 bytes of any routine, since that is the entry mask, thus we call
350 scale_and_align_entries() to adjust the entry points if the entry
351 mask falls in one bin but the code for the routine doesn't start
352 until the next bin. In conjunction with the alignment of routine
353 addresses, this should allow us to have only one sample for every
354 four bytes of text space and never have any overlap (the two end
355 cases, above). */
357 static void
358 hist_assign_samples_1 (histogram *r)
360 bfd_vma bin_low_pc, bin_high_pc;
361 bfd_vma sym_low_pc, sym_high_pc;
362 bfd_vma overlap, addr;
363 unsigned int bin_count;
364 unsigned int i, j;
365 double time, credit;
367 bfd_vma lowpc = r->lowpc / sizeof (UNIT);
369 /* Iterate over all sample bins. */
370 for (i = 0, j = 1; i < r->num_bins; ++i)
372 bin_count = r->sample[i];
373 if (! bin_count)
374 continue;
376 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
377 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
378 time = bin_count;
380 DBG (SAMPLEDEBUG,
381 printf (
382 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
383 (unsigned long) (sizeof (UNIT) * bin_low_pc),
384 (unsigned long) (sizeof (UNIT) * bin_high_pc),
385 bin_count));
386 total_time += time;
388 /* Credit all symbols that are covered by bin I. */
389 for (j = j - 1; j < symtab.len; ++j)
391 sym_low_pc = symtab.base[j].hist.scaled_addr;
392 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
394 /* If high end of bin is below entry address,
395 go for next bin. */
396 if (bin_high_pc < sym_low_pc)
397 break;
399 /* If low end of bin is above high end of symbol,
400 go for next symbol. */
401 if (bin_low_pc >= sym_high_pc)
402 continue;
404 overlap =
405 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
406 if (overlap > 0)
408 DBG (SAMPLEDEBUG,
409 printf (
410 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
411 (unsigned long) symtab.base[j].addr,
412 (unsigned long) (sizeof (UNIT) * sym_high_pc),
413 symtab.base[j].name, overlap * time / hist_scale,
414 (long) overlap));
416 addr = symtab.base[j].addr;
417 credit = overlap * time / hist_scale;
419 /* Credit symbol if it appears in INCL_FLAT or that
420 table is empty and it does not appear it in
421 EXCL_FLAT. */
422 if (sym_lookup (&syms[INCL_FLAT], addr)
423 || (syms[INCL_FLAT].len == 0
424 && !sym_lookup (&syms[EXCL_FLAT], addr)))
426 symtab.base[j].hist.time += credit;
428 else
430 total_time -= credit;
436 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
437 total_time));
440 /* Calls 'hist_assign_sampes_1' for all histogram records read so far. */
441 void
442 hist_assign_samples ()
444 unsigned i;
446 scale_and_align_entries ();
448 for (i = 0; i < num_histograms; ++i)
449 hist_assign_samples_1 (&histograms[i]);
453 /* Print header for flag histogram profile. */
455 static void
456 print_header (int prefix)
458 char unit[64];
460 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
462 if (bsd_style_output)
464 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
465 (long) hist_scale * sizeof (UNIT));
466 if (total_time > 0.0)
468 printf (_(" for %.2f%% of %.2f %s\n\n"),
469 100.0 / total_time, total_time / hz, hist_dimension);
472 else
474 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
477 if (total_time <= 0.0)
479 printf (_(" no time accumulated\n\n"));
481 /* This doesn't hurt since all the numerators will be zero. */
482 total_time = 1.0;
485 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
486 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
487 "");
488 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
489 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
490 _("name"));
494 static void
495 print_line (Sym *sym, double scale)
497 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
498 return;
500 accum_time += sym->hist.time;
502 if (bsd_style_output)
503 printf ("%5.1f %10.2f %8.2f",
504 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
505 accum_time / hz, sym->hist.time / hz);
506 else
507 printf ("%6.2f %9.2f %8.2f",
508 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
509 accum_time / hz, sym->hist.time / hz);
511 if (sym->ncalls != 0)
512 printf (" %8lu %8.2f %8.2f ",
513 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
514 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
515 else
516 printf (" %8.8s %8.8s %8.8s ", "", "", "");
518 if (bsd_style_output)
519 print_name (sym);
520 else
521 print_name_only (sym);
523 printf ("\n");
527 /* Compare LP and RP. The primary comparison key is execution time,
528 the secondary is number of invocation, and the tertiary is the
529 lexicographic order of the function names. */
531 static int
532 cmp_time (const PTR lp, const PTR rp)
534 const Sym *left = *(const Sym **) lp;
535 const Sym *right = *(const Sym **) rp;
536 double time_diff;
538 time_diff = right->hist.time - left->hist.time;
540 if (time_diff > 0.0)
541 return 1;
543 if (time_diff < 0.0)
544 return -1;
546 if (right->ncalls > left->ncalls)
547 return 1;
549 if (right->ncalls < left->ncalls)
550 return -1;
552 return strcmp (left->name, right->name);
556 /* Print the flat histogram profile. */
558 void
559 hist_print ()
561 Sym **time_sorted_syms, *top_dog, *sym;
562 unsigned int index;
563 unsigned log_scale;
564 double top_time, time;
565 bfd_vma addr;
567 if (first_output)
568 first_output = FALSE;
569 else
570 printf ("\f\n");
572 accum_time = 0.0;
574 if (bsd_style_output)
576 if (print_descriptions)
578 printf (_("\n\n\nflat profile:\n"));
579 flat_blurb (stdout);
582 else
584 printf (_("Flat profile:\n"));
587 /* Sort the symbol table by time (call-count and name as secondary
588 and tertiary keys). */
589 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
591 for (index = 0; index < symtab.len; ++index)
592 time_sorted_syms[index] = &symtab.base[index];
594 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
596 if (bsd_style_output)
598 log_scale = 5; /* Milli-seconds is BSD-default. */
600 else
602 /* Search for symbol with highest per-call
603 execution time and scale accordingly. */
604 log_scale = 0;
605 top_dog = 0;
606 top_time = 0.0;
608 for (index = 0; index < symtab.len; ++index)
610 sym = time_sorted_syms[index];
612 if (sym->ncalls != 0)
614 time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
616 if (time > top_time)
618 top_dog = sym;
619 top_time = time;
624 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
626 top_time /= hz;
628 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
630 double scaled_value = SItab[log_scale].scale * top_time;
632 if (scaled_value >= 1.0 && scaled_value < 1000.0)
633 break;
638 /* For now, the dimension is always seconds. In the future, we
639 may also want to support other (pseudo-)dimensions (such as
640 I-cache misses etc.). */
641 print_header (SItab[log_scale].prefix);
643 for (index = 0; index < symtab.len; ++index)
645 addr = time_sorted_syms[index]->addr;
647 /* Print symbol if its in INCL_FLAT table or that table
648 is empty and the symbol is not in EXCL_FLAT. */
649 if (sym_lookup (&syms[INCL_FLAT], addr)
650 || (syms[INCL_FLAT].len == 0
651 && !sym_lookup (&syms[EXCL_FLAT], addr)))
652 print_line (time_sorted_syms[index], SItab[log_scale].scale);
655 free (time_sorted_syms);
657 if (print_descriptions && !bsd_style_output)
658 flat_blurb (stdout);
662 hist_check_address (unsigned address)
664 unsigned i;
666 for (i = 0; i < num_histograms; ++i)
667 if (histograms[i].lowpc <= address && address < histograms[i].highpc)
668 return 1;
670 return 0;
673 #if ! defined(min)
674 #define min(a,b) (((a)<(b)) ? (a) : (b))
675 #endif
676 #if ! defined(max)
677 #define max(a,b) (((a)>(b)) ? (a) : (b))
678 #endif
680 void
681 hist_clip_symbol_address (bfd_vma *p_lowpc, bfd_vma *p_highpc)
683 unsigned i;
684 int found = 0;
686 if (num_histograms == 0)
688 *p_highpc = *p_lowpc;
689 return;
692 for (i = 0; i < num_histograms; ++i)
694 bfd_vma common_low, common_high;
695 common_low = max (histograms[i].lowpc, *p_lowpc);
696 common_high = min (histograms[i].highpc, *p_highpc);
698 if (common_low < common_high)
700 if (found)
702 // FIXME: some proper diagnostics.
703 // like: "a symbol covers more than one histogram record"
704 abort ();
707 found = 1;
708 *p_lowpc = common_low;
709 *p_highpc = common_high;
713 if (!found)
714 *p_highpc = *p_lowpc;
717 /* Find and return exising histogram record having the same lowpc and
718 highpc as passed via the parameters. Return NULL if nothing is found.
719 The return value is valid until any new histogram is read. */
720 static histogram *
721 find_histogram (bfd_vma lowpc, bfd_vma highpc)
723 unsigned i;
724 for (i = 0; i < num_histograms; ++i)
726 if (histograms[i].lowpc == lowpc && histograms[i].highpc == highpc)
727 return &histograms[i];
729 return 0;
732 /* Given a PC, return histogram record which address range include this PC.
733 Return NULL if there's no such record. */
734 static histogram *
735 find_histogram_for_pc (bfd_vma pc)
737 unsigned i;
738 for (i = 0; i < num_histograms; ++i)
740 if (histograms[i].lowpc <= pc && pc < histograms[i].highpc)
741 return &histograms[i];
743 return 0;