2006-07-03 Paolo Bonzini <bonzini@gnu.org>
[binutils.git] / gprof / hist.c
blob024f6653e662194638034a85a91a9b7c790901fd
1 /* hist.c - Histogram related operations.
3 Copyright 1999, 2000, 2001, 2002, 2004, 2005
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
23 #include "libiberty.h"
24 #include "gprof.h"
25 #include "search_list.h"
26 #include "source.h"
27 #include "symtab.h"
28 #include "corefile.h"
29 #include "gmon_io.h"
30 #include "gmon_out.h"
31 #include "hist.h"
32 #include "sym_ids.h"
33 #include "utils.h"
35 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
37 static void scale_and_align_entries (void);
38 static void print_header (int);
39 static void print_line (Sym *, double);
40 static int cmp_time (const PTR, const PTR);
42 /* Declarations of automatically generated functions to output blurbs. */
43 extern void flat_blurb (FILE * fp);
45 bfd_vma s_lowpc; /* Lowest address in .text. */
46 bfd_vma s_highpc = 0; /* Highest address in .text. */
47 bfd_vma lowpc, highpc; /* Same, but expressed in UNITs. */
48 unsigned int hist_num_bins = 0; /* Number of histogram samples. */
49 int *hist_sample = 0; /* Histogram samples (shorts in the file!). */
50 double hist_scale;
51 static char hist_dimension[16] = "seconds";
52 static char hist_dimension_abbrev = 's';
54 static double accum_time; /* Accumulated time so far for print_line(). */
55 static double total_time; /* Total time for all routines. */
57 /* Table of SI prefixes for powers of 10 (used to automatically
58 scale some of the values in the flat profile). */
59 const struct
61 char prefix;
62 double scale;
64 SItab[] =
66 { 'T', 1e-12 }, /* tera */
67 { 'G', 1e-09 }, /* giga */
68 { 'M', 1e-06 }, /* mega */
69 { 'K', 1e-03 }, /* kilo */
70 { ' ', 1e-00 },
71 { 'm', 1e+03 }, /* milli */
72 { 'u', 1e+06 }, /* micro */
73 { 'n', 1e+09 }, /* nano */
74 { 'p', 1e+12 }, /* pico */
75 { 'f', 1e+15 }, /* femto */
76 { 'a', 1e+18 } /* ato */
80 /* Read the histogram from file IFP. FILENAME is the name of IFP and
81 is provided for formatting error messages only. */
83 void
84 hist_read_rec (FILE * ifp, const char *filename)
86 bfd_vma n_lowpc, n_highpc;
87 unsigned int i, ncnt, profrate;
88 UNIT count;
90 if (gmon_io_read_vma (ifp, &n_lowpc)
91 || gmon_io_read_vma (ifp, &n_highpc)
92 || gmon_io_read_32 (ifp, &ncnt)
93 || gmon_io_read_32 (ifp, &profrate)
94 || gmon_io_read (ifp, hist_dimension, 15)
95 || gmon_io_read (ifp, &hist_dimension_abbrev, 1))
97 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
98 whoami, filename);
100 done (1);
103 if (!s_highpc)
105 /* This is the first histogram record. */
106 s_lowpc = n_lowpc;
107 s_highpc = n_highpc;
108 lowpc = (bfd_vma) n_lowpc / sizeof (UNIT);
109 highpc = (bfd_vma) n_highpc / sizeof (UNIT);
110 hist_num_bins = ncnt;
111 hz = profrate;
114 DBG (SAMPLEDEBUG,
115 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
116 (unsigned long) n_lowpc, (unsigned long) n_highpc, ncnt);
117 printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %u\n",
118 (unsigned long) s_lowpc, (unsigned long) s_highpc,
119 hist_num_bins);
120 printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n",
121 (unsigned long) lowpc, (unsigned long) highpc));
123 if (n_lowpc != s_lowpc || n_highpc != s_highpc
124 || ncnt != hist_num_bins || hz != (int) profrate)
126 fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"),
127 whoami, filename);
128 done (1);
131 if (!hist_sample)
133 hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0]));
134 memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0]));
137 for (i = 0; i < hist_num_bins; ++i)
139 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
141 fprintf (stderr,
142 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
143 whoami, filename, i, hist_num_bins);
144 done (1);
146 hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
147 DBG (SAMPLEDEBUG,
148 printf ("[hist_read_rec] 0x%lx: %u\n",
149 (unsigned long) (n_lowpc + i * (n_highpc - n_lowpc) / ncnt),
150 hist_sample[i]));
155 /* Write execution histogram to file OFP. FILENAME is the name
156 of OFP and is provided for formatting error-messages only. */
158 void
159 hist_write_hist (FILE * ofp, const char *filename)
161 UNIT count;
162 unsigned int i;
164 /* Write header. */
166 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
167 || gmon_io_write_vma (ofp, s_lowpc)
168 || gmon_io_write_vma (ofp, s_highpc)
169 || gmon_io_write_32 (ofp, hist_num_bins)
170 || gmon_io_write_32 (ofp, hz)
171 || gmon_io_write (ofp, hist_dimension, 15)
172 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
174 perror (filename);
175 done (1);
178 for (i = 0; i < hist_num_bins; ++i)
180 bfd_put_16 (core_bfd, (bfd_vma) hist_sample[i], (bfd_byte *) &count[0]);
182 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
184 perror (filename);
185 done (1);
191 /* Calculate scaled entry point addresses (to save time in
192 hist_assign_samples), and, on architectures that have procedure
193 entry masks at the start of a function, possibly push the scaled
194 entry points over the procedure entry mask, if it turns out that
195 the entry point is in one bin and the code for a routine is in the
196 next bin. */
198 static void
199 scale_and_align_entries ()
201 Sym *sym;
202 bfd_vma bin_of_entry;
203 bfd_vma bin_of_code;
205 for (sym = symtab.base; sym < symtab.limit; sym++)
207 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
208 bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale;
209 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - lowpc)
210 / hist_scale);
211 if (bin_of_entry < bin_of_code)
213 DBG (SAMPLEDEBUG,
214 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
215 (unsigned long) sym->hist.scaled_addr,
216 (unsigned long) (sym->hist.scaled_addr
217 + UNITS_TO_CODE)));
218 sym->hist.scaled_addr += UNITS_TO_CODE;
224 /* Assign samples to the symbol to which they belong.
226 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
227 which may overlap one more symbol address ranges. If a symbol
228 overlaps with the bin's address range by O percent, then O percent
229 of the bin's count is credited to that symbol.
231 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
232 with respect to the symbol's address range [SYM_LOW_PC,
233 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
234 the distance (in UNITs) between the arrows, the fraction of the
235 sample that is to be credited to the symbol which starts at
236 SYM_LOW_PC.
238 sym_low_pc sym_high_pc
242 +-----------------------------------------------+
244 | ->| |<- ->| |<- ->| |<- |
245 | | | | | |
246 +---------+ +---------+ +---------+
248 ^ ^ ^ ^ ^ ^
249 | | | | | |
250 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
252 For the VAX we assert that samples will never fall in the first two
253 bytes of any routine, since that is the entry mask, thus we call
254 scale_and_align_entries() to adjust the entry points if the entry
255 mask falls in one bin but the code for the routine doesn't start
256 until the next bin. In conjunction with the alignment of routine
257 addresses, this should allow us to have only one sample for every
258 four bytes of text space and never have any overlap (the two end
259 cases, above). */
261 void
262 hist_assign_samples ()
264 bfd_vma bin_low_pc, bin_high_pc;
265 bfd_vma sym_low_pc, sym_high_pc;
266 bfd_vma overlap, addr;
267 unsigned int bin_count;
268 unsigned int i, j;
269 double time, credit;
271 /* Read samples and assign to symbols. */
272 hist_scale = highpc - lowpc;
273 hist_scale /= hist_num_bins;
274 scale_and_align_entries ();
276 /* Iterate over all sample bins. */
277 for (i = 0, j = 1; i < hist_num_bins; ++i)
279 bin_count = hist_sample[i];
280 if (! bin_count)
281 continue;
283 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
284 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
285 time = bin_count;
287 DBG (SAMPLEDEBUG,
288 printf (
289 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
290 (unsigned long) (sizeof (UNIT) * bin_low_pc),
291 (unsigned long) (sizeof (UNIT) * bin_high_pc),
292 bin_count));
293 total_time += time;
295 /* Credit all symbols that are covered by bin I. */
296 for (j = j - 1; j < symtab.len; ++j)
298 sym_low_pc = symtab.base[j].hist.scaled_addr;
299 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
301 /* If high end of bin is below entry address,
302 go for next bin. */
303 if (bin_high_pc < sym_low_pc)
304 break;
306 /* If low end of bin is above high end of symbol,
307 go for next symbol. */
308 if (bin_low_pc >= sym_high_pc)
309 continue;
311 overlap =
312 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
313 if (overlap > 0)
315 DBG (SAMPLEDEBUG,
316 printf (
317 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
318 (unsigned long) symtab.base[j].addr,
319 (unsigned long) (sizeof (UNIT) * sym_high_pc),
320 symtab.base[j].name, overlap * time / hist_scale,
321 (long) overlap));
323 addr = symtab.base[j].addr;
324 credit = overlap * time / hist_scale;
326 /* Credit symbol if it appears in INCL_FLAT or that
327 table is empty and it does not appear it in
328 EXCL_FLAT. */
329 if (sym_lookup (&syms[INCL_FLAT], addr)
330 || (syms[INCL_FLAT].len == 0
331 && !sym_lookup (&syms[EXCL_FLAT], addr)))
333 symtab.base[j].hist.time += credit;
335 else
337 total_time -= credit;
343 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
344 total_time));
348 /* Print header for flag histogram profile. */
350 static void
351 print_header (int prefix)
353 char unit[64];
355 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
357 if (bsd_style_output)
359 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
360 (long) hist_scale * sizeof (UNIT));
361 if (total_time > 0.0)
363 printf (_(" for %.2f%% of %.2f %s\n\n"),
364 100.0 / total_time, total_time / hz, hist_dimension);
367 else
369 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
372 if (total_time <= 0.0)
374 printf (_(" no time accumulated\n\n"));
376 /* This doesn't hurt since all the numerators will be zero. */
377 total_time = 1.0;
380 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
381 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
382 "");
383 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
384 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
385 _("name"));
389 static void
390 print_line (Sym *sym, double scale)
392 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
393 return;
395 accum_time += sym->hist.time;
397 if (bsd_style_output)
398 printf ("%5.1f %10.2f %8.2f",
399 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
400 accum_time / hz, sym->hist.time / hz);
401 else
402 printf ("%6.2f %9.2f %8.2f",
403 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
404 accum_time / hz, sym->hist.time / hz);
406 if (sym->ncalls != 0)
407 printf (" %8lu %8.2f %8.2f ",
408 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
409 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
410 else
411 printf (" %8.8s %8.8s %8.8s ", "", "", "");
413 if (bsd_style_output)
414 print_name (sym);
415 else
416 print_name_only (sym);
418 printf ("\n");
422 /* Compare LP and RP. The primary comparison key is execution time,
423 the secondary is number of invocation, and the tertiary is the
424 lexicographic order of the function names. */
426 static int
427 cmp_time (const PTR lp, const PTR rp)
429 const Sym *left = *(const Sym **) lp;
430 const Sym *right = *(const Sym **) rp;
431 double time_diff;
433 time_diff = right->hist.time - left->hist.time;
435 if (time_diff > 0.0)
436 return 1;
438 if (time_diff < 0.0)
439 return -1;
441 if (right->ncalls > left->ncalls)
442 return 1;
444 if (right->ncalls < left->ncalls)
445 return -1;
447 return strcmp (left->name, right->name);
451 /* Print the flat histogram profile. */
453 void
454 hist_print ()
456 Sym **time_sorted_syms, *top_dog, *sym;
457 unsigned int index;
458 unsigned log_scale;
459 double top_time, time;
460 bfd_vma addr;
462 if (first_output)
463 first_output = FALSE;
464 else
465 printf ("\f\n");
467 accum_time = 0.0;
469 if (bsd_style_output)
471 if (print_descriptions)
473 printf (_("\n\n\nflat profile:\n"));
474 flat_blurb (stdout);
477 else
479 printf (_("Flat profile:\n"));
482 /* Sort the symbol table by time (call-count and name as secondary
483 and tertiary keys). */
484 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
486 for (index = 0; index < symtab.len; ++index)
487 time_sorted_syms[index] = &symtab.base[index];
489 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
491 if (bsd_style_output)
493 log_scale = 5; /* Milli-seconds is BSD-default. */
495 else
497 /* Search for symbol with highest per-call
498 execution time and scale accordingly. */
499 log_scale = 0;
500 top_dog = 0;
501 top_time = 0.0;
503 for (index = 0; index < symtab.len; ++index)
505 sym = time_sorted_syms[index];
507 if (sym->ncalls != 0)
509 time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
511 if (time > top_time)
513 top_dog = sym;
514 top_time = time;
519 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
521 top_time /= hz;
523 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
525 double scaled_value = SItab[log_scale].scale * top_time;
527 if (scaled_value >= 1.0 && scaled_value < 1000.0)
528 break;
533 /* For now, the dimension is always seconds. In the future, we
534 may also want to support other (pseudo-)dimensions (such as
535 I-cache misses etc.). */
536 print_header (SItab[log_scale].prefix);
538 for (index = 0; index < symtab.len; ++index)
540 addr = time_sorted_syms[index]->addr;
542 /* Print symbol if its in INCL_FLAT table or that table
543 is empty and the symbol is not in EXCL_FLAT. */
544 if (sym_lookup (&syms[INCL_FLAT], addr)
545 || (syms[INCL_FLAT].len == 0
546 && !sym_lookup (&syms[EXCL_FLAT], addr)))
547 print_line (time_sorted_syms[index], SItab[log_scale].scale);
550 free (time_sorted_syms);
552 if (print_descriptions && !bsd_style_output)
553 flat_blurb (stdout);