[PATCH 40/57][Arm][OBJDUMP] Add support for MVE instructions: vdup, veor, vfma, vfms...
[binutils-gdb.git] / gprof / hist.c
blob2fc2ad45f32f059156ca2c03a85c5e8f18145209
1 /* hist.c - Histogram related operations.
3 Copyright (C) 1999-2019 Free Software Foundation, Inc.
5 This file is part of GNU Binutils.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "gprof.h"
23 #include "libiberty.h"
24 #include "search_list.h"
25 #include "source.h"
26 #include "symtab.h"
27 #include "corefile.h"
28 #include "gmon_io.h"
29 #include "gmon_out.h"
30 #include "hist.h"
31 #include "sym_ids.h"
32 #include "utils.h"
33 #include "math.h"
34 #include "stdio.h"
35 #include "stdlib.h"
37 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
39 static void scale_and_align_entries (void);
40 static void print_header (int);
41 static void print_line (Sym *, double);
42 static int cmp_time (const PTR, const PTR);
44 /* Declarations of automatically generated functions to output blurbs. */
45 extern void flat_blurb (FILE * fp);
47 static histogram *find_histogram (bfd_vma lowpc, bfd_vma highpc);
48 static histogram *find_histogram_for_pc (bfd_vma pc);
50 histogram * histograms;
51 unsigned num_histograms;
52 double hist_scale;
53 static char hist_dimension[16] = "seconds";
54 static char hist_dimension_abbrev = 's';
56 static double accum_time; /* Accumulated time so far for print_line(). */
57 static double total_time; /* Total time for all routines. */
59 /* Table of SI prefixes for powers of 10 (used to automatically
60 scale some of the values in the flat profile). */
61 const struct
63 char prefix;
64 double scale;
66 SItab[] =
68 { 'T', 1e-12 }, /* tera */
69 { 'G', 1e-09 }, /* giga */
70 { 'M', 1e-06 }, /* mega */
71 { 'K', 1e-03 }, /* kilo */
72 { ' ', 1e-00 },
73 { 'm', 1e+03 }, /* milli */
74 { 'u', 1e+06 }, /* micro */
75 { 'n', 1e+09 }, /* nano */
76 { 'p', 1e+12 }, /* pico */
77 { 'f', 1e+15 }, /* femto */
78 { 'a', 1e+18 } /* ato */
81 /* Reads just the header part of histogram record into
82 *RECORD from IFP. FILENAME is the name of IFP and
83 is provided for formatting error messages only.
85 If FIRST is non-zero, sets global variables HZ, HIST_DIMENSION,
86 HIST_DIMENSION_ABBREV, HIST_SCALE. If FIRST is zero, checks
87 that the new histogram is compatible with already-set values
88 of those variables and emits an error if that's not so. */
89 static void
90 read_histogram_header (histogram *record,
91 FILE *ifp, const char *filename,
92 int first)
94 unsigned int profrate;
95 char n_hist_dimension[15];
96 char n_hist_dimension_abbrev;
97 double n_hist_scale;
99 if (gmon_io_read_vma (ifp, &record->lowpc)
100 || gmon_io_read_vma (ifp, &record->highpc)
101 || gmon_io_read_32 (ifp, &record->num_bins)
102 || gmon_io_read_32 (ifp, &profrate)
103 || gmon_io_read (ifp, n_hist_dimension, 15)
104 || gmon_io_read (ifp, &n_hist_dimension_abbrev, 1))
106 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
107 whoami, filename);
109 done (1);
112 n_hist_scale = (double)((record->highpc - record->lowpc) / sizeof (UNIT))
113 / record->num_bins;
115 if (first)
117 /* We don't try to veryfy profrate is the same for all histogram
118 records. If we have two histogram records for the same
119 address range and profiling samples is done as often
120 as possible as opposed on timer, then the actual profrate will
121 be slightly different. Most of the time the difference does not
122 matter and insisting that profiling rate is exactly the same
123 will only create inconvenient. */
124 hz = profrate;
125 memcpy (hist_dimension, n_hist_dimension, 15);
126 hist_dimension_abbrev = n_hist_dimension_abbrev;
127 hist_scale = n_hist_scale;
129 else
131 if (strncmp (n_hist_dimension, hist_dimension, 15) != 0)
133 fprintf (stderr,
134 _("%s: dimension unit changed between histogram records\n"
135 "%s: from '%s'\n"
136 "%s: to '%s'\n"),
137 whoami, whoami, hist_dimension, whoami, n_hist_dimension);
138 done (1);
141 if (n_hist_dimension_abbrev != hist_dimension_abbrev)
143 fprintf (stderr,
144 _("%s: dimension abbreviation changed between histogram records\n"
145 "%s: from '%c'\n"
146 "%s: to '%c'\n"),
147 whoami, whoami, hist_dimension_abbrev, whoami, n_hist_dimension_abbrev);
148 done (1);
151 /* The only reason we require the same scale for histograms is that
152 there's code (notably printing code), that prints units,
153 and it would be very confusing to have one unit mean different
154 things for different functions. */
155 if (fabs (hist_scale - n_hist_scale) > 0.000001)
157 fprintf (stderr,
158 _("%s: different scales in histogram records"),
159 whoami);
160 done (1);
165 /* Read the histogram from file IFP. FILENAME is the name of IFP and
166 is provided for formatting error messages only. */
168 void
169 hist_read_rec (FILE * ifp, const char *filename)
171 bfd_vma lowpc, highpc;
172 histogram n_record;
173 histogram *record, *existing_record;
174 unsigned i;
176 /* 1. Read the header and see if there's existing record for the
177 same address range and that there are no overlapping records. */
178 read_histogram_header (&n_record, ifp, filename, num_histograms == 0);
180 existing_record = find_histogram (n_record.lowpc, n_record.highpc);
181 if (existing_record)
183 record = existing_record;
185 else
187 /* If this record overlaps, but does not completely match an existing
188 record, it's an error. */
189 lowpc = n_record.lowpc;
190 highpc = n_record.highpc;
191 hist_clip_symbol_address (&lowpc, &highpc);
192 if (lowpc != highpc)
194 fprintf (stderr,
195 _("%s: overlapping histogram records\n"),
196 whoami);
197 done (1);
200 /* This is new record. Add it to global array and allocate space for
201 the samples. */
202 histograms = (struct histogram *)
203 xrealloc (histograms, sizeof (histogram) * (num_histograms + 1));
204 memcpy (histograms + num_histograms,
205 &n_record, sizeof (histogram));
206 record = &histograms[num_histograms];
207 ++num_histograms;
209 record->sample = (int *) xmalloc (record->num_bins
210 * sizeof (record->sample[0]));
211 memset (record->sample, 0, record->num_bins * sizeof (record->sample[0]));
214 /* 2. We have either a new record (with zeroed histogram data), or an existing
215 record with some data in the histogram already. Read new data into the
216 record, adding hit counts. */
218 DBG (SAMPLEDEBUG,
219 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
220 (unsigned long) record->lowpc, (unsigned long) record->highpc,
221 record->num_bins));
223 for (i = 0; i < record->num_bins; ++i)
225 UNIT count;
226 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
228 fprintf (stderr,
229 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
230 whoami, filename, i, record->num_bins);
231 done (1);
233 record->sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
234 DBG (SAMPLEDEBUG,
235 printf ("[hist_read_rec] 0x%lx: %u\n",
236 (unsigned long) (record->lowpc
237 + i * (record->highpc - record->lowpc)
238 / record->num_bins),
239 record->sample[i]));
244 /* Write all execution histograms file OFP. FILENAME is the name
245 of OFP and is provided for formatting error-messages only. */
247 void
248 hist_write_hist (FILE * ofp, const char *filename)
250 UNIT count;
251 unsigned int i, r;
253 for (r = 0; r < num_histograms; ++r)
255 histogram *record = &histograms[r];
257 /* Write header. */
259 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
260 || gmon_io_write_vma (ofp, record->lowpc)
261 || gmon_io_write_vma (ofp, record->highpc)
262 || gmon_io_write_32 (ofp, record->num_bins)
263 || gmon_io_write_32 (ofp, hz)
264 || gmon_io_write (ofp, hist_dimension, 15)
265 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
267 perror (filename);
268 done (1);
271 for (i = 0; i < record->num_bins; ++i)
273 bfd_put_16 (core_bfd, (bfd_vma) record->sample[i], (bfd_byte *) &count[0]);
275 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
277 perror (filename);
278 done (1);
284 /* Calculate scaled entry point addresses (to save time in
285 hist_assign_samples), and, on architectures that have procedure
286 entry masks at the start of a function, possibly push the scaled
287 entry points over the procedure entry mask, if it turns out that
288 the entry point is in one bin and the code for a routine is in the
289 next bin. */
291 static void
292 scale_and_align_entries (void)
294 Sym *sym;
295 bfd_vma bin_of_entry;
296 bfd_vma bin_of_code;
298 for (sym = symtab.base; sym < symtab.limit; sym++)
300 histogram *r = find_histogram_for_pc (sym->addr);
302 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
304 if (r)
306 bin_of_entry = (sym->hist.scaled_addr - r->lowpc) / hist_scale;
307 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - r->lowpc)
308 / hist_scale);
309 if (bin_of_entry < bin_of_code)
311 DBG (SAMPLEDEBUG,
312 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
313 (unsigned long) sym->hist.scaled_addr,
314 (unsigned long) (sym->hist.scaled_addr
315 + UNITS_TO_CODE)));
316 sym->hist.scaled_addr += UNITS_TO_CODE;
323 /* Assign samples to the symbol to which they belong.
325 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
326 which may overlap one more symbol address ranges. If a symbol
327 overlaps with the bin's address range by O percent, then O percent
328 of the bin's count is credited to that symbol.
330 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
331 with respect to the symbol's address range [SYM_LOW_PC,
332 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
333 the distance (in UNITs) between the arrows, the fraction of the
334 sample that is to be credited to the symbol which starts at
335 SYM_LOW_PC.
337 sym_low_pc sym_high_pc
341 +-----------------------------------------------+
343 | ->| |<- ->| |<- ->| |<- |
344 | | | | | |
345 +---------+ +---------+ +---------+
347 ^ ^ ^ ^ ^ ^
348 | | | | | |
349 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
351 For the VAX we assert that samples will never fall in the first two
352 bytes of any routine, since that is the entry mask, thus we call
353 scale_and_align_entries() to adjust the entry points if the entry
354 mask falls in one bin but the code for the routine doesn't start
355 until the next bin. In conjunction with the alignment of routine
356 addresses, this should allow us to have only one sample for every
357 four bytes of text space and never have any overlap (the two end
358 cases, above). */
360 static void
361 hist_assign_samples_1 (histogram *r)
363 bfd_vma bin_low_pc, bin_high_pc;
364 bfd_vma sym_low_pc, sym_high_pc;
365 bfd_vma overlap, addr;
366 unsigned int bin_count;
367 unsigned int i, j, k;
368 double count_time, credit;
370 bfd_vma lowpc = r->lowpc / sizeof (UNIT);
372 /* Iterate over all sample bins. */
373 for (i = 0, k = 1; i < r->num_bins; ++i)
375 bin_count = r->sample[i];
376 if (! bin_count)
377 continue;
379 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
380 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
381 count_time = bin_count;
383 DBG (SAMPLEDEBUG,
384 printf (
385 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
386 (unsigned long) (sizeof (UNIT) * bin_low_pc),
387 (unsigned long) (sizeof (UNIT) * bin_high_pc),
388 bin_count));
389 total_time += count_time;
391 /* Credit all symbols that are covered by bin I.
393 PR gprof/13325: Make sure that K does not get decremented
394 and J will never be less than 0. */
395 for (j = k - 1; j < symtab.len; k = ++j)
397 sym_low_pc = symtab.base[j].hist.scaled_addr;
398 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
400 /* If high end of bin is below entry address,
401 go for next bin. */
402 if (bin_high_pc < sym_low_pc)
403 break;
405 /* If low end of bin is above high end of symbol,
406 go for next symbol. */
407 if (bin_low_pc >= sym_high_pc)
408 continue;
410 overlap =
411 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
412 if (overlap > 0)
414 DBG (SAMPLEDEBUG,
415 printf (
416 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
417 (unsigned long) symtab.base[j].addr,
418 (unsigned long) (sizeof (UNIT) * sym_high_pc),
419 symtab.base[j].name, overlap * count_time / hist_scale,
420 (long) overlap));
422 addr = symtab.base[j].addr;
423 credit = overlap * count_time / hist_scale;
425 /* Credit symbol if it appears in INCL_FLAT or that
426 table is empty and it does not appear it in
427 EXCL_FLAT. */
428 if (sym_lookup (&syms[INCL_FLAT], addr)
429 || (syms[INCL_FLAT].len == 0
430 && !sym_lookup (&syms[EXCL_FLAT], addr)))
432 symtab.base[j].hist.time += credit;
434 else
436 total_time -= credit;
442 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
443 total_time));
446 /* Calls 'hist_assign_sampes_1' for all histogram records read so far. */
447 void
448 hist_assign_samples (void)
450 unsigned i;
452 scale_and_align_entries ();
454 for (i = 0; i < num_histograms; ++i)
455 hist_assign_samples_1 (&histograms[i]);
459 /* Print header for flag histogram profile. */
461 static void
462 print_header (int prefix)
464 char unit[64];
466 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
468 if (bsd_style_output)
470 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
471 (long) hist_scale * (long) sizeof (UNIT));
472 if (total_time > 0.0)
474 printf (_(" for %.2f%% of %.2f %s\n\n"),
475 100.0 / total_time, total_time / hz, hist_dimension);
478 else
480 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
483 if (total_time <= 0.0)
485 printf (_(" no time accumulated\n\n"));
487 /* This doesn't hurt since all the numerators will be zero. */
488 total_time = 1.0;
491 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
492 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
493 "");
494 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
495 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
496 _("name"));
500 static void
501 print_line (Sym *sym, double scale)
503 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
504 return;
506 accum_time += sym->hist.time;
508 if (bsd_style_output)
509 printf ("%5.1f %10.2f %8.2f",
510 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
511 accum_time / hz, sym->hist.time / hz);
512 else
513 printf ("%6.2f %9.2f %8.2f",
514 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
515 accum_time / hz, sym->hist.time / hz);
517 if (sym->ncalls != 0)
518 printf (" %8lu %8.2f %8.2f ",
519 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
520 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
521 else
522 printf (" %8.8s %8.8s %8.8s ", "", "", "");
524 if (bsd_style_output)
525 print_name (sym);
526 else
527 print_name_only (sym);
529 printf ("\n");
533 /* Compare LP and RP. The primary comparison key is execution time,
534 the secondary is number of invocation, and the tertiary is the
535 lexicographic order of the function names. */
537 static int
538 cmp_time (const PTR lp, const PTR rp)
540 const Sym *left = *(const Sym **) lp;
541 const Sym *right = *(const Sym **) rp;
542 double time_diff;
544 time_diff = right->hist.time - left->hist.time;
546 if (time_diff > 0.0)
547 return 1;
549 if (time_diff < 0.0)
550 return -1;
552 if (right->ncalls > left->ncalls)
553 return 1;
555 if (right->ncalls < left->ncalls)
556 return -1;
558 return strcmp (left->name, right->name);
562 /* Print the flat histogram profile. */
564 void
565 hist_print (void)
567 Sym **time_sorted_syms, *top_dog, *sym;
568 unsigned int sym_index;
569 unsigned log_scale;
570 double top_time;
571 bfd_vma addr;
573 if (first_output)
574 first_output = FALSE;
575 else
576 printf ("\f\n");
578 accum_time = 0.0;
580 if (bsd_style_output)
582 if (print_descriptions)
584 printf (_("\n\n\nflat profile:\n"));
585 flat_blurb (stdout);
588 else
590 printf (_("Flat profile:\n"));
593 /* Sort the symbol table by time (call-count and name as secondary
594 and tertiary keys). */
595 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
597 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
598 time_sorted_syms[sym_index] = &symtab.base[sym_index];
600 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
602 if (bsd_style_output)
604 log_scale = 5; /* Milli-seconds is BSD-default. */
606 else
608 /* Search for symbol with highest per-call
609 execution time and scale accordingly. */
610 log_scale = 0;
611 top_dog = 0;
612 top_time = 0.0;
614 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
616 sym = time_sorted_syms[sym_index];
618 if (sym->ncalls != 0)
620 double call_time;
622 call_time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
624 if (call_time > top_time)
626 top_dog = sym;
627 top_time = call_time;
632 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
634 top_time /= hz;
636 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
638 double scaled_value = SItab[log_scale].scale * top_time;
640 if (scaled_value >= 1.0 && scaled_value < 1000.0)
641 break;
646 /* For now, the dimension is always seconds. In the future, we
647 may also want to support other (pseudo-)dimensions (such as
648 I-cache misses etc.). */
649 print_header (SItab[log_scale].prefix);
651 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
653 addr = time_sorted_syms[sym_index]->addr;
655 /* Print symbol if its in INCL_FLAT table or that table
656 is empty and the symbol is not in EXCL_FLAT. */
657 if (sym_lookup (&syms[INCL_FLAT], addr)
658 || (syms[INCL_FLAT].len == 0
659 && !sym_lookup (&syms[EXCL_FLAT], addr)))
660 print_line (time_sorted_syms[sym_index], SItab[log_scale].scale);
663 free (time_sorted_syms);
665 if (print_descriptions && !bsd_style_output)
666 flat_blurb (stdout);
670 hist_check_address (unsigned address)
672 unsigned i;
674 for (i = 0; i < num_histograms; ++i)
675 if (histograms[i].lowpc <= address && address < histograms[i].highpc)
676 return 1;
678 return 0;
681 #if ! defined(min)
682 #define min(a,b) (((a)<(b)) ? (a) : (b))
683 #endif
684 #if ! defined(max)
685 #define max(a,b) (((a)>(b)) ? (a) : (b))
686 #endif
688 void
689 hist_clip_symbol_address (bfd_vma *p_lowpc, bfd_vma *p_highpc)
691 unsigned i;
692 int found = 0;
694 if (num_histograms == 0)
696 *p_highpc = *p_lowpc;
697 return;
700 for (i = 0; i < num_histograms; ++i)
702 bfd_vma common_low, common_high;
703 common_low = max (histograms[i].lowpc, *p_lowpc);
704 common_high = min (histograms[i].highpc, *p_highpc);
706 if (common_low < common_high)
708 if (found)
710 fprintf (stderr,
711 _("%s: found a symbol that covers "
712 "several histogram records"),
713 whoami);
714 done (1);
717 found = 1;
718 *p_lowpc = common_low;
719 *p_highpc = common_high;
723 if (!found)
724 *p_highpc = *p_lowpc;
727 /* Find and return exising histogram record having the same lowpc and
728 highpc as passed via the parameters. Return NULL if nothing is found.
729 The return value is valid until any new histogram is read. */
730 static histogram *
731 find_histogram (bfd_vma lowpc, bfd_vma highpc)
733 unsigned i;
734 for (i = 0; i < num_histograms; ++i)
736 if (histograms[i].lowpc == lowpc && histograms[i].highpc == highpc)
737 return &histograms[i];
739 return 0;
742 /* Given a PC, return histogram record which address range include this PC.
743 Return NULL if there's no such record. */
744 static histogram *
745 find_histogram_for_pc (bfd_vma pc)
747 unsigned i;
748 for (i = 0; i < num_histograms; ++i)
750 if (histograms[i].lowpc <= pc && pc < histograms[i].highpc)
751 return &histograms[i];
753 return 0;