* config/tc-arm.c (arm_cpus): Add entry for ARM Cortex-M0.
[binutils-gdb.git] / gprof / cg_print.c
blob58ff60841f175cdc3091b833cdb0599e57b556a6
1 /* cg_print.c - Print routines for displaying call graphs.
3 Copyright 2000, 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
5 This file is part of GNU Binutils.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 #include "gprof.h"
23 #include "libiberty.h"
24 #include "search_list.h"
25 #include "source.h"
26 #include "symtab.h"
27 #include "cg_arcs.h"
28 #include "cg_print.h"
29 #include "hist.h"
30 #include "utils.h"
31 #include "corefile.h"
33 /* Return value of comparison functions used to sort tables. */
34 #define LESSTHAN -1
35 #define EQUALTO 0
36 #define GREATERTHAN 1
38 static void print_header (void);
39 static void print_cycle (Sym *);
40 static int cmp_member (Sym *, Sym *);
41 static void sort_members (Sym *);
42 static void print_members (Sym *);
43 static int cmp_arc (Arc *, Arc *);
44 static void sort_parents (Sym *);
45 static void print_parents (Sym *);
46 static void sort_children (Sym *);
47 static void print_children (Sym *);
48 static void print_line (Sym *);
49 static int cmp_name (const PTR, const PTR);
50 static int cmp_arc_count (const PTR, const PTR);
51 static int cmp_fun_nuses (const PTR, const PTR);
52 static void order_and_dump_functions_by_arcs
53 (Arc **, unsigned long, int, Arc **, unsigned long *);
55 /* Declarations of automatically generated functions to output blurbs. */
56 extern void bsd_callg_blurb (FILE * fp);
57 extern void fsf_callg_blurb (FILE * fp);
59 double print_time = 0.0;
62 static void
63 print_header ()
65 if (first_output)
66 first_output = FALSE;
67 else
68 printf ("\f\n");
70 if (!bsd_style_output)
72 if (print_descriptions)
73 printf (_("\t\t Call graph (explanation follows)\n\n"));
74 else
75 printf (_("\t\t\tCall graph\n\n"));
78 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
79 (long) hist_scale * (long) sizeof (UNIT));
81 if (print_time > 0.0)
82 printf (_(" for %.2f%% of %.2f seconds\n\n"),
83 100.0 / print_time, print_time / hz);
84 else
86 printf (_(" no time propagated\n\n"));
88 /* This doesn't hurt, since all the numerators will be 0.0. */
89 print_time = 1.0;
92 if (bsd_style_output)
94 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
95 "", "", "", "", _("called"), _("total"), _("parents"));
96 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
97 _("index"), _("%time"), _("self"), _("descendants"),
98 _("called"), _("self"), _("name"), _("index"));
99 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
100 "", "", "", "", _("called"), _("total"), _("children"));
101 printf ("\n");
103 else
105 printf (_("index %% time self children called name\n"));
109 /* Print a cycle header. */
111 static void
112 print_cycle (Sym *cyc)
114 char buf[BUFSIZ];
116 sprintf (buf, "[%d]", cyc->cg.index);
117 printf (bsd_style_output
118 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
119 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
120 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
121 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
123 if (cyc->cg.self_calls != 0)
124 printf ("+%-7lu", cyc->cg.self_calls);
125 else
126 printf (" %7.7s", "");
128 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
131 /* Compare LEFT and RIGHT membmer. Major comparison key is
132 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
134 static int
135 cmp_member (Sym *left, Sym *right)
137 double left_time = left->cg.prop.self + left->cg.prop.child;
138 double right_time = right->cg.prop.self + right->cg.prop.child;
139 unsigned long left_calls = left->ncalls + left->cg.self_calls;
140 unsigned long right_calls = right->ncalls + right->cg.self_calls;
142 if (left_time > right_time)
143 return GREATERTHAN;
145 if (left_time < right_time)
146 return LESSTHAN;
148 if (left_calls > right_calls)
149 return GREATERTHAN;
151 if (left_calls < right_calls)
152 return LESSTHAN;
154 return EQUALTO;
157 /* Sort members of a cycle. */
159 static void
160 sort_members (Sym *cyc)
162 Sym *todo, *doing, *prev;
164 /* Detach cycle members from cyclehead,
165 and insertion sort them back on. */
166 todo = cyc->cg.cyc.next;
167 cyc->cg.cyc.next = 0;
169 for (doing = todo; doing != NULL; doing = todo)
171 todo = doing->cg.cyc.next;
173 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
175 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
176 break;
179 doing->cg.cyc.next = prev->cg.cyc.next;
180 prev->cg.cyc.next = doing;
184 /* Print the members of a cycle. */
186 static void
187 print_members (Sym *cyc)
189 Sym *member;
191 sort_members (cyc);
193 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
195 printf (bsd_style_output
196 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
197 : "%6.6s %5.5s %7.2f %7.2f %7lu",
198 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
199 member->ncalls);
201 if (member->cg.self_calls != 0)
202 printf ("+%-7lu", member->cg.self_calls);
203 else
204 printf (" %7.7s", "");
206 printf (" ");
207 print_name (member);
208 printf ("\n");
212 /* Compare two arcs to/from the same child/parent.
213 - if one arc is a self arc, it's least.
214 - if one arc is within a cycle, it's less than.
215 - if both arcs are within a cycle, compare arc counts.
216 - if neither arc is within a cycle, compare with
217 time + child_time as major key
218 arc count as minor key. */
220 static int
221 cmp_arc (Arc *left, Arc *right)
223 Sym *left_parent = left->parent;
224 Sym *left_child = left->child;
225 Sym *right_parent = right->parent;
226 Sym *right_child = right->child;
227 double left_time, right_time;
229 DBG (TIMEDEBUG,
230 printf ("[cmp_arc] ");
231 print_name (left_parent);
232 printf (" calls ");
233 print_name (left_child);
234 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
235 left->count, left_child->ncalls);
236 printf ("[cmp_arc] ");
237 print_name (right_parent);
238 printf (" calls ");
239 print_name (right_child);
240 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
241 right->count, right_child->ncalls);
242 printf ("\n");
245 if (left_parent == left_child)
246 return LESSTHAN; /* Left is a self call. */
248 if (right_parent == right_child)
249 return GREATERTHAN; /* Right is a self call. */
251 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
252 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
254 /* Left is a call within a cycle. */
255 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
256 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
258 /* Right is a call within the cycle, too. */
259 if (left->count < right->count)
260 return LESSTHAN;
262 if (left->count > right->count)
263 return GREATERTHAN;
265 return EQUALTO;
267 else
269 /* Right isn't a call within the cycle. */
270 return LESSTHAN;
273 else
275 /* Left isn't a call within a cycle. */
276 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
277 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
279 /* Right is a call within a cycle. */
280 return GREATERTHAN;
282 else
284 /* Neither is a call within a cycle. */
285 left_time = left->time + left->child_time;
286 right_time = right->time + right->child_time;
288 if (left_time < right_time)
289 return LESSTHAN;
291 if (left_time > right_time)
292 return GREATERTHAN;
294 if (left->count < right->count)
295 return LESSTHAN;
297 if (left->count > right->count)
298 return GREATERTHAN;
300 return EQUALTO;
306 static void
307 sort_parents (Sym * child)
309 Arc *arc, *detached, sorted, *prev;
311 /* Unlink parents from child, then insertion sort back on to
312 sorted's parents.
313 *arc the arc you have detached and are inserting.
314 *detached the rest of the arcs to be sorted.
315 sorted arc list onto which you insertion sort.
316 *prev arc before the arc you are comparing. */
317 sorted.next_parent = 0;
319 for (arc = child->cg.parents; arc; arc = detached)
321 detached = arc->next_parent;
323 /* Consider *arc as disconnected; insert it into sorted. */
324 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
326 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
327 break;
330 arc->next_parent = prev->next_parent;
331 prev->next_parent = arc;
334 /* Reattach sorted arcs to child. */
335 child->cg.parents = sorted.next_parent;
339 static void
340 print_parents (Sym *child)
342 Sym *parent;
343 Arc *arc;
344 Sym *cycle_head;
346 if (child->cg.cyc.head != 0)
347 cycle_head = child->cg.cyc.head;
348 else
349 cycle_head = child;
351 if (!child->cg.parents)
353 printf (bsd_style_output
354 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
355 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
356 "", "", "", "", "", "");
357 return;
360 sort_parents (child);
362 for (arc = child->cg.parents; arc; arc = arc->next_parent)
364 parent = arc->parent;
365 if (child == parent || (child->cg.cyc.num != 0
366 && parent->cg.cyc.num == child->cg.cyc.num))
368 /* Selfcall or call among siblings. */
369 printf (bsd_style_output
370 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
371 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
372 "", "", "", "",
373 arc->count, "");
374 print_name (parent);
375 printf ("\n");
377 else
379 /* Regular parent of child. */
380 printf (bsd_style_output
381 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
382 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
383 "", "",
384 arc->time / hz, arc->child_time / hz,
385 arc->count, cycle_head->ncalls);
386 print_name (parent);
387 printf ("\n");
393 static void
394 sort_children (Sym *parent)
396 Arc *arc, *detached, sorted, *prev;
398 /* Unlink children from parent, then insertion sort back on to
399 sorted's children.
400 *arc the arc you have detached and are inserting.
401 *detached the rest of the arcs to be sorted.
402 sorted arc list onto which you insertion sort.
403 *prev arc before the arc you are comparing. */
404 sorted.next_child = 0;
406 for (arc = parent->cg.children; arc; arc = detached)
408 detached = arc->next_child;
410 /* Consider *arc as disconnected; insert it into sorted. */
411 for (prev = &sorted; prev->next_child; prev = prev->next_child)
413 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
414 break;
417 arc->next_child = prev->next_child;
418 prev->next_child = arc;
421 /* Reattach sorted children to parent. */
422 parent->cg.children = sorted.next_child;
426 static void
427 print_children (Sym *parent)
429 Sym *child;
430 Arc *arc;
432 sort_children (parent);
433 arc = parent->cg.children;
435 for (arc = parent->cg.children; arc; arc = arc->next_child)
437 child = arc->child;
438 if (child == parent || (child->cg.cyc.num != 0
439 && child->cg.cyc.num == parent->cg.cyc.num))
441 /* Self call or call to sibling. */
442 printf (bsd_style_output
443 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
444 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
445 "", "", "", "", arc->count, "");
446 print_name (child);
447 printf ("\n");
449 else
451 /* Regular child of parent. */
452 printf (bsd_style_output
453 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
454 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
455 "", "",
456 arc->time / hz, arc->child_time / hz,
457 arc->count, child->cg.cyc.head->ncalls);
458 print_name (child);
459 printf ("\n");
465 static void
466 print_line (Sym *np)
468 char buf[BUFSIZ];
470 sprintf (buf, "[%d]", np->cg.index);
471 printf (bsd_style_output
472 ? "%-6.6s %5.1f %7.2f %11.2f"
473 : "%-6.6s %5.1f %7.2f %7.2f", buf,
474 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
475 np->cg.prop.self / hz, np->cg.prop.child / hz);
477 if ((np->ncalls + np->cg.self_calls) != 0)
479 printf (" %7lu", np->ncalls);
481 if (np->cg.self_calls != 0)
482 printf ("+%-7lu ", np->cg.self_calls);
483 else
484 printf (" %7.7s ", "");
486 else
488 printf (" %7.7s %7.7s ", "", "");
491 print_name (np);
492 printf ("\n");
496 /* Print dynamic call graph. */
498 void
499 cg_print (Sym ** timesortsym)
501 unsigned int index;
502 Sym *parent;
504 if (print_descriptions && bsd_style_output)
505 bsd_callg_blurb (stdout);
507 print_header ();
509 for (index = 0; index < symtab.len + num_cycles; ++index)
511 parent = timesortsym[index];
513 if ((ignore_zeros && parent->ncalls == 0
514 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
515 && parent->cg.prop.child == 0)
516 || !parent->cg.print_flag
517 || (line_granularity && ! parent->is_func))
518 continue;
520 if (!parent->name && parent->cg.cyc.num != 0)
522 /* Cycle header. */
523 print_cycle (parent);
524 print_members (parent);
526 else
528 print_parents (parent);
529 print_line (parent);
530 print_children (parent);
533 if (bsd_style_output)
534 printf ("\n");
536 printf ("-----------------------------------------------\n");
538 if (bsd_style_output)
539 printf ("\n");
542 free (timesortsym);
544 if (print_descriptions && !bsd_style_output)
545 fsf_callg_blurb (stdout);
549 static int
550 cmp_name (const PTR left, const PTR right)
552 const Sym **npp1 = (const Sym **) left;
553 const Sym **npp2 = (const Sym **) right;
555 return strcmp ((*npp1)->name, (*npp2)->name);
559 void
560 cg_print_index ()
562 unsigned int index;
563 unsigned int nnames, todo, i, j;
564 int col, starting_col;
565 Sym **name_sorted_syms, *sym;
566 const char *filename;
567 char buf[20];
568 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
570 /* Now, sort regular function name
571 alphabetically to create an index. */
572 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
574 for (index = 0, nnames = 0; index < symtab.len; index++)
576 if (ignore_zeros && symtab.base[index].ncalls == 0
577 && symtab.base[index].hist.time == 0)
578 continue;
580 name_sorted_syms[nnames++] = &symtab.base[index];
583 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
585 for (index = 1, todo = nnames; index <= num_cycles; index++)
586 name_sorted_syms[todo++] = &cycle_header[index];
588 printf ("\f\n");
589 printf (_("Index by function name\n\n"));
590 index = (todo + 2) / 3;
592 for (i = 0; i < index; i++)
594 col = 0;
595 starting_col = 0;
597 for (j = i; j < todo; j += index)
599 sym = name_sorted_syms[j];
601 if (sym->cg.print_flag)
602 sprintf (buf, "[%d]", sym->cg.index);
603 else
604 sprintf (buf, "(%d)", sym->cg.index);
606 if (j < nnames)
608 if (bsd_style_output)
610 printf ("%6.6s %-19.19s", buf, sym->name);
612 else
614 col += strlen (buf);
616 for (; col < starting_col + 5; ++col)
617 putchar (' ');
619 printf (" %s ", buf);
620 col += print_name_only (sym);
622 if (!line_granularity && sym->is_static && sym->file)
624 filename = sym->file->name;
626 if (!print_path)
628 filename = strrchr (filename, '/');
630 if (filename)
631 ++filename;
632 else
633 filename = sym->file->name;
636 printf (" (%s)", filename);
637 col += strlen (filename) + 3;
641 else
643 if (bsd_style_output)
645 printf ("%6.6s ", buf);
646 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
647 printf ("%-19.19s", buf);
649 else
651 col += strlen (buf);
652 for (; col < starting_col + 5; ++col)
653 putchar (' ');
654 printf (" %s ", buf);
655 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
656 printf ("%s", buf);
657 col += strlen (buf);
661 starting_col += column_width;
664 printf ("\n");
667 free (name_sorted_syms);
670 /* Compare two arcs based on their usage counts.
671 We want to sort in descending order. */
673 static int
674 cmp_arc_count (const PTR left, const PTR right)
676 const Arc **npp1 = (const Arc **) left;
677 const Arc **npp2 = (const Arc **) right;
679 if ((*npp1)->count > (*npp2)->count)
680 return -1;
681 else if ((*npp1)->count < (*npp2)->count)
682 return 1;
683 else
684 return 0;
687 /* Compare two funtions based on their usage counts.
688 We want to sort in descending order. */
690 static int
691 cmp_fun_nuses (const PTR left, const PTR right)
693 const Sym **npp1 = (const Sym **) left;
694 const Sym **npp2 = (const Sym **) right;
696 if ((*npp1)->nuses > (*npp2)->nuses)
697 return -1;
698 else if ((*npp1)->nuses < (*npp2)->nuses)
699 return 1;
700 else
701 return 0;
704 /* Print a suggested function ordering based on the profiling data.
706 We perform 4 major steps when ordering functions:
708 * Group unused functions together and place them at the
709 end of the function order.
711 * Search the highest use arcs (those which account for 90% of
712 the total arc count) for functions which have several parents.
714 Group those with the most call sites together (currently the
715 top 1.25% which have at least five different call sites).
717 These are emitted at the start of the function order.
719 * Use a greedy placement algorithm to place functions which
720 occur in the top 99% of the arcs in the profile. Some provisions
721 are made to handle high usage arcs where the parent and/or
722 child has already been placed.
724 * Run the same greedy placement algorithm on the remaining
725 arcs to place the leftover functions.
728 The various "magic numbers" should (one day) be tuneable by command
729 line options. They were arrived at by benchmarking a few applications
730 with various values to see which values produced better overall function
731 orderings.
733 Of course, profiling errors, machine limitations (PA long calls), and
734 poor cutoff values for the placement algorithm may limit the usefullness
735 of the resulting function order. Improvements would be greatly appreciated.
737 Suggestions:
739 * Place the functions with many callers near the middle of the
740 list to reduce long calls.
742 * Propagate arc usage changes as functions are placed. Ie if
743 func1 and func2 are placed together, arcs to/from those arcs
744 to the same parent/child should be combined, then resort the
745 arcs to choose the next one.
747 * Implement some global positioning algorithm to place the
748 chains made by the greedy local positioning algorithm. Probably
749 by examining arcs which haven't been placed yet to tie two
750 chains together.
752 * Take a function's size and time into account in the algorithm;
753 size in particular is important on the PA (long calls). Placing
754 many small functions onto their own page may be wise.
756 * Use better profiling information; many published algorithms
757 are based on call sequences through time, rather than just
758 arc counts.
760 * Prodecure cloning could improve performance when a small number
761 of arcs account for most of the calls to a particular function.
763 * Use relocation information to avoid moving unused functions
764 completely out of the code stream; this would avoid severe lossage
765 when the profile data bears little resemblance to actual runs.
767 * Propagation of arc usages should also improve .o link line
768 ordering which shares the same arc placement algorithm with
769 the function ordering code (in fact it is a degenerate case
770 of function ordering). */
772 void
773 cg_print_function_ordering ()
775 unsigned long index, used, unused, scratch_index;
776 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
777 #ifdef __GNUC__
778 unsigned long long total_arcs, tmp_arcs_count;
779 #else
780 unsigned long total_arcs, tmp_arcs_count;
781 #endif
782 Sym **unused_syms, **used_syms, **scratch_syms;
783 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
785 index = 0;
786 used = 0;
787 unused = 0;
788 scratch_index = 0;
789 unplaced_arc_count = 0;
790 high_arc_count = 0;
791 scratch_arc_count = 0;
793 /* First group all the unused functions together. */
794 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
795 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
796 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
797 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
798 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
799 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
801 /* Walk through all the functions; mark those which are never
802 called as placed (we'll emit them as a group later). */
803 for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
805 if (symtab.base[index].ncalls == 0)
807 unused_syms[unused++] = &symtab.base[index];
808 symtab.base[index].has_been_placed = 1;
810 else
812 used_syms[used++] = &symtab.base[index];
813 symtab.base[index].has_been_placed = 0;
814 symtab.base[index].next = 0;
815 symtab.base[index].prev = 0;
816 symtab.base[index].nuses = 0;
820 /* Sort the arcs from most used to least used. */
821 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
823 /* Compute the total arc count. Also mark arcs as unplaced.
825 Note we don't compensate for overflow if that happens!
826 Overflow is much less likely when this file is compiled
827 with GCC as it can double-wide integers via long long. */
828 total_arcs = 0;
829 for (index = 0; index < numarcs; index++)
831 total_arcs += arcs[index]->count;
832 arcs[index]->has_been_placed = 0;
835 /* We want to pull out those functions which are referenced
836 by many highly used arcs and emit them as a group. This
837 could probably use some tuning. */
838 tmp_arcs_count = 0;
839 for (index = 0; index < numarcs; index++)
841 tmp_arcs_count += arcs[index]->count;
843 /* Count how many times each parent and child are used up
844 to our threshhold of arcs (90%). */
845 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
846 break;
848 arcs[index]->child->nuses++;
851 /* Now sort a temporary symbol table based on the number of
852 times each function was used in the highest used arcs. */
853 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
854 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
856 /* Now pick out those symbols we're going to emit as
857 a group. We take up to 1.25% of the used symbols. */
858 for (index = 0; index < used / 80; index++)
860 Sym *sym = scratch_syms[index];
861 Arc *arc;
863 /* If we hit symbols that aren't used from many call sites,
864 then we can quit. We choose five as the low limit for
865 no particular reason. */
866 if (sym->nuses == 5)
867 break;
869 /* We're going to need the arcs between these functions.
870 Unfortunately, we don't know all these functions
871 until we're done. So we keep track of all the arcs
872 to the functions we care about, then prune out those
873 which are uninteresting.
875 An interesting variation would be to quit when we found
876 multi-call site functions which account for some percentage
877 of the arcs. */
878 arc = sym->cg.children;
880 while (arc)
882 if (arc->parent != arc->child)
883 scratch_arcs[scratch_arc_count++] = arc;
884 arc->has_been_placed = 1;
885 arc = arc->next_child;
888 arc = sym->cg.parents;
890 while (arc)
892 if (arc->parent != arc->child)
893 scratch_arcs[scratch_arc_count++] = arc;
894 arc->has_been_placed = 1;
895 arc = arc->next_parent;
898 /* Keep track of how many symbols we're going to place. */
899 scratch_index = index;
901 /* A lie, but it makes identifying
902 these functions easier later. */
903 sym->has_been_placed = 1;
906 /* Now walk through the temporary arcs and copy
907 those we care about into the high arcs array. */
908 for (index = 0; index < scratch_arc_count; index++)
910 Arc *arc = scratch_arcs[index];
912 /* If this arc refers to highly used functions, then
913 then we want to keep it. */
914 if (arc->child->has_been_placed
915 && arc->parent->has_been_placed)
917 high_arcs[high_arc_count++] = scratch_arcs[index];
919 /* We need to turn of has_been_placed since we're going to
920 use the main arc placement algorithm on these arcs. */
921 arc->child->has_been_placed = 0;
922 arc->parent->has_been_placed = 0;
926 /* Dump the multi-site high usage functions which are not
927 going to be ordered by the main ordering algorithm. */
928 for (index = 0; index < scratch_index; index++)
930 if (scratch_syms[index]->has_been_placed)
931 printf ("%s\n", scratch_syms[index]->name);
934 /* Now we can order the multi-site high use
935 functions based on the arcs between them. */
936 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
937 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
938 unplaced_arcs, &unplaced_arc_count);
940 /* Order and dump the high use functions left,
941 these typically have only a few call sites. */
942 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
943 unplaced_arcs, &unplaced_arc_count);
945 /* Now place the rarely used functions. */
946 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
947 scratch_arcs, &scratch_arc_count);
949 /* Output any functions not emitted by the order_and_dump calls. */
950 for (index = 0; index < used; index++)
951 if (used_syms[index]->has_been_placed == 0)
952 printf("%s\n", used_syms[index]->name);
954 /* Output the unused functions. */
955 for (index = 0; index < unused; index++)
956 printf("%s\n", unused_syms[index]->name);
958 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
959 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
960 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
961 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
962 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
963 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
965 free (unused_syms);
966 free (used_syms);
967 free (scratch_syms);
968 free (high_arcs);
969 free (scratch_arcs);
970 free (unplaced_arcs);
973 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
974 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
976 If ALL is nonzero, then place all functions referenced by THE_ARCS,
977 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
979 #define MOST 0.99
980 static void
981 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
982 unplaced_arcs, unplaced_arc_count)
983 Arc **the_arcs;
984 unsigned long arc_count;
985 int all;
986 Arc **unplaced_arcs;
987 unsigned long *unplaced_arc_count;
989 #ifdef __GNUC__
990 unsigned long long tmp_arcs, total_arcs;
991 #else
992 unsigned long tmp_arcs, total_arcs;
993 #endif
994 unsigned int index;
996 /* If needed, compute the total arc count.
998 Note we don't compensate for overflow if that happens! */
999 if (! all)
1001 total_arcs = 0;
1002 for (index = 0; index < arc_count; index++)
1003 total_arcs += the_arcs[index]->count;
1005 else
1006 total_arcs = 0;
1008 tmp_arcs = 0;
1010 for (index = 0; index < arc_count; index++)
1012 Sym *sym1, *sym2;
1013 Sym *child, *parent;
1015 tmp_arcs += the_arcs[index]->count;
1017 /* Ignore this arc if it's already been placed. */
1018 if (the_arcs[index]->has_been_placed)
1019 continue;
1021 child = the_arcs[index]->child;
1022 parent = the_arcs[index]->parent;
1024 /* If we're not using all arcs, and this is a rarely used
1025 arc, then put it on the unplaced_arc list. Similarly
1026 if both the parent and child of this arc have been placed. */
1027 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1028 || child->has_been_placed || parent->has_been_placed)
1030 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1031 continue;
1034 /* If all slots in the parent and child are full, then there isn't
1035 anything we can do right now. We'll place this arc on the
1036 unplaced arc list in the hope that a global positioning
1037 algorithm can use it to place function chains. */
1038 if (parent->next && parent->prev && child->next && child->prev)
1040 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1041 continue;
1044 /* If the parent is unattached, then find the closest
1045 place to attach it onto child's chain. Similarly
1046 for the opposite case. */
1047 if (!parent->next && !parent->prev)
1049 int next_count = 0;
1050 int prev_count = 0;
1051 Sym *prev = child;
1052 Sym *next = child;
1054 /* Walk to the beginning and end of the child's chain. */
1055 while (next->next)
1057 next = next->next;
1058 next_count++;
1061 while (prev->prev)
1063 prev = prev->prev;
1064 prev_count++;
1067 /* Choose the closest. */
1068 child = next_count < prev_count ? next : prev;
1070 else if (! child->next && !child->prev)
1072 int next_count = 0;
1073 int prev_count = 0;
1074 Sym *prev = parent;
1075 Sym *next = parent;
1077 while (next->next)
1079 next = next->next;
1080 next_count++;
1083 while (prev->prev)
1085 prev = prev->prev;
1086 prev_count++;
1089 parent = prev_count < next_count ? prev : next;
1091 else
1093 /* Couldn't find anywhere to attach the functions,
1094 put the arc on the unplaced arc list. */
1095 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1096 continue;
1099 /* Make sure we don't tie two ends together. */
1100 sym1 = parent;
1101 if (sym1->next)
1102 while (sym1->next)
1103 sym1 = sym1->next;
1104 else
1105 while (sym1->prev)
1106 sym1 = sym1->prev;
1108 sym2 = child;
1109 if (sym2->next)
1110 while (sym2->next)
1111 sym2 = sym2->next;
1112 else
1113 while (sym2->prev)
1114 sym2 = sym2->prev;
1116 if (sym1 == child
1117 && sym2 == parent)
1119 /* This would tie two ends together. */
1120 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1121 continue;
1124 if (parent->next)
1126 /* Must attach to the parent's prev field. */
1127 if (! child->next)
1129 /* parent-prev and child-next */
1130 parent->prev = child;
1131 child->next = parent;
1132 the_arcs[index]->has_been_placed = 1;
1135 else if (parent->prev)
1137 /* Must attach to the parent's next field. */
1138 if (! child->prev)
1140 /* parent-next and child-prev */
1141 parent->next = child;
1142 child->prev = parent;
1143 the_arcs[index]->has_been_placed = 1;
1146 else
1148 /* Can attach to either field in the parent, depends
1149 on where we've got space in the child. */
1150 if (child->prev)
1152 /* parent-prev and child-next. */
1153 parent->prev = child;
1154 child->next = parent;
1155 the_arcs[index]->has_been_placed = 1;
1157 else
1159 /* parent-next and child-prev. */
1160 parent->next = child;
1161 child->prev = parent;
1162 the_arcs[index]->has_been_placed = 1;
1167 /* Dump the chains of functions we've made. */
1168 for (index = 0; index < arc_count; index++)
1170 Sym *sym;
1171 if (the_arcs[index]->parent->has_been_placed
1172 || the_arcs[index]->child->has_been_placed)
1173 continue;
1175 sym = the_arcs[index]->parent;
1177 /* If this symbol isn't attached to any other
1178 symbols, then we've got a rarely used arc.
1180 Skip it for now, we'll deal with them later. */
1181 if (sym->next == NULL
1182 && sym->prev == NULL)
1183 continue;
1185 /* Get to the start of this chain. */
1186 while (sym->prev)
1187 sym = sym->prev;
1189 while (sym)
1191 /* Mark it as placed. */
1192 sym->has_been_placed = 1;
1193 printf ("%s\n", sym->name);
1194 sym = sym->next;
1198 /* If we want to place all the arcs, then output
1199 those which weren't placed by the main algorithm. */
1200 if (all)
1201 for (index = 0; index < arc_count; index++)
1203 Sym *sym;
1204 if (the_arcs[index]->parent->has_been_placed
1205 || the_arcs[index]->child->has_been_placed)
1206 continue;
1208 sym = the_arcs[index]->parent;
1210 sym->has_been_placed = 1;
1211 printf ("%s\n", sym->name);
1215 /* Print a suggested .o ordering for files on a link line based
1216 on profiling information. This uses the function placement
1217 code for the bulk of its work. */
1219 void
1220 cg_print_file_ordering ()
1222 unsigned long scratch_arc_count, index;
1223 Arc **scratch_arcs;
1224 char *last;
1226 scratch_arc_count = 0;
1228 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1229 for (index = 0; index < numarcs; index++)
1231 if (! arcs[index]->parent->mapped
1232 || ! arcs[index]->child->mapped)
1233 arcs[index]->has_been_placed = 1;
1236 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1237 scratch_arcs, &scratch_arc_count);
1239 /* Output .o's not handled by the main placement algorithm. */
1240 for (index = 0; index < symtab.len; index++)
1242 if (symtab.base[index].mapped
1243 && ! symtab.base[index].has_been_placed)
1244 printf ("%s\n", symtab.base[index].name);
1247 /* Now output any .o's that didn't have any text symbols. */
1248 last = NULL;
1249 for (index = 0; index < symbol_map_count; index++)
1251 unsigned int index2;
1253 /* Don't bother searching if this symbol
1254 is the same as the previous one. */
1255 if (last && !strcmp (last, symbol_map[index].file_name))
1256 continue;
1258 for (index2 = 0; index2 < symtab.len; index2++)
1260 if (! symtab.base[index2].mapped)
1261 continue;
1263 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1264 break;
1267 /* If we didn't find it in the symbol table, then it must
1268 be a .o with no text symbols. Output it last. */
1269 if (index2 == symtab.len)
1270 printf ("%s\n", symbol_map[index].file_name);
1271 last = symbol_map[index].file_name;