libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / predict.cc
blobaffa037371ca94582a2adb2472795eaeffeaa316
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* References:
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "backend.h"
34 #include "rtl.h"
35 #include "tree.h"
36 #include "gimple.h"
37 #include "cfghooks.h"
38 #include "tree-pass.h"
39 #include "ssa.h"
40 #include "memmodel.h"
41 #include "emit-rtl.h"
42 #include "cgraph.h"
43 #include "coverage.h"
44 #include "diagnostic-core.h"
45 #include "gimple-predict.h"
46 #include "fold-const.h"
47 #include "calls.h"
48 #include "cfganal.h"
49 #include "profile.h"
50 #include "sreal.h"
51 #include "cfgloop.h"
52 #include "gimple-iterator.h"
53 #include "tree-cfg.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "tree-ssa-loop.h"
56 #include "tree-scalar-evolution.h"
57 #include "ipa-utils.h"
58 #include "gimple-pretty-print.h"
59 #include "selftest.h"
60 #include "cfgrtl.h"
61 #include "stringpool.h"
62 #include "attribs.h"
64 /* Enum with reasons why a predictor is ignored. */
66 enum predictor_reason
68 REASON_NONE,
69 REASON_IGNORED,
70 REASON_SINGLE_EDGE_DUPLICATE,
71 REASON_EDGE_PAIR_DUPLICATE
74 /* String messages for the aforementioned enum. */
76 static const char *reason_messages[] = {"", " (ignored)",
77 " (single edge duplicate)", " (edge pair duplicate)"};
80 static void combine_predictions_for_insn (rtx_insn *, basic_block);
81 static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
82 enum predictor_reason, edge);
83 static void predict_paths_leading_to (basic_block, enum br_predictor,
84 enum prediction,
85 class loop *in_loop = NULL);
86 static void predict_paths_leading_to_edge (edge, enum br_predictor,
87 enum prediction,
88 class loop *in_loop = NULL);
89 static bool can_predict_insn_p (const rtx_insn *);
90 static HOST_WIDE_INT get_predictor_value (br_predictor, HOST_WIDE_INT);
91 static void determine_unlikely_bbs ();
92 static void estimate_bb_frequencies ();
94 /* Information we hold about each branch predictor.
95 Filled using information from predict.def. */
97 struct predictor_info
99 const char *const name; /* Name used in the debugging dumps. */
100 const int hitrate; /* Expected hitrate used by
101 predict_insn_def call. */
102 const int flags;
105 /* Use given predictor without Dempster-Shaffer theory if it matches
106 using first_match heuristics. */
107 #define PRED_FLAG_FIRST_MATCH 1
109 /* Recompute hitrate in percent to our representation. */
111 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
113 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
114 static const struct predictor_info predictor_info[]= {
115 #include "predict.def"
117 /* Upper bound on predictors. */
118 {NULL, 0, 0}
120 #undef DEF_PREDICTOR
122 static gcov_type min_count = -1;
124 /* Determine the threshold for hot BB counts. */
126 gcov_type
127 get_hot_bb_threshold ()
129 if (min_count == -1)
131 const int hot_frac = param_hot_bb_count_fraction;
132 const gcov_type min_hot_count
133 = hot_frac
134 ? profile_info->sum_max / hot_frac
135 : (gcov_type)profile_count::max_count;
136 set_hot_bb_threshold (min_hot_count);
137 if (dump_file)
138 fprintf (dump_file, "Setting hotness threshold to %" PRId64 ".\n",
139 min_hot_count);
141 return min_count;
144 /* Set the threshold for hot BB counts. */
146 void
147 set_hot_bb_threshold (gcov_type min)
149 min_count = min;
152 /* Return TRUE if COUNT is considered to be hot in function FUN. */
154 bool
155 maybe_hot_count_p (struct function *fun, profile_count count)
157 if (!count.initialized_p ())
158 return true;
159 if (count.ipa () == profile_count::zero ())
160 return false;
161 if (!count.ipa_p ())
163 struct cgraph_node *node = cgraph_node::get (fun->decl);
164 if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
166 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
167 return false;
168 if (node->frequency == NODE_FREQUENCY_HOT)
169 return true;
171 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
172 return true;
173 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
174 && count < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (2, 3)))
175 return false;
176 if (count * param_hot_bb_frequency_fraction
177 < ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
178 return false;
179 return true;
181 /* Code executed at most once is not hot. */
182 if (count <= MAX (profile_info ? profile_info->runs : 1, 1))
183 return false;
184 return (count >= get_hot_bb_threshold ());
187 /* Return true if basic block BB of function FUN can be CPU intensive
188 and should thus be optimized for maximum performance. */
190 bool
191 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
193 gcc_checking_assert (fun);
194 return maybe_hot_count_p (fun, bb->count);
197 /* Return true if edge E can be CPU intensive and should thus be optimized
198 for maximum performance. */
200 bool
201 maybe_hot_edge_p (edge e)
203 return maybe_hot_count_p (cfun, e->count ());
206 /* Return true if COUNT is considered to be never executed in function FUN
207 or if function FUN is considered so in the static profile. */
209 static bool
210 probably_never_executed (struct function *fun, profile_count count)
212 gcc_checking_assert (fun);
213 if (count.ipa () == profile_count::zero ())
214 return true;
215 /* Do not trust adjusted counts. This will make us to drop int cold section
216 code with low execution count as a result of inlining. These low counts
217 are not safe even with read profile and may lead us to dropping
218 code which actually gets executed into cold section of binary that is not
219 desirable. */
220 if (count.precise_p () && profile_status_for_fn (fun) == PROFILE_READ)
222 const int unlikely_frac = param_unlikely_bb_count_fraction;
223 if (count * unlikely_frac >= profile_info->runs)
224 return false;
225 return true;
227 if ((!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
228 && (cgraph_node::get (fun->decl)->frequency
229 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
230 return true;
231 return false;
234 /* Return true if basic block BB of function FUN is probably never executed. */
236 bool
237 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
239 return probably_never_executed (fun, bb->count);
242 /* Return true if edge E is unlikely executed for obvious reasons. */
244 static bool
245 unlikely_executed_edge_p (edge e)
247 return (e->src->count == profile_count::zero ()
248 || e->probability == profile_probability::never ())
249 || (e->flags & (EDGE_EH | EDGE_FAKE));
252 /* Return true if edge E of function FUN is probably never executed. */
254 bool
255 probably_never_executed_edge_p (struct function *fun, edge e)
257 if (unlikely_executed_edge_p (e))
258 return true;
259 return probably_never_executed (fun, e->count ());
262 /* Return true if function FUN should always be optimized for size. */
264 optimize_size_level
265 optimize_function_for_size_p (struct function *fun)
267 if (!fun || !fun->decl)
268 return optimize_size ? OPTIMIZE_SIZE_MAX : OPTIMIZE_SIZE_NO;
269 cgraph_node *n = cgraph_node::get (fun->decl);
270 if (n)
271 return n->optimize_for_size_p ();
272 return OPTIMIZE_SIZE_NO;
275 /* Return true if function FUN should always be optimized for speed. */
277 bool
278 optimize_function_for_speed_p (struct function *fun)
280 return !optimize_function_for_size_p (fun);
283 /* Return the optimization type that should be used for function FUN. */
285 optimization_type
286 function_optimization_type (struct function *fun)
288 return (optimize_function_for_speed_p (fun)
289 ? OPTIMIZE_FOR_SPEED
290 : OPTIMIZE_FOR_SIZE);
293 /* Return TRUE if basic block BB should be optimized for size. */
295 optimize_size_level
296 optimize_bb_for_size_p (const_basic_block bb)
298 enum optimize_size_level ret = optimize_function_for_size_p (cfun);
300 if (bb && ret < OPTIMIZE_SIZE_MAX && bb->count == profile_count::zero ())
301 ret = OPTIMIZE_SIZE_MAX;
302 if (bb && ret < OPTIMIZE_SIZE_BALANCED && !maybe_hot_bb_p (cfun, bb))
303 ret = OPTIMIZE_SIZE_BALANCED;
304 return ret;
307 /* Return TRUE if basic block BB should be optimized for speed. */
309 bool
310 optimize_bb_for_speed_p (const_basic_block bb)
312 return !optimize_bb_for_size_p (bb);
315 /* Return the optimization type that should be used for basic block BB. */
317 optimization_type
318 bb_optimization_type (const_basic_block bb)
320 return (optimize_bb_for_speed_p (bb)
321 ? OPTIMIZE_FOR_SPEED
322 : OPTIMIZE_FOR_SIZE);
325 /* Return TRUE if edge E should be optimized for size. */
327 optimize_size_level
328 optimize_edge_for_size_p (edge e)
330 enum optimize_size_level ret = optimize_function_for_size_p (cfun);
332 if (ret < OPTIMIZE_SIZE_MAX && unlikely_executed_edge_p (e))
333 ret = OPTIMIZE_SIZE_MAX;
334 if (ret < OPTIMIZE_SIZE_BALANCED && !maybe_hot_edge_p (e))
335 ret = OPTIMIZE_SIZE_BALANCED;
336 return ret;
339 /* Return TRUE if edge E should be optimized for speed. */
341 bool
342 optimize_edge_for_speed_p (edge e)
344 return !optimize_edge_for_size_p (e);
347 /* Return TRUE if the current function is optimized for size. */
349 optimize_size_level
350 optimize_insn_for_size_p (void)
352 enum optimize_size_level ret = optimize_function_for_size_p (cfun);
353 if (ret < OPTIMIZE_SIZE_BALANCED && !crtl->maybe_hot_insn_p)
354 ret = OPTIMIZE_SIZE_BALANCED;
355 return ret;
358 /* Return TRUE if the current function is optimized for speed. */
360 bool
361 optimize_insn_for_speed_p (void)
363 return !optimize_insn_for_size_p ();
366 /* Return the optimization type that should be used for the current
367 instruction. */
369 optimization_type
370 insn_optimization_type ()
372 return (optimize_insn_for_speed_p ()
373 ? OPTIMIZE_FOR_SPEED
374 : OPTIMIZE_FOR_SIZE);
377 /* Return TRUE if LOOP should be optimized for size. */
379 optimize_size_level
380 optimize_loop_for_size_p (class loop *loop)
382 return optimize_bb_for_size_p (loop->header);
385 /* Return TRUE if LOOP should be optimized for speed. */
387 bool
388 optimize_loop_for_speed_p (class loop *loop)
390 return optimize_bb_for_speed_p (loop->header);
393 /* Return TRUE if nest rooted at LOOP should be optimized for speed. */
395 bool
396 optimize_loop_nest_for_speed_p (class loop *loop)
398 class loop *l = loop;
399 if (optimize_loop_for_speed_p (loop))
400 return true;
401 l = loop->inner;
402 while (l && l != loop)
404 if (optimize_loop_for_speed_p (l))
405 return true;
406 if (l->inner)
407 l = l->inner;
408 else if (l->next)
409 l = l->next;
410 else
412 while (l != loop && !l->next)
413 l = loop_outer (l);
414 if (l != loop)
415 l = l->next;
418 return false;
421 /* Return TRUE if nest rooted at LOOP should be optimized for size. */
423 optimize_size_level
424 optimize_loop_nest_for_size_p (class loop *loop)
426 enum optimize_size_level ret = optimize_loop_for_size_p (loop);
427 class loop *l = loop;
429 l = loop->inner;
430 while (l && l != loop)
432 if (ret == OPTIMIZE_SIZE_NO)
433 break;
434 ret = MIN (optimize_loop_for_size_p (l), ret);
435 if (l->inner)
436 l = l->inner;
437 else if (l->next)
438 l = l->next;
439 else
441 while (l != loop && !l->next)
442 l = loop_outer (l);
443 if (l != loop)
444 l = l->next;
447 return ret;
450 /* Return true if edge E is likely to be well predictable by branch
451 predictor. */
453 bool
454 predictable_edge_p (edge e)
456 if (!e->probability.initialized_p ())
457 return false;
458 if ((e->probability.to_reg_br_prob_base ()
459 <= param_predictable_branch_outcome * REG_BR_PROB_BASE / 100)
460 || (REG_BR_PROB_BASE - e->probability.to_reg_br_prob_base ()
461 <= param_predictable_branch_outcome * REG_BR_PROB_BASE / 100))
462 return true;
463 return false;
467 /* Set RTL expansion for BB profile. */
469 void
470 rtl_profile_for_bb (basic_block bb)
472 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
475 /* Set RTL expansion for edge profile. */
477 void
478 rtl_profile_for_edge (edge e)
480 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
483 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
484 void
485 default_rtl_profile (void)
487 crtl->maybe_hot_insn_p = true;
490 /* Return true if the one of outgoing edges is already predicted by
491 PREDICTOR. */
493 bool
494 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
496 rtx note;
497 if (!INSN_P (BB_END (bb)))
498 return false;
499 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
500 if (REG_NOTE_KIND (note) == REG_BR_PRED
501 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
502 return true;
503 return false;
506 /* Structure representing predictions in tree level. */
508 struct edge_prediction {
509 struct edge_prediction *ep_next;
510 edge ep_edge;
511 enum br_predictor ep_predictor;
512 int ep_probability;
515 /* This map contains for a basic block the list of predictions for the
516 outgoing edges. */
518 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
520 /* Global cache for expr_expected_value. */
522 struct expected_value
524 tree val;
525 enum br_predictor predictor;
526 HOST_WIDE_INT probability;
528 static hash_map<int_hash<unsigned, 0>, expected_value> *ssa_expected_value;
530 /* Return true if the one of outgoing edges is already predicted by
531 PREDICTOR. */
533 bool
534 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
536 struct edge_prediction *i;
537 edge_prediction **preds = bb_predictions->get (bb);
539 if (!preds)
540 return false;
542 for (i = *preds; i; i = i->ep_next)
543 if (i->ep_predictor == predictor)
544 return true;
545 return false;
548 /* Return true if the one of outgoing edges is already predicted by
549 PREDICTOR for edge E predicted as TAKEN. */
551 bool
552 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
554 struct edge_prediction *i;
555 basic_block bb = e->src;
556 edge_prediction **preds = bb_predictions->get (bb);
557 if (!preds)
558 return false;
560 int probability = predictor_info[(int) predictor].hitrate;
562 if (taken != TAKEN)
563 probability = REG_BR_PROB_BASE - probability;
565 for (i = *preds; i; i = i->ep_next)
566 if (i->ep_predictor == predictor
567 && i->ep_edge == e
568 && i->ep_probability == probability)
569 return true;
570 return false;
573 /* Same predicate as above, working on edges. */
574 bool
575 edge_probability_reliable_p (const_edge e)
577 return e->probability.probably_reliable_p ();
580 /* Same predicate as edge_probability_reliable_p, working on notes. */
581 bool
582 br_prob_note_reliable_p (const_rtx note)
584 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
585 return profile_probability::from_reg_br_prob_note
586 (XINT (note, 0)).probably_reliable_p ();
589 static void
590 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
592 gcc_assert (any_condjump_p (insn));
593 if (!flag_guess_branch_prob)
594 return;
596 add_reg_note (insn, REG_BR_PRED,
597 gen_rtx_CONCAT (VOIDmode,
598 GEN_INT ((int) predictor),
599 GEN_INT ((int) probability)));
602 /* Predict insn by given predictor. */
604 void
605 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
606 enum prediction taken)
608 int probability = predictor_info[(int) predictor].hitrate;
609 gcc_assert (probability != PROB_UNINITIALIZED);
611 if (taken != TAKEN)
612 probability = REG_BR_PROB_BASE - probability;
614 predict_insn (insn, predictor, probability);
617 /* Predict edge E with given probability if possible. */
619 void
620 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
622 rtx_insn *last_insn;
623 last_insn = BB_END (e->src);
625 /* We can store the branch prediction information only about
626 conditional jumps. */
627 if (!any_condjump_p (last_insn))
628 return;
630 /* We always store probability of branching. */
631 if (e->flags & EDGE_FALLTHRU)
632 probability = REG_BR_PROB_BASE - probability;
634 predict_insn (last_insn, predictor, probability);
637 /* Predict edge E with the given PROBABILITY. */
638 void
639 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
641 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
642 && EDGE_COUNT (e->src->succs) > 1
643 && flag_guess_branch_prob
644 && optimize)
646 struct edge_prediction *i = XNEW (struct edge_prediction);
647 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
649 i->ep_next = preds;
650 preds = i;
651 i->ep_probability = probability;
652 i->ep_predictor = predictor;
653 i->ep_edge = e;
657 /* Filter edge predictions PREDS by a function FILTER: if FILTER return false
658 the prediction is removed.
659 DATA are passed to the filter function. */
661 static void
662 filter_predictions (edge_prediction **preds,
663 bool (*filter) (edge_prediction *, void *), void *data)
665 if (!bb_predictions)
666 return;
668 if (preds)
670 struct edge_prediction **prediction = preds;
671 struct edge_prediction *next;
673 while (*prediction)
675 if ((*filter) (*prediction, data))
676 prediction = &((*prediction)->ep_next);
677 else
679 next = (*prediction)->ep_next;
680 free (*prediction);
681 *prediction = next;
687 /* Filter function predicate that returns true for a edge predicate P
688 if its edge is equal to DATA. */
690 static bool
691 not_equal_edge_p (edge_prediction *p, void *data)
693 return p->ep_edge != (edge)data;
696 /* Remove all predictions on given basic block that are attached
697 to edge E. */
698 void
699 remove_predictions_associated_with_edge (edge e)
701 if (!bb_predictions)
702 return;
704 edge_prediction **preds = bb_predictions->get (e->src);
705 filter_predictions (preds, not_equal_edge_p, e);
708 /* Clears the list of predictions stored for BB. */
710 static void
711 clear_bb_predictions (basic_block bb)
713 edge_prediction **preds = bb_predictions->get (bb);
714 struct edge_prediction *pred, *next;
716 if (!preds)
717 return;
719 for (pred = *preds; pred; pred = next)
721 next = pred->ep_next;
722 free (pred);
724 *preds = NULL;
727 /* Return true when we can store prediction on insn INSN.
728 At the moment we represent predictions only on conditional
729 jumps, not at computed jump or other complicated cases. */
730 static bool
731 can_predict_insn_p (const rtx_insn *insn)
733 return (JUMP_P (insn)
734 && any_condjump_p (insn)
735 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
738 /* Predict edge E by given predictor if possible. */
740 void
741 predict_edge_def (edge e, enum br_predictor predictor,
742 enum prediction taken)
744 int probability = predictor_info[(int) predictor].hitrate;
746 if (taken != TAKEN)
747 probability = REG_BR_PROB_BASE - probability;
749 predict_edge (e, predictor, probability);
752 /* Invert all branch predictions or probability notes in the INSN. This needs
753 to be done each time we invert the condition used by the jump. */
755 void
756 invert_br_probabilities (rtx insn)
758 rtx note;
760 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
761 if (REG_NOTE_KIND (note) == REG_BR_PROB)
762 XINT (note, 0) = profile_probability::from_reg_br_prob_note
763 (XINT (note, 0)).invert ().to_reg_br_prob_note ();
764 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
765 XEXP (XEXP (note, 0), 1)
766 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
769 /* Dump information about the branch prediction to the output file. */
771 static void
772 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
773 basic_block bb, enum predictor_reason reason = REASON_NONE,
774 edge ep_edge = NULL)
776 edge e = ep_edge;
777 edge_iterator ei;
779 if (!file)
780 return;
782 if (e == NULL)
783 FOR_EACH_EDGE (e, ei, bb->succs)
784 if (! (e->flags & EDGE_FALLTHRU))
785 break;
787 char edge_info_str[128];
788 if (ep_edge)
789 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index,
790 ep_edge->dest->index);
791 else
792 edge_info_str[0] = '\0';
794 fprintf (file, " %s heuristics%s%s: %.2f%%",
795 predictor_info[predictor].name,
796 edge_info_str, reason_messages[reason],
797 probability * 100.0 / REG_BR_PROB_BASE);
799 if (bb->count.initialized_p ())
801 fprintf (file, " exec ");
802 bb->count.dump (file);
803 if (e && e->count ().initialized_p () && bb->count.to_gcov_type ())
805 fprintf (file, " hit ");
806 e->count ().dump (file);
807 fprintf (file, " (%.1f%%)", e->count ().to_gcov_type() * 100.0
808 / bb->count.to_gcov_type ());
812 fprintf (file, "\n");
814 /* Print output that be easily read by analyze_brprob.py script. We are
815 interested only in counts that are read from GCDA files. */
816 if (dump_file && (dump_flags & TDF_DETAILS)
817 && bb->count.precise_p ()
818 && reason == REASON_NONE)
820 fprintf (file, ";;heuristics;%s;%" PRId64 ";%" PRId64 ";%.1f;\n",
821 predictor_info[predictor].name,
822 bb->count.to_gcov_type (), e->count ().to_gcov_type (),
823 probability * 100.0 / REG_BR_PROB_BASE);
827 /* Return true if STMT is known to be unlikely executed. */
829 static bool
830 unlikely_executed_stmt_p (gimple *stmt)
832 if (!is_gimple_call (stmt))
833 return false;
834 /* NORETURN attribute alone is not strong enough: exit() may be quite
835 likely executed once during program run. */
836 if (gimple_call_fntype (stmt)
837 && lookup_attribute ("cold",
838 TYPE_ATTRIBUTES (gimple_call_fntype (stmt)))
839 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
840 return true;
841 tree decl = gimple_call_fndecl (stmt);
842 if (!decl)
843 return false;
844 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl))
845 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
846 return true;
848 cgraph_node *n = cgraph_node::get (decl);
849 if (!n)
850 return false;
852 availability avail;
853 n = n->ultimate_alias_target (&avail);
854 if (avail < AVAIL_AVAILABLE)
855 return false;
856 if (!n->analyzed
857 || n->decl == current_function_decl)
858 return false;
859 return n->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED;
862 /* Return true if BB is unlikely executed. */
864 static bool
865 unlikely_executed_bb_p (basic_block bb)
867 if (bb->count == profile_count::zero ())
868 return true;
869 if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun) || bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
870 return false;
871 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
872 !gsi_end_p (gsi); gsi_next (&gsi))
874 if (unlikely_executed_stmt_p (gsi_stmt (gsi)))
875 return true;
876 if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
877 return false;
879 return false;
882 /* We cannot predict the probabilities of outgoing edges of bb. Set them
883 evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute
884 even probability for all edges not mentioned in the set. These edges
885 are given PROB_VERY_UNLIKELY probability. Similarly for LIKELY_EDGES,
886 if we have exactly one likely edge, make the other edges predicted
887 as not probable. */
889 static void
890 set_even_probabilities (basic_block bb,
891 hash_set<edge> *unlikely_edges = NULL,
892 hash_set<edge_prediction *> *likely_edges = NULL)
894 unsigned nedges = 0, unlikely_count = 0;
895 edge e = NULL;
896 edge_iterator ei;
897 profile_probability all = profile_probability::always ();
899 FOR_EACH_EDGE (e, ei, bb->succs)
900 if (e->probability.initialized_p ())
901 all -= e->probability;
902 else if (!unlikely_executed_edge_p (e))
904 nedges++;
905 if (unlikely_edges != NULL && unlikely_edges->contains (e))
907 all -= profile_probability::very_unlikely ();
908 unlikely_count++;
912 /* Make the distribution even if all edges are unlikely. */
913 unsigned likely_count = likely_edges ? likely_edges->elements () : 0;
914 if (unlikely_count == nedges)
916 unlikely_edges = NULL;
917 unlikely_count = 0;
920 /* If we have one likely edge, then use its probability and distribute
921 remaining probabilities as even. */
922 if (likely_count == 1)
924 FOR_EACH_EDGE (e, ei, bb->succs)
925 if (e->probability.initialized_p ())
927 else if (!unlikely_executed_edge_p (e))
929 edge_prediction *prediction = *likely_edges->begin ();
930 int p = prediction->ep_probability;
931 profile_probability prob
932 = profile_probability::from_reg_br_prob_base (p);
934 if (prediction->ep_edge == e)
935 e->probability = prob;
936 else if (unlikely_edges != NULL && unlikely_edges->contains (e))
937 e->probability = profile_probability::very_unlikely ();
938 else
940 profile_probability remainder = prob.invert ();
941 remainder -= (profile_probability::very_unlikely ()
942 * unlikely_count);
943 int count = nedges - unlikely_count - 1;
944 gcc_assert (count >= 0);
946 e->probability = remainder / count;
949 else
950 e->probability = profile_probability::never ();
952 else
954 /* Make all unlikely edges unlikely and the rest will have even
955 probability. */
956 unsigned scale = nedges - unlikely_count;
957 FOR_EACH_EDGE (e, ei, bb->succs)
958 if (e->probability.initialized_p ())
960 else if (!unlikely_executed_edge_p (e))
962 if (unlikely_edges != NULL && unlikely_edges->contains (e))
963 e->probability = profile_probability::very_unlikely ();
964 else
965 e->probability = all / scale;
967 else
968 e->probability = profile_probability::never ();
972 /* Add REG_BR_PROB note to JUMP with PROB. */
974 void
975 add_reg_br_prob_note (rtx_insn *jump, profile_probability prob)
977 gcc_checking_assert (JUMP_P (jump) && !find_reg_note (jump, REG_BR_PROB, 0));
978 add_int_reg_note (jump, REG_BR_PROB, prob.to_reg_br_prob_note ());
981 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
982 note if not already present. Remove now useless REG_BR_PRED notes. */
984 static void
985 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
987 rtx prob_note;
988 rtx *pnote;
989 rtx note;
990 int best_probability = PROB_EVEN;
991 enum br_predictor best_predictor = END_PREDICTORS;
992 int combined_probability = REG_BR_PROB_BASE / 2;
993 int d;
994 bool first_match = false;
995 bool found = false;
997 if (!can_predict_insn_p (insn))
999 set_even_probabilities (bb);
1000 return;
1003 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
1004 pnote = &REG_NOTES (insn);
1005 if (dump_file)
1006 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
1007 bb->index);
1009 /* We implement "first match" heuristics and use probability guessed
1010 by predictor with smallest index. */
1011 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1012 if (REG_NOTE_KIND (note) == REG_BR_PRED)
1014 enum br_predictor predictor = ((enum br_predictor)
1015 INTVAL (XEXP (XEXP (note, 0), 0)));
1016 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
1018 found = true;
1019 if (best_predictor > predictor
1020 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1021 best_probability = probability, best_predictor = predictor;
1023 d = (combined_probability * probability
1024 + (REG_BR_PROB_BASE - combined_probability)
1025 * (REG_BR_PROB_BASE - probability));
1027 /* Use FP math to avoid overflows of 32bit integers. */
1028 if (d == 0)
1029 /* If one probability is 0% and one 100%, avoid division by zero. */
1030 combined_probability = REG_BR_PROB_BASE / 2;
1031 else
1032 combined_probability = (((double) combined_probability) * probability
1033 * REG_BR_PROB_BASE / d + 0.5);
1036 /* Decide which heuristic to use. In case we didn't match anything,
1037 use no_prediction heuristic, in case we did match, use either
1038 first match or Dempster-Shaffer theory depending on the flags. */
1040 if (best_predictor != END_PREDICTORS)
1041 first_match = true;
1043 if (!found)
1044 dump_prediction (dump_file, PRED_NO_PREDICTION,
1045 combined_probability, bb);
1046 else
1048 if (!first_match)
1049 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
1050 bb, !first_match ? REASON_NONE : REASON_IGNORED);
1051 else
1052 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
1053 bb, first_match ? REASON_NONE : REASON_IGNORED);
1056 if (first_match)
1057 combined_probability = best_probability;
1058 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1060 while (*pnote)
1062 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
1064 enum br_predictor predictor = ((enum br_predictor)
1065 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
1066 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
1068 dump_prediction (dump_file, predictor, probability, bb,
1069 (!first_match || best_predictor == predictor)
1070 ? REASON_NONE : REASON_IGNORED);
1071 *pnote = XEXP (*pnote, 1);
1073 else
1074 pnote = &XEXP (*pnote, 1);
1077 if (!prob_note)
1079 profile_probability p
1080 = profile_probability::from_reg_br_prob_base (combined_probability);
1081 add_reg_br_prob_note (insn, p);
1083 /* Save the prediction into CFG in case we are seeing non-degenerated
1084 conditional jump. */
1085 if (!single_succ_p (bb))
1087 BRANCH_EDGE (bb)->probability = p;
1088 FALLTHRU_EDGE (bb)->probability
1089 = BRANCH_EDGE (bb)->probability.invert ();
1092 else if (!single_succ_p (bb))
1094 profile_probability prob = profile_probability::from_reg_br_prob_note
1095 (XINT (prob_note, 0));
1097 BRANCH_EDGE (bb)->probability = prob;
1098 FALLTHRU_EDGE (bb)->probability = prob.invert ();
1100 else
1101 single_succ_edge (bb)->probability = profile_probability::always ();
1104 /* Edge prediction hash traits. */
1106 struct predictor_hash: pointer_hash <edge_prediction>
1109 static inline hashval_t hash (const edge_prediction *);
1110 static inline bool equal (const edge_prediction *, const edge_prediction *);
1113 /* Calculate hash value of an edge prediction P based on predictor and
1114 normalized probability. */
1116 inline hashval_t
1117 predictor_hash::hash (const edge_prediction *p)
1119 inchash::hash hstate;
1120 hstate.add_int (p->ep_predictor);
1122 int prob = p->ep_probability;
1123 if (prob > REG_BR_PROB_BASE / 2)
1124 prob = REG_BR_PROB_BASE - prob;
1126 hstate.add_int (prob);
1128 return hstate.end ();
1131 /* Return true whether edge predictions P1 and P2 use the same predictor and
1132 have equal (or opposed probability). */
1134 inline bool
1135 predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2)
1137 return (p1->ep_predictor == p2->ep_predictor
1138 && (p1->ep_probability == p2->ep_probability
1139 || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability));
1142 struct predictor_hash_traits: predictor_hash,
1143 typed_noop_remove <edge_prediction *> {};
1145 /* Return true if edge prediction P is not in DATA hash set. */
1147 static bool
1148 not_removed_prediction_p (edge_prediction *p, void *data)
1150 hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data;
1151 return !remove->contains (p);
1154 /* Prune predictions for a basic block BB. Currently we do following
1155 clean-up steps:
1157 1) remove duplicate prediction that is guessed with the same probability
1158 (different than 1/2) to both edge
1159 2) remove duplicates for a prediction that belongs with the same probability
1160 to a single edge
1164 static void
1165 prune_predictions_for_bb (basic_block bb)
1167 edge_prediction **preds = bb_predictions->get (bb);
1169 if (preds)
1171 hash_table <predictor_hash_traits> s (13);
1172 hash_set <edge_prediction *> remove;
1174 /* Step 1: identify predictors that should be removed. */
1175 for (edge_prediction *pred = *preds; pred; pred = pred->ep_next)
1177 edge_prediction *existing = s.find (pred);
1178 if (existing)
1180 if (pred->ep_edge == existing->ep_edge
1181 && pred->ep_probability == existing->ep_probability)
1183 /* Remove a duplicate predictor. */
1184 dump_prediction (dump_file, pred->ep_predictor,
1185 pred->ep_probability, bb,
1186 REASON_SINGLE_EDGE_DUPLICATE, pred->ep_edge);
1188 remove.add (pred);
1190 else if (pred->ep_edge != existing->ep_edge
1191 && pred->ep_probability == existing->ep_probability
1192 && pred->ep_probability != REG_BR_PROB_BASE / 2)
1194 /* Remove both predictors as they predict the same
1195 for both edges. */
1196 dump_prediction (dump_file, existing->ep_predictor,
1197 pred->ep_probability, bb,
1198 REASON_EDGE_PAIR_DUPLICATE,
1199 existing->ep_edge);
1200 dump_prediction (dump_file, pred->ep_predictor,
1201 pred->ep_probability, bb,
1202 REASON_EDGE_PAIR_DUPLICATE,
1203 pred->ep_edge);
1205 remove.add (existing);
1206 remove.add (pred);
1210 edge_prediction **slot2 = s.find_slot (pred, INSERT);
1211 *slot2 = pred;
1214 /* Step 2: Remove predictors. */
1215 filter_predictions (preds, not_removed_prediction_p, &remove);
1219 /* Combine predictions into single probability and store them into CFG.
1220 Remove now useless prediction entries.
1221 If DRY_RUN is set, only produce dumps and do not modify profile. */
1223 static void
1224 combine_predictions_for_bb (basic_block bb, bool dry_run)
1226 int best_probability = PROB_EVEN;
1227 enum br_predictor best_predictor = END_PREDICTORS;
1228 int combined_probability = REG_BR_PROB_BASE / 2;
1229 int d;
1230 bool first_match = false;
1231 bool found = false;
1232 struct edge_prediction *pred;
1233 int nedges = 0;
1234 edge e, first = NULL, second = NULL;
1235 edge_iterator ei;
1236 int nzero = 0;
1237 int nunknown = 0;
1239 FOR_EACH_EDGE (e, ei, bb->succs)
1241 if (!unlikely_executed_edge_p (e))
1243 nedges ++;
1244 if (first && !second)
1245 second = e;
1246 if (!first)
1247 first = e;
1249 else if (!e->probability.initialized_p ())
1250 e->probability = profile_probability::never ();
1251 if (!e->probability.initialized_p ())
1252 nunknown++;
1253 else if (e->probability == profile_probability::never ())
1254 nzero++;
1257 /* When there is no successor or only one choice, prediction is easy.
1259 When we have a basic block with more than 2 successors, the situation
1260 is more complicated as DS theory cannot be used literally.
1261 More precisely, let's assume we predicted edge e1 with probability p1,
1262 thus: m1({b1}) = p1. As we're going to combine more than 2 edges, we
1263 need to find probability of e.g. m1({b2}), which we don't know.
1264 The only approximation is to equally distribute 1-p1 to all edges
1265 different from b1.
1267 According to numbers we've got from SPEC2006 benchark, there's only
1268 one interesting reliable predictor (noreturn call), which can be
1269 handled with a bit easier approach. */
1270 if (nedges != 2)
1272 hash_set<edge> unlikely_edges (4);
1273 hash_set<edge_prediction *> likely_edges (4);
1275 /* Identify all edges that have a probability close to very unlikely.
1276 Doing the approach for very unlikely doesn't worth for doing as
1277 there's no such probability in SPEC2006 benchmark. */
1278 edge_prediction **preds = bb_predictions->get (bb);
1279 if (preds)
1280 for (pred = *preds; pred; pred = pred->ep_next)
1282 if (pred->ep_probability <= PROB_VERY_UNLIKELY
1283 || pred->ep_predictor == PRED_COLD_LABEL)
1284 unlikely_edges.add (pred->ep_edge);
1285 else if (pred->ep_probability >= PROB_VERY_LIKELY
1286 || pred->ep_predictor == PRED_BUILTIN_EXPECT
1287 || pred->ep_predictor == PRED_HOT_LABEL)
1288 likely_edges.add (pred);
1291 /* It can happen that an edge is both in likely_edges and unlikely_edges.
1292 Clear both sets in that situation. */
1293 for (hash_set<edge_prediction *>::iterator it = likely_edges.begin ();
1294 it != likely_edges.end (); ++it)
1295 if (unlikely_edges.contains ((*it)->ep_edge))
1297 likely_edges.empty ();
1298 unlikely_edges.empty ();
1299 break;
1302 if (!dry_run)
1303 set_even_probabilities (bb, &unlikely_edges, &likely_edges);
1304 clear_bb_predictions (bb);
1305 if (dump_file)
1307 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1308 if (unlikely_edges.is_empty ())
1309 fprintf (dump_file,
1310 "%i edges in bb %i predicted to even probabilities\n",
1311 nedges, bb->index);
1312 else
1314 fprintf (dump_file,
1315 "%i edges in bb %i predicted with some unlikely edges\n",
1316 nedges, bb->index);
1317 FOR_EACH_EDGE (e, ei, bb->succs)
1318 if (!unlikely_executed_edge_p (e))
1319 dump_prediction (dump_file, PRED_COMBINED,
1320 e->probability.to_reg_br_prob_base (), bb, REASON_NONE, e);
1323 return;
1326 if (dump_file)
1327 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1329 prune_predictions_for_bb (bb);
1331 edge_prediction **preds = bb_predictions->get (bb);
1333 if (preds)
1335 /* We implement "first match" heuristics and use probability guessed
1336 by predictor with smallest index. */
1337 for (pred = *preds; pred; pred = pred->ep_next)
1339 enum br_predictor predictor = pred->ep_predictor;
1340 int probability = pred->ep_probability;
1342 if (pred->ep_edge != first)
1343 probability = REG_BR_PROB_BASE - probability;
1345 found = true;
1346 /* First match heuristics would be widly confused if we predicted
1347 both directions. */
1348 if (best_predictor > predictor
1349 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1351 struct edge_prediction *pred2;
1352 int prob = probability;
1354 for (pred2 = (struct edge_prediction *) *preds;
1355 pred2; pred2 = pred2->ep_next)
1356 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
1358 int probability2 = pred2->ep_probability;
1360 if (pred2->ep_edge != first)
1361 probability2 = REG_BR_PROB_BASE - probability2;
1363 if ((probability < REG_BR_PROB_BASE / 2) !=
1364 (probability2 < REG_BR_PROB_BASE / 2))
1365 break;
1367 /* If the same predictor later gave better result, go for it! */
1368 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
1369 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
1370 prob = probability2;
1372 if (!pred2)
1373 best_probability = prob, best_predictor = predictor;
1376 d = (combined_probability * probability
1377 + (REG_BR_PROB_BASE - combined_probability)
1378 * (REG_BR_PROB_BASE - probability));
1380 /* Use FP math to avoid overflows of 32bit integers. */
1381 if (d == 0)
1382 /* If one probability is 0% and one 100%, avoid division by zero. */
1383 combined_probability = REG_BR_PROB_BASE / 2;
1384 else
1385 combined_probability = (((double) combined_probability)
1386 * probability
1387 * REG_BR_PROB_BASE / d + 0.5);
1391 /* Decide which heuristic to use. In case we didn't match anything,
1392 use no_prediction heuristic, in case we did match, use either
1393 first match or Dempster-Shaffer theory depending on the flags. */
1395 if (best_predictor != END_PREDICTORS)
1396 first_match = true;
1398 if (!found)
1399 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb);
1400 else
1402 if (!first_match)
1403 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1404 !first_match ? REASON_NONE : REASON_IGNORED);
1405 else
1406 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1407 first_match ? REASON_NONE : REASON_IGNORED);
1410 if (first_match)
1411 combined_probability = best_probability;
1412 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1414 if (preds)
1416 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1418 enum br_predictor predictor = pred->ep_predictor;
1419 int probability = pred->ep_probability;
1421 dump_prediction (dump_file, predictor, probability, bb,
1422 (!first_match || best_predictor == predictor)
1423 ? REASON_NONE : REASON_IGNORED, pred->ep_edge);
1426 clear_bb_predictions (bb);
1429 /* If we have only one successor which is unknown, we can compute missing
1430 probability. */
1431 if (nunknown == 1)
1433 profile_probability prob = profile_probability::always ();
1434 edge missing = NULL;
1436 FOR_EACH_EDGE (e, ei, bb->succs)
1437 if (e->probability.initialized_p ())
1438 prob -= e->probability;
1439 else if (missing == NULL)
1440 missing = e;
1441 else
1442 gcc_unreachable ();
1443 missing->probability = prob;
1445 /* If nothing is unknown, we have nothing to update. */
1446 else if (!nunknown && nzero != (int)EDGE_COUNT (bb->succs))
1448 else if (!dry_run)
1450 first->probability
1451 = profile_probability::from_reg_br_prob_base (combined_probability);
1452 second->probability = first->probability.invert ();
1456 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1457 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1459 T1 and T2 should be one of the following cases:
1460 1. T1 is SSA_NAME, T2 is NULL
1461 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1462 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1464 static tree
1465 strips_small_constant (tree t1, tree t2)
1467 tree ret = NULL;
1468 int value = 0;
1470 if (!t1)
1471 return NULL;
1472 else if (TREE_CODE (t1) == SSA_NAME)
1473 ret = t1;
1474 else if (tree_fits_shwi_p (t1))
1475 value = tree_to_shwi (t1);
1476 else
1477 return NULL;
1479 if (!t2)
1480 return ret;
1481 else if (tree_fits_shwi_p (t2))
1482 value = tree_to_shwi (t2);
1483 else if (TREE_CODE (t2) == SSA_NAME)
1485 if (ret)
1486 return NULL;
1487 else
1488 ret = t2;
1491 if (value <= 4 && value >= -4)
1492 return ret;
1493 else
1494 return NULL;
1497 /* Return the SSA_NAME in T or T's operands.
1498 Return NULL if SSA_NAME cannot be found. */
1500 static tree
1501 get_base_value (tree t)
1503 if (TREE_CODE (t) == SSA_NAME)
1504 return t;
1506 if (!BINARY_CLASS_P (t))
1507 return NULL;
1509 switch (TREE_OPERAND_LENGTH (t))
1511 case 1:
1512 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1513 case 2:
1514 return strips_small_constant (TREE_OPERAND (t, 0),
1515 TREE_OPERAND (t, 1));
1516 default:
1517 return NULL;
1521 /* Check the compare STMT in LOOP. If it compares an induction
1522 variable to a loop invariant, return true, and save
1523 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1524 Otherwise return false and set LOOP_INVAIANT to NULL. */
1526 static bool
1527 is_comparison_with_loop_invariant_p (gcond *stmt, class loop *loop,
1528 tree *loop_invariant,
1529 enum tree_code *compare_code,
1530 tree *loop_step,
1531 tree *loop_iv_base)
1533 tree op0, op1, bound, base;
1534 affine_iv iv0, iv1;
1535 enum tree_code code;
1536 tree step;
1538 code = gimple_cond_code (stmt);
1539 *loop_invariant = NULL;
1541 switch (code)
1543 case GT_EXPR:
1544 case GE_EXPR:
1545 case NE_EXPR:
1546 case LT_EXPR:
1547 case LE_EXPR:
1548 case EQ_EXPR:
1549 break;
1551 default:
1552 return false;
1555 op0 = gimple_cond_lhs (stmt);
1556 op1 = gimple_cond_rhs (stmt);
1558 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1559 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1560 return false;
1561 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1562 return false;
1563 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1564 return false;
1565 if (TREE_CODE (iv0.step) != INTEGER_CST
1566 || TREE_CODE (iv1.step) != INTEGER_CST)
1567 return false;
1568 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1569 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1570 return false;
1572 if (integer_zerop (iv0.step))
1574 if (code != NE_EXPR && code != EQ_EXPR)
1575 code = invert_tree_comparison (code, false);
1576 bound = iv0.base;
1577 base = iv1.base;
1578 if (tree_fits_shwi_p (iv1.step))
1579 step = iv1.step;
1580 else
1581 return false;
1583 else
1585 bound = iv1.base;
1586 base = iv0.base;
1587 if (tree_fits_shwi_p (iv0.step))
1588 step = iv0.step;
1589 else
1590 return false;
1593 if (TREE_CODE (bound) != INTEGER_CST)
1594 bound = get_base_value (bound);
1595 if (!bound)
1596 return false;
1597 if (TREE_CODE (base) != INTEGER_CST)
1598 base = get_base_value (base);
1599 if (!base)
1600 return false;
1602 *loop_invariant = bound;
1603 *compare_code = code;
1604 *loop_step = step;
1605 *loop_iv_base = base;
1606 return true;
1609 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1611 static bool
1612 expr_coherent_p (tree t1, tree t2)
1614 gimple *stmt;
1615 tree ssa_name_1 = NULL;
1616 tree ssa_name_2 = NULL;
1618 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1619 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1621 if (t1 == t2)
1622 return true;
1624 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1625 return true;
1626 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1627 return false;
1629 /* Check to see if t1 is expressed/defined with t2. */
1630 stmt = SSA_NAME_DEF_STMT (t1);
1631 gcc_assert (stmt != NULL);
1632 if (is_gimple_assign (stmt))
1634 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1635 if (ssa_name_1 && ssa_name_1 == t2)
1636 return true;
1639 /* Check to see if t2 is expressed/defined with t1. */
1640 stmt = SSA_NAME_DEF_STMT (t2);
1641 gcc_assert (stmt != NULL);
1642 if (is_gimple_assign (stmt))
1644 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1645 if (ssa_name_2 && ssa_name_2 == t1)
1646 return true;
1649 /* Compare if t1 and t2's def_stmts are identical. */
1650 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1651 return true;
1652 else
1653 return false;
1656 /* Return true if E is predicted by one of loop heuristics. */
1658 static bool
1659 predicted_by_loop_heuristics_p (basic_block bb)
1661 struct edge_prediction *i;
1662 edge_prediction **preds = bb_predictions->get (bb);
1664 if (!preds)
1665 return false;
1667 for (i = *preds; i; i = i->ep_next)
1668 if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1669 || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1670 || i->ep_predictor == PRED_LOOP_ITERATIONS
1671 || i->ep_predictor == PRED_LOOP_EXIT
1672 || i->ep_predictor == PRED_LOOP_EXIT_WITH_RECURSION
1673 || i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1674 return true;
1675 return false;
1678 /* Predict branch probability of BB when BB contains a branch that compares
1679 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1680 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1682 E.g.
1683 for (int i = 0; i < bound; i++) {
1684 if (i < bound - 2)
1685 computation_1();
1686 else
1687 computation_2();
1690 In this loop, we will predict the branch inside the loop to be taken. */
1692 static void
1693 predict_iv_comparison (class loop *loop, basic_block bb,
1694 tree loop_bound_var,
1695 tree loop_iv_base_var,
1696 enum tree_code loop_bound_code,
1697 int loop_bound_step)
1699 tree compare_var, compare_base;
1700 enum tree_code compare_code;
1701 tree compare_step_var;
1702 edge then_edge;
1703 edge_iterator ei;
1705 if (predicted_by_loop_heuristics_p (bb))
1706 return;
1708 gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1709 if (!stmt)
1710 return;
1711 if (!is_comparison_with_loop_invariant_p (stmt,
1712 loop, &compare_var,
1713 &compare_code,
1714 &compare_step_var,
1715 &compare_base))
1716 return;
1718 /* Find the taken edge. */
1719 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1720 if (then_edge->flags & EDGE_TRUE_VALUE)
1721 break;
1723 /* When comparing an IV to a loop invariant, NE is more likely to be
1724 taken while EQ is more likely to be not-taken. */
1725 if (compare_code == NE_EXPR)
1727 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1728 return;
1730 else if (compare_code == EQ_EXPR)
1732 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1733 return;
1736 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1737 return;
1739 /* If loop bound, base and compare bound are all constants, we can
1740 calculate the probability directly. */
1741 if (tree_fits_shwi_p (loop_bound_var)
1742 && tree_fits_shwi_p (compare_var)
1743 && tree_fits_shwi_p (compare_base))
1745 int probability;
1746 wi::overflow_type overflow;
1747 bool overall_overflow = false;
1748 widest_int compare_count, tem;
1750 /* (loop_bound - base) / compare_step */
1751 tem = wi::sub (wi::to_widest (loop_bound_var),
1752 wi::to_widest (compare_base), SIGNED, &overflow);
1753 overall_overflow |= overflow;
1754 widest_int loop_count = wi::div_trunc (tem,
1755 wi::to_widest (compare_step_var),
1756 SIGNED, &overflow);
1757 overall_overflow |= overflow;
1759 if (!wi::neg_p (wi::to_widest (compare_step_var))
1760 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1762 /* (loop_bound - compare_bound) / compare_step */
1763 tem = wi::sub (wi::to_widest (loop_bound_var),
1764 wi::to_widest (compare_var), SIGNED, &overflow);
1765 overall_overflow |= overflow;
1766 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1767 SIGNED, &overflow);
1768 overall_overflow |= overflow;
1770 else
1772 /* (compare_bound - base) / compare_step */
1773 tem = wi::sub (wi::to_widest (compare_var),
1774 wi::to_widest (compare_base), SIGNED, &overflow);
1775 overall_overflow |= overflow;
1776 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1777 SIGNED, &overflow);
1778 overall_overflow |= overflow;
1780 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1781 ++compare_count;
1782 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1783 ++loop_count;
1784 if (wi::neg_p (compare_count))
1785 compare_count = 0;
1786 if (wi::neg_p (loop_count))
1787 loop_count = 0;
1788 if (loop_count == 0)
1789 probability = 0;
1790 else if (wi::cmps (compare_count, loop_count) == 1)
1791 probability = REG_BR_PROB_BASE;
1792 else
1794 tem = compare_count * REG_BR_PROB_BASE;
1795 tem = wi::udiv_trunc (tem, loop_count);
1796 probability = tem.to_uhwi ();
1799 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1800 if (!overall_overflow)
1801 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1803 return;
1806 if (expr_coherent_p (loop_bound_var, compare_var))
1808 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1809 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1810 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1811 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1812 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1813 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1814 else if (loop_bound_code == NE_EXPR)
1816 /* If the loop backedge condition is "(i != bound)", we do
1817 the comparison based on the step of IV:
1818 * step < 0 : backedge condition is like (i > bound)
1819 * step > 0 : backedge condition is like (i < bound) */
1820 gcc_assert (loop_bound_step != 0);
1821 if (loop_bound_step > 0
1822 && (compare_code == LT_EXPR
1823 || compare_code == LE_EXPR))
1824 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1825 else if (loop_bound_step < 0
1826 && (compare_code == GT_EXPR
1827 || compare_code == GE_EXPR))
1828 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1829 else
1830 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1832 else
1833 /* The branch is predicted not-taken if loop_bound_code is
1834 opposite with compare_code. */
1835 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1837 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1839 /* For cases like:
1840 for (i = s; i < h; i++)
1841 if (i > s + 2) ....
1842 The branch should be predicted taken. */
1843 if (loop_bound_step > 0
1844 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1845 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1846 else if (loop_bound_step < 0
1847 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1848 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1849 else
1850 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1854 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1855 exits are resulted from short-circuit conditions that will generate an
1856 if_tmp. E.g.:
1858 if (foo() || global > 10)
1859 break;
1861 This will be translated into:
1863 BB3:
1864 loop header...
1865 BB4:
1866 if foo() goto BB6 else goto BB5
1867 BB5:
1868 if global > 10 goto BB6 else goto BB7
1869 BB6:
1870 goto BB7
1871 BB7:
1872 iftmp = (PHI 0(BB5), 1(BB6))
1873 if iftmp == 1 goto BB8 else goto BB3
1874 BB8:
1875 outside of the loop...
1877 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1878 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1879 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1880 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1882 static void
1883 predict_extra_loop_exits (class loop *loop, edge exit_edge)
1885 unsigned i;
1886 bool check_value_one;
1887 gimple *lhs_def_stmt;
1888 gphi *phi_stmt;
1889 tree cmp_rhs, cmp_lhs;
1891 gcond *cmp_stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (exit_edge->src));
1892 if (!cmp_stmt)
1893 return;
1895 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1896 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1897 if (!TREE_CONSTANT (cmp_rhs)
1898 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1899 return;
1900 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1901 return;
1903 /* If check_value_one is true, only the phi_args with value '1' will lead
1904 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1905 loop exit. */
1906 check_value_one = (((integer_onep (cmp_rhs))
1907 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1908 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1910 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1911 if (!lhs_def_stmt)
1912 return;
1914 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1915 if (!phi_stmt)
1916 return;
1918 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1920 edge e1;
1921 edge_iterator ei;
1922 tree val = gimple_phi_arg_def (phi_stmt, i);
1923 edge e = gimple_phi_arg_edge (phi_stmt, i);
1925 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1926 continue;
1927 if ((check_value_one ^ integer_onep (val)) == 1)
1928 continue;
1929 if (EDGE_COUNT (e->src->succs) != 1)
1931 predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN,
1932 loop);
1933 continue;
1936 FOR_EACH_EDGE (e1, ei, e->src->preds)
1937 predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN,
1938 loop);
1943 /* Predict edge probabilities by exploiting loop structure. */
1945 static void
1946 predict_loops (void)
1948 basic_block bb;
1949 hash_set <class loop *> with_recursion(10);
1951 FOR_EACH_BB_FN (bb, cfun)
1953 gimple_stmt_iterator gsi;
1954 tree decl;
1956 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1957 if (is_gimple_call (gsi_stmt (gsi))
1958 && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
1959 && recursive_call_p (current_function_decl, decl))
1961 class loop *loop = bb->loop_father;
1962 while (loop && !with_recursion.add (loop))
1963 loop = loop_outer (loop);
1967 /* Try to predict out blocks in a loop that are not part of a
1968 natural loop. */
1969 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1971 basic_block bb, *bbs;
1972 unsigned j, n_exits = 0;
1973 class tree_niter_desc niter_desc;
1974 edge ex;
1975 class nb_iter_bound *nb_iter;
1976 enum tree_code loop_bound_code = ERROR_MARK;
1977 tree loop_bound_step = NULL;
1978 tree loop_bound_var = NULL;
1979 tree loop_iv_base = NULL;
1980 gcond *stmt = NULL;
1981 bool recursion = with_recursion.contains (loop);
1983 auto_vec<edge> exits = get_loop_exit_edges (loop);
1984 FOR_EACH_VEC_ELT (exits, j, ex)
1985 if (!unlikely_executed_edge_p (ex) && !(ex->flags & EDGE_ABNORMAL_CALL))
1986 n_exits ++;
1987 if (!n_exits)
1988 continue;
1990 if (dump_file && (dump_flags & TDF_DETAILS))
1991 fprintf (dump_file, "Predicting loop %i%s with %i exits.\n",
1992 loop->num, recursion ? " (with recursion)":"", n_exits);
1993 if (dump_file && (dump_flags & TDF_DETAILS)
1994 && max_loop_iterations_int (loop) >= 0)
1996 fprintf (dump_file,
1997 "Loop %d iterates at most %i times.\n", loop->num,
1998 (int)max_loop_iterations_int (loop));
2000 if (dump_file && (dump_flags & TDF_DETAILS)
2001 && likely_max_loop_iterations_int (loop) >= 0)
2003 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
2004 loop->num, (int)likely_max_loop_iterations_int (loop));
2007 FOR_EACH_VEC_ELT (exits, j, ex)
2009 tree niter = NULL;
2010 HOST_WIDE_INT nitercst;
2011 int max = param_max_predicted_iterations;
2012 int probability;
2013 enum br_predictor predictor;
2014 widest_int nit;
2016 if (unlikely_executed_edge_p (ex)
2017 || (ex->flags & EDGE_ABNORMAL_CALL))
2018 continue;
2019 /* Loop heuristics do not expect exit conditional to be inside
2020 inner loop. We predict from innermost to outermost loop. */
2021 if (predicted_by_loop_heuristics_p (ex->src))
2023 if (dump_file && (dump_flags & TDF_DETAILS))
2024 fprintf (dump_file, "Skipping exit %i->%i because "
2025 "it is already predicted.\n",
2026 ex->src->index, ex->dest->index);
2027 continue;
2029 predict_extra_loop_exits (loop, ex);
2031 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
2032 niter = niter_desc.niter;
2033 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
2034 niter = loop_niter_by_eval (loop, ex);
2035 if (dump_file && (dump_flags & TDF_DETAILS)
2036 && TREE_CODE (niter) == INTEGER_CST)
2038 fprintf (dump_file, "Exit %i->%i %d iterates ",
2039 ex->src->index, ex->dest->index,
2040 loop->num);
2041 print_generic_expr (dump_file, niter, TDF_SLIM);
2042 fprintf (dump_file, " times.\n");
2045 if (TREE_CODE (niter) == INTEGER_CST)
2047 if (tree_fits_uhwi_p (niter)
2048 && max
2049 && compare_tree_int (niter, max - 1) == -1)
2050 nitercst = tree_to_uhwi (niter) + 1;
2051 else
2052 nitercst = max;
2053 predictor = PRED_LOOP_ITERATIONS;
2055 /* If we have just one exit and we can derive some information about
2056 the number of iterations of the loop from the statements inside
2057 the loop, use it to predict this exit. */
2058 else if (n_exits == 1
2059 && estimated_stmt_executions (loop, &nit))
2061 if (wi::gtu_p (nit, max))
2062 nitercst = max;
2063 else
2064 nitercst = nit.to_shwi ();
2065 predictor = PRED_LOOP_ITERATIONS_GUESSED;
2067 /* If we have likely upper bound, trust it for very small iteration
2068 counts. Such loops would otherwise get mispredicted by standard
2069 LOOP_EXIT heuristics. */
2070 else if (n_exits == 1
2071 && likely_max_stmt_executions (loop, &nit)
2072 && wi::ltu_p (nit,
2073 RDIV (REG_BR_PROB_BASE,
2074 REG_BR_PROB_BASE
2075 - predictor_info
2076 [recursion
2077 ? PRED_LOOP_EXIT_WITH_RECURSION
2078 : PRED_LOOP_EXIT].hitrate)))
2080 nitercst = nit.to_shwi ();
2081 predictor = PRED_LOOP_ITERATIONS_MAX;
2083 else
2085 if (dump_file && (dump_flags & TDF_DETAILS))
2086 fprintf (dump_file, "Nothing known about exit %i->%i.\n",
2087 ex->src->index, ex->dest->index);
2088 continue;
2091 if (dump_file && (dump_flags & TDF_DETAILS))
2092 fprintf (dump_file, "Recording prediction to %i iterations by %s.\n",
2093 (int)nitercst, predictor_info[predictor].name);
2094 /* If the prediction for number of iterations is zero, do not
2095 predict the exit edges. */
2096 if (nitercst == 0)
2097 continue;
2099 probability = RDIV (REG_BR_PROB_BASE, nitercst);
2100 predict_edge (ex, predictor, probability);
2103 /* Find information about loop bound variables. */
2104 for (nb_iter = loop->bounds; nb_iter;
2105 nb_iter = nb_iter->next)
2106 if (nb_iter->stmt
2107 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
2109 stmt = as_a <gcond *> (nb_iter->stmt);
2110 break;
2112 if (!stmt)
2113 stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (loop->header));
2114 if (stmt)
2115 is_comparison_with_loop_invariant_p (stmt, loop,
2116 &loop_bound_var,
2117 &loop_bound_code,
2118 &loop_bound_step,
2119 &loop_iv_base);
2121 bbs = get_loop_body (loop);
2123 for (j = 0; j < loop->num_nodes; j++)
2125 edge e;
2126 edge_iterator ei;
2128 bb = bbs[j];
2130 /* Bypass loop heuristics on continue statement. These
2131 statements construct loops via "non-loop" constructs
2132 in the source language and are better to be handled
2133 separately. */
2134 if (predicted_by_p (bb, PRED_CONTINUE))
2136 if (dump_file && (dump_flags & TDF_DETAILS))
2137 fprintf (dump_file, "BB %i predicted by continue.\n",
2138 bb->index);
2139 continue;
2142 /* If we already used more reliable loop exit predictors, do not
2143 bother with PRED_LOOP_EXIT. */
2144 if (!predicted_by_loop_heuristics_p (bb))
2146 /* For loop with many exits we don't want to predict all exits
2147 with the pretty large probability, because if all exits are
2148 considered in row, the loop would be predicted to iterate
2149 almost never. The code to divide probability by number of
2150 exits is very rough. It should compute the number of exits
2151 taken in each patch through function (not the overall number
2152 of exits that might be a lot higher for loops with wide switch
2153 statements in them) and compute n-th square root.
2155 We limit the minimal probability by 2% to avoid
2156 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
2157 as this was causing regression in perl benchmark containing such
2158 a wide loop. */
2160 int probability = ((REG_BR_PROB_BASE
2161 - predictor_info
2162 [recursion
2163 ? PRED_LOOP_EXIT_WITH_RECURSION
2164 : PRED_LOOP_EXIT].hitrate)
2165 / n_exits);
2166 if (probability < HITRATE (2))
2167 probability = HITRATE (2);
2168 FOR_EACH_EDGE (e, ei, bb->succs)
2169 if (e->dest->index < NUM_FIXED_BLOCKS
2170 || !flow_bb_inside_loop_p (loop, e->dest))
2172 if (dump_file && (dump_flags & TDF_DETAILS))
2173 fprintf (dump_file,
2174 "Predicting exit %i->%i with prob %i.\n",
2175 e->src->index, e->dest->index, probability);
2176 predict_edge (e,
2177 recursion ? PRED_LOOP_EXIT_WITH_RECURSION
2178 : PRED_LOOP_EXIT, probability);
2181 if (loop_bound_var)
2182 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
2183 loop_bound_code,
2184 tree_to_shwi (loop_bound_step));
2187 /* In the following code
2188 for (loop1)
2189 if (cond)
2190 for (loop2)
2191 body;
2192 guess that cond is unlikely. */
2193 if (loop_outer (loop)->num)
2195 basic_block bb = NULL;
2196 edge preheader_edge = loop_preheader_edge (loop);
2198 if (single_pred_p (preheader_edge->src)
2199 && single_succ_p (preheader_edge->src))
2200 preheader_edge = single_pred_edge (preheader_edge->src);
2202 /* Pattern match fortran loop preheader:
2203 _16 = BUILTIN_EXPECT (_15, 1, PRED_FORTRAN_LOOP_PREHEADER);
2204 _17 = (logical(kind=4)) _16;
2205 if (_17 != 0)
2206 goto <bb 11>;
2207 else
2208 goto <bb 13>;
2210 Loop guard branch prediction says nothing about duplicated loop
2211 headers produced by fortran frontend and in this case we want
2212 to predict paths leading to this preheader. */
2214 gcond *stmt
2215 = safe_dyn_cast <gcond *> (*gsi_last_bb (preheader_edge->src));
2216 if (stmt
2217 && gimple_cond_code (stmt) == NE_EXPR
2218 && TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
2219 && integer_zerop (gimple_cond_rhs (stmt)))
2221 gimple *call_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
2222 if (gimple_code (call_stmt) == GIMPLE_ASSIGN
2223 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (call_stmt))
2224 && TREE_CODE (gimple_assign_rhs1 (call_stmt)) == SSA_NAME)
2225 call_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (call_stmt));
2226 if (gimple_call_internal_p (call_stmt, IFN_BUILTIN_EXPECT)
2227 && TREE_CODE (gimple_call_arg (call_stmt, 2)) == INTEGER_CST
2228 && tree_fits_uhwi_p (gimple_call_arg (call_stmt, 2))
2229 && tree_to_uhwi (gimple_call_arg (call_stmt, 2))
2230 == PRED_FORTRAN_LOOP_PREHEADER)
2231 bb = preheader_edge->src;
2233 if (!bb)
2235 if (!dominated_by_p (CDI_DOMINATORS,
2236 loop_outer (loop)->latch, loop->header))
2237 predict_paths_leading_to_edge (loop_preheader_edge (loop),
2238 recursion
2239 ? PRED_LOOP_GUARD_WITH_RECURSION
2240 : PRED_LOOP_GUARD,
2241 NOT_TAKEN,
2242 loop_outer (loop));
2244 else
2246 if (!dominated_by_p (CDI_DOMINATORS,
2247 loop_outer (loop)->latch, bb))
2248 predict_paths_leading_to (bb,
2249 recursion
2250 ? PRED_LOOP_GUARD_WITH_RECURSION
2251 : PRED_LOOP_GUARD,
2252 NOT_TAKEN,
2253 loop_outer (loop));
2257 /* Free basic blocks from get_loop_body. */
2258 free (bbs);
2262 /* Attempt to predict probabilities of BB outgoing edges using local
2263 properties. */
2264 static void
2265 bb_estimate_probability_locally (basic_block bb)
2267 rtx_insn *last_insn = BB_END (bb);
2268 rtx cond;
2270 if (! can_predict_insn_p (last_insn))
2271 return;
2272 cond = get_condition (last_insn, NULL, false, false);
2273 if (! cond)
2274 return;
2276 /* Try "pointer heuristic."
2277 A comparison ptr == 0 is predicted as false.
2278 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2279 if (COMPARISON_P (cond)
2280 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
2281 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
2283 if (GET_CODE (cond) == EQ)
2284 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
2285 else if (GET_CODE (cond) == NE)
2286 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
2288 else
2290 /* Try "opcode heuristic."
2291 EQ tests are usually false and NE tests are usually true. Also,
2292 most quantities are positive, so we can make the appropriate guesses
2293 about signed comparisons against zero. */
2294 switch (GET_CODE (cond))
2296 case CONST_INT:
2297 /* Unconditional branch. */
2298 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
2299 cond == const0_rtx ? NOT_TAKEN : TAKEN);
2300 break;
2302 case EQ:
2303 case UNEQ:
2304 /* Floating point comparisons appears to behave in a very
2305 unpredictable way because of special role of = tests in
2306 FP code. */
2307 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2309 /* Comparisons with 0 are often used for booleans and there is
2310 nothing useful to predict about them. */
2311 else if (XEXP (cond, 1) == const0_rtx
2312 || XEXP (cond, 0) == const0_rtx)
2314 else
2315 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
2316 break;
2318 case NE:
2319 case LTGT:
2320 /* Floating point comparisons appears to behave in a very
2321 unpredictable way because of special role of = tests in
2322 FP code. */
2323 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2325 /* Comparisons with 0 are often used for booleans and there is
2326 nothing useful to predict about them. */
2327 else if (XEXP (cond, 1) == const0_rtx
2328 || XEXP (cond, 0) == const0_rtx)
2330 else
2331 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
2332 break;
2334 case ORDERED:
2335 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
2336 break;
2338 case UNORDERED:
2339 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
2340 break;
2342 case LE:
2343 case LT:
2344 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2345 || XEXP (cond, 1) == constm1_rtx)
2346 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
2347 break;
2349 case GE:
2350 case GT:
2351 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2352 || XEXP (cond, 1) == constm1_rtx)
2353 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
2354 break;
2356 default:
2357 break;
2361 /* Set edge->probability for each successor edge of BB. */
2362 void
2363 guess_outgoing_edge_probabilities (basic_block bb)
2365 bb_estimate_probability_locally (bb);
2366 combine_predictions_for_insn (BB_END (bb), bb);
2369 static tree expr_expected_value (tree, enum br_predictor *predictor,
2370 HOST_WIDE_INT *probability);
2372 /* Helper function for expr_expected_value. */
2374 static tree
2375 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
2376 tree op1, enum br_predictor *predictor,
2377 HOST_WIDE_INT *probability)
2379 gimple *def;
2381 /* Reset returned probability value. */
2382 *probability = -1;
2383 *predictor = PRED_UNCONDITIONAL;
2385 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
2387 if (TREE_CONSTANT (op0))
2388 return op0;
2390 if (code == IMAGPART_EXPR)
2392 if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
2394 def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
2395 if (is_gimple_call (def)
2396 && gimple_call_internal_p (def)
2397 && (gimple_call_internal_fn (def)
2398 == IFN_ATOMIC_COMPARE_EXCHANGE))
2400 /* Assume that any given atomic operation has low contention,
2401 and thus the compare-and-swap operation succeeds. */
2402 *predictor = PRED_COMPARE_AND_SWAP;
2403 return build_one_cst (TREE_TYPE (op0));
2408 if (code != SSA_NAME)
2409 return NULL_TREE;
2411 def = SSA_NAME_DEF_STMT (op0);
2413 /* If we were already here, break the infinite cycle. */
2414 bool existed_p;
2415 expected_value *res
2416 = &ssa_expected_value->get_or_insert (SSA_NAME_VERSION (op0),
2417 &existed_p);
2418 if (existed_p)
2420 *probability = res->probability;
2421 *predictor = res->predictor;
2422 return res->val;
2424 res->val = NULL_TREE;
2425 res->predictor = *predictor;
2426 res->probability = *probability;
2428 if (gphi *phi = dyn_cast <gphi *> (def))
2430 /* All the arguments of the PHI node must have the same constant
2431 length. */
2432 int i, n = gimple_phi_num_args (phi);
2433 tree val = NULL;
2434 bool has_nonzero_edge = false;
2436 /* If we already proved that given edge is unlikely, we do not need
2437 to handle merging of the probabilities. */
2438 for (i = 0; i < n && !has_nonzero_edge; i++)
2440 tree arg = PHI_ARG_DEF (phi, i);
2441 if (arg == PHI_RESULT (phi))
2442 continue;
2443 profile_count cnt = gimple_phi_arg_edge (phi, i)->count ();
2444 if (!cnt.initialized_p () || cnt.nonzero_p ())
2445 has_nonzero_edge = true;
2448 for (i = 0; i < n; i++)
2450 tree arg = PHI_ARG_DEF (phi, i);
2451 enum br_predictor predictor2;
2453 /* Skip self-referring parameters, since they does not change
2454 expected value. */
2455 if (arg == PHI_RESULT (phi))
2456 continue;
2458 /* Skip edges which we already predicted as executing
2459 zero times. */
2460 if (has_nonzero_edge)
2462 profile_count cnt = gimple_phi_arg_edge (phi, i)->count ();
2463 if (cnt.initialized_p () && !cnt.nonzero_p ())
2464 continue;
2466 HOST_WIDE_INT probability2;
2467 tree new_val = expr_expected_value (arg, &predictor2,
2468 &probability2);
2469 /* If we know nothing about value, give up. */
2470 if (!new_val)
2471 return NULL;
2473 /* If this is a first edge, trust its prediction. */
2474 if (!val)
2476 val = new_val;
2477 *predictor = predictor2;
2478 *probability = probability2;
2479 continue;
2481 /* If there are two different values, give up. */
2482 if (!operand_equal_p (val, new_val, false))
2483 return NULL;
2485 int p1 = get_predictor_value (*predictor, *probability);
2486 int p2 = get_predictor_value (predictor2, probability2);
2487 /* If both predictors agree, it does not matter from which
2488 edge we enter the basic block. */
2489 if (*predictor == predictor2 && p1 == p2)
2490 continue;
2491 /* The general case has no precise solution, since we do not
2492 know probabilities of incomming edges, yet.
2493 Still if value is predicted over all incomming edges, we
2494 can hope it will be indeed the case. Conservatively
2495 downgrade prediction quality (so first match merging is not
2496 performed) and take least successful prediction. */
2498 *predictor = PRED_COMBINED_VALUE_PREDICTIONS_PHI;
2499 *probability = MIN (p1, p2);
2502 res = ssa_expected_value->get (SSA_NAME_VERSION (op0));
2503 res->val = val;
2504 res->predictor = *predictor;
2505 res->probability = *probability;
2506 return val;
2508 if (is_gimple_assign (def))
2510 if (gimple_assign_lhs (def) != op0)
2511 return NULL;
2513 tree val = expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
2514 gimple_assign_rhs1 (def),
2515 gimple_assign_rhs_code (def),
2516 gimple_assign_rhs2 (def),
2517 predictor, probability);
2518 if (val)
2520 res = ssa_expected_value->get (SSA_NAME_VERSION (op0));
2521 res->val = val;
2522 res->predictor = *predictor;
2523 res->probability = *probability;
2525 return val;
2528 if (is_gimple_call (def))
2530 tree decl = gimple_call_fndecl (def);
2531 if (!decl)
2533 if (gimple_call_internal_p (def)
2534 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
2536 gcc_assert (gimple_call_num_args (def) == 3);
2537 tree val = gimple_call_arg (def, 0);
2538 if (TREE_CONSTANT (val))
2539 return val;
2540 tree val2 = gimple_call_arg (def, 2);
2541 gcc_assert (TREE_CODE (val2) == INTEGER_CST
2542 && tree_fits_uhwi_p (val2)
2543 && tree_to_uhwi (val2) < END_PREDICTORS);
2544 *predictor = (enum br_predictor) tree_to_uhwi (val2);
2545 if (*predictor == PRED_BUILTIN_EXPECT)
2546 *probability
2547 = HITRATE (param_builtin_expect_probability);
2548 val = gimple_call_arg (def, 1);
2549 res->val = val;
2550 res->predictor = *predictor;
2551 res->probability = *probability;
2552 return val;
2554 return NULL;
2557 if (DECL_IS_MALLOC (decl) || DECL_IS_OPERATOR_NEW_P (decl))
2559 if (predictor)
2560 *predictor = PRED_MALLOC_NONNULL;
2561 /* FIXME: This is wrong and we need to convert the logic
2562 to value ranges. This makes predictor to assume that
2563 malloc always returns (size_t)1 which is not the same
2564 as returning non-NULL. */
2565 tree val = fold_convert (type, boolean_true_node);
2566 res->val = val;
2567 res->predictor = *predictor;
2568 res->probability = *probability;
2569 return val;
2572 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
2573 switch (DECL_FUNCTION_CODE (decl))
2575 case BUILT_IN_EXPECT:
2577 tree val;
2578 if (gimple_call_num_args (def) != 2)
2579 return NULL;
2580 val = gimple_call_arg (def, 0);
2581 if (TREE_CONSTANT (val))
2582 return val;
2583 *predictor = PRED_BUILTIN_EXPECT;
2584 *probability
2585 = HITRATE (param_builtin_expect_probability);
2586 val = gimple_call_arg (def, 1);
2587 res->val = val;
2588 res->predictor = *predictor;
2589 res->probability = *probability;
2590 return val;
2592 case BUILT_IN_EXPECT_WITH_PROBABILITY:
2594 tree val;
2595 if (gimple_call_num_args (def) != 3)
2596 return NULL;
2597 val = gimple_call_arg (def, 0);
2598 if (TREE_CONSTANT (val))
2600 res->val = val;
2601 res->predictor = *predictor;
2602 res->probability = *probability;
2603 return val;
2605 /* Compute final probability as:
2606 probability * REG_BR_PROB_BASE. */
2607 tree prob = gimple_call_arg (def, 2);
2608 tree t = TREE_TYPE (prob);
2609 tree base = build_int_cst (integer_type_node,
2610 REG_BR_PROB_BASE);
2611 base = build_real_from_int_cst (t, base);
2612 tree r = fold_build2_initializer_loc (UNKNOWN_LOCATION,
2613 MULT_EXPR, t, prob, base);
2614 if (TREE_CODE (r) != REAL_CST)
2616 error_at (gimple_location (def),
2617 "probability %qE must be "
2618 "constant floating-point expression", prob);
2619 return NULL;
2621 HOST_WIDE_INT probi
2622 = real_to_integer (TREE_REAL_CST_PTR (r));
2623 if (probi >= 0 && probi <= REG_BR_PROB_BASE)
2625 *predictor = PRED_BUILTIN_EXPECT_WITH_PROBABILITY;
2626 *probability = probi;
2628 else
2629 error_at (gimple_location (def),
2630 "probability %qE is outside "
2631 "the range [0.0, 1.0]", prob);
2633 val = gimple_call_arg (def, 1);
2634 res->val = val;
2635 res->predictor = *predictor;
2636 res->probability = *probability;
2637 return val;
2640 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
2641 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
2642 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
2643 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
2644 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
2645 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
2646 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
2647 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
2648 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
2649 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
2650 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
2651 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
2652 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
2653 /* Assume that any given atomic operation has low contention,
2654 and thus the compare-and-swap operation succeeds. */
2655 *predictor = PRED_COMPARE_AND_SWAP;
2656 res->val = boolean_true_node;
2657 res->predictor = *predictor;
2658 res->probability = *probability;
2659 return boolean_true_node;
2660 case BUILT_IN_REALLOC:
2661 case BUILT_IN_GOMP_REALLOC:
2662 if (predictor)
2663 *predictor = PRED_MALLOC_NONNULL;
2664 /* FIXME: This is wrong and we need to convert the logic
2665 to value ranges. */
2666 res->val = fold_convert (type, boolean_true_node);
2667 res->predictor = *predictor;
2668 res->probability = *probability;
2669 return res->val;
2670 default:
2671 break;
2675 return NULL;
2678 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
2680 tree res;
2681 tree nop0 = op0;
2682 tree nop1 = op1;
2684 /* First handle situation where single op is enough to determine final
2685 value. In this case we can do better job by avoiding the prediction
2686 merging. */
2687 if (TREE_CODE (op0) != INTEGER_CST)
2689 /* See if expected value of op0 is good enough to determine the result. */
2690 nop0 = expr_expected_value (op0, predictor, probability);
2691 if (nop0
2692 && (res = fold_build2 (code, type, nop0, op1)) != NULL
2693 && TREE_CODE (res) == INTEGER_CST)
2694 /* We are now getting conservative probability. Consider for
2695 example:
2696 op0 * op1
2697 If op0 is 0 with probability p, then we will ignore the
2698 posibility that op0 != 0 and op1 == 0. It does not seem to be
2699 worthwhile to downgrade prediciton quality for this. */
2700 return res;
2701 if (!nop0)
2702 nop0 = op0;
2704 enum br_predictor predictor2 = PRED_UNCONDITIONAL;
2705 HOST_WIDE_INT probability2 = -1;
2706 if (TREE_CODE (op1) != INTEGER_CST)
2708 /* See if expected value of op1 is good enough to determine the result. */
2709 nop1 = expr_expected_value (op1, &predictor2, &probability2);
2710 if (nop1
2711 && (res = fold_build2 (code, type, op0, nop1)) != NULL
2712 && TREE_CODE (res) == INTEGER_CST)
2714 /* Similarly as above we now get conservative probability. */
2715 *predictor = predictor2;
2716 *probability = probability2;
2717 return res;
2719 if (!nop1)
2720 nop1 = op1;
2722 /* We already checked if folding one of arguments to constant is good
2723 enough. Consequently failing to fold both means that we will not
2724 succeed determining the value. */
2725 if (nop0 == op0 || nop1 == op1)
2726 return NULL;
2727 /* Finally see if we have two known values. */
2728 res = fold_build2 (code, type, nop0, nop1);
2729 if (TREE_CODE (res) == INTEGER_CST)
2731 HOST_WIDE_INT p1 = get_predictor_value (*predictor, *probability);
2732 HOST_WIDE_INT p2 = get_predictor_value (predictor2, probability2);
2734 /* If one of predictions is sure, such as PRED_UNCONDITIONAL, we
2735 can ignore it. */
2736 if (p2 == PROB_ALWAYS)
2737 return res;
2738 if (p1 == PROB_ALWAYS)
2740 *predictor = predictor2;
2741 *probability = probability2;
2742 return res;
2744 /* Combine binary predictions.
2745 Since we do not know about independence of predictors, we
2746 can not determine value precisely. */
2747 *probability = RDIV (p1 * p2, REG_BR_PROB_BASE);
2748 /* If we no longer track useful information, give up. */
2749 if (!*probability)
2750 return NULL;
2751 /* Otherwise mark that prediction is a result of combining
2752 different heuristics, since we do not want it to participate
2753 in first match merging. It is no longer reliable since
2754 we do not know if the probabilities are indpenendet. */
2755 *predictor = PRED_COMBINED_VALUE_PREDICTIONS;
2757 return res;
2759 return NULL;
2761 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
2763 tree res;
2764 op0 = expr_expected_value (op0, predictor, probability);
2765 if (!op0)
2766 return NULL;
2767 res = fold_build1 (code, type, op0);
2768 if (TREE_CONSTANT (res))
2769 return res;
2770 return NULL;
2772 return NULL;
2775 /* Return constant EXPR will likely have at execution time, NULL if unknown.
2776 The function is used by builtin_expect branch predictor so the evidence
2777 must come from this construct and additional possible constant folding.
2779 We may want to implement more involved value guess (such as value range
2780 propagation based prediction), but such tricks shall go to new
2781 implementation. */
2783 static tree
2784 expr_expected_value (tree expr, enum br_predictor *predictor,
2785 HOST_WIDE_INT *probability)
2787 enum tree_code code;
2788 tree op0, op1;
2790 if (TREE_CONSTANT (expr))
2792 *predictor = PRED_UNCONDITIONAL;
2793 *probability = -1;
2794 return expr;
2797 extract_ops_from_tree (expr, &code, &op0, &op1);
2798 return expr_expected_value_1 (TREE_TYPE (expr),
2799 op0, code, op1, predictor, probability);
2803 /* Return probability of a PREDICTOR. If the predictor has variable
2804 probability return passed PROBABILITY. */
2806 static HOST_WIDE_INT
2807 get_predictor_value (br_predictor predictor, HOST_WIDE_INT probability)
2809 switch (predictor)
2811 case PRED_BUILTIN_EXPECT:
2812 case PRED_BUILTIN_EXPECT_WITH_PROBABILITY:
2813 case PRED_COMBINED_VALUE_PREDICTIONS_PHI:
2814 case PRED_COMBINED_VALUE_PREDICTIONS:
2815 gcc_assert (probability != -1);
2816 return probability;
2817 default:
2818 gcc_assert (probability == -1);
2819 return predictor_info[(int) predictor].hitrate;
2823 /* Predict using opcode of the last statement in basic block. */
2824 static void
2825 tree_predict_by_opcode (basic_block bb)
2827 edge then_edge;
2828 tree op0, op1;
2829 tree type;
2830 tree val;
2831 enum tree_code cmp;
2832 edge_iterator ei;
2833 enum br_predictor predictor;
2834 HOST_WIDE_INT probability;
2836 gimple *stmt = *gsi_last_bb (bb);
2837 if (!stmt)
2838 return;
2840 if (gswitch *sw = dyn_cast <gswitch *> (stmt))
2842 tree index = gimple_switch_index (sw);
2843 tree val = expr_expected_value (index, &predictor, &probability);
2844 if (val && TREE_CODE (val) == INTEGER_CST)
2846 edge e = find_taken_edge_switch_expr (sw, val);
2847 if (predictor == PRED_BUILTIN_EXPECT)
2849 int percent = param_builtin_expect_probability;
2850 gcc_assert (percent >= 0 && percent <= 100);
2851 predict_edge (e, PRED_BUILTIN_EXPECT,
2852 HITRATE (percent));
2854 else
2855 predict_edge_def (e, predictor, TAKEN);
2859 if (gimple_code (stmt) != GIMPLE_COND)
2860 return;
2861 FOR_EACH_EDGE (then_edge, ei, bb->succs)
2862 if (then_edge->flags & EDGE_TRUE_VALUE)
2863 break;
2864 op0 = gimple_cond_lhs (stmt);
2865 op1 = gimple_cond_rhs (stmt);
2866 cmp = gimple_cond_code (stmt);
2867 type = TREE_TYPE (op0);
2868 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1,
2869 &predictor, &probability);
2870 if (val && TREE_CODE (val) == INTEGER_CST)
2872 HOST_WIDE_INT prob = get_predictor_value (predictor, probability);
2873 if (integer_zerop (val))
2874 prob = REG_BR_PROB_BASE - prob;
2875 predict_edge (then_edge, predictor, prob);
2877 /* Try "pointer heuristic."
2878 A comparison ptr == 0 is predicted as false.
2879 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2880 if (POINTER_TYPE_P (type))
2882 if (cmp == EQ_EXPR)
2883 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2884 else if (cmp == NE_EXPR)
2885 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2887 else
2889 /* Try "opcode heuristic."
2890 EQ tests are usually false and NE tests are usually true. Also,
2891 most quantities are positive, so we can make the appropriate guesses
2892 about signed comparisons against zero. */
2893 switch (cmp)
2895 case EQ_EXPR:
2896 case UNEQ_EXPR:
2897 /* Floating point comparisons appears to behave in a very
2898 unpredictable way because of special role of = tests in
2899 FP code. */
2900 if (FLOAT_TYPE_P (type))
2902 /* Comparisons with 0 are often used for booleans and there is
2903 nothing useful to predict about them. */
2904 else if (integer_zerop (op0) || integer_zerop (op1))
2906 else
2907 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2908 break;
2910 case NE_EXPR:
2911 case LTGT_EXPR:
2912 /* Floating point comparisons appears to behave in a very
2913 unpredictable way because of special role of = tests in
2914 FP code. */
2915 if (FLOAT_TYPE_P (type))
2917 /* Comparisons with 0 are often used for booleans and there is
2918 nothing useful to predict about them. */
2919 else if (integer_zerop (op0)
2920 || integer_zerop (op1))
2922 else
2923 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2924 break;
2926 case ORDERED_EXPR:
2927 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2928 break;
2930 case UNORDERED_EXPR:
2931 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2932 break;
2934 case LE_EXPR:
2935 case LT_EXPR:
2936 if (integer_zerop (op1)
2937 || integer_onep (op1)
2938 || integer_all_onesp (op1)
2939 || real_zerop (op1)
2940 || real_onep (op1)
2941 || real_minus_onep (op1))
2942 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2943 break;
2945 case GE_EXPR:
2946 case GT_EXPR:
2947 if (integer_zerop (op1)
2948 || integer_onep (op1)
2949 || integer_all_onesp (op1)
2950 || real_zerop (op1)
2951 || real_onep (op1)
2952 || real_minus_onep (op1))
2953 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2954 break;
2956 default:
2957 break;
2961 /* Returns TRUE if the STMT is exit(0) like statement. */
2963 static bool
2964 is_exit_with_zero_arg (const gimple *stmt)
2966 /* This is not exit, _exit or _Exit. */
2967 if (!gimple_call_builtin_p (stmt, BUILT_IN_EXIT)
2968 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT)
2969 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT2))
2970 return false;
2972 /* Argument is an interger zero. */
2973 return integer_zerop (gimple_call_arg (stmt, 0));
2976 /* Try to guess whether the value of return means error code. */
2978 static enum br_predictor
2979 return_prediction (tree val, enum prediction *prediction)
2981 /* VOID. */
2982 if (!val)
2983 return PRED_NO_PREDICTION;
2984 /* Different heuristics for pointers and scalars. */
2985 if (POINTER_TYPE_P (TREE_TYPE (val)))
2987 /* NULL is usually not returned. */
2988 if (integer_zerop (val))
2990 *prediction = NOT_TAKEN;
2991 return PRED_NULL_RETURN;
2994 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2996 /* Negative return values are often used to indicate
2997 errors. */
2998 if (TREE_CODE (val) == INTEGER_CST
2999 && tree_int_cst_sgn (val) < 0)
3001 *prediction = NOT_TAKEN;
3002 return PRED_NEGATIVE_RETURN;
3004 /* Constant return values seems to be commonly taken.
3005 Zero/one often represent booleans so exclude them from the
3006 heuristics. */
3007 if (TREE_CONSTANT (val)
3008 && (!integer_zerop (val) && !integer_onep (val)))
3010 *prediction = NOT_TAKEN;
3011 return PRED_CONST_RETURN;
3014 return PRED_NO_PREDICTION;
3017 /* Return zero if phi result could have values other than -1, 0 or 1,
3018 otherwise return a bitmask, with bits 0, 1 and 2 set if -1, 0 and 1
3019 values are used or likely. */
3021 static int
3022 zero_one_minusone (gphi *phi, int limit)
3024 int phi_num_args = gimple_phi_num_args (phi);
3025 int ret = 0;
3026 for (int i = 0; i < phi_num_args; i++)
3028 tree t = PHI_ARG_DEF (phi, i);
3029 if (TREE_CODE (t) != INTEGER_CST)
3030 continue;
3031 wide_int w = wi::to_wide (t);
3032 if (w == -1)
3033 ret |= 1;
3034 else if (w == 0)
3035 ret |= 2;
3036 else if (w == 1)
3037 ret |= 4;
3038 else
3039 return 0;
3041 for (int i = 0; i < phi_num_args; i++)
3043 tree t = PHI_ARG_DEF (phi, i);
3044 if (TREE_CODE (t) == INTEGER_CST)
3045 continue;
3046 if (TREE_CODE (t) != SSA_NAME)
3047 return 0;
3048 gimple *g = SSA_NAME_DEF_STMT (t);
3049 if (gimple_code (g) == GIMPLE_PHI && limit > 0)
3050 if (int r = zero_one_minusone (as_a <gphi *> (g), limit - 1))
3052 ret |= r;
3053 continue;
3055 if (!is_gimple_assign (g))
3056 return 0;
3057 if (gimple_assign_cast_p (g))
3059 tree rhs1 = gimple_assign_rhs1 (g);
3060 if (TREE_CODE (rhs1) != SSA_NAME
3061 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
3062 || TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
3063 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3064 return 0;
3065 ret |= (2 | 4);
3066 continue;
3068 if (TREE_CODE_CLASS (gimple_assign_rhs_code (g)) != tcc_comparison)
3069 return 0;
3070 ret |= (2 | 4);
3072 return ret;
3075 /* Find the basic block with return expression and look up for possible
3076 return value trying to apply RETURN_PREDICTION heuristics. */
3077 static void
3078 apply_return_prediction (void)
3080 greturn *return_stmt = NULL;
3081 tree return_val;
3082 edge e;
3083 gphi *phi;
3084 int phi_num_args, i;
3085 enum br_predictor pred;
3086 enum prediction direction;
3087 edge_iterator ei;
3089 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3091 if (greturn *last = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src)))
3093 return_stmt = last;
3094 break;
3097 if (!e)
3098 return;
3099 return_val = gimple_return_retval (return_stmt);
3100 if (!return_val)
3101 return;
3102 if (TREE_CODE (return_val) != SSA_NAME
3103 || !SSA_NAME_DEF_STMT (return_val)
3104 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
3105 return;
3106 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
3107 phi_num_args = gimple_phi_num_args (phi);
3108 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
3110 /* Avoid the case where the function returns -1, 0 and 1 values and
3111 nothing else. Those could be qsort etc. comparison functions
3112 where the negative return isn't less probable than positive.
3113 For this require that the function returns at least -1 or 1
3114 or -1 and a boolean value or comparison result, so that functions
3115 returning just -1 and 0 are treated as if -1 represents error value. */
3116 if (INTEGRAL_TYPE_P (TREE_TYPE (return_val))
3117 && !TYPE_UNSIGNED (TREE_TYPE (return_val))
3118 && TYPE_PRECISION (TREE_TYPE (return_val)) > 1)
3119 if (int r = zero_one_minusone (phi, 3))
3120 if ((r & (1 | 4)) == (1 | 4))
3121 return;
3123 /* Avoid the degenerate case where all return values form the function
3124 belongs to same category (ie they are all positive constants)
3125 so we can hardly say something about them. */
3126 for (i = 1; i < phi_num_args; i++)
3127 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
3128 break;
3129 if (i != phi_num_args)
3130 for (i = 0; i < phi_num_args; i++)
3132 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
3133 if (pred != PRED_NO_PREDICTION)
3134 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
3135 direction);
3139 /* Look for basic block that contains unlikely to happen events
3140 (such as noreturn calls) and mark all paths leading to execution
3141 of this basic blocks as unlikely. */
3143 static void
3144 tree_bb_level_predictions (void)
3146 basic_block bb;
3147 bool has_return_edges = false;
3148 edge e;
3149 edge_iterator ei;
3151 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3152 if (!unlikely_executed_edge_p (e) && !(e->flags & EDGE_ABNORMAL_CALL))
3154 has_return_edges = true;
3155 break;
3158 apply_return_prediction ();
3160 FOR_EACH_BB_FN (bb, cfun)
3162 gimple_stmt_iterator gsi;
3164 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3166 gimple *stmt = gsi_stmt (gsi);
3167 tree decl;
3169 if (is_gimple_call (stmt))
3171 if (gimple_call_noreturn_p (stmt)
3172 && has_return_edges
3173 && !is_exit_with_zero_arg (stmt))
3174 predict_paths_leading_to (bb, PRED_NORETURN,
3175 NOT_TAKEN);
3176 decl = gimple_call_fndecl (stmt);
3177 if (decl
3178 && lookup_attribute ("cold",
3179 DECL_ATTRIBUTES (decl)))
3180 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
3181 NOT_TAKEN);
3182 if (decl && recursive_call_p (current_function_decl, decl))
3183 predict_paths_leading_to (bb, PRED_RECURSIVE_CALL,
3184 NOT_TAKEN);
3186 else if (gimple_code (stmt) == GIMPLE_PREDICT)
3188 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
3189 gimple_predict_outcome (stmt));
3190 /* Keep GIMPLE_PREDICT around so early inlining will propagate
3191 hints to callers. */
3197 /* Callback for hash_map::traverse, asserts that the pointer map is
3198 empty. */
3200 bool
3201 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
3202 void *)
3204 gcc_assert (!value);
3205 return true;
3208 /* Predict branch probabilities and estimate profile for basic block BB.
3209 When LOCAL_ONLY is set do not use any global properties of CFG. */
3211 static void
3212 tree_estimate_probability_bb (basic_block bb, bool local_only)
3214 edge e;
3215 edge_iterator ei;
3217 FOR_EACH_EDGE (e, ei, bb->succs)
3219 /* Look for block we are guarding (ie we dominate it,
3220 but it doesn't postdominate us). */
3221 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
3222 && !local_only
3223 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
3224 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
3226 gimple_stmt_iterator bi;
3228 /* The call heuristic claims that a guarded function call
3229 is improbable. This is because such calls are often used
3230 to signal exceptional situations such as printing error
3231 messages. */
3232 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
3233 gsi_next (&bi))
3235 gimple *stmt = gsi_stmt (bi);
3236 if (is_gimple_call (stmt)
3237 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))
3238 /* Constant and pure calls are hardly used to signalize
3239 something exceptional. */
3240 && gimple_has_side_effects (stmt))
3242 if (gimple_call_fndecl (stmt))
3243 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
3244 else if (virtual_method_call_p (gimple_call_fn (stmt)))
3245 predict_edge_def (e, PRED_POLYMORPHIC_CALL, NOT_TAKEN);
3246 else
3247 predict_edge_def (e, PRED_INDIR_CALL, TAKEN);
3248 break;
3253 tree_predict_by_opcode (bb);
3256 /* Predict branch probabilities and estimate profile of the tree CFG.
3257 This function can be called from the loop optimizers to recompute
3258 the profile information.
3259 If DRY_RUN is set, do not modify CFG and only produce dump files. */
3261 void
3262 tree_estimate_probability (bool dry_run)
3264 basic_block bb;
3266 connect_infinite_loops_to_exit ();
3267 /* We use loop_niter_by_eval, which requires that the loops have
3268 preheaders. */
3269 create_preheaders (CP_SIMPLE_PREHEADERS);
3270 calculate_dominance_info (CDI_POST_DOMINATORS);
3271 /* Decide which edges are known to be unlikely. This improves later
3272 branch prediction. */
3273 determine_unlikely_bbs ();
3275 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
3276 ssa_expected_value = new hash_map<int_hash<unsigned, 0>, expected_value>;
3278 tree_bb_level_predictions ();
3279 record_loop_exits ();
3281 if (number_of_loops (cfun) > 1)
3282 predict_loops ();
3284 FOR_EACH_BB_FN (bb, cfun)
3285 tree_estimate_probability_bb (bb, false);
3287 FOR_EACH_BB_FN (bb, cfun)
3288 combine_predictions_for_bb (bb, dry_run);
3290 if (flag_checking)
3291 bb_predictions->traverse<void *, assert_is_empty> (NULL);
3293 delete bb_predictions;
3294 bb_predictions = NULL;
3295 delete ssa_expected_value;
3296 ssa_expected_value = NULL;
3298 if (!dry_run
3299 && profile_status_for_fn (cfun) != PROFILE_READ)
3300 estimate_bb_frequencies ();
3301 free_dominance_info (CDI_POST_DOMINATORS);
3302 remove_fake_exit_edges ();
3305 /* Set edge->probability for each successor edge of BB. */
3306 void
3307 tree_guess_outgoing_edge_probabilities (basic_block bb)
3309 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
3310 ssa_expected_value = new hash_map<int_hash<unsigned, 0>, expected_value>;
3311 tree_estimate_probability_bb (bb, true);
3312 combine_predictions_for_bb (bb, false);
3313 if (flag_checking)
3314 bb_predictions->traverse<void *, assert_is_empty> (NULL);
3315 delete bb_predictions;
3316 bb_predictions = NULL;
3317 delete ssa_expected_value;
3318 ssa_expected_value = NULL;
3321 /* Filter function predicate that returns true for a edge predicate P
3322 if its edge is equal to DATA. */
3324 static bool
3325 not_loop_guard_equal_edge_p (edge_prediction *p, void *data)
3327 return p->ep_edge != (edge)data || p->ep_predictor != PRED_LOOP_GUARD;
3330 /* Predict edge E with PRED unless it is already predicted by some predictor
3331 considered equivalent. */
3333 static void
3334 maybe_predict_edge (edge e, enum br_predictor pred, enum prediction taken)
3336 if (edge_predicted_by_p (e, pred, taken))
3337 return;
3338 if (pred == PRED_LOOP_GUARD
3339 && edge_predicted_by_p (e, PRED_LOOP_GUARD_WITH_RECURSION, taken))
3340 return;
3341 /* Consider PRED_LOOP_GUARD_WITH_RECURSION superrior to LOOP_GUARD. */
3342 if (pred == PRED_LOOP_GUARD_WITH_RECURSION)
3344 edge_prediction **preds = bb_predictions->get (e->src);
3345 if (preds)
3346 filter_predictions (preds, not_loop_guard_equal_edge_p, e);
3348 predict_edge_def (e, pred, taken);
3350 /* Predict edges to successors of CUR whose sources are not postdominated by
3351 BB by PRED and recurse to all postdominators. */
3353 static void
3354 predict_paths_for_bb (basic_block cur, basic_block bb,
3355 enum br_predictor pred,
3356 enum prediction taken,
3357 bitmap visited, class loop *in_loop = NULL)
3359 edge e;
3360 edge_iterator ei;
3361 basic_block son;
3363 /* If we exited the loop or CUR is unconditional in the loop, there is
3364 nothing to do. */
3365 if (in_loop
3366 && (!flow_bb_inside_loop_p (in_loop, cur)
3367 || dominated_by_p (CDI_DOMINATORS, in_loop->latch, cur)))
3368 return;
3370 /* We are looking for all edges forming edge cut induced by
3371 set of all blocks postdominated by BB. */
3372 FOR_EACH_EDGE (e, ei, cur->preds)
3373 if (e->src->index >= NUM_FIXED_BLOCKS
3374 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
3376 edge e2;
3377 edge_iterator ei2;
3378 bool found = false;
3380 /* Ignore fake edges and eh, we predict them as not taken anyway. */
3381 if (unlikely_executed_edge_p (e))
3382 continue;
3383 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
3385 /* See if there is an edge from e->src that is not abnormal
3386 and does not lead to BB and does not exit the loop. */
3387 FOR_EACH_EDGE (e2, ei2, e->src->succs)
3388 if (e2 != e
3389 && !unlikely_executed_edge_p (e2)
3390 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)
3391 && (!in_loop || !loop_exit_edge_p (in_loop, e2)))
3393 found = true;
3394 break;
3397 /* If there is non-abnormal path leaving e->src, predict edge
3398 using predictor. Otherwise we need to look for paths
3399 leading to e->src.
3401 The second may lead to infinite loop in the case we are predicitng
3402 regions that are only reachable by abnormal edges. We simply
3403 prevent visiting given BB twice. */
3404 if (found)
3405 maybe_predict_edge (e, pred, taken);
3406 else if (bitmap_set_bit (visited, e->src->index))
3407 predict_paths_for_bb (e->src, e->src, pred, taken, visited, in_loop);
3409 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
3410 son;
3411 son = next_dom_son (CDI_POST_DOMINATORS, son))
3412 predict_paths_for_bb (son, bb, pred, taken, visited, in_loop);
3415 /* Sets branch probabilities according to PREDiction and
3416 FLAGS. */
3418 static void
3419 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
3420 enum prediction taken, class loop *in_loop)
3422 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
3425 /* Like predict_paths_leading_to but take edge instead of basic block. */
3427 static void
3428 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
3429 enum prediction taken, class loop *in_loop)
3431 bool has_nonloop_edge = false;
3432 edge_iterator ei;
3433 edge e2;
3435 basic_block bb = e->src;
3436 FOR_EACH_EDGE (e2, ei, bb->succs)
3437 if (e2->dest != e->src && e2->dest != e->dest
3438 && !unlikely_executed_edge_p (e2)
3439 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
3441 has_nonloop_edge = true;
3442 break;
3445 if (!has_nonloop_edge)
3446 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
3447 else
3448 maybe_predict_edge (e, pred, taken);
3451 /* This is used to carry information about basic blocks. It is
3452 attached to the AUX field of the standard CFG block. */
3454 class block_info
3456 public:
3457 /* Estimated frequency of execution of basic_block. */
3458 sreal frequency;
3460 /* To keep queue of basic blocks to process. */
3461 basic_block next;
3463 /* Number of predecessors we need to visit first. */
3464 int npredecessors;
3467 /* Similar information for edges. */
3468 class edge_prob_info
3470 public:
3471 /* In case edge is a loopback edge, the probability edge will be reached
3472 in case header is. Estimated number of iterations of the loop can be
3473 then computed as 1 / (1 - back_edge_prob). */
3474 sreal back_edge_prob;
3475 /* True if the edge is a loopback edge in the natural loop. */
3476 unsigned int back_edge:1;
3479 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
3480 #undef EDGE_INFO
3481 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
3483 /* Helper function for estimate_bb_frequencies.
3484 Propagate the frequencies in blocks marked in
3485 TOVISIT, starting in HEAD. */
3487 static void
3488 propagate_freq (basic_block head, bitmap tovisit,
3489 sreal max_cyclic_prob)
3491 basic_block bb;
3492 basic_block last;
3493 unsigned i;
3494 edge e;
3495 basic_block nextbb;
3496 bitmap_iterator bi;
3498 /* For each basic block we need to visit count number of his predecessors
3499 we need to visit first. */
3500 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
3502 edge_iterator ei;
3503 int count = 0;
3505 bb = BASIC_BLOCK_FOR_FN (cfun, i);
3507 FOR_EACH_EDGE (e, ei, bb->preds)
3509 bool visit = bitmap_bit_p (tovisit, e->src->index);
3511 if (visit && !(e->flags & EDGE_DFS_BACK))
3512 count++;
3513 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
3514 fprintf (dump_file,
3515 "Irreducible region hit, ignoring edge to %i->%i\n",
3516 e->src->index, bb->index);
3518 BLOCK_INFO (bb)->npredecessors = count;
3519 /* When function never returns, we will never process exit block. */
3520 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
3521 bb->count = profile_count::zero ();
3524 BLOCK_INFO (head)->frequency = 1;
3525 last = head;
3526 for (bb = head; bb; bb = nextbb)
3528 edge_iterator ei;
3529 sreal cyclic_probability = 0;
3530 sreal frequency = 0;
3532 nextbb = BLOCK_INFO (bb)->next;
3533 BLOCK_INFO (bb)->next = NULL;
3535 /* Compute frequency of basic block. */
3536 if (bb != head)
3538 if (flag_checking)
3539 FOR_EACH_EDGE (e, ei, bb->preds)
3540 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
3541 || (e->flags & EDGE_DFS_BACK));
3543 FOR_EACH_EDGE (e, ei, bb->preds)
3544 if (EDGE_INFO (e)->back_edge)
3545 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
3546 else if (!(e->flags & EDGE_DFS_BACK))
3548 /* FIXME: Graphite is producing edges with no profile. Once
3549 this is fixed, drop this. */
3550 sreal tmp = e->probability.initialized_p () ?
3551 e->probability.to_sreal () : 0;
3552 frequency += tmp * BLOCK_INFO (e->src)->frequency;
3555 if (cyclic_probability == 0)
3557 BLOCK_INFO (bb)->frequency = frequency;
3559 else
3561 if (cyclic_probability > max_cyclic_prob)
3563 if (dump_file)
3564 fprintf (dump_file,
3565 "cyclic probability of bb %i is %f (capped to %f)"
3566 "; turning freq %f",
3567 bb->index, cyclic_probability.to_double (),
3568 max_cyclic_prob.to_double (),
3569 frequency.to_double ());
3571 cyclic_probability = max_cyclic_prob;
3573 else if (dump_file)
3574 fprintf (dump_file,
3575 "cyclic probability of bb %i is %f; turning freq %f",
3576 bb->index, cyclic_probability.to_double (),
3577 frequency.to_double ());
3579 BLOCK_INFO (bb)->frequency = frequency
3580 / (sreal (1) - cyclic_probability);
3581 if (dump_file)
3582 fprintf (dump_file, " to %f\n",
3583 BLOCK_INFO (bb)->frequency.to_double ());
3587 bitmap_clear_bit (tovisit, bb->index);
3589 e = find_edge (bb, head);
3590 if (e)
3592 /* FIXME: Graphite is producing edges with no profile. Once
3593 this is fixed, drop this. */
3594 sreal tmp = e->probability.initialized_p () ?
3595 e->probability.to_sreal () : 0;
3596 EDGE_INFO (e)->back_edge_prob = tmp * BLOCK_INFO (bb)->frequency;
3599 /* Propagate to successor blocks. */
3600 FOR_EACH_EDGE (e, ei, bb->succs)
3601 if (!(e->flags & EDGE_DFS_BACK)
3602 && BLOCK_INFO (e->dest)->npredecessors)
3604 BLOCK_INFO (e->dest)->npredecessors--;
3605 if (!BLOCK_INFO (e->dest)->npredecessors)
3607 if (!nextbb)
3608 nextbb = e->dest;
3609 else
3610 BLOCK_INFO (last)->next = e->dest;
3612 last = e->dest;
3618 /* Estimate frequencies in loops at same nest level. */
3620 static void
3621 estimate_loops_at_level (class loop *first_loop, sreal max_cyclic_prob)
3623 class loop *loop;
3625 for (loop = first_loop; loop; loop = loop->next)
3627 edge e;
3628 basic_block *bbs;
3629 unsigned i;
3630 auto_bitmap tovisit;
3632 estimate_loops_at_level (loop->inner, max_cyclic_prob);
3634 /* Find current loop back edge and mark it. */
3635 e = loop_latch_edge (loop);
3636 EDGE_INFO (e)->back_edge = 1;
3638 bbs = get_loop_body (loop);
3639 for (i = 0; i < loop->num_nodes; i++)
3640 bitmap_set_bit (tovisit, bbs[i]->index);
3641 free (bbs);
3642 propagate_freq (loop->header, tovisit, max_cyclic_prob);
3646 /* Propagates frequencies through structure of loops. */
3648 static void
3649 estimate_loops (void)
3651 auto_bitmap tovisit;
3652 basic_block bb;
3653 sreal max_cyclic_prob = (sreal)1
3654 - (sreal)1 / (param_max_predicted_iterations + 1);
3656 /* Start by estimating the frequencies in the loops. */
3657 if (number_of_loops (cfun) > 1)
3658 estimate_loops_at_level (current_loops->tree_root->inner, max_cyclic_prob);
3660 /* Now propagate the frequencies through all the blocks. */
3661 FOR_ALL_BB_FN (bb, cfun)
3663 bitmap_set_bit (tovisit, bb->index);
3665 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit, max_cyclic_prob);
3668 /* Drop the profile for NODE to guessed, and update its frequency based on
3669 whether it is expected to be hot given the CALL_COUNT. */
3671 static void
3672 drop_profile (struct cgraph_node *node, profile_count call_count)
3674 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3675 /* In the case where this was called by another function with a
3676 dropped profile, call_count will be 0. Since there are no
3677 non-zero call counts to this function, we don't know for sure
3678 whether it is hot, and therefore it will be marked normal below. */
3679 bool hot = maybe_hot_count_p (NULL, call_count);
3681 if (dump_file)
3682 fprintf (dump_file,
3683 "Dropping 0 profile for %s. %s based on calls.\n",
3684 node->dump_name (),
3685 hot ? "Function is hot" : "Function is normal");
3686 /* We only expect to miss profiles for functions that are reached
3687 via non-zero call edges in cases where the function may have
3688 been linked from another module or library (COMDATs and extern
3689 templates). See the comments below for handle_missing_profiles.
3690 Also, only warn in cases where the missing counts exceed the
3691 number of training runs. In certain cases with an execv followed
3692 by a no-return call the profile for the no-return call is not
3693 dumped and there can be a mismatch. */
3694 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
3695 && call_count > profile_info->runs)
3697 if (flag_profile_correction)
3699 if (dump_file)
3700 fprintf (dump_file,
3701 "Missing counts for called function %s\n",
3702 node->dump_name ());
3704 else
3705 warning (0, "Missing counts for called function %s",
3706 node->dump_name ());
3709 basic_block bb;
3710 if (opt_for_fn (node->decl, flag_guess_branch_prob))
3712 bool clear_zeros
3713 = !ENTRY_BLOCK_PTR_FOR_FN (fn)->count.nonzero_p ();
3714 FOR_ALL_BB_FN (bb, fn)
3715 if (clear_zeros || !(bb->count == profile_count::zero ()))
3716 bb->count = bb->count.guessed_local ();
3717 fn->cfg->count_max = fn->cfg->count_max.guessed_local ();
3719 else
3721 FOR_ALL_BB_FN (bb, fn)
3722 bb->count = profile_count::uninitialized ();
3723 fn->cfg->count_max = profile_count::uninitialized ();
3726 struct cgraph_edge *e;
3727 for (e = node->callees; e; e = e->next_callee)
3728 e->count = gimple_bb (e->call_stmt)->count;
3729 for (e = node->indirect_calls; e; e = e->next_callee)
3730 e->count = gimple_bb (e->call_stmt)->count;
3731 node->count = ENTRY_BLOCK_PTR_FOR_FN (fn)->count;
3733 profile_status_for_fn (fn)
3734 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
3735 node->frequency
3736 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
3739 /* In the case of COMDAT routines, multiple object files will contain the same
3740 function and the linker will select one for the binary. In that case
3741 all the other copies from the profile instrument binary will be missing
3742 profile counts. Look for cases where this happened, due to non-zero
3743 call counts going to 0-count functions, and drop the profile to guessed
3744 so that we can use the estimated probabilities and avoid optimizing only
3745 for size.
3747 The other case where the profile may be missing is when the routine
3748 is not going to be emitted to the object file, e.g. for "extern template"
3749 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
3750 all other cases of non-zero calls to 0-count functions. */
3752 void
3753 handle_missing_profiles (void)
3755 const int unlikely_frac = param_unlikely_bb_count_fraction;
3756 struct cgraph_node *node;
3757 auto_vec<struct cgraph_node *, 64> worklist;
3759 /* See if 0 count function has non-0 count callers. In this case we
3760 lost some profile. Drop its function profile to PROFILE_GUESSED. */
3761 FOR_EACH_DEFINED_FUNCTION (node)
3763 struct cgraph_edge *e;
3764 profile_count call_count = profile_count::zero ();
3765 gcov_type max_tp_first_run = 0;
3766 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3768 if (node->count.ipa ().nonzero_p ())
3769 continue;
3770 for (e = node->callers; e; e = e->next_caller)
3771 if (e->count.ipa ().initialized_p () && e->count.ipa () > 0)
3773 call_count = call_count + e->count.ipa ();
3775 if (e->caller->tp_first_run > max_tp_first_run)
3776 max_tp_first_run = e->caller->tp_first_run;
3779 /* If time profile is missing, let assign the maximum that comes from
3780 caller functions. */
3781 if (!node->tp_first_run && max_tp_first_run)
3782 node->tp_first_run = max_tp_first_run + 1;
3784 if (call_count > 0
3785 && fn && fn->cfg
3786 && call_count * unlikely_frac >= profile_info->runs)
3788 drop_profile (node, call_count);
3789 worklist.safe_push (node);
3793 /* Propagate the profile dropping to other 0-count COMDATs that are
3794 potentially called by COMDATs we already dropped the profile on. */
3795 while (worklist.length () > 0)
3797 struct cgraph_edge *e;
3799 node = worklist.pop ();
3800 for (e = node->callees; e; e = e->next_caller)
3802 struct cgraph_node *callee = e->callee;
3803 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
3805 if (!(e->count.ipa () == profile_count::zero ())
3806 && callee->count.ipa ().nonzero_p ())
3807 continue;
3808 if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl))
3809 && fn && fn->cfg
3810 && profile_status_for_fn (fn) == PROFILE_READ)
3812 drop_profile (node, profile_count::zero ());
3813 worklist.safe_push (callee);
3819 /* Convert counts measured by profile driven feedback to frequencies.
3820 Return nonzero iff there was any nonzero execution count. */
3822 bool
3823 update_max_bb_count (void)
3825 profile_count true_count_max = profile_count::uninitialized ();
3826 basic_block bb;
3828 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3829 true_count_max = true_count_max.max (bb->count);
3831 cfun->cfg->count_max = true_count_max;
3833 return true_count_max.ipa ().nonzero_p ();
3836 /* Return true if function is likely to be expensive, so there is no point to
3837 optimize performance of prologue, epilogue or do inlining at the expense
3838 of code size growth. THRESHOLD is the limit of number of instructions
3839 function can execute at average to be still considered not expensive. */
3841 bool
3842 expensive_function_p (int threshold)
3844 basic_block bb;
3846 /* If profile was scaled in a way entry block has count 0, then the function
3847 is deifnitly taking a lot of time. */
3848 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.nonzero_p ())
3849 return true;
3851 profile_count limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count * threshold;
3852 profile_count sum = profile_count::zero ();
3853 FOR_EACH_BB_FN (bb, cfun)
3855 rtx_insn *insn;
3857 if (!bb->count.initialized_p ())
3859 if (dump_file)
3860 fprintf (dump_file, "Function is considered expensive because"
3861 " count of bb %i is not initialized\n", bb->index);
3862 return true;
3865 FOR_BB_INSNS (bb, insn)
3866 if (active_insn_p (insn))
3868 sum += bb->count;
3869 if (sum > limit)
3870 return true;
3874 return false;
3877 /* All basic blocks that are reachable only from unlikely basic blocks are
3878 unlikely. */
3880 void
3881 propagate_unlikely_bbs_forward (void)
3883 auto_vec<basic_block, 64> worklist;
3884 basic_block bb;
3885 edge_iterator ei;
3886 edge e;
3888 if (!(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()))
3890 ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(size_t) 1;
3891 worklist.safe_push (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3893 while (worklist.length () > 0)
3895 bb = worklist.pop ();
3896 FOR_EACH_EDGE (e, ei, bb->succs)
3897 if (!(e->count () == profile_count::zero ())
3898 && !(e->dest->count == profile_count::zero ())
3899 && !e->dest->aux)
3901 e->dest->aux = (void *)(size_t) 1;
3902 worklist.safe_push (e->dest);
3907 FOR_ALL_BB_FN (bb, cfun)
3909 if (!bb->aux)
3911 if (!(bb->count == profile_count::zero ())
3912 && (dump_file && (dump_flags & TDF_DETAILS)))
3913 fprintf (dump_file,
3914 "Basic block %i is marked unlikely by forward prop\n",
3915 bb->index);
3916 bb->count = profile_count::zero ();
3918 else
3919 bb->aux = NULL;
3923 /* Determine basic blocks/edges that are known to be unlikely executed and set
3924 their counters to zero.
3925 This is done with first identifying obviously unlikely BBs/edges and then
3926 propagating in both directions. */
3928 static void
3929 determine_unlikely_bbs ()
3931 basic_block bb;
3932 auto_vec<basic_block, 64> worklist;
3933 edge_iterator ei;
3934 edge e;
3936 FOR_EACH_BB_FN (bb, cfun)
3938 if (!(bb->count == profile_count::zero ())
3939 && unlikely_executed_bb_p (bb))
3941 if (dump_file && (dump_flags & TDF_DETAILS))
3942 fprintf (dump_file, "Basic block %i is locally unlikely\n",
3943 bb->index);
3944 bb->count = profile_count::zero ();
3947 FOR_EACH_EDGE (e, ei, bb->succs)
3948 if (!(e->probability == profile_probability::never ())
3949 && unlikely_executed_edge_p (e))
3951 if (dump_file && (dump_flags & TDF_DETAILS))
3952 fprintf (dump_file, "Edge %i->%i is locally unlikely\n",
3953 bb->index, e->dest->index);
3954 e->probability = profile_probability::never ();
3957 gcc_checking_assert (!bb->aux);
3959 propagate_unlikely_bbs_forward ();
3961 auto_vec<int, 64> nsuccs;
3962 nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
3963 FOR_ALL_BB_FN (bb, cfun)
3964 if (!(bb->count == profile_count::zero ())
3965 && bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
3967 nsuccs[bb->index] = 0;
3968 FOR_EACH_EDGE (e, ei, bb->succs)
3969 if (!(e->probability == profile_probability::never ())
3970 && !(e->dest->count == profile_count::zero ()))
3971 nsuccs[bb->index]++;
3972 if (!nsuccs[bb->index])
3973 worklist.safe_push (bb);
3975 while (worklist.length () > 0)
3977 bb = worklist.pop ();
3978 if (bb->count == profile_count::zero ())
3979 continue;
3980 if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
3982 bool found = false;
3983 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
3984 !gsi_end_p (gsi); gsi_next (&gsi))
3985 if (stmt_can_terminate_bb_p (gsi_stmt (gsi))
3986 /* stmt_can_terminate_bb_p special cases noreturns because it
3987 assumes that fake edges are created. We want to know that
3988 noreturn alone does not imply BB to be unlikely. */
3989 || (is_gimple_call (gsi_stmt (gsi))
3990 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_NORETURN)))
3992 found = true;
3993 break;
3995 if (found)
3996 continue;
3998 if (dump_file && (dump_flags & TDF_DETAILS))
3999 fprintf (dump_file,
4000 "Basic block %i is marked unlikely by backward prop\n",
4001 bb->index);
4002 bb->count = profile_count::zero ();
4003 FOR_EACH_EDGE (e, ei, bb->preds)
4004 if (!(e->probability == profile_probability::never ()))
4006 if (!(e->src->count == profile_count::zero ()))
4008 gcc_checking_assert (nsuccs[e->src->index] > 0);
4009 nsuccs[e->src->index]--;
4010 if (!nsuccs[e->src->index])
4011 worklist.safe_push (e->src);
4015 /* Finally all edges from non-0 regions to 0 are unlikely. */
4016 FOR_ALL_BB_FN (bb, cfun)
4018 if (!(bb->count == profile_count::zero ()))
4019 FOR_EACH_EDGE (e, ei, bb->succs)
4020 if (!(e->probability == profile_probability::never ())
4021 && e->dest->count == profile_count::zero ())
4023 if (dump_file && (dump_flags & TDF_DETAILS))
4024 fprintf (dump_file, "Edge %i->%i is unlikely because "
4025 "it enters unlikely block\n",
4026 bb->index, e->dest->index);
4027 e->probability = profile_probability::never ();
4030 edge other = NULL;
4032 FOR_EACH_EDGE (e, ei, bb->succs)
4033 if (e->probability == profile_probability::never ())
4035 else if (other)
4037 other = NULL;
4038 break;
4040 else
4041 other = e;
4042 if (other
4043 && !(other->probability == profile_probability::always ()))
4045 if (dump_file && (dump_flags & TDF_DETAILS))
4046 fprintf (dump_file, "Edge %i->%i is locally likely\n",
4047 bb->index, other->dest->index);
4048 other->probability = profile_probability::always ();
4051 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
4052 cgraph_node::get (current_function_decl)->count = profile_count::zero ();
4055 /* Estimate and propagate basic block frequencies using the given branch
4056 probabilities. */
4058 static void
4059 estimate_bb_frequencies ()
4061 basic_block bb;
4062 sreal freq_max;
4064 determine_unlikely_bbs ();
4066 mark_dfs_back_edges ();
4068 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
4069 profile_probability::always ();
4071 /* Set up block info for each basic block. */
4072 alloc_aux_for_blocks (sizeof (block_info));
4073 alloc_aux_for_edges (sizeof (edge_prob_info));
4074 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
4076 edge e;
4077 edge_iterator ei;
4079 FOR_EACH_EDGE (e, ei, bb->succs)
4081 /* FIXME: Graphite is producing edges with no profile. Once
4082 this is fixed, drop this. */
4083 if (e->probability.initialized_p ())
4084 EDGE_INFO (e)->back_edge_prob
4085 = e->probability.to_sreal ();
4086 else
4087 /* back_edge_prob = 0.5 */
4088 EDGE_INFO (e)->back_edge_prob = sreal (1, -1);
4092 /* First compute frequencies locally for each loop from innermost
4093 to outermost to examine frequencies for back edges. */
4094 estimate_loops ();
4096 freq_max = 0;
4097 FOR_EACH_BB_FN (bb, cfun)
4098 if (freq_max < BLOCK_INFO (bb)->frequency)
4099 freq_max = BLOCK_INFO (bb)->frequency;
4101 /* Scaling frequencies up to maximal profile count may result in
4102 frequent overflows especially when inlining loops.
4103 Small scaling results in unnecesary precision loss. Stay in
4104 the half of the (exponential) range. */
4105 freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max;
4106 if (freq_max < 16)
4107 freq_max = 16;
4108 profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
4109 cfun->cfg->count_max = profile_count::uninitialized ();
4110 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
4112 sreal tmp = BLOCK_INFO (bb)->frequency;
4113 if (tmp >= 1)
4115 gimple_stmt_iterator gsi;
4116 tree decl;
4118 /* Self recursive calls can not have frequency greater than 1
4119 or program will never terminate. This will result in an
4120 inconsistent bb profile but it is better than greatly confusing
4121 IPA cost metrics. */
4122 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4123 if (is_gimple_call (gsi_stmt (gsi))
4124 && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
4125 && recursive_call_p (current_function_decl, decl))
4127 if (dump_file)
4128 fprintf (dump_file, "Dropping frequency of recursive call"
4129 " in bb %i from %f\n", bb->index,
4130 tmp.to_double ());
4131 tmp = (sreal)9 / (sreal)10;
4132 break;
4135 tmp = tmp * freq_max;
4136 profile_count count = profile_count::from_gcov_type (tmp.to_nearest_int ());
4138 /* If we have profile feedback in which this function was never
4139 executed, then preserve this info. */
4140 if (!(bb->count == profile_count::zero ()))
4141 bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count);
4142 cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
4145 free_aux_for_blocks ();
4146 free_aux_for_edges ();
4147 compute_function_frequency ();
4150 /* Decide whether function is hot, cold or unlikely executed. */
4151 void
4152 compute_function_frequency (void)
4154 basic_block bb;
4155 struct cgraph_node *node = cgraph_node::get (current_function_decl);
4157 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
4158 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
4159 node->only_called_at_startup = true;
4160 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
4161 node->only_called_at_exit = true;
4163 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p ())
4165 int flags = flags_from_decl_or_type (current_function_decl);
4166 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
4167 != NULL)
4168 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
4169 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
4170 != NULL)
4171 node->frequency = NODE_FREQUENCY_HOT;
4172 else if (flags & ECF_NORETURN)
4173 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
4174 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
4175 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
4176 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
4177 || DECL_STATIC_DESTRUCTOR (current_function_decl))
4178 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
4179 return;
4182 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
4183 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
4184 == NULL)
4185 warn_function_cold (current_function_decl);
4186 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ())
4187 return;
4188 FOR_EACH_BB_FN (bb, cfun)
4190 if (maybe_hot_bb_p (cfun, bb))
4192 node->frequency = NODE_FREQUENCY_HOT;
4193 return;
4195 if (!probably_never_executed_bb_p (cfun, bb))
4196 node->frequency = NODE_FREQUENCY_NORMAL;
4200 /* Build PREDICT_EXPR. */
4201 tree
4202 build_predict_expr (enum br_predictor predictor, enum prediction taken)
4204 tree t = build1 (PREDICT_EXPR, void_type_node,
4205 build_int_cst (integer_type_node, predictor));
4206 SET_PREDICT_EXPR_OUTCOME (t, taken);
4207 return t;
4210 const char *
4211 predictor_name (enum br_predictor predictor)
4213 return predictor_info[predictor].name;
4216 /* Predict branch probabilities and estimate profile of the tree CFG. */
4218 namespace {
4220 const pass_data pass_data_profile =
4222 GIMPLE_PASS, /* type */
4223 "profile_estimate", /* name */
4224 OPTGROUP_NONE, /* optinfo_flags */
4225 TV_BRANCH_PROB, /* tv_id */
4226 PROP_cfg, /* properties_required */
4227 0, /* properties_provided */
4228 0, /* properties_destroyed */
4229 0, /* todo_flags_start */
4230 0, /* todo_flags_finish */
4233 class pass_profile : public gimple_opt_pass
4235 public:
4236 pass_profile (gcc::context *ctxt)
4237 : gimple_opt_pass (pass_data_profile, ctxt)
4240 /* opt_pass methods: */
4241 bool gate (function *) final override { return flag_guess_branch_prob; }
4242 unsigned int execute (function *) final override;
4244 }; // class pass_profile
4246 unsigned int
4247 pass_profile::execute (function *fun)
4249 unsigned nb_loops;
4251 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
4252 return 0;
4254 loop_optimizer_init (LOOPS_NORMAL);
4255 if (dump_file && (dump_flags & TDF_DETAILS))
4256 flow_loops_dump (dump_file, NULL, 0);
4258 nb_loops = number_of_loops (fun);
4259 if (nb_loops > 1)
4260 scev_initialize ();
4262 tree_estimate_probability (false);
4263 cfun->cfg->full_profile = true;
4265 if (nb_loops > 1)
4266 scev_finalize ();
4268 loop_optimizer_finalize ();
4269 if (dump_file && (dump_flags & TDF_DETAILS))
4270 gimple_dump_cfg (dump_file, dump_flags);
4271 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
4272 profile_status_for_fn (fun) = PROFILE_GUESSED;
4273 if (dump_file && (dump_flags & TDF_DETAILS))
4275 sreal iterations;
4276 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
4277 if (expected_loop_iterations_by_profile (loop, &iterations))
4278 fprintf (dump_file, "Loop %d got predicted to iterate %f times.\n",
4279 loop->num, iterations.to_double ());
4281 return 0;
4284 } // anon namespace
4286 gimple_opt_pass *
4287 make_pass_profile (gcc::context *ctxt)
4289 return new pass_profile (ctxt);
4292 /* Return true when PRED predictor should be removed after early
4293 tree passes. Most of the predictors are beneficial to survive
4294 as early inlining can also distribute then into caller's bodies. */
4296 static bool
4297 strip_predictor_early (enum br_predictor pred)
4299 switch (pred)
4301 case PRED_TREE_EARLY_RETURN:
4302 return true;
4303 default:
4304 return false;
4308 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
4309 we no longer need. EARLY is set to true when called from early
4310 optimizations. */
4312 unsigned int
4313 strip_predict_hints (function *fun, bool early)
4315 basic_block bb;
4316 gimple *ass_stmt;
4317 tree var;
4318 bool changed = false;
4320 FOR_EACH_BB_FN (bb, fun)
4322 gimple_stmt_iterator bi;
4323 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
4325 gimple *stmt = gsi_stmt (bi);
4327 if (gimple_code (stmt) == GIMPLE_PREDICT)
4329 if (!early
4330 || strip_predictor_early (gimple_predict_predictor (stmt)))
4332 gsi_remove (&bi, true);
4333 changed = true;
4334 continue;
4337 else if (is_gimple_call (stmt))
4339 tree fndecl = gimple_call_fndecl (stmt);
4341 if (!early
4342 && ((fndecl != NULL_TREE
4343 && fndecl_built_in_p (fndecl, BUILT_IN_EXPECT)
4344 && gimple_call_num_args (stmt) == 2)
4345 || (fndecl != NULL_TREE
4346 && fndecl_built_in_p (fndecl,
4347 BUILT_IN_EXPECT_WITH_PROBABILITY)
4348 && gimple_call_num_args (stmt) == 3)
4349 || (gimple_call_internal_p (stmt)
4350 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT)))
4352 var = gimple_call_lhs (stmt);
4353 changed = true;
4354 if (var)
4356 ass_stmt
4357 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
4358 gsi_replace (&bi, ass_stmt, true);
4360 else
4362 gsi_remove (&bi, true);
4363 continue;
4367 gsi_next (&bi);
4370 return changed ? TODO_cleanup_cfg : 0;
4373 namespace {
4375 const pass_data pass_data_strip_predict_hints =
4377 GIMPLE_PASS, /* type */
4378 "*strip_predict_hints", /* name */
4379 OPTGROUP_NONE, /* optinfo_flags */
4380 TV_BRANCH_PROB, /* tv_id */
4381 PROP_cfg, /* properties_required */
4382 0, /* properties_provided */
4383 0, /* properties_destroyed */
4384 0, /* todo_flags_start */
4385 0, /* todo_flags_finish */
4388 class pass_strip_predict_hints : public gimple_opt_pass
4390 public:
4391 pass_strip_predict_hints (gcc::context *ctxt)
4392 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
4395 /* opt_pass methods: */
4396 opt_pass * clone () final override
4398 return new pass_strip_predict_hints (m_ctxt);
4400 void set_pass_param (unsigned int n, bool param) final override
4402 gcc_assert (n == 0);
4403 early_p = param;
4406 unsigned int execute (function *) final override;
4408 private:
4409 bool early_p;
4411 }; // class pass_strip_predict_hints
4413 unsigned int
4414 pass_strip_predict_hints::execute (function *fun)
4416 return strip_predict_hints (fun, early_p);
4419 } // anon namespace
4421 gimple_opt_pass *
4422 make_pass_strip_predict_hints (gcc::context *ctxt)
4424 return new pass_strip_predict_hints (ctxt);
4427 /* Rebuild function frequencies. Passes are in general expected to
4428 maintain profile by hand, however in some cases this is not possible:
4429 for example when inlining several functions with loops freuqencies might run
4430 out of scale and thus needs to be recomputed. */
4432 void
4433 rebuild_frequencies (void)
4435 /* If we have no profile, do nothing. Note that after inlining
4436 profile_status_for_fn may not represent the actual presence/absence of
4437 profile. */
4438 if (profile_status_for_fn (cfun) == PROFILE_ABSENT
4439 && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
4440 return;
4443 /* See if everything is OK and update count_max. */
4444 basic_block bb;
4445 bool inconsistency_found = false;
4446 bool uninitialized_probablity_found = false;
4447 bool uninitialized_count_found = false;
4449 cfun->cfg->count_max = profile_count::uninitialized ();
4450 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
4452 cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
4453 /* Uninitialized count may be result of inlining or an omision in an
4454 optimization pass. */
4455 if (!bb->count.initialized_p ())
4457 uninitialized_count_found = true;
4458 if (dump_file)
4459 fprintf (dump_file, "BB %i has uninitialized count\n",
4460 bb->index);
4462 if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)
4463 && (!uninitialized_probablity_found || !inconsistency_found))
4465 profile_count sum = profile_count::zero ();
4466 edge e;
4467 edge_iterator ei;
4469 FOR_EACH_EDGE (e, ei, bb->preds)
4471 sum += e->count ();
4472 /* Uninitialized probability may be result of inlining or an
4473 omision in an optimization pass. */
4474 if (!e->probability.initialized_p ())
4476 if (dump_file)
4477 fprintf (dump_file,
4478 "Edge %i->%i has uninitialized probability\n",
4479 e->src->index, e->dest->index);
4482 if (sum.differs_from_p (bb->count))
4484 if (dump_file)
4485 fprintf (dump_file,
4486 "BB %i has invalid sum of incomming counts\n",
4487 bb->index);
4488 inconsistency_found = true;
4493 /* If everything is OK, do not re-propagate frequencies. */
4494 if (!inconsistency_found
4495 && (!uninitialized_count_found || uninitialized_probablity_found)
4496 && !cfun->cfg->count_max.very_large_p ())
4498 if (dump_file)
4499 fprintf (dump_file, "Profile is consistent\n");
4500 return;
4502 /* Do not re-propagate if we have profile feedback. Even if the profile is
4503 inconsistent from previous transofrmations, it is probably more realistic
4504 for hot part of the program than result of repropagating.
4506 Consider example where we previously has
4508 if (test)
4509 then [large probability for true]
4511 and we later proved that test is always 0. In this case, if profile was
4512 read correctly, we must have duplicated the conditional (for example by
4513 inlining) in to a context where test is false. From profile feedback
4514 we know that most executions if the conditionals were true, so the
4515 important copy is not the one we look on.
4517 Propagating from probabilities would make profile look consistent, but
4518 because probablities after code duplication may not be representative
4519 for a given run, we would only propagate the error further. */
4520 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().nonzero_p ()
4521 && !uninitialized_count_found)
4523 if (dump_file)
4524 fprintf (dump_file,
4525 "Profile is inconsistent but read from profile feedback;"
4526 " not rebuilding\n");
4527 return;
4530 loop_optimizer_init (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
4531 connect_infinite_loops_to_exit ();
4532 estimate_bb_frequencies ();
4533 remove_fake_exit_edges ();
4534 loop_optimizer_finalize ();
4535 if (dump_file)
4536 fprintf (dump_file, "Rebuilt basic block counts\n");
4538 return;
4541 namespace {
4543 const pass_data pass_data_rebuild_frequencies =
4545 GIMPLE_PASS, /* type */
4546 "rebuild_frequencies", /* name */
4547 OPTGROUP_NONE, /* optinfo_flags */
4548 TV_REBUILD_FREQUENCIES, /* tv_id */
4549 PROP_cfg, /* properties_required */
4550 0, /* properties_provided */
4551 0, /* properties_destroyed */
4552 0, /* todo_flags_start */
4553 0, /* todo_flags_finish */
4556 class pass_rebuild_frequencies : public gimple_opt_pass
4558 public:
4559 pass_rebuild_frequencies (gcc::context *ctxt)
4560 : gimple_opt_pass (pass_data_rebuild_frequencies, ctxt)
4563 /* opt_pass methods: */
4564 opt_pass * clone () final override
4566 return new pass_rebuild_frequencies (m_ctxt);
4568 void set_pass_param (unsigned int n, bool param) final override
4570 gcc_assert (n == 0);
4571 early_p = param;
4574 unsigned int execute (function *) final override
4576 rebuild_frequencies ();
4577 return 0;
4580 private:
4581 bool early_p;
4583 }; // class pass_rebuild_frequencies
4585 } // anon namespace
4587 gimple_opt_pass *
4588 make_pass_rebuild_frequencies (gcc::context *ctxt)
4590 return new pass_rebuild_frequencies (ctxt);
4593 /* Perform a dry run of the branch prediction pass and report comparsion of
4594 the predicted and real profile into the dump file. */
4596 void
4597 report_predictor_hitrates (void)
4599 unsigned nb_loops;
4601 loop_optimizer_init (LOOPS_NORMAL);
4602 if (dump_file && (dump_flags & TDF_DETAILS))
4603 flow_loops_dump (dump_file, NULL, 0);
4605 nb_loops = number_of_loops (cfun);
4606 if (nb_loops > 1)
4607 scev_initialize ();
4609 tree_estimate_probability (true);
4611 if (nb_loops > 1)
4612 scev_finalize ();
4614 loop_optimizer_finalize ();
4617 /* Force edge E to be cold.
4618 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
4619 keep low probability to represent possible error in a guess. This is used
4620 i.e. in case we predict loop to likely iterate given number of times but
4621 we are not 100% sure.
4623 This function locally updates profile without attempt to keep global
4624 consistency which cannot be reached in full generality without full profile
4625 rebuild from probabilities alone. Doing so is not necessarily a good idea
4626 because frequencies and counts may be more realistic then probabilities.
4628 In some cases (such as for elimination of early exits during full loop
4629 unrolling) the caller can ensure that profile will get consistent
4630 afterwards. */
4632 void
4633 force_edge_cold (edge e, bool impossible)
4635 profile_count count_sum = profile_count::zero ();
4636 profile_probability prob_sum = profile_probability::never ();
4637 edge_iterator ei;
4638 edge e2;
4639 bool uninitialized_exit = false;
4641 /* When branch probability guesses are not known, then do nothing. */
4642 if (!impossible && !e->count ().initialized_p ())
4643 return;
4645 profile_probability goal = (impossible ? profile_probability::never ()
4646 : profile_probability::very_unlikely ());
4648 /* If edge is already improbably or cold, just return. */
4649 if (e->probability <= goal
4650 && (!impossible || e->count () == profile_count::zero ()))
4651 return;
4652 FOR_EACH_EDGE (e2, ei, e->src->succs)
4653 if (e2 != e)
4655 if (e->flags & EDGE_FAKE)
4656 continue;
4657 if (e2->count ().initialized_p ())
4658 count_sum += e2->count ();
4659 if (e2->probability.initialized_p ())
4660 prob_sum += e2->probability;
4661 else
4662 uninitialized_exit = true;
4665 /* If we are not guessing profiles but have some other edges out,
4666 just assume the control flow goes elsewhere. */
4667 if (uninitialized_exit)
4668 e->probability = goal;
4669 /* If there are other edges out of e->src, redistribute probabilitity
4670 there. */
4671 else if (prob_sum > profile_probability::never ())
4673 if (dump_file && (dump_flags & TDF_DETAILS))
4675 fprintf (dump_file, "Making edge %i->%i %s by redistributing "
4676 "probability to other edges. Original probability: ",
4677 e->src->index, e->dest->index,
4678 impossible ? "impossible" : "cold");
4679 e->probability.dump (dump_file);
4680 fprintf (dump_file, "\n");
4682 set_edge_probability_and_rescale_others (e, goal);
4683 if (current_ir_type () != IR_GIMPLE
4684 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4685 update_br_prob_note (e->src);
4687 /* If all edges out of e->src are unlikely, the basic block itself
4688 is unlikely. */
4689 else
4691 if (prob_sum == profile_probability::never ())
4692 e->probability = profile_probability::always ();
4693 else
4695 if (impossible)
4696 e->probability = profile_probability::never ();
4697 /* If BB has some edges out that are not impossible, we cannot
4698 assume that BB itself is. */
4699 impossible = false;
4701 if (current_ir_type () != IR_GIMPLE
4702 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4703 update_br_prob_note (e->src);
4704 if (e->src->count == profile_count::zero ())
4705 return;
4706 if (count_sum == profile_count::zero () && impossible)
4708 bool found = false;
4709 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
4711 else if (current_ir_type () == IR_GIMPLE)
4712 for (gimple_stmt_iterator gsi = gsi_start_bb (e->src);
4713 !gsi_end_p (gsi); gsi_next (&gsi))
4715 if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
4717 found = true;
4718 break;
4721 /* FIXME: Implement RTL path. */
4722 else
4723 found = true;
4724 if (!found)
4726 if (dump_file && (dump_flags & TDF_DETAILS))
4727 fprintf (dump_file,
4728 "Making bb %i impossible and dropping count to 0.\n",
4729 e->src->index);
4730 e->src->count = profile_count::zero ();
4731 FOR_EACH_EDGE (e2, ei, e->src->preds)
4732 force_edge_cold (e2, impossible);
4733 return;
4737 /* If we did not adjusting, the source basic block has no likely edeges
4738 leaving other direction. In that case force that bb cold, too.
4739 This in general is difficult task to do, but handle special case when
4740 BB has only one predecestor. This is common case when we are updating
4741 after loop transforms. */
4742 if (!(prob_sum > profile_probability::never ())
4743 && count_sum == profile_count::zero ()
4744 && single_pred_p (e->src) && e->src->count.to_frequency (cfun)
4745 > (impossible ? 0 : 1))
4747 int old_frequency = e->src->count.to_frequency (cfun);
4748 if (dump_file && (dump_flags & TDF_DETAILS))
4749 fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
4750 impossible ? "impossible" : "cold");
4751 int new_frequency = MIN (e->src->count.to_frequency (cfun),
4752 impossible ? 0 : 1);
4753 if (impossible)
4754 e->src->count = profile_count::zero ();
4755 else
4756 e->src->count = e->count ().apply_scale (new_frequency,
4757 old_frequency);
4758 force_edge_cold (single_pred_edge (e->src), impossible);
4760 else if (dump_file && (dump_flags & TDF_DETAILS)
4761 && maybe_hot_bb_p (cfun, e->src))
4762 fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
4763 impossible ? "impossible" : "cold");
4767 #if CHECKING_P
4769 namespace selftest {
4771 /* Test that value range of predictor values defined in predict.def is
4772 within range (50, 100]. */
4774 struct branch_predictor
4776 const char *name;
4777 int probability;
4780 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE },
4782 static void
4783 test_prediction_value_range ()
4785 branch_predictor predictors[] = {
4786 #include "predict.def"
4787 { NULL, PROB_UNINITIALIZED }
4790 for (unsigned i = 0; predictors[i].name != NULL; i++)
4792 if (predictors[i].probability == PROB_UNINITIALIZED)
4793 continue;
4795 unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE;
4796 ASSERT_TRUE (p >= 50 && p <= 100);
4800 #undef DEF_PREDICTOR
4802 /* Run all of the selfests within this file. */
4804 void
4805 predict_cc_tests ()
4807 test_prediction_value_range ();
4810 } // namespace selftest
4811 #endif /* CHECKING_P. */