AVR: Provide built-ins for strlen where the string lives in some AS.
[gcc.git] / gcc / predict.cc
blobef31c48bfe258064466190a8797da9d5f39e36fa
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* References:
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
29 #include "config.h"
30 #include "system.h"
31 #include "coretypes.h"
32 #include "backend.h"
33 #include "rtl.h"
34 #include "tree.h"
35 #include "gimple.h"
36 #include "cfghooks.h"
37 #include "tree-pass.h"
38 #include "ssa.h"
39 #include "memmodel.h"
40 #include "emit-rtl.h"
41 #include "cgraph.h"
42 #include "coverage.h"
43 #include "diagnostic-core.h"
44 #include "gimple-predict.h"
45 #include "fold-const.h"
46 #include "calls.h"
47 #include "cfganal.h"
48 #include "profile.h"
49 #include "sreal.h"
50 #include "cfgloop.h"
51 #include "gimple-iterator.h"
52 #include "tree-cfg.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "tree-ssa-loop.h"
55 #include "tree-scalar-evolution.h"
56 #include "ipa-utils.h"
57 #include "gimple-pretty-print.h"
58 #include "selftest.h"
59 #include "cfgrtl.h"
60 #include "stringpool.h"
61 #include "attribs.h"
63 /* Enum with reasons why a predictor is ignored. */
65 enum predictor_reason
67 REASON_NONE,
68 REASON_IGNORED,
69 REASON_SINGLE_EDGE_DUPLICATE,
70 REASON_EDGE_PAIR_DUPLICATE
73 /* String messages for the aforementioned enum. */
75 static const char *reason_messages[] = {"", " (ignored)",
76 " (single edge duplicate)", " (edge pair duplicate)"};
79 static void combine_predictions_for_insn (rtx_insn *, basic_block);
80 static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
81 enum predictor_reason, edge);
82 static void predict_paths_leading_to (basic_block, enum br_predictor,
83 enum prediction,
84 class loop *in_loop = NULL);
85 static void predict_paths_leading_to_edge (edge, enum br_predictor,
86 enum prediction,
87 class loop *in_loop = NULL);
88 static bool can_predict_insn_p (const rtx_insn *);
89 static HOST_WIDE_INT get_predictor_value (br_predictor, HOST_WIDE_INT);
90 static void determine_unlikely_bbs ();
91 static void estimate_bb_frequencies ();
93 /* Information we hold about each branch predictor.
94 Filled using information from predict.def. */
96 struct predictor_info
98 const char *const name; /* Name used in the debugging dumps. */
99 const int hitrate; /* Expected hitrate used by
100 predict_insn_def call. */
101 const int flags;
104 /* Use given predictor without Dempster-Shaffer theory if it matches
105 using first_match heuristics. */
106 #define PRED_FLAG_FIRST_MATCH 1
108 /* Recompute hitrate in percent to our representation. */
110 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
112 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
113 static const struct predictor_info predictor_info[]= {
114 #include "predict.def"
116 /* Upper bound on predictors. */
117 {NULL, 0, 0}
119 #undef DEF_PREDICTOR
121 static gcov_type min_count = -1;
123 /* Determine the threshold for hot BB counts. */
125 gcov_type
126 get_hot_bb_threshold ()
128 if (min_count == -1)
130 const int hot_frac = param_hot_bb_count_fraction;
131 const gcov_type min_hot_count
132 = hot_frac
133 ? profile_info->sum_max / hot_frac
134 : (gcov_type)profile_count::max_count;
135 set_hot_bb_threshold (min_hot_count);
136 if (dump_file)
137 fprintf (dump_file, "Setting hotness threshold to %" PRId64 ".\n",
138 min_hot_count);
140 return min_count;
143 /* Set the threshold for hot BB counts. */
145 void
146 set_hot_bb_threshold (gcov_type min)
148 min_count = min;
151 /* Return TRUE if COUNT is considered to be hot in function FUN. */
153 bool
154 maybe_hot_count_p (struct function *fun, profile_count count)
156 if (!count.initialized_p ())
157 return true;
158 if (count.ipa () == profile_count::zero ())
159 return false;
160 if (!count.ipa_p ())
162 struct cgraph_node *node = cgraph_node::get (fun->decl);
163 if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
165 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
166 return false;
167 if (node->frequency == NODE_FREQUENCY_HOT)
168 return true;
170 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
171 return true;
172 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
173 && count < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (2, 3)))
174 return false;
175 if (count * param_hot_bb_frequency_fraction
176 < ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
177 return false;
178 return true;
180 /* Code executed at most once is not hot. */
181 if (count <= MAX (profile_info ? profile_info->runs : 1, 1))
182 return false;
183 return (count >= get_hot_bb_threshold ());
186 /* Return true if basic block BB of function FUN can be CPU intensive
187 and should thus be optimized for maximum performance. */
189 bool
190 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
192 gcc_checking_assert (fun);
193 return maybe_hot_count_p (fun, bb->count);
196 /* Return true if edge E can be CPU intensive and should thus be optimized
197 for maximum performance. */
199 bool
200 maybe_hot_edge_p (edge e)
202 return maybe_hot_count_p (cfun, e->count ());
205 /* Return true if COUNT is considered to be never executed in function FUN
206 or if function FUN is considered so in the static profile. */
208 static bool
209 probably_never_executed (struct function *fun, profile_count count)
211 gcc_checking_assert (fun);
212 if (count.ipa () == profile_count::zero ())
213 return true;
214 /* Do not trust adjusted counts. This will make us to drop int cold section
215 code with low execution count as a result of inlining. These low counts
216 are not safe even with read profile and may lead us to dropping
217 code which actually gets executed into cold section of binary that is not
218 desirable. */
219 if (count.precise_p () && profile_status_for_fn (fun) == PROFILE_READ)
221 const int unlikely_frac = param_unlikely_bb_count_fraction;
222 if (count * unlikely_frac >= profile_info->runs)
223 return false;
224 return true;
226 if ((!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
227 && (cgraph_node::get (fun->decl)->frequency
228 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
229 return true;
230 return false;
233 /* Return true if basic block BB of function FUN is probably never executed. */
235 bool
236 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
238 return probably_never_executed (fun, bb->count);
241 /* Return true if edge E is unlikely executed for obvious reasons. */
243 static bool
244 unlikely_executed_edge_p (edge e)
246 return (e->src->count == profile_count::zero ()
247 || e->probability == profile_probability::never ())
248 || (e->flags & (EDGE_EH | EDGE_FAKE));
251 /* Return true if edge E of function FUN is probably never executed. */
253 bool
254 probably_never_executed_edge_p (struct function *fun, edge e)
256 if (unlikely_executed_edge_p (e))
257 return true;
258 return probably_never_executed (fun, e->count ());
261 /* Return true if function FUN should always be optimized for size. */
263 optimize_size_level
264 optimize_function_for_size_p (struct function *fun)
266 if (!fun || !fun->decl)
267 return optimize_size ? OPTIMIZE_SIZE_MAX : OPTIMIZE_SIZE_NO;
268 cgraph_node *n = cgraph_node::get (fun->decl);
269 if (n)
270 return n->optimize_for_size_p ();
271 return OPTIMIZE_SIZE_NO;
274 /* Return true if function FUN should always be optimized for speed. */
276 bool
277 optimize_function_for_speed_p (struct function *fun)
279 return !optimize_function_for_size_p (fun);
282 /* Return the optimization type that should be used for function FUN. */
284 optimization_type
285 function_optimization_type (struct function *fun)
287 return (optimize_function_for_speed_p (fun)
288 ? OPTIMIZE_FOR_SPEED
289 : OPTIMIZE_FOR_SIZE);
292 /* Return TRUE if basic block BB should be optimized for size. */
294 optimize_size_level
295 optimize_bb_for_size_p (const_basic_block bb)
297 enum optimize_size_level ret = optimize_function_for_size_p (cfun);
299 if (bb && ret < OPTIMIZE_SIZE_MAX && bb->count == profile_count::zero ())
300 ret = OPTIMIZE_SIZE_MAX;
301 if (bb && ret < OPTIMIZE_SIZE_BALANCED && !maybe_hot_bb_p (cfun, bb))
302 ret = OPTIMIZE_SIZE_BALANCED;
303 return ret;
306 /* Return TRUE if basic block BB should be optimized for speed. */
308 bool
309 optimize_bb_for_speed_p (const_basic_block bb)
311 return !optimize_bb_for_size_p (bb);
314 /* Return the optimization type that should be used for basic block BB. */
316 optimization_type
317 bb_optimization_type (const_basic_block bb)
319 return (optimize_bb_for_speed_p (bb)
320 ? OPTIMIZE_FOR_SPEED
321 : OPTIMIZE_FOR_SIZE);
324 /* Return TRUE if edge E should be optimized for size. */
326 optimize_size_level
327 optimize_edge_for_size_p (edge e)
329 enum optimize_size_level ret = optimize_function_for_size_p (cfun);
331 if (ret < OPTIMIZE_SIZE_MAX && unlikely_executed_edge_p (e))
332 ret = OPTIMIZE_SIZE_MAX;
333 if (ret < OPTIMIZE_SIZE_BALANCED && !maybe_hot_edge_p (e))
334 ret = OPTIMIZE_SIZE_BALANCED;
335 return ret;
338 /* Return TRUE if edge E should be optimized for speed. */
340 bool
341 optimize_edge_for_speed_p (edge e)
343 return !optimize_edge_for_size_p (e);
346 /* Return TRUE if the current function is optimized for size. */
348 optimize_size_level
349 optimize_insn_for_size_p (void)
351 enum optimize_size_level ret = optimize_function_for_size_p (cfun);
352 if (ret < OPTIMIZE_SIZE_BALANCED && !crtl->maybe_hot_insn_p)
353 ret = OPTIMIZE_SIZE_BALANCED;
354 return ret;
357 /* Return TRUE if the current function is optimized for speed. */
359 bool
360 optimize_insn_for_speed_p (void)
362 return !optimize_insn_for_size_p ();
365 /* Return the optimization type that should be used for the current
366 instruction. */
368 optimization_type
369 insn_optimization_type ()
371 return (optimize_insn_for_speed_p ()
372 ? OPTIMIZE_FOR_SPEED
373 : OPTIMIZE_FOR_SIZE);
376 /* Return TRUE if LOOP should be optimized for size. */
378 optimize_size_level
379 optimize_loop_for_size_p (class loop *loop)
381 return optimize_bb_for_size_p (loop->header);
384 /* Return TRUE if LOOP should be optimized for speed. */
386 bool
387 optimize_loop_for_speed_p (class loop *loop)
389 return optimize_bb_for_speed_p (loop->header);
392 /* Return TRUE if nest rooted at LOOP should be optimized for speed. */
394 bool
395 optimize_loop_nest_for_speed_p (class loop *loop)
397 class loop *l = loop;
398 if (optimize_loop_for_speed_p (loop))
399 return true;
400 l = loop->inner;
401 while (l && l != loop)
403 if (optimize_loop_for_speed_p (l))
404 return true;
405 if (l->inner)
406 l = l->inner;
407 else if (l->next)
408 l = l->next;
409 else
411 while (l != loop && !l->next)
412 l = loop_outer (l);
413 if (l != loop)
414 l = l->next;
417 return false;
420 /* Return TRUE if nest rooted at LOOP should be optimized for size. */
422 optimize_size_level
423 optimize_loop_nest_for_size_p (class loop *loop)
425 enum optimize_size_level ret = optimize_loop_for_size_p (loop);
426 class loop *l = loop;
428 l = loop->inner;
429 while (l && l != loop)
431 if (ret == OPTIMIZE_SIZE_NO)
432 break;
433 ret = MIN (optimize_loop_for_size_p (l), ret);
434 if (l->inner)
435 l = l->inner;
436 else if (l->next)
437 l = l->next;
438 else
440 while (l != loop && !l->next)
441 l = loop_outer (l);
442 if (l != loop)
443 l = l->next;
446 return ret;
449 /* Return true if edge E is likely to be well predictable by branch
450 predictor. */
452 bool
453 predictable_edge_p (edge e)
455 if (!e->probability.initialized_p ())
456 return false;
457 if ((e->probability.to_reg_br_prob_base ()
458 <= param_predictable_branch_outcome * REG_BR_PROB_BASE / 100)
459 || (REG_BR_PROB_BASE - e->probability.to_reg_br_prob_base ()
460 <= param_predictable_branch_outcome * REG_BR_PROB_BASE / 100))
461 return true;
462 return false;
466 /* Set RTL expansion for BB profile. */
468 void
469 rtl_profile_for_bb (basic_block bb)
471 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
474 /* Set RTL expansion for edge profile. */
476 void
477 rtl_profile_for_edge (edge e)
479 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
482 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
483 void
484 default_rtl_profile (void)
486 crtl->maybe_hot_insn_p = true;
489 /* Return true if the one of outgoing edges is already predicted by
490 PREDICTOR. */
492 bool
493 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
495 rtx note;
496 if (!INSN_P (BB_END (bb)))
497 return false;
498 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
499 if (REG_NOTE_KIND (note) == REG_BR_PRED
500 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
501 return true;
502 return false;
505 /* Structure representing predictions in tree level. */
507 struct edge_prediction {
508 struct edge_prediction *ep_next;
509 edge ep_edge;
510 enum br_predictor ep_predictor;
511 int ep_probability;
514 /* This map contains for a basic block the list of predictions for the
515 outgoing edges. */
517 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
519 /* Global cache for expr_expected_value. */
521 struct expected_value
523 tree val;
524 enum br_predictor predictor;
525 HOST_WIDE_INT probability;
527 static hash_map<int_hash<unsigned, 0>, expected_value> *ssa_expected_value;
529 /* Return true if the one of outgoing edges is already predicted by
530 PREDICTOR. */
532 bool
533 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
535 struct edge_prediction *i;
536 edge_prediction **preds = bb_predictions->get (bb);
538 if (!preds)
539 return false;
541 for (i = *preds; i; i = i->ep_next)
542 if (i->ep_predictor == predictor)
543 return true;
544 return false;
547 /* Return true if the one of outgoing edges is already predicted by
548 PREDICTOR for edge E predicted as TAKEN. */
550 bool
551 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
553 struct edge_prediction *i;
554 basic_block bb = e->src;
555 edge_prediction **preds = bb_predictions->get (bb);
556 if (!preds)
557 return false;
559 int probability = predictor_info[(int) predictor].hitrate;
561 if (taken != TAKEN)
562 probability = REG_BR_PROB_BASE - probability;
564 for (i = *preds; i; i = i->ep_next)
565 if (i->ep_predictor == predictor
566 && i->ep_edge == e
567 && i->ep_probability == probability)
568 return true;
569 return false;
572 /* Same predicate as above, working on edges. */
573 bool
574 edge_probability_reliable_p (const_edge e)
576 return e->probability.probably_reliable_p ();
579 /* Same predicate as edge_probability_reliable_p, working on notes. */
580 bool
581 br_prob_note_reliable_p (const_rtx note)
583 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
584 return profile_probability::from_reg_br_prob_note
585 (XINT (note, 0)).probably_reliable_p ();
588 static void
589 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
591 gcc_assert (any_condjump_p (insn));
592 if (!flag_guess_branch_prob)
593 return;
595 add_reg_note (insn, REG_BR_PRED,
596 gen_rtx_CONCAT (VOIDmode,
597 GEN_INT ((int) predictor),
598 GEN_INT ((int) probability)));
601 /* Predict insn by given predictor. */
603 void
604 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
605 enum prediction taken)
607 int probability = predictor_info[(int) predictor].hitrate;
608 gcc_assert (probability != PROB_UNINITIALIZED);
610 if (taken != TAKEN)
611 probability = REG_BR_PROB_BASE - probability;
613 predict_insn (insn, predictor, probability);
616 /* Predict edge E with given probability if possible. */
618 void
619 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
621 rtx_insn *last_insn;
622 last_insn = BB_END (e->src);
624 /* We can store the branch prediction information only about
625 conditional jumps. */
626 if (!any_condjump_p (last_insn))
627 return;
629 /* We always store probability of branching. */
630 if (e->flags & EDGE_FALLTHRU)
631 probability = REG_BR_PROB_BASE - probability;
633 predict_insn (last_insn, predictor, probability);
636 /* Predict edge E with the given PROBABILITY. */
637 void
638 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
640 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
641 && EDGE_COUNT (e->src->succs) > 1
642 && flag_guess_branch_prob
643 && optimize)
645 struct edge_prediction *i = XNEW (struct edge_prediction);
646 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
648 i->ep_next = preds;
649 preds = i;
650 i->ep_probability = probability;
651 i->ep_predictor = predictor;
652 i->ep_edge = e;
656 /* Filter edge predictions PREDS by a function FILTER: if FILTER return false
657 the prediction is removed.
658 DATA are passed to the filter function. */
660 static void
661 filter_predictions (edge_prediction **preds,
662 bool (*filter) (edge_prediction *, void *), void *data)
664 if (!bb_predictions)
665 return;
667 if (preds)
669 struct edge_prediction **prediction = preds;
670 struct edge_prediction *next;
672 while (*prediction)
674 if ((*filter) (*prediction, data))
675 prediction = &((*prediction)->ep_next);
676 else
678 next = (*prediction)->ep_next;
679 free (*prediction);
680 *prediction = next;
686 /* Filter function predicate that returns true for a edge predicate P
687 if its edge is equal to DATA. */
689 static bool
690 not_equal_edge_p (edge_prediction *p, void *data)
692 return p->ep_edge != (edge)data;
695 /* Remove all predictions on given basic block that are attached
696 to edge E. */
697 void
698 remove_predictions_associated_with_edge (edge e)
700 if (!bb_predictions)
701 return;
703 edge_prediction **preds = bb_predictions->get (e->src);
704 filter_predictions (preds, not_equal_edge_p, e);
707 /* Clears the list of predictions stored for BB. */
709 static void
710 clear_bb_predictions (basic_block bb)
712 edge_prediction **preds = bb_predictions->get (bb);
713 struct edge_prediction *pred, *next;
715 if (!preds)
716 return;
718 for (pred = *preds; pred; pred = next)
720 next = pred->ep_next;
721 free (pred);
723 *preds = NULL;
726 /* Return true when we can store prediction on insn INSN.
727 At the moment we represent predictions only on conditional
728 jumps, not at computed jump or other complicated cases. */
729 static bool
730 can_predict_insn_p (const rtx_insn *insn)
732 return (JUMP_P (insn)
733 && any_condjump_p (insn)
734 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
737 /* Predict edge E by given predictor if possible. */
739 void
740 predict_edge_def (edge e, enum br_predictor predictor,
741 enum prediction taken)
743 int probability = predictor_info[(int) predictor].hitrate;
745 if (taken != TAKEN)
746 probability = REG_BR_PROB_BASE - probability;
748 predict_edge (e, predictor, probability);
751 /* Invert all branch predictions or probability notes in the INSN. This needs
752 to be done each time we invert the condition used by the jump. */
754 void
755 invert_br_probabilities (rtx insn)
757 rtx note;
759 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
760 if (REG_NOTE_KIND (note) == REG_BR_PROB)
761 XINT (note, 0) = profile_probability::from_reg_br_prob_note
762 (XINT (note, 0)).invert ().to_reg_br_prob_note ();
763 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
764 XEXP (XEXP (note, 0), 1)
765 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
768 /* Dump information about the branch prediction to the output file. */
770 static void
771 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
772 basic_block bb, enum predictor_reason reason = REASON_NONE,
773 edge ep_edge = NULL)
775 edge e = ep_edge;
776 edge_iterator ei;
778 if (!file)
779 return;
781 if (e == NULL)
782 FOR_EACH_EDGE (e, ei, bb->succs)
783 if (! (e->flags & EDGE_FALLTHRU))
784 break;
786 char edge_info_str[128];
787 if (ep_edge)
788 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index,
789 ep_edge->dest->index);
790 else
791 edge_info_str[0] = '\0';
793 fprintf (file, " %s heuristics%s%s: %.2f%%",
794 predictor_info[predictor].name,
795 edge_info_str, reason_messages[reason],
796 probability * 100.0 / REG_BR_PROB_BASE);
798 if (bb->count.initialized_p ())
800 fprintf (file, " exec ");
801 bb->count.dump (file);
802 if (e && e->count ().initialized_p () && bb->count.to_gcov_type ())
804 fprintf (file, " hit ");
805 e->count ().dump (file);
806 fprintf (file, " (%.1f%%)", e->count ().to_gcov_type() * 100.0
807 / bb->count.to_gcov_type ());
811 fprintf (file, "\n");
813 /* Print output that be easily read by analyze_brprob.py script. We are
814 interested only in counts that are read from GCDA files. */
815 if (dump_file && (dump_flags & TDF_DETAILS)
816 && bb->count.precise_p ()
817 && reason == REASON_NONE)
819 fprintf (file, ";;heuristics;%s;%" PRId64 ";%" PRId64 ";%.1f;\n",
820 predictor_info[predictor].name,
821 bb->count.to_gcov_type (), e->count ().to_gcov_type (),
822 probability * 100.0 / REG_BR_PROB_BASE);
826 /* Return true if STMT is known to be unlikely executed. */
828 static bool
829 unlikely_executed_stmt_p (gimple *stmt)
831 if (!is_gimple_call (stmt))
832 return false;
833 /* NORETURN attribute alone is not strong enough: exit() may be quite
834 likely executed once during program run. */
835 if (gimple_call_fntype (stmt)
836 && lookup_attribute ("cold",
837 TYPE_ATTRIBUTES (gimple_call_fntype (stmt)))
838 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
839 return true;
840 tree decl = gimple_call_fndecl (stmt);
841 if (!decl)
842 return false;
843 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl))
844 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
845 return true;
847 cgraph_node *n = cgraph_node::get (decl);
848 if (!n)
849 return false;
851 availability avail;
852 n = n->ultimate_alias_target (&avail);
853 if (avail < AVAIL_AVAILABLE)
854 return false;
855 if (!n->analyzed
856 || n->decl == current_function_decl)
857 return false;
858 return n->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED;
861 /* Return true if BB is unlikely executed. */
863 static bool
864 unlikely_executed_bb_p (basic_block bb)
866 if (bb->count == profile_count::zero ())
867 return true;
868 if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun) || bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
869 return false;
870 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
871 !gsi_end_p (gsi); gsi_next (&gsi))
873 if (unlikely_executed_stmt_p (gsi_stmt (gsi)))
874 return true;
875 if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
876 return false;
878 return false;
881 /* We cannot predict the probabilities of outgoing edges of bb. Set them
882 evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute
883 even probability for all edges not mentioned in the set. These edges
884 are given PROB_VERY_UNLIKELY probability. Similarly for LIKELY_EDGES,
885 if we have exactly one likely edge, make the other edges predicted
886 as not probable. */
888 static void
889 set_even_probabilities (basic_block bb,
890 hash_set<edge> *unlikely_edges = NULL,
891 hash_set<edge_prediction *> *likely_edges = NULL)
893 unsigned nedges = 0, unlikely_count = 0;
894 edge e = NULL;
895 edge_iterator ei;
896 profile_probability all = profile_probability::always ();
898 FOR_EACH_EDGE (e, ei, bb->succs)
899 if (e->probability.initialized_p ())
900 all -= e->probability;
901 else if (!unlikely_executed_edge_p (e))
903 nedges++;
904 if (unlikely_edges != NULL && unlikely_edges->contains (e))
906 all -= profile_probability::very_unlikely ();
907 unlikely_count++;
911 /* Make the distribution even if all edges are unlikely. */
912 unsigned likely_count = likely_edges ? likely_edges->elements () : 0;
913 if (unlikely_count == nedges)
915 unlikely_edges = NULL;
916 unlikely_count = 0;
919 /* If we have one likely edge, then use its probability and distribute
920 remaining probabilities as even. */
921 if (likely_count == 1)
923 FOR_EACH_EDGE (e, ei, bb->succs)
924 if (e->probability.initialized_p ())
926 else if (!unlikely_executed_edge_p (e))
928 edge_prediction *prediction = *likely_edges->begin ();
929 int p = prediction->ep_probability;
930 profile_probability prob
931 = profile_probability::from_reg_br_prob_base (p);
933 if (prediction->ep_edge == e)
934 e->probability = prob;
935 else if (unlikely_edges != NULL && unlikely_edges->contains (e))
936 e->probability = profile_probability::very_unlikely ();
937 else
939 profile_probability remainder = prob.invert ();
940 remainder -= (profile_probability::very_unlikely ()
941 * unlikely_count);
942 int count = nedges - unlikely_count - 1;
943 gcc_assert (count >= 0);
945 e->probability = remainder / count;
948 else
949 e->probability = profile_probability::never ();
951 else
953 /* Make all unlikely edges unlikely and the rest will have even
954 probability. */
955 unsigned scale = nedges - unlikely_count;
956 FOR_EACH_EDGE (e, ei, bb->succs)
957 if (e->probability.initialized_p ())
959 else if (!unlikely_executed_edge_p (e))
961 if (unlikely_edges != NULL && unlikely_edges->contains (e))
962 e->probability = profile_probability::very_unlikely ();
963 else
964 e->probability = all / scale;
966 else
967 e->probability = profile_probability::never ();
971 /* Add REG_BR_PROB note to JUMP with PROB. */
973 void
974 add_reg_br_prob_note (rtx_insn *jump, profile_probability prob)
976 gcc_checking_assert (JUMP_P (jump) && !find_reg_note (jump, REG_BR_PROB, 0));
977 add_int_reg_note (jump, REG_BR_PROB, prob.to_reg_br_prob_note ());
980 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
981 note if not already present. Remove now useless REG_BR_PRED notes. */
983 static void
984 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
986 rtx prob_note;
987 rtx *pnote;
988 rtx note;
989 int best_probability = PROB_EVEN;
990 enum br_predictor best_predictor = END_PREDICTORS;
991 int combined_probability = REG_BR_PROB_BASE / 2;
992 int d;
993 bool first_match = false;
994 bool found = false;
996 if (!can_predict_insn_p (insn))
998 set_even_probabilities (bb);
999 return;
1002 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
1003 pnote = &REG_NOTES (insn);
1004 if (dump_file)
1005 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
1006 bb->index);
1008 /* We implement "first match" heuristics and use probability guessed
1009 by predictor with smallest index. */
1010 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1011 if (REG_NOTE_KIND (note) == REG_BR_PRED)
1013 enum br_predictor predictor = ((enum br_predictor)
1014 INTVAL (XEXP (XEXP (note, 0), 0)));
1015 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
1017 found = true;
1018 if (best_predictor > predictor
1019 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1020 best_probability = probability, best_predictor = predictor;
1022 d = (combined_probability * probability
1023 + (REG_BR_PROB_BASE - combined_probability)
1024 * (REG_BR_PROB_BASE - probability));
1026 /* Use FP math to avoid overflows of 32bit integers. */
1027 if (d == 0)
1028 /* If one probability is 0% and one 100%, avoid division by zero. */
1029 combined_probability = REG_BR_PROB_BASE / 2;
1030 else
1031 combined_probability = (((double) combined_probability) * probability
1032 * REG_BR_PROB_BASE / d + 0.5);
1035 /* Decide which heuristic to use. In case we didn't match anything,
1036 use no_prediction heuristic, in case we did match, use either
1037 first match or Dempster-Shaffer theory depending on the flags. */
1039 if (best_predictor != END_PREDICTORS)
1040 first_match = true;
1042 if (!found)
1043 dump_prediction (dump_file, PRED_NO_PREDICTION,
1044 combined_probability, bb);
1045 else
1047 if (!first_match)
1048 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
1049 bb, !first_match ? REASON_NONE : REASON_IGNORED);
1050 else
1051 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
1052 bb, first_match ? REASON_NONE : REASON_IGNORED);
1055 if (first_match)
1056 combined_probability = best_probability;
1057 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1059 while (*pnote)
1061 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
1063 enum br_predictor predictor = ((enum br_predictor)
1064 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
1065 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
1067 dump_prediction (dump_file, predictor, probability, bb,
1068 (!first_match || best_predictor == predictor)
1069 ? REASON_NONE : REASON_IGNORED);
1070 *pnote = XEXP (*pnote, 1);
1072 else
1073 pnote = &XEXP (*pnote, 1);
1076 if (!prob_note)
1078 profile_probability p
1079 = profile_probability::from_reg_br_prob_base (combined_probability);
1080 add_reg_br_prob_note (insn, p);
1082 /* Save the prediction into CFG in case we are seeing non-degenerated
1083 conditional jump. */
1084 if (!single_succ_p (bb))
1086 BRANCH_EDGE (bb)->probability = p;
1087 FALLTHRU_EDGE (bb)->probability
1088 = BRANCH_EDGE (bb)->probability.invert ();
1091 else if (!single_succ_p (bb))
1093 profile_probability prob = profile_probability::from_reg_br_prob_note
1094 (XINT (prob_note, 0));
1096 BRANCH_EDGE (bb)->probability = prob;
1097 FALLTHRU_EDGE (bb)->probability = prob.invert ();
1099 else
1100 single_succ_edge (bb)->probability = profile_probability::always ();
1103 /* Edge prediction hash traits. */
1105 struct predictor_hash: pointer_hash <edge_prediction>
1108 static inline hashval_t hash (const edge_prediction *);
1109 static inline bool equal (const edge_prediction *, const edge_prediction *);
1112 /* Calculate hash value of an edge prediction P based on predictor and
1113 normalized probability. */
1115 inline hashval_t
1116 predictor_hash::hash (const edge_prediction *p)
1118 inchash::hash hstate;
1119 hstate.add_int (p->ep_predictor);
1121 int prob = p->ep_probability;
1122 if (prob > REG_BR_PROB_BASE / 2)
1123 prob = REG_BR_PROB_BASE - prob;
1125 hstate.add_int (prob);
1127 return hstate.end ();
1130 /* Return true whether edge predictions P1 and P2 use the same predictor and
1131 have equal (or opposed probability). */
1133 inline bool
1134 predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2)
1136 return (p1->ep_predictor == p2->ep_predictor
1137 && (p1->ep_probability == p2->ep_probability
1138 || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability));
1141 struct predictor_hash_traits: predictor_hash,
1142 typed_noop_remove <edge_prediction *> {};
1144 /* Return true if edge prediction P is not in DATA hash set. */
1146 static bool
1147 not_removed_prediction_p (edge_prediction *p, void *data)
1149 hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data;
1150 return !remove->contains (p);
1153 /* Prune predictions for a basic block BB. Currently we do following
1154 clean-up steps:
1156 1) remove duplicate prediction that is guessed with the same probability
1157 (different than 1/2) to both edge
1158 2) remove duplicates for a prediction that belongs with the same probability
1159 to a single edge
1163 static void
1164 prune_predictions_for_bb (basic_block bb)
1166 edge_prediction **preds = bb_predictions->get (bb);
1168 if (preds)
1170 hash_table <predictor_hash_traits> s (13);
1171 hash_set <edge_prediction *> remove;
1173 /* Step 1: identify predictors that should be removed. */
1174 for (edge_prediction *pred = *preds; pred; pred = pred->ep_next)
1176 edge_prediction *existing = s.find (pred);
1177 if (existing)
1179 if (pred->ep_edge == existing->ep_edge
1180 && pred->ep_probability == existing->ep_probability)
1182 /* Remove a duplicate predictor. */
1183 dump_prediction (dump_file, pred->ep_predictor,
1184 pred->ep_probability, bb,
1185 REASON_SINGLE_EDGE_DUPLICATE, pred->ep_edge);
1187 remove.add (pred);
1189 else if (pred->ep_edge != existing->ep_edge
1190 && pred->ep_probability == existing->ep_probability
1191 && pred->ep_probability != REG_BR_PROB_BASE / 2)
1193 /* Remove both predictors as they predict the same
1194 for both edges. */
1195 dump_prediction (dump_file, existing->ep_predictor,
1196 pred->ep_probability, bb,
1197 REASON_EDGE_PAIR_DUPLICATE,
1198 existing->ep_edge);
1199 dump_prediction (dump_file, pred->ep_predictor,
1200 pred->ep_probability, bb,
1201 REASON_EDGE_PAIR_DUPLICATE,
1202 pred->ep_edge);
1204 remove.add (existing);
1205 remove.add (pred);
1209 edge_prediction **slot2 = s.find_slot (pred, INSERT);
1210 *slot2 = pred;
1213 /* Step 2: Remove predictors. */
1214 filter_predictions (preds, not_removed_prediction_p, &remove);
1218 /* Combine predictions into single probability and store them into CFG.
1219 Remove now useless prediction entries.
1220 If DRY_RUN is set, only produce dumps and do not modify profile. */
1222 static void
1223 combine_predictions_for_bb (basic_block bb, bool dry_run)
1225 int best_probability = PROB_EVEN;
1226 enum br_predictor best_predictor = END_PREDICTORS;
1227 int combined_probability = REG_BR_PROB_BASE / 2;
1228 int d;
1229 bool first_match = false;
1230 bool found = false;
1231 struct edge_prediction *pred;
1232 int nedges = 0;
1233 edge e, first = NULL, second = NULL;
1234 edge_iterator ei;
1235 int nzero = 0;
1236 int nunknown = 0;
1238 FOR_EACH_EDGE (e, ei, bb->succs)
1240 if (!unlikely_executed_edge_p (e))
1242 nedges ++;
1243 if (first && !second)
1244 second = e;
1245 if (!first)
1246 first = e;
1248 else if (!e->probability.initialized_p ())
1249 e->probability = profile_probability::never ();
1250 if (!e->probability.initialized_p ())
1251 nunknown++;
1252 else if (e->probability == profile_probability::never ())
1253 nzero++;
1256 /* When there is no successor or only one choice, prediction is easy.
1258 When we have a basic block with more than 2 successors, the situation
1259 is more complicated as DS theory cannot be used literally.
1260 More precisely, let's assume we predicted edge e1 with probability p1,
1261 thus: m1({b1}) = p1. As we're going to combine more than 2 edges, we
1262 need to find probability of e.g. m1({b2}), which we don't know.
1263 The only approximation is to equally distribute 1-p1 to all edges
1264 different from b1.
1266 According to numbers we've got from SPEC2006 benchark, there's only
1267 one interesting reliable predictor (noreturn call), which can be
1268 handled with a bit easier approach. */
1269 if (nedges != 2)
1271 hash_set<edge> unlikely_edges (4);
1272 hash_set<edge_prediction *> likely_edges (4);
1274 /* Identify all edges that have a probability close to very unlikely.
1275 Doing the approach for very unlikely doesn't worth for doing as
1276 there's no such probability in SPEC2006 benchmark. */
1277 edge_prediction **preds = bb_predictions->get (bb);
1278 if (preds)
1279 for (pred = *preds; pred; pred = pred->ep_next)
1281 if (pred->ep_probability <= PROB_VERY_UNLIKELY
1282 || pred->ep_predictor == PRED_COLD_LABEL)
1283 unlikely_edges.add (pred->ep_edge);
1284 else if (pred->ep_probability >= PROB_VERY_LIKELY
1285 || pred->ep_predictor == PRED_BUILTIN_EXPECT
1286 || pred->ep_predictor == PRED_HOT_LABEL)
1287 likely_edges.add (pred);
1290 /* It can happen that an edge is both in likely_edges and unlikely_edges.
1291 Clear both sets in that situation. */
1292 for (hash_set<edge_prediction *>::iterator it = likely_edges.begin ();
1293 it != likely_edges.end (); ++it)
1294 if (unlikely_edges.contains ((*it)->ep_edge))
1296 likely_edges.empty ();
1297 unlikely_edges.empty ();
1298 break;
1301 if (!dry_run)
1302 set_even_probabilities (bb, &unlikely_edges, &likely_edges);
1303 clear_bb_predictions (bb);
1304 if (dump_file)
1306 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1307 if (unlikely_edges.is_empty ())
1308 fprintf (dump_file,
1309 "%i edges in bb %i predicted to even probabilities\n",
1310 nedges, bb->index);
1311 else
1313 fprintf (dump_file,
1314 "%i edges in bb %i predicted with some unlikely edges\n",
1315 nedges, bb->index);
1316 FOR_EACH_EDGE (e, ei, bb->succs)
1317 if (!unlikely_executed_edge_p (e))
1318 dump_prediction (dump_file, PRED_COMBINED,
1319 e->probability.to_reg_br_prob_base (), bb, REASON_NONE, e);
1322 return;
1325 if (dump_file)
1326 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1328 prune_predictions_for_bb (bb);
1330 edge_prediction **preds = bb_predictions->get (bb);
1332 if (preds)
1334 /* We implement "first match" heuristics and use probability guessed
1335 by predictor with smallest index. */
1336 for (pred = *preds; pred; pred = pred->ep_next)
1338 enum br_predictor predictor = pred->ep_predictor;
1339 int probability = pred->ep_probability;
1341 if (pred->ep_edge != first)
1342 probability = REG_BR_PROB_BASE - probability;
1344 found = true;
1345 /* First match heuristics would be widly confused if we predicted
1346 both directions. */
1347 if (best_predictor > predictor
1348 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1350 struct edge_prediction *pred2;
1351 int prob = probability;
1353 for (pred2 = (struct edge_prediction *) *preds;
1354 pred2; pred2 = pred2->ep_next)
1355 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
1357 int probability2 = pred2->ep_probability;
1359 if (pred2->ep_edge != first)
1360 probability2 = REG_BR_PROB_BASE - probability2;
1362 if ((probability < REG_BR_PROB_BASE / 2) !=
1363 (probability2 < REG_BR_PROB_BASE / 2))
1364 break;
1366 /* If the same predictor later gave better result, go for it! */
1367 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
1368 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
1369 prob = probability2;
1371 if (!pred2)
1372 best_probability = prob, best_predictor = predictor;
1375 d = (combined_probability * probability
1376 + (REG_BR_PROB_BASE - combined_probability)
1377 * (REG_BR_PROB_BASE - probability));
1379 /* Use FP math to avoid overflows of 32bit integers. */
1380 if (d == 0)
1381 /* If one probability is 0% and one 100%, avoid division by zero. */
1382 combined_probability = REG_BR_PROB_BASE / 2;
1383 else
1384 combined_probability = (((double) combined_probability)
1385 * probability
1386 * REG_BR_PROB_BASE / d + 0.5);
1390 /* Decide which heuristic to use. In case we didn't match anything,
1391 use no_prediction heuristic, in case we did match, use either
1392 first match or Dempster-Shaffer theory depending on the flags. */
1394 if (best_predictor != END_PREDICTORS)
1395 first_match = true;
1397 if (!found)
1398 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb);
1399 else
1401 if (!first_match)
1402 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1403 !first_match ? REASON_NONE : REASON_IGNORED);
1404 else
1405 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1406 first_match ? REASON_NONE : REASON_IGNORED);
1409 if (first_match)
1410 combined_probability = best_probability;
1411 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1413 if (preds)
1415 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1417 enum br_predictor predictor = pred->ep_predictor;
1418 int probability = pred->ep_probability;
1420 dump_prediction (dump_file, predictor, probability, bb,
1421 (!first_match || best_predictor == predictor)
1422 ? REASON_NONE : REASON_IGNORED, pred->ep_edge);
1425 clear_bb_predictions (bb);
1428 /* If we have only one successor which is unknown, we can compute missing
1429 probability. */
1430 if (nunknown == 1)
1432 profile_probability prob = profile_probability::always ();
1433 edge missing = NULL;
1435 FOR_EACH_EDGE (e, ei, bb->succs)
1436 if (e->probability.initialized_p ())
1437 prob -= e->probability;
1438 else if (missing == NULL)
1439 missing = e;
1440 else
1441 gcc_unreachable ();
1442 missing->probability = prob;
1444 /* If nothing is unknown, we have nothing to update. */
1445 else if (!nunknown && nzero != (int)EDGE_COUNT (bb->succs))
1447 else if (!dry_run)
1449 first->probability
1450 = profile_probability::from_reg_br_prob_base (combined_probability);
1451 second->probability = first->probability.invert ();
1455 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1456 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1458 T1 and T2 should be one of the following cases:
1459 1. T1 is SSA_NAME, T2 is NULL
1460 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1461 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1463 static tree
1464 strips_small_constant (tree t1, tree t2)
1466 tree ret = NULL;
1467 int value = 0;
1469 if (!t1)
1470 return NULL;
1471 else if (TREE_CODE (t1) == SSA_NAME)
1472 ret = t1;
1473 else if (tree_fits_shwi_p (t1))
1474 value = tree_to_shwi (t1);
1475 else
1476 return NULL;
1478 if (!t2)
1479 return ret;
1480 else if (tree_fits_shwi_p (t2))
1481 value = tree_to_shwi (t2);
1482 else if (TREE_CODE (t2) == SSA_NAME)
1484 if (ret)
1485 return NULL;
1486 else
1487 ret = t2;
1490 if (value <= 4 && value >= -4)
1491 return ret;
1492 else
1493 return NULL;
1496 /* Return the SSA_NAME in T or T's operands.
1497 Return NULL if SSA_NAME cannot be found. */
1499 static tree
1500 get_base_value (tree t)
1502 if (TREE_CODE (t) == SSA_NAME)
1503 return t;
1505 if (!BINARY_CLASS_P (t))
1506 return NULL;
1508 switch (TREE_OPERAND_LENGTH (t))
1510 case 1:
1511 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1512 case 2:
1513 return strips_small_constant (TREE_OPERAND (t, 0),
1514 TREE_OPERAND (t, 1));
1515 default:
1516 return NULL;
1520 /* Check the compare STMT in LOOP. If it compares an induction
1521 variable to a loop invariant, return true, and save
1522 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1523 Otherwise return false and set LOOP_INVAIANT to NULL. */
1525 static bool
1526 is_comparison_with_loop_invariant_p (gcond *stmt, class loop *loop,
1527 tree *loop_invariant,
1528 enum tree_code *compare_code,
1529 tree *loop_step,
1530 tree *loop_iv_base)
1532 tree op0, op1, bound, base;
1533 affine_iv iv0, iv1;
1534 enum tree_code code;
1535 tree step;
1537 code = gimple_cond_code (stmt);
1538 *loop_invariant = NULL;
1540 switch (code)
1542 case GT_EXPR:
1543 case GE_EXPR:
1544 case NE_EXPR:
1545 case LT_EXPR:
1546 case LE_EXPR:
1547 case EQ_EXPR:
1548 break;
1550 default:
1551 return false;
1554 op0 = gimple_cond_lhs (stmt);
1555 op1 = gimple_cond_rhs (stmt);
1557 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1558 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1559 return false;
1560 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1561 return false;
1562 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1563 return false;
1564 if (TREE_CODE (iv0.step) != INTEGER_CST
1565 || TREE_CODE (iv1.step) != INTEGER_CST)
1566 return false;
1567 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1568 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1569 return false;
1571 if (integer_zerop (iv0.step))
1573 if (code != NE_EXPR && code != EQ_EXPR)
1574 code = invert_tree_comparison (code, false);
1575 bound = iv0.base;
1576 base = iv1.base;
1577 if (tree_fits_shwi_p (iv1.step))
1578 step = iv1.step;
1579 else
1580 return false;
1582 else
1584 bound = iv1.base;
1585 base = iv0.base;
1586 if (tree_fits_shwi_p (iv0.step))
1587 step = iv0.step;
1588 else
1589 return false;
1592 if (TREE_CODE (bound) != INTEGER_CST)
1593 bound = get_base_value (bound);
1594 if (!bound)
1595 return false;
1596 if (TREE_CODE (base) != INTEGER_CST)
1597 base = get_base_value (base);
1598 if (!base)
1599 return false;
1601 *loop_invariant = bound;
1602 *compare_code = code;
1603 *loop_step = step;
1604 *loop_iv_base = base;
1605 return true;
1608 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1610 static bool
1611 expr_coherent_p (tree t1, tree t2)
1613 gimple *stmt;
1614 tree ssa_name_1 = NULL;
1615 tree ssa_name_2 = NULL;
1617 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1618 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1620 if (t1 == t2)
1621 return true;
1623 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1624 return true;
1625 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1626 return false;
1628 /* Check to see if t1 is expressed/defined with t2. */
1629 stmt = SSA_NAME_DEF_STMT (t1);
1630 gcc_assert (stmt != NULL);
1631 if (is_gimple_assign (stmt))
1633 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1634 if (ssa_name_1 && ssa_name_1 == t2)
1635 return true;
1638 /* Check to see if t2 is expressed/defined with t1. */
1639 stmt = SSA_NAME_DEF_STMT (t2);
1640 gcc_assert (stmt != NULL);
1641 if (is_gimple_assign (stmt))
1643 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1644 if (ssa_name_2 && ssa_name_2 == t1)
1645 return true;
1648 /* Compare if t1 and t2's def_stmts are identical. */
1649 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1650 return true;
1651 else
1652 return false;
1655 /* Return true if E is predicted by one of loop heuristics. */
1657 static bool
1658 predicted_by_loop_heuristics_p (basic_block bb)
1660 struct edge_prediction *i;
1661 edge_prediction **preds = bb_predictions->get (bb);
1663 if (!preds)
1664 return false;
1666 for (i = *preds; i; i = i->ep_next)
1667 if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1668 || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1669 || i->ep_predictor == PRED_LOOP_ITERATIONS
1670 || i->ep_predictor == PRED_LOOP_EXIT
1671 || i->ep_predictor == PRED_LOOP_EXIT_WITH_RECURSION
1672 || i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1673 return true;
1674 return false;
1677 /* Predict branch probability of BB when BB contains a branch that compares
1678 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1679 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1681 E.g.
1682 for (int i = 0; i < bound; i++) {
1683 if (i < bound - 2)
1684 computation_1();
1685 else
1686 computation_2();
1689 In this loop, we will predict the branch inside the loop to be taken. */
1691 static void
1692 predict_iv_comparison (class loop *loop, basic_block bb,
1693 tree loop_bound_var,
1694 tree loop_iv_base_var,
1695 enum tree_code loop_bound_code,
1696 int loop_bound_step)
1698 tree compare_var, compare_base;
1699 enum tree_code compare_code;
1700 tree compare_step_var;
1701 edge then_edge;
1702 edge_iterator ei;
1704 if (predicted_by_loop_heuristics_p (bb))
1705 return;
1707 gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1708 if (!stmt)
1709 return;
1710 if (!is_comparison_with_loop_invariant_p (stmt,
1711 loop, &compare_var,
1712 &compare_code,
1713 &compare_step_var,
1714 &compare_base))
1715 return;
1717 /* Find the taken edge. */
1718 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1719 if (then_edge->flags & EDGE_TRUE_VALUE)
1720 break;
1722 /* When comparing an IV to a loop invariant, NE is more likely to be
1723 taken while EQ is more likely to be not-taken. */
1724 if (compare_code == NE_EXPR)
1726 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1727 return;
1729 else if (compare_code == EQ_EXPR)
1731 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1732 return;
1735 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1736 return;
1738 /* If loop bound, base and compare bound are all constants, we can
1739 calculate the probability directly. */
1740 if (tree_fits_shwi_p (loop_bound_var)
1741 && tree_fits_shwi_p (compare_var)
1742 && tree_fits_shwi_p (compare_base))
1744 int probability;
1745 wi::overflow_type overflow;
1746 bool overall_overflow = false;
1747 widest_int compare_count, tem;
1749 /* (loop_bound - base) / compare_step */
1750 tem = wi::sub (wi::to_widest (loop_bound_var),
1751 wi::to_widest (compare_base), SIGNED, &overflow);
1752 overall_overflow |= overflow;
1753 widest_int loop_count = wi::div_trunc (tem,
1754 wi::to_widest (compare_step_var),
1755 SIGNED, &overflow);
1756 overall_overflow |= overflow;
1758 if (!wi::neg_p (wi::to_widest (compare_step_var))
1759 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1761 /* (loop_bound - compare_bound) / compare_step */
1762 tem = wi::sub (wi::to_widest (loop_bound_var),
1763 wi::to_widest (compare_var), SIGNED, &overflow);
1764 overall_overflow |= overflow;
1765 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1766 SIGNED, &overflow);
1767 overall_overflow |= overflow;
1769 else
1771 /* (compare_bound - base) / compare_step */
1772 tem = wi::sub (wi::to_widest (compare_var),
1773 wi::to_widest (compare_base), SIGNED, &overflow);
1774 overall_overflow |= overflow;
1775 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1776 SIGNED, &overflow);
1777 overall_overflow |= overflow;
1779 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1780 ++compare_count;
1781 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1782 ++loop_count;
1783 if (wi::neg_p (compare_count))
1784 compare_count = 0;
1785 if (wi::neg_p (loop_count))
1786 loop_count = 0;
1787 if (loop_count == 0)
1788 probability = 0;
1789 else if (wi::cmps (compare_count, loop_count) == 1)
1790 probability = REG_BR_PROB_BASE;
1791 else
1793 tem = compare_count * REG_BR_PROB_BASE;
1794 tem = wi::udiv_trunc (tem, loop_count);
1795 probability = tem.to_uhwi ();
1798 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1799 if (!overall_overflow)
1800 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1802 return;
1805 if (expr_coherent_p (loop_bound_var, compare_var))
1807 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1808 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1809 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1810 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1811 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1812 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1813 else if (loop_bound_code == NE_EXPR)
1815 /* If the loop backedge condition is "(i != bound)", we do
1816 the comparison based on the step of IV:
1817 * step < 0 : backedge condition is like (i > bound)
1818 * step > 0 : backedge condition is like (i < bound) */
1819 gcc_assert (loop_bound_step != 0);
1820 if (loop_bound_step > 0
1821 && (compare_code == LT_EXPR
1822 || compare_code == LE_EXPR))
1823 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1824 else if (loop_bound_step < 0
1825 && (compare_code == GT_EXPR
1826 || compare_code == GE_EXPR))
1827 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1828 else
1829 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1831 else
1832 /* The branch is predicted not-taken if loop_bound_code is
1833 opposite with compare_code. */
1834 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1836 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1838 /* For cases like:
1839 for (i = s; i < h; i++)
1840 if (i > s + 2) ....
1841 The branch should be predicted taken. */
1842 if (loop_bound_step > 0
1843 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1844 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1845 else if (loop_bound_step < 0
1846 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1847 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1848 else
1849 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1853 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1854 exits are resulted from short-circuit conditions that will generate an
1855 if_tmp. E.g.:
1857 if (foo() || global > 10)
1858 break;
1860 This will be translated into:
1862 BB3:
1863 loop header...
1864 BB4:
1865 if foo() goto BB6 else goto BB5
1866 BB5:
1867 if global > 10 goto BB6 else goto BB7
1868 BB6:
1869 goto BB7
1870 BB7:
1871 iftmp = (PHI 0(BB5), 1(BB6))
1872 if iftmp == 1 goto BB8 else goto BB3
1873 BB8:
1874 outside of the loop...
1876 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1877 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1878 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1879 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1881 static void
1882 predict_extra_loop_exits (class loop *loop, edge exit_edge)
1884 unsigned i;
1885 bool check_value_one;
1886 gimple *lhs_def_stmt;
1887 gphi *phi_stmt;
1888 tree cmp_rhs, cmp_lhs;
1890 gcond *cmp_stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (exit_edge->src));
1891 if (!cmp_stmt)
1892 return;
1894 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1895 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1896 if (!TREE_CONSTANT (cmp_rhs)
1897 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1898 return;
1899 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1900 return;
1902 /* If check_value_one is true, only the phi_args with value '1' will lead
1903 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1904 loop exit. */
1905 check_value_one = (((integer_onep (cmp_rhs))
1906 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1907 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1909 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1910 if (!lhs_def_stmt)
1911 return;
1913 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1914 if (!phi_stmt)
1915 return;
1917 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1919 edge e1;
1920 edge_iterator ei;
1921 tree val = gimple_phi_arg_def (phi_stmt, i);
1922 edge e = gimple_phi_arg_edge (phi_stmt, i);
1924 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1925 continue;
1926 if ((check_value_one ^ integer_onep (val)) == 1)
1927 continue;
1928 if (EDGE_COUNT (e->src->succs) != 1)
1930 predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN,
1931 loop);
1932 continue;
1935 FOR_EACH_EDGE (e1, ei, e->src->preds)
1936 predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN,
1937 loop);
1942 /* Predict edge probabilities by exploiting loop structure. */
1944 static void
1945 predict_loops (void)
1947 basic_block bb;
1948 hash_set <class loop *> with_recursion(10);
1950 FOR_EACH_BB_FN (bb, cfun)
1952 gimple_stmt_iterator gsi;
1953 tree decl;
1955 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1956 if (is_gimple_call (gsi_stmt (gsi))
1957 && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
1958 && recursive_call_p (current_function_decl, decl))
1960 class loop *loop = bb->loop_father;
1961 while (loop && !with_recursion.add (loop))
1962 loop = loop_outer (loop);
1966 /* Try to predict out blocks in a loop that are not part of a
1967 natural loop. */
1968 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1970 basic_block bb, *bbs;
1971 unsigned j, n_exits = 0;
1972 class tree_niter_desc niter_desc;
1973 edge ex;
1974 class nb_iter_bound *nb_iter;
1975 enum tree_code loop_bound_code = ERROR_MARK;
1976 tree loop_bound_step = NULL;
1977 tree loop_bound_var = NULL;
1978 tree loop_iv_base = NULL;
1979 gcond *stmt = NULL;
1980 bool recursion = with_recursion.contains (loop);
1982 auto_vec<edge> exits = get_loop_exit_edges (loop);
1983 FOR_EACH_VEC_ELT (exits, j, ex)
1984 if (!unlikely_executed_edge_p (ex) && !(ex->flags & EDGE_ABNORMAL_CALL))
1985 n_exits ++;
1986 if (!n_exits)
1987 continue;
1989 if (dump_file && (dump_flags & TDF_DETAILS))
1990 fprintf (dump_file, "Predicting loop %i%s with %i exits.\n",
1991 loop->num, recursion ? " (with recursion)" : "", n_exits);
1992 if (dump_file && (dump_flags & TDF_DETAILS)
1993 && max_loop_iterations_int (loop) >= 0)
1995 fprintf (dump_file,
1996 "Loop %d iterates at most %i times.\n", loop->num,
1997 (int)max_loop_iterations_int (loop));
1999 if (dump_file && (dump_flags & TDF_DETAILS)
2000 && likely_max_loop_iterations_int (loop) >= 0)
2002 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
2003 loop->num, (int)likely_max_loop_iterations_int (loop));
2006 FOR_EACH_VEC_ELT (exits, j, ex)
2008 tree niter = NULL;
2009 HOST_WIDE_INT nitercst;
2010 int max = param_max_predicted_iterations;
2011 int probability;
2012 enum br_predictor predictor;
2013 widest_int nit;
2015 if (unlikely_executed_edge_p (ex)
2016 || (ex->flags & EDGE_ABNORMAL_CALL))
2017 continue;
2018 /* Loop heuristics do not expect exit conditional to be inside
2019 inner loop. We predict from innermost to outermost loop. */
2020 if (predicted_by_loop_heuristics_p (ex->src))
2022 if (dump_file && (dump_flags & TDF_DETAILS))
2023 fprintf (dump_file, "Skipping exit %i->%i because "
2024 "it is already predicted.\n",
2025 ex->src->index, ex->dest->index);
2026 continue;
2028 predict_extra_loop_exits (loop, ex);
2030 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
2031 niter = niter_desc.niter;
2032 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
2033 niter = loop_niter_by_eval (loop, ex);
2034 if (dump_file && (dump_flags & TDF_DETAILS)
2035 && TREE_CODE (niter) == INTEGER_CST)
2037 fprintf (dump_file, "Exit %i->%i %d iterates ",
2038 ex->src->index, ex->dest->index,
2039 loop->num);
2040 print_generic_expr (dump_file, niter, TDF_SLIM);
2041 fprintf (dump_file, " times.\n");
2044 if (TREE_CODE (niter) == INTEGER_CST)
2046 if (tree_fits_uhwi_p (niter)
2047 && max
2048 && compare_tree_int (niter, max - 1) == -1)
2049 nitercst = tree_to_uhwi (niter) + 1;
2050 else
2051 nitercst = max;
2052 predictor = PRED_LOOP_ITERATIONS;
2054 /* If we have just one exit and we can derive some information about
2055 the number of iterations of the loop from the statements inside
2056 the loop, use it to predict this exit. */
2057 else if (n_exits == 1
2058 && estimated_stmt_executions (loop, &nit))
2060 if (wi::gtu_p (nit, max))
2061 nitercst = max;
2062 else
2063 nitercst = nit.to_shwi ();
2064 predictor = PRED_LOOP_ITERATIONS_GUESSED;
2066 /* If we have likely upper bound, trust it for very small iteration
2067 counts. Such loops would otherwise get mispredicted by standard
2068 LOOP_EXIT heuristics. */
2069 else if (n_exits == 1
2070 && likely_max_stmt_executions (loop, &nit)
2071 && wi::ltu_p (nit,
2072 RDIV (REG_BR_PROB_BASE,
2073 REG_BR_PROB_BASE
2074 - predictor_info
2075 [recursion
2076 ? PRED_LOOP_EXIT_WITH_RECURSION
2077 : PRED_LOOP_EXIT].hitrate)))
2079 nitercst = nit.to_shwi ();
2080 predictor = PRED_LOOP_ITERATIONS_MAX;
2082 else
2084 if (dump_file && (dump_flags & TDF_DETAILS))
2085 fprintf (dump_file, "Nothing known about exit %i->%i.\n",
2086 ex->src->index, ex->dest->index);
2087 continue;
2090 if (dump_file && (dump_flags & TDF_DETAILS))
2091 fprintf (dump_file, "Recording prediction to %i iterations by %s.\n",
2092 (int)nitercst, predictor_info[predictor].name);
2093 /* If the prediction for number of iterations is zero, do not
2094 predict the exit edges. */
2095 if (nitercst == 0)
2096 continue;
2098 probability = RDIV (REG_BR_PROB_BASE, nitercst);
2099 predict_edge (ex, predictor, probability);
2102 /* Find information about loop bound variables. */
2103 for (nb_iter = loop->bounds; nb_iter;
2104 nb_iter = nb_iter->next)
2105 if (nb_iter->stmt
2106 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
2108 stmt = as_a <gcond *> (nb_iter->stmt);
2109 break;
2111 if (!stmt)
2112 stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (loop->header));
2113 if (stmt)
2114 is_comparison_with_loop_invariant_p (stmt, loop,
2115 &loop_bound_var,
2116 &loop_bound_code,
2117 &loop_bound_step,
2118 &loop_iv_base);
2120 bbs = get_loop_body (loop);
2122 for (j = 0; j < loop->num_nodes; j++)
2124 edge e;
2125 edge_iterator ei;
2127 bb = bbs[j];
2129 /* Bypass loop heuristics on continue statement. These
2130 statements construct loops via "non-loop" constructs
2131 in the source language and are better to be handled
2132 separately. */
2133 if (predicted_by_p (bb, PRED_CONTINUE))
2135 if (dump_file && (dump_flags & TDF_DETAILS))
2136 fprintf (dump_file, "BB %i predicted by continue.\n",
2137 bb->index);
2138 continue;
2141 /* If we already used more reliable loop exit predictors, do not
2142 bother with PRED_LOOP_EXIT. */
2143 if (!predicted_by_loop_heuristics_p (bb))
2145 /* For loop with many exits we don't want to predict all exits
2146 with the pretty large probability, because if all exits are
2147 considered in row, the loop would be predicted to iterate
2148 almost never. The code to divide probability by number of
2149 exits is very rough. It should compute the number of exits
2150 taken in each patch through function (not the overall number
2151 of exits that might be a lot higher for loops with wide switch
2152 statements in them) and compute n-th square root.
2154 We limit the minimal probability by 2% to avoid
2155 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
2156 as this was causing regression in perl benchmark containing such
2157 a wide loop. */
2159 int probability = ((REG_BR_PROB_BASE
2160 - predictor_info
2161 [recursion
2162 ? PRED_LOOP_EXIT_WITH_RECURSION
2163 : PRED_LOOP_EXIT].hitrate)
2164 / n_exits);
2165 if (probability < HITRATE (2))
2166 probability = HITRATE (2);
2167 FOR_EACH_EDGE (e, ei, bb->succs)
2168 if (e->dest->index < NUM_FIXED_BLOCKS
2169 || !flow_bb_inside_loop_p (loop, e->dest))
2171 if (dump_file && (dump_flags & TDF_DETAILS))
2172 fprintf (dump_file,
2173 "Predicting exit %i->%i with prob %i.\n",
2174 e->src->index, e->dest->index, probability);
2175 predict_edge (e,
2176 recursion ? PRED_LOOP_EXIT_WITH_RECURSION
2177 : PRED_LOOP_EXIT, probability);
2180 if (loop_bound_var)
2181 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
2182 loop_bound_code,
2183 tree_to_shwi (loop_bound_step));
2186 /* In the following code
2187 for (loop1)
2188 if (cond)
2189 for (loop2)
2190 body;
2191 guess that cond is unlikely. */
2192 if (loop_outer (loop)->num)
2194 basic_block bb = NULL;
2195 edge preheader_edge = loop_preheader_edge (loop);
2197 if (single_pred_p (preheader_edge->src)
2198 && single_succ_p (preheader_edge->src))
2199 preheader_edge = single_pred_edge (preheader_edge->src);
2201 /* Pattern match fortran loop preheader:
2202 _16 = BUILTIN_EXPECT (_15, 1, PRED_FORTRAN_LOOP_PREHEADER);
2203 _17 = (logical(kind=4)) _16;
2204 if (_17 != 0)
2205 goto <bb 11>;
2206 else
2207 goto <bb 13>;
2209 Loop guard branch prediction says nothing about duplicated loop
2210 headers produced by fortran frontend and in this case we want
2211 to predict paths leading to this preheader. */
2213 gcond *stmt
2214 = safe_dyn_cast <gcond *> (*gsi_last_bb (preheader_edge->src));
2215 if (stmt
2216 && gimple_cond_code (stmt) == NE_EXPR
2217 && TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
2218 && integer_zerop (gimple_cond_rhs (stmt)))
2220 gimple *call_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
2221 if (gimple_code (call_stmt) == GIMPLE_ASSIGN
2222 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (call_stmt))
2223 && TREE_CODE (gimple_assign_rhs1 (call_stmt)) == SSA_NAME)
2224 call_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (call_stmt));
2225 if (gimple_call_internal_p (call_stmt, IFN_BUILTIN_EXPECT)
2226 && TREE_CODE (gimple_call_arg (call_stmt, 2)) == INTEGER_CST
2227 && tree_fits_uhwi_p (gimple_call_arg (call_stmt, 2))
2228 && tree_to_uhwi (gimple_call_arg (call_stmt, 2))
2229 == PRED_FORTRAN_LOOP_PREHEADER)
2230 bb = preheader_edge->src;
2232 if (!bb)
2234 if (!dominated_by_p (CDI_DOMINATORS,
2235 loop_outer (loop)->latch, loop->header))
2236 predict_paths_leading_to_edge (loop_preheader_edge (loop),
2237 recursion
2238 ? PRED_LOOP_GUARD_WITH_RECURSION
2239 : PRED_LOOP_GUARD,
2240 NOT_TAKEN,
2241 loop_outer (loop));
2243 else
2245 if (!dominated_by_p (CDI_DOMINATORS,
2246 loop_outer (loop)->latch, bb))
2247 predict_paths_leading_to (bb,
2248 recursion
2249 ? PRED_LOOP_GUARD_WITH_RECURSION
2250 : PRED_LOOP_GUARD,
2251 NOT_TAKEN,
2252 loop_outer (loop));
2256 /* Free basic blocks from get_loop_body. */
2257 free (bbs);
2261 /* Attempt to predict probabilities of BB outgoing edges using local
2262 properties. */
2263 static void
2264 bb_estimate_probability_locally (basic_block bb)
2266 rtx_insn *last_insn = BB_END (bb);
2267 rtx cond;
2269 if (! can_predict_insn_p (last_insn))
2270 return;
2271 cond = get_condition (last_insn, NULL, false, false);
2272 if (! cond)
2273 return;
2275 /* Try "pointer heuristic."
2276 A comparison ptr == 0 is predicted as false.
2277 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2278 if (COMPARISON_P (cond)
2279 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
2280 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
2282 if (GET_CODE (cond) == EQ)
2283 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
2284 else if (GET_CODE (cond) == NE)
2285 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
2287 else
2289 /* Try "opcode heuristic."
2290 EQ tests are usually false and NE tests are usually true. Also,
2291 most quantities are positive, so we can make the appropriate guesses
2292 about signed comparisons against zero. */
2293 switch (GET_CODE (cond))
2295 case CONST_INT:
2296 /* Unconditional branch. */
2297 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
2298 cond == const0_rtx ? NOT_TAKEN : TAKEN);
2299 break;
2301 case EQ:
2302 case UNEQ:
2303 /* Floating point comparisons appears to behave in a very
2304 unpredictable way because of special role of = tests in
2305 FP code. */
2306 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2308 /* Comparisons with 0 are often used for booleans and there is
2309 nothing useful to predict about them. */
2310 else if (XEXP (cond, 1) == const0_rtx
2311 || XEXP (cond, 0) == const0_rtx)
2313 else
2314 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
2315 break;
2317 case NE:
2318 case LTGT:
2319 /* Floating point comparisons appears to behave in a very
2320 unpredictable way because of special role of = tests in
2321 FP code. */
2322 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2324 /* Comparisons with 0 are often used for booleans and there is
2325 nothing useful to predict about them. */
2326 else if (XEXP (cond, 1) == const0_rtx
2327 || XEXP (cond, 0) == const0_rtx)
2329 else
2330 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
2331 break;
2333 case ORDERED:
2334 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
2335 break;
2337 case UNORDERED:
2338 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
2339 break;
2341 case LE:
2342 case LT:
2343 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2344 || XEXP (cond, 1) == constm1_rtx)
2345 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
2346 break;
2348 case GE:
2349 case GT:
2350 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2351 || XEXP (cond, 1) == constm1_rtx)
2352 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
2353 break;
2355 default:
2356 break;
2360 /* Set edge->probability for each successor edge of BB. */
2361 void
2362 guess_outgoing_edge_probabilities (basic_block bb)
2364 bb_estimate_probability_locally (bb);
2365 combine_predictions_for_insn (BB_END (bb), bb);
2368 static tree expr_expected_value (tree, enum br_predictor *predictor,
2369 HOST_WIDE_INT *probability);
2371 /* Helper function for expr_expected_value. */
2373 static tree
2374 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
2375 tree op1, enum br_predictor *predictor,
2376 HOST_WIDE_INT *probability)
2378 gimple *def;
2380 /* Reset returned probability value. */
2381 *probability = -1;
2382 *predictor = PRED_UNCONDITIONAL;
2384 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
2386 if (TREE_CONSTANT (op0))
2387 return op0;
2389 if (code == IMAGPART_EXPR)
2391 if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
2393 def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
2394 if (is_gimple_call (def)
2395 && gimple_call_internal_p (def)
2396 && (gimple_call_internal_fn (def)
2397 == IFN_ATOMIC_COMPARE_EXCHANGE))
2399 /* Assume that any given atomic operation has low contention,
2400 and thus the compare-and-swap operation succeeds. */
2401 *predictor = PRED_COMPARE_AND_SWAP;
2402 return build_one_cst (TREE_TYPE (op0));
2407 if (code != SSA_NAME)
2408 return NULL_TREE;
2410 def = SSA_NAME_DEF_STMT (op0);
2412 /* If we were already here, break the infinite cycle. */
2413 bool existed_p;
2414 expected_value *res
2415 = &ssa_expected_value->get_or_insert (SSA_NAME_VERSION (op0),
2416 &existed_p);
2417 if (existed_p)
2419 *probability = res->probability;
2420 *predictor = res->predictor;
2421 return res->val;
2423 res->val = NULL_TREE;
2424 res->predictor = *predictor;
2425 res->probability = *probability;
2427 if (gphi *phi = dyn_cast <gphi *> (def))
2429 /* All the arguments of the PHI node must have the same constant
2430 length. */
2431 int i, n = gimple_phi_num_args (phi);
2432 tree val = NULL;
2433 bool has_nonzero_edge = false;
2435 /* If we already proved that given edge is unlikely, we do not need
2436 to handle merging of the probabilities. */
2437 for (i = 0; i < n && !has_nonzero_edge; i++)
2439 tree arg = PHI_ARG_DEF (phi, i);
2440 if (arg == PHI_RESULT (phi))
2441 continue;
2442 profile_count cnt = gimple_phi_arg_edge (phi, i)->count ();
2443 if (!cnt.initialized_p () || cnt.nonzero_p ())
2444 has_nonzero_edge = true;
2447 for (i = 0; i < n; i++)
2449 tree arg = PHI_ARG_DEF (phi, i);
2450 enum br_predictor predictor2;
2452 /* Skip self-referring parameters, since they does not change
2453 expected value. */
2454 if (arg == PHI_RESULT (phi))
2455 continue;
2457 /* Skip edges which we already predicted as executing
2458 zero times. */
2459 if (has_nonzero_edge)
2461 profile_count cnt = gimple_phi_arg_edge (phi, i)->count ();
2462 if (cnt.initialized_p () && !cnt.nonzero_p ())
2463 continue;
2465 HOST_WIDE_INT probability2;
2466 tree new_val = expr_expected_value (arg, &predictor2,
2467 &probability2);
2468 /* If we know nothing about value, give up. */
2469 if (!new_val)
2470 return NULL;
2472 /* If this is a first edge, trust its prediction. */
2473 if (!val)
2475 val = new_val;
2476 *predictor = predictor2;
2477 *probability = probability2;
2478 continue;
2480 /* If there are two different values, give up. */
2481 if (!operand_equal_p (val, new_val, false))
2482 return NULL;
2484 int p1 = get_predictor_value (*predictor, *probability);
2485 int p2 = get_predictor_value (predictor2, probability2);
2486 /* If both predictors agree, it does not matter from which
2487 edge we enter the basic block. */
2488 if (*predictor == predictor2 && p1 == p2)
2489 continue;
2490 /* The general case has no precise solution, since we do not
2491 know probabilities of incomming edges, yet.
2492 Still if value is predicted over all incomming edges, we
2493 can hope it will be indeed the case. Conservatively
2494 downgrade prediction quality (so first match merging is not
2495 performed) and take least successful prediction. */
2497 *predictor = PRED_COMBINED_VALUE_PREDICTIONS_PHI;
2498 *probability = MIN (p1, p2);
2501 res = ssa_expected_value->get (SSA_NAME_VERSION (op0));
2502 res->val = val;
2503 res->predictor = *predictor;
2504 res->probability = *probability;
2505 return val;
2507 if (is_gimple_assign (def))
2509 if (gimple_assign_lhs (def) != op0)
2510 return NULL;
2512 tree val = expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
2513 gimple_assign_rhs1 (def),
2514 gimple_assign_rhs_code (def),
2515 gimple_assign_rhs2 (def),
2516 predictor, probability);
2517 if (val)
2519 res = ssa_expected_value->get (SSA_NAME_VERSION (op0));
2520 res->val = val;
2521 res->predictor = *predictor;
2522 res->probability = *probability;
2524 return val;
2527 if (is_gimple_call (def))
2529 tree decl = gimple_call_fndecl (def);
2530 if (!decl)
2532 if (gimple_call_internal_p (def)
2533 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
2535 gcc_assert (gimple_call_num_args (def) == 3);
2536 tree val = gimple_call_arg (def, 0);
2537 if (TREE_CONSTANT (val))
2538 return val;
2539 tree val2 = gimple_call_arg (def, 2);
2540 gcc_assert (TREE_CODE (val2) == INTEGER_CST
2541 && tree_fits_uhwi_p (val2)
2542 && tree_to_uhwi (val2) < END_PREDICTORS);
2543 *predictor = (enum br_predictor) tree_to_uhwi (val2);
2544 if (*predictor == PRED_BUILTIN_EXPECT)
2545 *probability
2546 = HITRATE (param_builtin_expect_probability);
2547 val = gimple_call_arg (def, 1);
2548 res->val = val;
2549 res->predictor = *predictor;
2550 res->probability = *probability;
2551 return val;
2553 return NULL;
2556 if (DECL_IS_MALLOC (decl) || DECL_IS_OPERATOR_NEW_P (decl))
2558 if (predictor)
2559 *predictor = PRED_MALLOC_NONNULL;
2560 /* FIXME: This is wrong and we need to convert the logic
2561 to value ranges. This makes predictor to assume that
2562 malloc always returns (size_t)1 which is not the same
2563 as returning non-NULL. */
2564 tree val = fold_convert (type, boolean_true_node);
2565 res->val = val;
2566 res->predictor = *predictor;
2567 res->probability = *probability;
2568 return val;
2571 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
2572 switch (DECL_FUNCTION_CODE (decl))
2574 case BUILT_IN_EXPECT:
2576 tree val;
2577 if (gimple_call_num_args (def) != 2)
2578 return NULL;
2579 val = gimple_call_arg (def, 0);
2580 if (TREE_CONSTANT (val))
2581 return val;
2582 *predictor = PRED_BUILTIN_EXPECT;
2583 *probability
2584 = HITRATE (param_builtin_expect_probability);
2585 val = gimple_call_arg (def, 1);
2586 res->val = val;
2587 res->predictor = *predictor;
2588 res->probability = *probability;
2589 return val;
2591 case BUILT_IN_EXPECT_WITH_PROBABILITY:
2593 tree val;
2594 if (gimple_call_num_args (def) != 3)
2595 return NULL;
2596 val = gimple_call_arg (def, 0);
2597 if (TREE_CONSTANT (val))
2599 res->val = val;
2600 res->predictor = *predictor;
2601 res->probability = *probability;
2602 return val;
2604 /* Compute final probability as:
2605 probability * REG_BR_PROB_BASE. */
2606 tree prob = gimple_call_arg (def, 2);
2607 tree t = TREE_TYPE (prob);
2608 tree base = build_int_cst (integer_type_node,
2609 REG_BR_PROB_BASE);
2610 base = build_real_from_int_cst (t, base);
2611 tree r = fold_build2_initializer_loc (UNKNOWN_LOCATION,
2612 MULT_EXPR, t, prob, base);
2613 if (TREE_CODE (r) != REAL_CST)
2615 error_at (gimple_location (def),
2616 "probability %qE must be "
2617 "constant floating-point expression", prob);
2618 return NULL;
2620 HOST_WIDE_INT probi
2621 = real_to_integer (TREE_REAL_CST_PTR (r));
2622 if (probi >= 0 && probi <= REG_BR_PROB_BASE)
2624 *predictor = PRED_BUILTIN_EXPECT_WITH_PROBABILITY;
2625 *probability = probi;
2627 else
2628 error_at (gimple_location (def),
2629 "probability %qE is outside "
2630 "the range [0.0, 1.0]", prob);
2632 val = gimple_call_arg (def, 1);
2633 res->val = val;
2634 res->predictor = *predictor;
2635 res->probability = *probability;
2636 return val;
2639 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
2640 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
2641 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
2642 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
2643 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
2644 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
2645 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
2646 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
2647 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
2648 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
2649 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
2650 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
2651 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
2652 /* Assume that any given atomic operation has low contention,
2653 and thus the compare-and-swap operation succeeds. */
2654 *predictor = PRED_COMPARE_AND_SWAP;
2655 res->val = boolean_true_node;
2656 res->predictor = *predictor;
2657 res->probability = *probability;
2658 return boolean_true_node;
2659 case BUILT_IN_REALLOC:
2660 case BUILT_IN_GOMP_REALLOC:
2661 if (predictor)
2662 *predictor = PRED_MALLOC_NONNULL;
2663 /* FIXME: This is wrong and we need to convert the logic
2664 to value ranges. */
2665 res->val = fold_convert (type, boolean_true_node);
2666 res->predictor = *predictor;
2667 res->probability = *probability;
2668 return res->val;
2669 default:
2670 break;
2674 return NULL;
2677 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
2679 tree res;
2680 tree nop0 = op0;
2681 tree nop1 = op1;
2683 /* First handle situation where single op is enough to determine final
2684 value. In this case we can do better job by avoiding the prediction
2685 merging. */
2686 if (TREE_CODE (op0) != INTEGER_CST)
2688 /* See if expected value of op0 is good enough to determine the result. */
2689 nop0 = expr_expected_value (op0, predictor, probability);
2690 if (nop0
2691 && (res = fold_build2 (code, type, nop0, op1)) != NULL
2692 && TREE_CODE (res) == INTEGER_CST)
2693 /* We are now getting conservative probability. Consider for
2694 example:
2695 op0 * op1
2696 If op0 is 0 with probability p, then we will ignore the
2697 posibility that op0 != 0 and op1 == 0. It does not seem to be
2698 worthwhile to downgrade prediciton quality for this. */
2699 return res;
2700 if (!nop0)
2701 nop0 = op0;
2703 enum br_predictor predictor2 = PRED_UNCONDITIONAL;
2704 HOST_WIDE_INT probability2 = -1;
2705 if (TREE_CODE (op1) != INTEGER_CST)
2707 /* See if expected value of op1 is good enough to determine the result. */
2708 nop1 = expr_expected_value (op1, &predictor2, &probability2);
2709 if (nop1
2710 && (res = fold_build2 (code, type, op0, nop1)) != NULL
2711 && TREE_CODE (res) == INTEGER_CST)
2713 /* Similarly as above we now get conservative probability. */
2714 *predictor = predictor2;
2715 *probability = probability2;
2716 return res;
2718 if (!nop1)
2719 nop1 = op1;
2721 /* We already checked if folding one of arguments to constant is good
2722 enough. Consequently failing to fold both means that we will not
2723 succeed determining the value. */
2724 if (nop0 == op0 || nop1 == op1)
2725 return NULL;
2726 /* Finally see if we have two known values. */
2727 res = fold_build2 (code, type, nop0, nop1);
2728 if (TREE_CODE (res) == INTEGER_CST)
2730 HOST_WIDE_INT p1 = get_predictor_value (*predictor, *probability);
2731 HOST_WIDE_INT p2 = get_predictor_value (predictor2, probability2);
2733 /* If one of predictions is sure, such as PRED_UNCONDITIONAL, we
2734 can ignore it. */
2735 if (p2 == PROB_ALWAYS)
2736 return res;
2737 if (p1 == PROB_ALWAYS)
2739 *predictor = predictor2;
2740 *probability = probability2;
2741 return res;
2743 /* Combine binary predictions.
2744 Since we do not know about independence of predictors, we
2745 can not determine value precisely. */
2746 *probability = RDIV (p1 * p2, REG_BR_PROB_BASE);
2747 /* If we no longer track useful information, give up. */
2748 if (!*probability)
2749 return NULL;
2750 /* Otherwise mark that prediction is a result of combining
2751 different heuristics, since we do not want it to participate
2752 in first match merging. It is no longer reliable since
2753 we do not know if the probabilities are indpenendet. */
2754 *predictor = PRED_COMBINED_VALUE_PREDICTIONS;
2756 return res;
2758 return NULL;
2760 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
2762 tree res;
2763 op0 = expr_expected_value (op0, predictor, probability);
2764 if (!op0)
2765 return NULL;
2766 res = fold_build1 (code, type, op0);
2767 if (TREE_CONSTANT (res))
2768 return res;
2769 return NULL;
2771 return NULL;
2774 /* Return constant EXPR will likely have at execution time, NULL if unknown.
2775 The function is used by builtin_expect branch predictor so the evidence
2776 must come from this construct and additional possible constant folding.
2778 We may want to implement more involved value guess (such as value range
2779 propagation based prediction), but such tricks shall go to new
2780 implementation. */
2782 static tree
2783 expr_expected_value (tree expr, enum br_predictor *predictor,
2784 HOST_WIDE_INT *probability)
2786 enum tree_code code;
2787 tree op0, op1;
2789 if (TREE_CONSTANT (expr))
2791 *predictor = PRED_UNCONDITIONAL;
2792 *probability = -1;
2793 return expr;
2796 extract_ops_from_tree (expr, &code, &op0, &op1);
2797 return expr_expected_value_1 (TREE_TYPE (expr),
2798 op0, code, op1, predictor, probability);
2802 /* Return probability of a PREDICTOR. If the predictor has variable
2803 probability return passed PROBABILITY. */
2805 static HOST_WIDE_INT
2806 get_predictor_value (br_predictor predictor, HOST_WIDE_INT probability)
2808 switch (predictor)
2810 case PRED_BUILTIN_EXPECT:
2811 case PRED_BUILTIN_EXPECT_WITH_PROBABILITY:
2812 case PRED_COMBINED_VALUE_PREDICTIONS_PHI:
2813 case PRED_COMBINED_VALUE_PREDICTIONS:
2814 gcc_assert (probability != -1);
2815 return probability;
2816 default:
2817 gcc_assert (probability == -1);
2818 return predictor_info[(int) predictor].hitrate;
2822 /* Predict using opcode of the last statement in basic block. */
2823 static void
2824 tree_predict_by_opcode (basic_block bb)
2826 edge then_edge;
2827 tree op0, op1;
2828 tree type;
2829 tree val;
2830 enum tree_code cmp;
2831 edge_iterator ei;
2832 enum br_predictor predictor;
2833 HOST_WIDE_INT probability;
2835 gimple *stmt = *gsi_last_bb (bb);
2836 if (!stmt)
2837 return;
2839 if (gswitch *sw = dyn_cast <gswitch *> (stmt))
2841 tree index = gimple_switch_index (sw);
2842 tree val = expr_expected_value (index, &predictor, &probability);
2843 if (val && TREE_CODE (val) == INTEGER_CST)
2845 edge e = find_taken_edge_switch_expr (sw, val);
2846 if (predictor == PRED_BUILTIN_EXPECT)
2848 int percent = param_builtin_expect_probability;
2849 gcc_assert (percent >= 0 && percent <= 100);
2850 predict_edge (e, PRED_BUILTIN_EXPECT,
2851 HITRATE (percent));
2853 else
2854 predict_edge_def (e, predictor, TAKEN);
2858 if (gimple_code (stmt) != GIMPLE_COND)
2859 return;
2860 FOR_EACH_EDGE (then_edge, ei, bb->succs)
2861 if (then_edge->flags & EDGE_TRUE_VALUE)
2862 break;
2863 op0 = gimple_cond_lhs (stmt);
2864 op1 = gimple_cond_rhs (stmt);
2865 cmp = gimple_cond_code (stmt);
2866 type = TREE_TYPE (op0);
2867 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1,
2868 &predictor, &probability);
2869 if (val && TREE_CODE (val) == INTEGER_CST)
2871 HOST_WIDE_INT prob = get_predictor_value (predictor, probability);
2872 if (integer_zerop (val))
2873 prob = REG_BR_PROB_BASE - prob;
2874 predict_edge (then_edge, predictor, prob);
2876 /* Try "pointer heuristic."
2877 A comparison ptr == 0 is predicted as false.
2878 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2879 if (POINTER_TYPE_P (type))
2881 if (cmp == EQ_EXPR)
2882 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2883 else if (cmp == NE_EXPR)
2884 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2886 else
2888 /* Try "opcode heuristic."
2889 EQ tests are usually false and NE tests are usually true. Also,
2890 most quantities are positive, so we can make the appropriate guesses
2891 about signed comparisons against zero. */
2892 switch (cmp)
2894 case EQ_EXPR:
2895 case UNEQ_EXPR:
2896 /* Floating point comparisons appears to behave in a very
2897 unpredictable way because of special role of = tests in
2898 FP code. */
2899 if (FLOAT_TYPE_P (type))
2901 /* Comparisons with 0 are often used for booleans and there is
2902 nothing useful to predict about them. */
2903 else if (integer_zerop (op0) || integer_zerop (op1))
2905 else
2906 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2907 break;
2909 case NE_EXPR:
2910 case LTGT_EXPR:
2911 /* Floating point comparisons appears to behave in a very
2912 unpredictable way because of special role of = tests in
2913 FP code. */
2914 if (FLOAT_TYPE_P (type))
2916 /* Comparisons with 0 are often used for booleans and there is
2917 nothing useful to predict about them. */
2918 else if (integer_zerop (op0)
2919 || integer_zerop (op1))
2921 else
2922 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2923 break;
2925 case ORDERED_EXPR:
2926 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2927 break;
2929 case UNORDERED_EXPR:
2930 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2931 break;
2933 case LE_EXPR:
2934 case LT_EXPR:
2935 if (integer_zerop (op1)
2936 || integer_onep (op1)
2937 || integer_all_onesp (op1)
2938 || real_zerop (op1)
2939 || real_onep (op1)
2940 || real_minus_onep (op1))
2941 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2942 break;
2944 case GE_EXPR:
2945 case GT_EXPR:
2946 if (integer_zerop (op1)
2947 || integer_onep (op1)
2948 || integer_all_onesp (op1)
2949 || real_zerop (op1)
2950 || real_onep (op1)
2951 || real_minus_onep (op1))
2952 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2953 break;
2955 default:
2956 break;
2960 /* Returns TRUE if the STMT is exit(0) like statement. */
2962 static bool
2963 is_exit_with_zero_arg (const gimple *stmt)
2965 /* This is not exit, _exit or _Exit. */
2966 if (!gimple_call_builtin_p (stmt, BUILT_IN_EXIT)
2967 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT)
2968 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT2))
2969 return false;
2971 /* Argument is an interger zero. */
2972 return integer_zerop (gimple_call_arg (stmt, 0));
2975 /* Try to guess whether the value of return means error code. */
2977 static enum br_predictor
2978 return_prediction (tree val, enum prediction *prediction)
2980 /* VOID. */
2981 if (!val)
2982 return PRED_NO_PREDICTION;
2983 /* Different heuristics for pointers and scalars. */
2984 if (POINTER_TYPE_P (TREE_TYPE (val)))
2986 /* NULL is usually not returned. */
2987 if (integer_zerop (val))
2989 *prediction = NOT_TAKEN;
2990 return PRED_NULL_RETURN;
2993 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2995 /* Negative return values are often used to indicate
2996 errors. */
2997 if (TREE_CODE (val) == INTEGER_CST
2998 && tree_int_cst_sgn (val) < 0)
3000 *prediction = NOT_TAKEN;
3001 return PRED_NEGATIVE_RETURN;
3003 /* Constant return values seems to be commonly taken.
3004 Zero/one often represent booleans so exclude them from the
3005 heuristics. */
3006 if (TREE_CONSTANT (val)
3007 && (!integer_zerop (val) && !integer_onep (val)))
3009 *prediction = NOT_TAKEN;
3010 return PRED_CONST_RETURN;
3013 return PRED_NO_PREDICTION;
3016 /* Return zero if phi result could have values other than -1, 0 or 1,
3017 otherwise return a bitmask, with bits 0, 1 and 2 set if -1, 0 and 1
3018 values are used or likely. */
3020 static int
3021 zero_one_minusone (gphi *phi, int limit)
3023 int phi_num_args = gimple_phi_num_args (phi);
3024 int ret = 0;
3025 for (int i = 0; i < phi_num_args; i++)
3027 tree t = PHI_ARG_DEF (phi, i);
3028 if (TREE_CODE (t) != INTEGER_CST)
3029 continue;
3030 wide_int w = wi::to_wide (t);
3031 if (w == -1)
3032 ret |= 1;
3033 else if (w == 0)
3034 ret |= 2;
3035 else if (w == 1)
3036 ret |= 4;
3037 else
3038 return 0;
3040 for (int i = 0; i < phi_num_args; i++)
3042 tree t = PHI_ARG_DEF (phi, i);
3043 if (TREE_CODE (t) == INTEGER_CST)
3044 continue;
3045 if (TREE_CODE (t) != SSA_NAME)
3046 return 0;
3047 gimple *g = SSA_NAME_DEF_STMT (t);
3048 if (gimple_code (g) == GIMPLE_PHI && limit > 0)
3049 if (int r = zero_one_minusone (as_a <gphi *> (g), limit - 1))
3051 ret |= r;
3052 continue;
3054 if (!is_gimple_assign (g))
3055 return 0;
3056 if (gimple_assign_cast_p (g))
3058 tree rhs1 = gimple_assign_rhs1 (g);
3059 if (TREE_CODE (rhs1) != SSA_NAME
3060 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
3061 || TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
3062 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3063 return 0;
3064 ret |= (2 | 4);
3065 continue;
3067 if (TREE_CODE_CLASS (gimple_assign_rhs_code (g)) != tcc_comparison)
3068 return 0;
3069 ret |= (2 | 4);
3071 return ret;
3074 /* Find the basic block with return expression and look up for possible
3075 return value trying to apply RETURN_PREDICTION heuristics. */
3076 static void
3077 apply_return_prediction (void)
3079 greturn *return_stmt = NULL;
3080 tree return_val;
3081 edge e;
3082 gphi *phi;
3083 int phi_num_args, i;
3084 enum br_predictor pred;
3085 enum prediction direction;
3086 edge_iterator ei;
3088 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3090 if (greturn *last = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src)))
3092 return_stmt = last;
3093 break;
3096 if (!e)
3097 return;
3098 return_val = gimple_return_retval (return_stmt);
3099 if (!return_val)
3100 return;
3101 if (TREE_CODE (return_val) != SSA_NAME
3102 || !SSA_NAME_DEF_STMT (return_val)
3103 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
3104 return;
3105 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
3106 phi_num_args = gimple_phi_num_args (phi);
3107 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
3109 /* Avoid the case where the function returns -1, 0 and 1 values and
3110 nothing else. Those could be qsort etc. comparison functions
3111 where the negative return isn't less probable than positive.
3112 For this require that the function returns at least -1 or 1
3113 or -1 and a boolean value or comparison result, so that functions
3114 returning just -1 and 0 are treated as if -1 represents error value. */
3115 if (INTEGRAL_TYPE_P (TREE_TYPE (return_val))
3116 && !TYPE_UNSIGNED (TREE_TYPE (return_val))
3117 && TYPE_PRECISION (TREE_TYPE (return_val)) > 1)
3118 if (int r = zero_one_minusone (phi, 3))
3119 if ((r & (1 | 4)) == (1 | 4))
3120 return;
3122 /* Avoid the degenerate case where all return values form the function
3123 belongs to same category (ie they are all positive constants)
3124 so we can hardly say something about them. */
3125 for (i = 1; i < phi_num_args; i++)
3126 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
3127 break;
3128 if (i != phi_num_args)
3129 for (i = 0; i < phi_num_args; i++)
3131 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
3132 if (pred != PRED_NO_PREDICTION)
3133 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
3134 direction);
3138 /* Look for basic block that contains unlikely to happen events
3139 (such as noreturn calls) and mark all paths leading to execution
3140 of this basic blocks as unlikely. */
3142 static void
3143 tree_bb_level_predictions (void)
3145 basic_block bb;
3146 bool has_return_edges = false;
3147 edge e;
3148 edge_iterator ei;
3150 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3151 if (!unlikely_executed_edge_p (e) && !(e->flags & EDGE_ABNORMAL_CALL))
3153 has_return_edges = true;
3154 break;
3157 apply_return_prediction ();
3159 FOR_EACH_BB_FN (bb, cfun)
3161 gimple_stmt_iterator gsi;
3163 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3165 gimple *stmt = gsi_stmt (gsi);
3166 tree decl;
3168 if (is_gimple_call (stmt))
3170 if (gimple_call_noreturn_p (stmt)
3171 && has_return_edges
3172 && !is_exit_with_zero_arg (stmt))
3173 predict_paths_leading_to (bb, PRED_NORETURN,
3174 NOT_TAKEN);
3175 decl = gimple_call_fndecl (stmt);
3176 if (decl
3177 && lookup_attribute ("cold",
3178 DECL_ATTRIBUTES (decl)))
3179 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
3180 NOT_TAKEN);
3181 if (decl && recursive_call_p (current_function_decl, decl))
3182 predict_paths_leading_to (bb, PRED_RECURSIVE_CALL,
3183 NOT_TAKEN);
3185 else if (gimple_code (stmt) == GIMPLE_PREDICT)
3187 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
3188 gimple_predict_outcome (stmt));
3189 /* Keep GIMPLE_PREDICT around so early inlining will propagate
3190 hints to callers. */
3196 /* Callback for hash_map::traverse, asserts that the pointer map is
3197 empty. */
3199 bool
3200 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
3201 void *)
3203 gcc_assert (!value);
3204 return true;
3207 /* Predict branch probabilities and estimate profile for basic block BB.
3208 When LOCAL_ONLY is set do not use any global properties of CFG. */
3210 static void
3211 tree_estimate_probability_bb (basic_block bb, bool local_only)
3213 edge e;
3214 edge_iterator ei;
3216 FOR_EACH_EDGE (e, ei, bb->succs)
3218 /* Look for block we are guarding (ie we dominate it,
3219 but it doesn't postdominate us). */
3220 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
3221 && !local_only
3222 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
3223 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
3225 gimple_stmt_iterator bi;
3227 /* The call heuristic claims that a guarded function call
3228 is improbable. This is because such calls are often used
3229 to signal exceptional situations such as printing error
3230 messages. */
3231 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
3232 gsi_next (&bi))
3234 gimple *stmt = gsi_stmt (bi);
3235 if (is_gimple_call (stmt)
3236 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))
3237 /* Constant and pure calls are hardly used to signalize
3238 something exceptional. */
3239 && gimple_has_side_effects (stmt))
3241 if (gimple_call_fndecl (stmt))
3242 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
3243 else if (virtual_method_call_p (gimple_call_fn (stmt)))
3244 predict_edge_def (e, PRED_POLYMORPHIC_CALL, NOT_TAKEN);
3245 else
3246 predict_edge_def (e, PRED_INDIR_CALL, TAKEN);
3247 break;
3252 tree_predict_by_opcode (bb);
3255 /* Predict branch probabilities and estimate profile of the tree CFG.
3256 This function can be called from the loop optimizers to recompute
3257 the profile information.
3258 If DRY_RUN is set, do not modify CFG and only produce dump files. */
3260 void
3261 tree_estimate_probability (bool dry_run)
3263 basic_block bb;
3265 connect_infinite_loops_to_exit ();
3266 /* We use loop_niter_by_eval, which requires that the loops have
3267 preheaders. */
3268 create_preheaders (CP_SIMPLE_PREHEADERS);
3269 calculate_dominance_info (CDI_POST_DOMINATORS);
3270 /* Decide which edges are known to be unlikely. This improves later
3271 branch prediction. */
3272 determine_unlikely_bbs ();
3274 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
3275 ssa_expected_value = new hash_map<int_hash<unsigned, 0>, expected_value>;
3277 tree_bb_level_predictions ();
3278 record_loop_exits ();
3280 if (number_of_loops (cfun) > 1)
3281 predict_loops ();
3283 FOR_EACH_BB_FN (bb, cfun)
3284 tree_estimate_probability_bb (bb, false);
3286 FOR_EACH_BB_FN (bb, cfun)
3287 combine_predictions_for_bb (bb, dry_run);
3289 if (flag_checking)
3290 bb_predictions->traverse<void *, assert_is_empty> (NULL);
3292 delete bb_predictions;
3293 bb_predictions = NULL;
3294 delete ssa_expected_value;
3295 ssa_expected_value = NULL;
3297 if (!dry_run
3298 && profile_status_for_fn (cfun) != PROFILE_READ)
3299 estimate_bb_frequencies ();
3300 free_dominance_info (CDI_POST_DOMINATORS);
3301 remove_fake_exit_edges ();
3304 /* Set edge->probability for each successor edge of BB. */
3305 void
3306 tree_guess_outgoing_edge_probabilities (basic_block bb)
3308 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
3309 ssa_expected_value = new hash_map<int_hash<unsigned, 0>, expected_value>;
3310 tree_estimate_probability_bb (bb, true);
3311 combine_predictions_for_bb (bb, false);
3312 if (flag_checking)
3313 bb_predictions->traverse<void *, assert_is_empty> (NULL);
3314 delete bb_predictions;
3315 bb_predictions = NULL;
3316 delete ssa_expected_value;
3317 ssa_expected_value = NULL;
3320 /* Filter function predicate that returns true for a edge predicate P
3321 if its edge is equal to DATA. */
3323 static bool
3324 not_loop_guard_equal_edge_p (edge_prediction *p, void *data)
3326 return p->ep_edge != (edge)data || p->ep_predictor != PRED_LOOP_GUARD;
3329 /* Predict edge E with PRED unless it is already predicted by some predictor
3330 considered equivalent. */
3332 static void
3333 maybe_predict_edge (edge e, enum br_predictor pred, enum prediction taken)
3335 if (edge_predicted_by_p (e, pred, taken))
3336 return;
3337 if (pred == PRED_LOOP_GUARD
3338 && edge_predicted_by_p (e, PRED_LOOP_GUARD_WITH_RECURSION, taken))
3339 return;
3340 /* Consider PRED_LOOP_GUARD_WITH_RECURSION superrior to LOOP_GUARD. */
3341 if (pred == PRED_LOOP_GUARD_WITH_RECURSION)
3343 edge_prediction **preds = bb_predictions->get (e->src);
3344 if (preds)
3345 filter_predictions (preds, not_loop_guard_equal_edge_p, e);
3347 predict_edge_def (e, pred, taken);
3349 /* Predict edges to successors of CUR whose sources are not postdominated by
3350 BB by PRED and recurse to all postdominators. */
3352 static void
3353 predict_paths_for_bb (basic_block cur, basic_block bb,
3354 enum br_predictor pred,
3355 enum prediction taken,
3356 bitmap visited, class loop *in_loop = NULL)
3358 edge e;
3359 edge_iterator ei;
3360 basic_block son;
3362 /* If we exited the loop or CUR is unconditional in the loop, there is
3363 nothing to do. */
3364 if (in_loop
3365 && (!flow_bb_inside_loop_p (in_loop, cur)
3366 || dominated_by_p (CDI_DOMINATORS, in_loop->latch, cur)))
3367 return;
3369 /* We are looking for all edges forming edge cut induced by
3370 set of all blocks postdominated by BB. */
3371 FOR_EACH_EDGE (e, ei, cur->preds)
3372 if (e->src->index >= NUM_FIXED_BLOCKS
3373 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
3375 edge e2;
3376 edge_iterator ei2;
3377 bool found = false;
3379 /* Ignore fake edges and eh, we predict them as not taken anyway. */
3380 if (unlikely_executed_edge_p (e))
3381 continue;
3382 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
3384 /* See if there is an edge from e->src that is not abnormal
3385 and does not lead to BB and does not exit the loop. */
3386 FOR_EACH_EDGE (e2, ei2, e->src->succs)
3387 if (e2 != e
3388 && !unlikely_executed_edge_p (e2)
3389 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)
3390 && (!in_loop || !loop_exit_edge_p (in_loop, e2)))
3392 found = true;
3393 break;
3396 /* If there is non-abnormal path leaving e->src, predict edge
3397 using predictor. Otherwise we need to look for paths
3398 leading to e->src.
3400 The second may lead to infinite loop in the case we are predicitng
3401 regions that are only reachable by abnormal edges. We simply
3402 prevent visiting given BB twice. */
3403 if (found)
3404 maybe_predict_edge (e, pred, taken);
3405 else if (bitmap_set_bit (visited, e->src->index))
3406 predict_paths_for_bb (e->src, e->src, pred, taken, visited, in_loop);
3408 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
3409 son;
3410 son = next_dom_son (CDI_POST_DOMINATORS, son))
3411 predict_paths_for_bb (son, bb, pred, taken, visited, in_loop);
3414 /* Sets branch probabilities according to PREDiction and
3415 FLAGS. */
3417 static void
3418 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
3419 enum prediction taken, class loop *in_loop)
3421 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
3424 /* Like predict_paths_leading_to but take edge instead of basic block. */
3426 static void
3427 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
3428 enum prediction taken, class loop *in_loop)
3430 bool has_nonloop_edge = false;
3431 edge_iterator ei;
3432 edge e2;
3434 basic_block bb = e->src;
3435 FOR_EACH_EDGE (e2, ei, bb->succs)
3436 if (e2->dest != e->src && e2->dest != e->dest
3437 && !unlikely_executed_edge_p (e2)
3438 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
3440 has_nonloop_edge = true;
3441 break;
3444 if (!has_nonloop_edge)
3445 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
3446 else
3447 maybe_predict_edge (e, pred, taken);
3450 /* This is used to carry information about basic blocks. It is
3451 attached to the AUX field of the standard CFG block. */
3453 class block_info
3455 public:
3456 /* Estimated frequency of execution of basic_block. */
3457 sreal frequency;
3459 /* To keep queue of basic blocks to process. */
3460 basic_block next;
3462 /* Number of predecessors we need to visit first. */
3463 int npredecessors;
3466 /* Similar information for edges. */
3467 class edge_prob_info
3469 public:
3470 /* In case edge is a loopback edge, the probability edge will be reached
3471 in case header is. Estimated number of iterations of the loop can be
3472 then computed as 1 / (1 - back_edge_prob). */
3473 sreal back_edge_prob;
3474 /* True if the edge is a loopback edge in the natural loop. */
3475 unsigned int back_edge:1;
3478 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
3479 #undef EDGE_INFO
3480 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
3482 /* Helper function for estimate_bb_frequencies.
3483 Propagate the frequencies in blocks marked in
3484 TOVISIT, starting in HEAD. */
3486 static void
3487 propagate_freq (basic_block head, bitmap tovisit,
3488 sreal max_cyclic_prob)
3490 basic_block bb;
3491 basic_block last;
3492 unsigned i;
3493 edge e;
3494 basic_block nextbb;
3495 bitmap_iterator bi;
3497 /* For each basic block we need to visit count number of his predecessors
3498 we need to visit first. */
3499 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
3501 edge_iterator ei;
3502 int count = 0;
3504 bb = BASIC_BLOCK_FOR_FN (cfun, i);
3506 FOR_EACH_EDGE (e, ei, bb->preds)
3508 bool visit = bitmap_bit_p (tovisit, e->src->index);
3510 if (visit && !(e->flags & EDGE_DFS_BACK))
3511 count++;
3512 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
3513 fprintf (dump_file,
3514 "Irreducible region hit, ignoring edge to %i->%i\n",
3515 e->src->index, bb->index);
3517 BLOCK_INFO (bb)->npredecessors = count;
3518 /* When function never returns, we will never process exit block. */
3519 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
3520 bb->count = profile_count::zero ();
3523 BLOCK_INFO (head)->frequency = 1;
3524 last = head;
3525 for (bb = head; bb; bb = nextbb)
3527 edge_iterator ei;
3528 sreal cyclic_probability = 0;
3529 sreal frequency = 0;
3531 nextbb = BLOCK_INFO (bb)->next;
3532 BLOCK_INFO (bb)->next = NULL;
3534 /* Compute frequency of basic block. */
3535 if (bb != head)
3537 if (flag_checking)
3538 FOR_EACH_EDGE (e, ei, bb->preds)
3539 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
3540 || (e->flags & EDGE_DFS_BACK));
3542 FOR_EACH_EDGE (e, ei, bb->preds)
3543 if (EDGE_INFO (e)->back_edge)
3544 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
3545 else if (!(e->flags & EDGE_DFS_BACK))
3547 /* FIXME: Graphite is producing edges with no profile. Once
3548 this is fixed, drop this. */
3549 sreal tmp = e->probability.initialized_p () ?
3550 e->probability.to_sreal () : 0;
3551 frequency += tmp * BLOCK_INFO (e->src)->frequency;
3554 if (cyclic_probability == 0)
3556 BLOCK_INFO (bb)->frequency = frequency;
3558 else
3560 if (cyclic_probability > max_cyclic_prob)
3562 if (dump_file)
3563 fprintf (dump_file,
3564 "cyclic probability of bb %i is %f (capped to %f)"
3565 "; turning freq %f",
3566 bb->index, cyclic_probability.to_double (),
3567 max_cyclic_prob.to_double (),
3568 frequency.to_double ());
3570 cyclic_probability = max_cyclic_prob;
3572 else if (dump_file)
3573 fprintf (dump_file,
3574 "cyclic probability of bb %i is %f; turning freq %f",
3575 bb->index, cyclic_probability.to_double (),
3576 frequency.to_double ());
3578 BLOCK_INFO (bb)->frequency = frequency
3579 / (sreal (1) - cyclic_probability);
3580 if (dump_file)
3581 fprintf (dump_file, " to %f\n",
3582 BLOCK_INFO (bb)->frequency.to_double ());
3586 bitmap_clear_bit (tovisit, bb->index);
3588 e = find_edge (bb, head);
3589 if (e)
3591 /* FIXME: Graphite is producing edges with no profile. Once
3592 this is fixed, drop this. */
3593 sreal tmp = e->probability.initialized_p () ?
3594 e->probability.to_sreal () : 0;
3595 EDGE_INFO (e)->back_edge_prob = tmp * BLOCK_INFO (bb)->frequency;
3598 /* Propagate to successor blocks. */
3599 FOR_EACH_EDGE (e, ei, bb->succs)
3600 if (!(e->flags & EDGE_DFS_BACK)
3601 && BLOCK_INFO (e->dest)->npredecessors)
3603 BLOCK_INFO (e->dest)->npredecessors--;
3604 if (!BLOCK_INFO (e->dest)->npredecessors)
3606 if (!nextbb)
3607 nextbb = e->dest;
3608 else
3609 BLOCK_INFO (last)->next = e->dest;
3611 last = e->dest;
3617 /* Estimate frequencies in loops at same nest level. */
3619 static void
3620 estimate_loops_at_level (class loop *first_loop, sreal max_cyclic_prob)
3622 class loop *loop;
3624 for (loop = first_loop; loop; loop = loop->next)
3626 edge e;
3627 basic_block *bbs;
3628 unsigned i;
3629 auto_bitmap tovisit;
3631 estimate_loops_at_level (loop->inner, max_cyclic_prob);
3633 /* Find current loop back edge and mark it. */
3634 e = loop_latch_edge (loop);
3635 EDGE_INFO (e)->back_edge = 1;
3637 bbs = get_loop_body (loop);
3638 for (i = 0; i < loop->num_nodes; i++)
3639 bitmap_set_bit (tovisit, bbs[i]->index);
3640 free (bbs);
3641 propagate_freq (loop->header, tovisit, max_cyclic_prob);
3645 /* Propagates frequencies through structure of loops. */
3647 static void
3648 estimate_loops (void)
3650 auto_bitmap tovisit;
3651 basic_block bb;
3652 sreal max_cyclic_prob = (sreal)1
3653 - (sreal)1 / (param_max_predicted_iterations + 1);
3655 /* Start by estimating the frequencies in the loops. */
3656 if (number_of_loops (cfun) > 1)
3657 estimate_loops_at_level (current_loops->tree_root->inner, max_cyclic_prob);
3659 /* Now propagate the frequencies through all the blocks. */
3660 FOR_ALL_BB_FN (bb, cfun)
3662 bitmap_set_bit (tovisit, bb->index);
3664 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit, max_cyclic_prob);
3667 /* Drop the profile for NODE to guessed, and update its frequency based on
3668 whether it is expected to be hot given the CALL_COUNT. */
3670 static void
3671 drop_profile (struct cgraph_node *node, profile_count call_count)
3673 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3674 /* In the case where this was called by another function with a
3675 dropped profile, call_count will be 0. Since there are no
3676 non-zero call counts to this function, we don't know for sure
3677 whether it is hot, and therefore it will be marked normal below. */
3678 bool hot = maybe_hot_count_p (NULL, call_count);
3680 if (dump_file)
3681 fprintf (dump_file,
3682 "Dropping 0 profile for %s. %s based on calls.\n",
3683 node->dump_name (),
3684 hot ? "Function is hot" : "Function is normal");
3685 /* We only expect to miss profiles for functions that are reached
3686 via non-zero call edges in cases where the function may have
3687 been linked from another module or library (COMDATs and extern
3688 templates). See the comments below for handle_missing_profiles.
3689 Also, only warn in cases where the missing counts exceed the
3690 number of training runs. In certain cases with an execv followed
3691 by a no-return call the profile for the no-return call is not
3692 dumped and there can be a mismatch. */
3693 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
3694 && call_count > profile_info->runs)
3696 if (flag_profile_correction)
3698 if (dump_file)
3699 fprintf (dump_file,
3700 "Missing counts for called function %s\n",
3701 node->dump_name ());
3703 else
3704 warning (0, "Missing counts for called function %s",
3705 node->dump_name ());
3708 basic_block bb;
3709 if (opt_for_fn (node->decl, flag_guess_branch_prob))
3711 bool clear_zeros
3712 = !ENTRY_BLOCK_PTR_FOR_FN (fn)->count.nonzero_p ();
3713 FOR_ALL_BB_FN (bb, fn)
3714 if (clear_zeros || !(bb->count == profile_count::zero ()))
3715 bb->count = bb->count.guessed_local ();
3716 fn->cfg->count_max = fn->cfg->count_max.guessed_local ();
3718 else
3720 FOR_ALL_BB_FN (bb, fn)
3721 bb->count = profile_count::uninitialized ();
3722 fn->cfg->count_max = profile_count::uninitialized ();
3725 struct cgraph_edge *e;
3726 for (e = node->callees; e; e = e->next_callee)
3727 e->count = gimple_bb (e->call_stmt)->count;
3728 for (e = node->indirect_calls; e; e = e->next_callee)
3729 e->count = gimple_bb (e->call_stmt)->count;
3730 node->count = ENTRY_BLOCK_PTR_FOR_FN (fn)->count;
3732 profile_status_for_fn (fn)
3733 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
3734 node->frequency
3735 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
3738 /* In the case of COMDAT routines, multiple object files will contain the same
3739 function and the linker will select one for the binary. In that case
3740 all the other copies from the profile instrument binary will be missing
3741 profile counts. Look for cases where this happened, due to non-zero
3742 call counts going to 0-count functions, and drop the profile to guessed
3743 so that we can use the estimated probabilities and avoid optimizing only
3744 for size.
3746 The other case where the profile may be missing is when the routine
3747 is not going to be emitted to the object file, e.g. for "extern template"
3748 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
3749 all other cases of non-zero calls to 0-count functions. */
3751 void
3752 handle_missing_profiles (void)
3754 const int unlikely_frac = param_unlikely_bb_count_fraction;
3755 struct cgraph_node *node;
3756 auto_vec<struct cgraph_node *, 64> worklist;
3758 /* See if 0 count function has non-0 count callers. In this case we
3759 lost some profile. Drop its function profile to PROFILE_GUESSED. */
3760 FOR_EACH_DEFINED_FUNCTION (node)
3762 struct cgraph_edge *e;
3763 profile_count call_count = profile_count::zero ();
3764 gcov_type max_tp_first_run = 0;
3765 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3767 if (node->count.ipa ().nonzero_p ())
3768 continue;
3769 for (e = node->callers; e; e = e->next_caller)
3770 if (e->count.ipa ().initialized_p () && e->count.ipa () > 0)
3772 call_count = call_count + e->count.ipa ();
3774 if (e->caller->tp_first_run > max_tp_first_run)
3775 max_tp_first_run = e->caller->tp_first_run;
3778 /* If time profile is missing, let assign the maximum that comes from
3779 caller functions. */
3780 if (!node->tp_first_run && max_tp_first_run)
3781 node->tp_first_run = max_tp_first_run + 1;
3783 if (call_count > 0
3784 && fn && fn->cfg
3785 && call_count * unlikely_frac >= profile_info->runs)
3787 drop_profile (node, call_count);
3788 worklist.safe_push (node);
3792 /* Propagate the profile dropping to other 0-count COMDATs that are
3793 potentially called by COMDATs we already dropped the profile on. */
3794 while (worklist.length () > 0)
3796 struct cgraph_edge *e;
3798 node = worklist.pop ();
3799 for (e = node->callees; e; e = e->next_caller)
3801 struct cgraph_node *callee = e->callee;
3802 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
3804 if (!(e->count.ipa () == profile_count::zero ())
3805 && callee->count.ipa ().nonzero_p ())
3806 continue;
3807 if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl))
3808 && fn && fn->cfg
3809 && profile_status_for_fn (fn) == PROFILE_READ)
3811 drop_profile (node, profile_count::zero ());
3812 worklist.safe_push (callee);
3818 /* Convert counts measured by profile driven feedback to frequencies.
3819 Return nonzero iff there was any nonzero execution count. */
3821 bool
3822 update_max_bb_count (void)
3824 profile_count true_count_max = profile_count::uninitialized ();
3825 basic_block bb;
3827 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3828 true_count_max = true_count_max.max (bb->count);
3830 cfun->cfg->count_max = true_count_max;
3832 return true_count_max.ipa ().nonzero_p ();
3835 /* Return true if function is likely to be expensive, so there is no point to
3836 optimize performance of prologue, epilogue or do inlining at the expense
3837 of code size growth. THRESHOLD is the limit of number of instructions
3838 function can execute at average to be still considered not expensive. */
3840 bool
3841 expensive_function_p (int threshold)
3843 basic_block bb;
3845 /* If profile was scaled in a way entry block has count 0, then the function
3846 is deifnitly taking a lot of time. */
3847 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.nonzero_p ())
3848 return true;
3850 profile_count limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count * threshold;
3851 profile_count sum = profile_count::zero ();
3852 FOR_EACH_BB_FN (bb, cfun)
3854 rtx_insn *insn;
3856 if (!bb->count.initialized_p ())
3858 if (dump_file)
3859 fprintf (dump_file, "Function is considered expensive because"
3860 " count of bb %i is not initialized\n", bb->index);
3861 return true;
3864 FOR_BB_INSNS (bb, insn)
3865 if (active_insn_p (insn))
3867 sum += bb->count;
3868 if (sum > limit)
3869 return true;
3873 return false;
3876 /* All basic blocks that are reachable only from unlikely basic blocks are
3877 unlikely. */
3879 void
3880 propagate_unlikely_bbs_forward (void)
3882 auto_vec<basic_block, 64> worklist;
3883 basic_block bb;
3884 edge_iterator ei;
3885 edge e;
3887 if (!(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()))
3889 ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(size_t) 1;
3890 worklist.safe_push (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3892 while (worklist.length () > 0)
3894 bb = worklist.pop ();
3895 FOR_EACH_EDGE (e, ei, bb->succs)
3896 if (!(e->count () == profile_count::zero ())
3897 && !(e->dest->count == profile_count::zero ())
3898 && !e->dest->aux)
3900 e->dest->aux = (void *)(size_t) 1;
3901 worklist.safe_push (e->dest);
3906 FOR_ALL_BB_FN (bb, cfun)
3908 if (!bb->aux)
3910 if (!(bb->count == profile_count::zero ())
3911 && (dump_file && (dump_flags & TDF_DETAILS)))
3912 fprintf (dump_file,
3913 "Basic block %i is marked unlikely by forward prop\n",
3914 bb->index);
3915 bb->count = profile_count::zero ();
3917 else
3918 bb->aux = NULL;
3922 /* Determine basic blocks/edges that are known to be unlikely executed and set
3923 their counters to zero.
3924 This is done with first identifying obviously unlikely BBs/edges and then
3925 propagating in both directions. */
3927 static void
3928 determine_unlikely_bbs ()
3930 basic_block bb;
3931 auto_vec<basic_block, 64> worklist;
3932 edge_iterator ei;
3933 edge e;
3935 FOR_EACH_BB_FN (bb, cfun)
3937 if (!(bb->count == profile_count::zero ())
3938 && unlikely_executed_bb_p (bb))
3940 if (dump_file && (dump_flags & TDF_DETAILS))
3941 fprintf (dump_file, "Basic block %i is locally unlikely\n",
3942 bb->index);
3943 bb->count = profile_count::zero ();
3946 FOR_EACH_EDGE (e, ei, bb->succs)
3947 if (!(e->probability == profile_probability::never ())
3948 && unlikely_executed_edge_p (e))
3950 if (dump_file && (dump_flags & TDF_DETAILS))
3951 fprintf (dump_file, "Edge %i->%i is locally unlikely\n",
3952 bb->index, e->dest->index);
3953 e->probability = profile_probability::never ();
3956 gcc_checking_assert (!bb->aux);
3958 propagate_unlikely_bbs_forward ();
3960 auto_vec<int, 64> nsuccs;
3961 nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
3962 FOR_ALL_BB_FN (bb, cfun)
3963 if (!(bb->count == profile_count::zero ())
3964 && bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
3966 nsuccs[bb->index] = 0;
3967 FOR_EACH_EDGE (e, ei, bb->succs)
3968 if (!(e->probability == profile_probability::never ())
3969 && !(e->dest->count == profile_count::zero ()))
3970 nsuccs[bb->index]++;
3971 if (!nsuccs[bb->index])
3972 worklist.safe_push (bb);
3974 while (worklist.length () > 0)
3976 bb = worklist.pop ();
3977 if (bb->count == profile_count::zero ())
3978 continue;
3979 if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
3981 bool found = false;
3982 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
3983 !gsi_end_p (gsi); gsi_next (&gsi))
3984 if (stmt_can_terminate_bb_p (gsi_stmt (gsi))
3985 /* stmt_can_terminate_bb_p special cases noreturns because it
3986 assumes that fake edges are created. We want to know that
3987 noreturn alone does not imply BB to be unlikely. */
3988 || (is_gimple_call (gsi_stmt (gsi))
3989 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_NORETURN)))
3991 found = true;
3992 break;
3994 if (found)
3995 continue;
3997 if (dump_file && (dump_flags & TDF_DETAILS))
3998 fprintf (dump_file,
3999 "Basic block %i is marked unlikely by backward prop\n",
4000 bb->index);
4001 bb->count = profile_count::zero ();
4002 FOR_EACH_EDGE (e, ei, bb->preds)
4003 if (!(e->probability == profile_probability::never ()))
4005 if (!(e->src->count == profile_count::zero ()))
4007 gcc_checking_assert (nsuccs[e->src->index] > 0);
4008 nsuccs[e->src->index]--;
4009 if (!nsuccs[e->src->index])
4010 worklist.safe_push (e->src);
4014 /* Finally all edges from non-0 regions to 0 are unlikely. */
4015 FOR_ALL_BB_FN (bb, cfun)
4017 if (!(bb->count == profile_count::zero ()))
4018 FOR_EACH_EDGE (e, ei, bb->succs)
4019 if (!(e->probability == profile_probability::never ())
4020 && e->dest->count == profile_count::zero ())
4022 if (dump_file && (dump_flags & TDF_DETAILS))
4023 fprintf (dump_file, "Edge %i->%i is unlikely because "
4024 "it enters unlikely block\n",
4025 bb->index, e->dest->index);
4026 e->probability = profile_probability::never ();
4029 edge other = NULL;
4031 FOR_EACH_EDGE (e, ei, bb->succs)
4032 if (e->probability == profile_probability::never ())
4034 else if (other)
4036 other = NULL;
4037 break;
4039 else
4040 other = e;
4041 if (other
4042 && !(other->probability == profile_probability::always ()))
4044 if (dump_file && (dump_flags & TDF_DETAILS))
4045 fprintf (dump_file, "Edge %i->%i is locally likely\n",
4046 bb->index, other->dest->index);
4047 other->probability = profile_probability::always ();
4050 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
4051 cgraph_node::get (current_function_decl)->count = profile_count::zero ();
4054 /* Estimate and propagate basic block frequencies using the given branch
4055 probabilities. */
4057 static void
4058 estimate_bb_frequencies ()
4060 basic_block bb;
4061 sreal freq_max;
4063 determine_unlikely_bbs ();
4065 mark_dfs_back_edges ();
4067 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
4068 profile_probability::always ();
4070 /* Set up block info for each basic block. */
4071 alloc_aux_for_blocks (sizeof (block_info));
4072 alloc_aux_for_edges (sizeof (edge_prob_info));
4073 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
4075 edge e;
4076 edge_iterator ei;
4078 FOR_EACH_EDGE (e, ei, bb->succs)
4080 /* FIXME: Graphite is producing edges with no profile. Once
4081 this is fixed, drop this. */
4082 if (e->probability.initialized_p ())
4083 EDGE_INFO (e)->back_edge_prob
4084 = e->probability.to_sreal ();
4085 else
4086 /* back_edge_prob = 0.5 */
4087 EDGE_INFO (e)->back_edge_prob = sreal (1, -1);
4091 /* First compute frequencies locally for each loop from innermost
4092 to outermost to examine frequencies for back edges. */
4093 estimate_loops ();
4095 freq_max = 0;
4096 FOR_EACH_BB_FN (bb, cfun)
4097 if (freq_max < BLOCK_INFO (bb)->frequency)
4098 freq_max = BLOCK_INFO (bb)->frequency;
4100 /* Scaling frequencies up to maximal profile count may result in
4101 frequent overflows especially when inlining loops.
4102 Small scaling results in unnecesary precision loss. Stay in
4103 the half of the (exponential) range. */
4104 freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max;
4105 if (freq_max < 16)
4106 freq_max = 16;
4107 profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
4108 cfun->cfg->count_max = profile_count::uninitialized ();
4109 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
4111 sreal tmp = BLOCK_INFO (bb)->frequency;
4112 if (tmp >= 1)
4114 gimple_stmt_iterator gsi;
4115 tree decl;
4117 /* Self recursive calls can not have frequency greater than 1
4118 or program will never terminate. This will result in an
4119 inconsistent bb profile but it is better than greatly confusing
4120 IPA cost metrics. */
4121 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4122 if (is_gimple_call (gsi_stmt (gsi))
4123 && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
4124 && recursive_call_p (current_function_decl, decl))
4126 if (dump_file)
4127 fprintf (dump_file, "Dropping frequency of recursive call"
4128 " in bb %i from %f\n", bb->index,
4129 tmp.to_double ());
4130 tmp = (sreal)9 / (sreal)10;
4131 break;
4134 tmp = tmp * freq_max;
4135 profile_count count = profile_count::from_gcov_type (tmp.to_nearest_int ());
4137 /* If we have profile feedback in which this function was never
4138 executed, then preserve this info. */
4139 if (!(bb->count == profile_count::zero ()))
4140 bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count);
4141 cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
4144 free_aux_for_blocks ();
4145 free_aux_for_edges ();
4146 compute_function_frequency ();
4149 /* Decide whether function is hot, cold or unlikely executed. */
4150 void
4151 compute_function_frequency (void)
4153 basic_block bb;
4154 struct cgraph_node *node = cgraph_node::get (current_function_decl);
4156 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
4157 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
4158 node->only_called_at_startup = true;
4159 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
4160 node->only_called_at_exit = true;
4162 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p ())
4164 int flags = flags_from_decl_or_type (current_function_decl);
4165 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
4166 != NULL)
4167 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
4168 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
4169 != NULL)
4170 node->frequency = NODE_FREQUENCY_HOT;
4171 else if (flags & ECF_NORETURN)
4172 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
4173 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
4174 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
4175 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
4176 || DECL_STATIC_DESTRUCTOR (current_function_decl))
4177 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
4178 return;
4181 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
4182 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
4183 == NULL)
4184 warn_function_cold (current_function_decl);
4185 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ())
4186 return;
4187 FOR_EACH_BB_FN (bb, cfun)
4189 if (maybe_hot_bb_p (cfun, bb))
4191 node->frequency = NODE_FREQUENCY_HOT;
4192 return;
4194 if (!probably_never_executed_bb_p (cfun, bb))
4195 node->frequency = NODE_FREQUENCY_NORMAL;
4199 /* Build PREDICT_EXPR. */
4200 tree
4201 build_predict_expr (enum br_predictor predictor, enum prediction taken)
4203 tree t = build1 (PREDICT_EXPR, void_type_node,
4204 build_int_cst (integer_type_node, predictor));
4205 SET_PREDICT_EXPR_OUTCOME (t, taken);
4206 return t;
4209 const char *
4210 predictor_name (enum br_predictor predictor)
4212 return predictor_info[predictor].name;
4215 /* Predict branch probabilities and estimate profile of the tree CFG. */
4217 namespace {
4219 const pass_data pass_data_profile =
4221 GIMPLE_PASS, /* type */
4222 "profile_estimate", /* name */
4223 OPTGROUP_NONE, /* optinfo_flags */
4224 TV_BRANCH_PROB, /* tv_id */
4225 PROP_cfg, /* properties_required */
4226 0, /* properties_provided */
4227 0, /* properties_destroyed */
4228 0, /* todo_flags_start */
4229 0, /* todo_flags_finish */
4232 class pass_profile : public gimple_opt_pass
4234 public:
4235 pass_profile (gcc::context *ctxt)
4236 : gimple_opt_pass (pass_data_profile, ctxt)
4239 /* opt_pass methods: */
4240 bool gate (function *) final override { return flag_guess_branch_prob; }
4241 unsigned int execute (function *) final override;
4243 }; // class pass_profile
4245 unsigned int
4246 pass_profile::execute (function *fun)
4248 unsigned nb_loops;
4250 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
4251 return 0;
4253 loop_optimizer_init (LOOPS_NORMAL);
4254 if (dump_file && (dump_flags & TDF_DETAILS))
4255 flow_loops_dump (dump_file, NULL, 0);
4257 nb_loops = number_of_loops (fun);
4258 if (nb_loops > 1)
4259 scev_initialize ();
4261 tree_estimate_probability (false);
4262 cfun->cfg->full_profile = true;
4264 if (nb_loops > 1)
4265 scev_finalize ();
4267 loop_optimizer_finalize ();
4268 if (dump_file && (dump_flags & TDF_DETAILS))
4269 gimple_dump_cfg (dump_file, dump_flags);
4270 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
4271 profile_status_for_fn (fun) = PROFILE_GUESSED;
4272 if (dump_file && (dump_flags & TDF_DETAILS))
4274 sreal iterations;
4275 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
4276 if (expected_loop_iterations_by_profile (loop, &iterations))
4277 fprintf (dump_file, "Loop %d got predicted to iterate %f times.\n",
4278 loop->num, iterations.to_double ());
4280 return 0;
4283 } // anon namespace
4285 gimple_opt_pass *
4286 make_pass_profile (gcc::context *ctxt)
4288 return new pass_profile (ctxt);
4291 /* Return true when PRED predictor should be removed after early
4292 tree passes. Most of the predictors are beneficial to survive
4293 as early inlining can also distribute then into caller's bodies. */
4295 static bool
4296 strip_predictor_early (enum br_predictor pred)
4298 switch (pred)
4300 case PRED_TREE_EARLY_RETURN:
4301 return true;
4302 default:
4303 return false;
4307 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
4308 we no longer need. EARLY is set to true when called from early
4309 optimizations. */
4311 unsigned int
4312 strip_predict_hints (function *fun, bool early)
4314 basic_block bb;
4315 gimple *ass_stmt;
4316 tree var;
4317 bool changed = false;
4319 FOR_EACH_BB_FN (bb, fun)
4321 gimple_stmt_iterator bi;
4322 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
4324 gimple *stmt = gsi_stmt (bi);
4326 if (gimple_code (stmt) == GIMPLE_PREDICT)
4328 if (!early
4329 || strip_predictor_early (gimple_predict_predictor (stmt)))
4331 gsi_remove (&bi, true);
4332 changed = true;
4333 continue;
4336 else if (is_gimple_call (stmt))
4338 tree fndecl = gimple_call_fndecl (stmt);
4340 if (!early
4341 && ((fndecl != NULL_TREE
4342 && fndecl_built_in_p (fndecl, BUILT_IN_EXPECT)
4343 && gimple_call_num_args (stmt) == 2)
4344 || (fndecl != NULL_TREE
4345 && fndecl_built_in_p (fndecl,
4346 BUILT_IN_EXPECT_WITH_PROBABILITY)
4347 && gimple_call_num_args (stmt) == 3)
4348 || (gimple_call_internal_p (stmt)
4349 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT)))
4351 var = gimple_call_lhs (stmt);
4352 changed = true;
4353 if (var)
4355 ass_stmt
4356 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
4357 gsi_replace (&bi, ass_stmt, true);
4359 else
4361 gsi_remove (&bi, true);
4362 continue;
4366 gsi_next (&bi);
4369 return changed ? TODO_cleanup_cfg : 0;
4372 namespace {
4374 const pass_data pass_data_strip_predict_hints =
4376 GIMPLE_PASS, /* type */
4377 "*strip_predict_hints", /* name */
4378 OPTGROUP_NONE, /* optinfo_flags */
4379 TV_BRANCH_PROB, /* tv_id */
4380 PROP_cfg, /* properties_required */
4381 0, /* properties_provided */
4382 0, /* properties_destroyed */
4383 0, /* todo_flags_start */
4384 0, /* todo_flags_finish */
4387 class pass_strip_predict_hints : public gimple_opt_pass
4389 public:
4390 pass_strip_predict_hints (gcc::context *ctxt)
4391 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
4394 /* opt_pass methods: */
4395 opt_pass * clone () final override
4397 return new pass_strip_predict_hints (m_ctxt);
4399 void set_pass_param (unsigned int n, bool param) final override
4401 gcc_assert (n == 0);
4402 early_p = param;
4405 unsigned int execute (function *) final override;
4407 private:
4408 bool early_p;
4410 }; // class pass_strip_predict_hints
4412 unsigned int
4413 pass_strip_predict_hints::execute (function *fun)
4415 return strip_predict_hints (fun, early_p);
4418 } // anon namespace
4420 gimple_opt_pass *
4421 make_pass_strip_predict_hints (gcc::context *ctxt)
4423 return new pass_strip_predict_hints (ctxt);
4426 /* Rebuild function frequencies. Passes are in general expected to
4427 maintain profile by hand, however in some cases this is not possible:
4428 for example when inlining several functions with loops freuqencies might run
4429 out of scale and thus needs to be recomputed. */
4431 void
4432 rebuild_frequencies (void)
4434 /* If we have no profile, do nothing. Note that after inlining
4435 profile_status_for_fn may not represent the actual presence/absence of
4436 profile. */
4437 if (profile_status_for_fn (cfun) == PROFILE_ABSENT
4438 && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
4439 return;
4442 /* See if everything is OK and update count_max. */
4443 basic_block bb;
4444 bool inconsistency_found = false;
4445 bool uninitialized_probablity_found = false;
4446 bool uninitialized_count_found = false;
4448 cfun->cfg->count_max = profile_count::uninitialized ();
4449 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
4451 cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
4452 /* Uninitialized count may be result of inlining or an omision in an
4453 optimization pass. */
4454 if (!bb->count.initialized_p ())
4456 uninitialized_count_found = true;
4457 if (dump_file)
4458 fprintf (dump_file, "BB %i has uninitialized count\n",
4459 bb->index);
4461 if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)
4462 && (!uninitialized_probablity_found || !inconsistency_found))
4464 profile_count sum = profile_count::zero ();
4465 edge e;
4466 edge_iterator ei;
4468 FOR_EACH_EDGE (e, ei, bb->preds)
4470 sum += e->count ();
4471 /* Uninitialized probability may be result of inlining or an
4472 omision in an optimization pass. */
4473 if (!e->probability.initialized_p ())
4475 if (dump_file)
4476 fprintf (dump_file,
4477 "Edge %i->%i has uninitialized probability\n",
4478 e->src->index, e->dest->index);
4481 if (sum.differs_from_p (bb->count))
4483 if (dump_file)
4484 fprintf (dump_file,
4485 "BB %i has invalid sum of incomming counts\n",
4486 bb->index);
4487 inconsistency_found = true;
4492 /* If everything is OK, do not re-propagate frequencies. */
4493 if (!inconsistency_found
4494 && (!uninitialized_count_found || uninitialized_probablity_found)
4495 && !cfun->cfg->count_max.very_large_p ())
4497 if (dump_file)
4498 fprintf (dump_file, "Profile is consistent\n");
4499 return;
4501 /* Do not re-propagate if we have profile feedback. Even if the profile is
4502 inconsistent from previous transofrmations, it is probably more realistic
4503 for hot part of the program than result of repropagating.
4505 Consider example where we previously has
4507 if (test)
4508 then [large probability for true]
4510 and we later proved that test is always 0. In this case, if profile was
4511 read correctly, we must have duplicated the conditional (for example by
4512 inlining) in to a context where test is false. From profile feedback
4513 we know that most executions if the conditionals were true, so the
4514 important copy is not the one we look on.
4516 Propagating from probabilities would make profile look consistent, but
4517 because probablities after code duplication may not be representative
4518 for a given run, we would only propagate the error further. */
4519 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().nonzero_p ()
4520 && !uninitialized_count_found)
4522 if (dump_file)
4523 fprintf (dump_file,
4524 "Profile is inconsistent but read from profile feedback;"
4525 " not rebuilding\n");
4526 return;
4529 loop_optimizer_init (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
4530 connect_infinite_loops_to_exit ();
4531 estimate_bb_frequencies ();
4532 remove_fake_exit_edges ();
4533 loop_optimizer_finalize ();
4534 if (dump_file)
4535 fprintf (dump_file, "Rebuilt basic block counts\n");
4537 return;
4540 namespace {
4542 const pass_data pass_data_rebuild_frequencies =
4544 GIMPLE_PASS, /* type */
4545 "rebuild_frequencies", /* name */
4546 OPTGROUP_NONE, /* optinfo_flags */
4547 TV_REBUILD_FREQUENCIES, /* tv_id */
4548 PROP_cfg, /* properties_required */
4549 0, /* properties_provided */
4550 0, /* properties_destroyed */
4551 0, /* todo_flags_start */
4552 0, /* todo_flags_finish */
4555 class pass_rebuild_frequencies : public gimple_opt_pass
4557 public:
4558 pass_rebuild_frequencies (gcc::context *ctxt)
4559 : gimple_opt_pass (pass_data_rebuild_frequencies, ctxt)
4562 /* opt_pass methods: */
4563 opt_pass * clone () final override
4565 return new pass_rebuild_frequencies (m_ctxt);
4567 void set_pass_param (unsigned int n, bool param) final override
4569 gcc_assert (n == 0);
4570 early_p = param;
4573 unsigned int execute (function *) final override
4575 rebuild_frequencies ();
4576 return 0;
4579 private:
4580 bool early_p;
4582 }; // class pass_rebuild_frequencies
4584 } // anon namespace
4586 gimple_opt_pass *
4587 make_pass_rebuild_frequencies (gcc::context *ctxt)
4589 return new pass_rebuild_frequencies (ctxt);
4592 /* Perform a dry run of the branch prediction pass and report comparsion of
4593 the predicted and real profile into the dump file. */
4595 void
4596 report_predictor_hitrates (void)
4598 unsigned nb_loops;
4600 loop_optimizer_init (LOOPS_NORMAL);
4601 if (dump_file && (dump_flags & TDF_DETAILS))
4602 flow_loops_dump (dump_file, NULL, 0);
4604 nb_loops = number_of_loops (cfun);
4605 if (nb_loops > 1)
4606 scev_initialize ();
4608 tree_estimate_probability (true);
4610 if (nb_loops > 1)
4611 scev_finalize ();
4613 loop_optimizer_finalize ();
4616 /* Force edge E to be cold.
4617 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
4618 keep low probability to represent possible error in a guess. This is used
4619 i.e. in case we predict loop to likely iterate given number of times but
4620 we are not 100% sure.
4622 This function locally updates profile without attempt to keep global
4623 consistency which cannot be reached in full generality without full profile
4624 rebuild from probabilities alone. Doing so is not necessarily a good idea
4625 because frequencies and counts may be more realistic then probabilities.
4627 In some cases (such as for elimination of early exits during full loop
4628 unrolling) the caller can ensure that profile will get consistent
4629 afterwards. */
4631 void
4632 force_edge_cold (edge e, bool impossible)
4634 profile_count count_sum = profile_count::zero ();
4635 profile_probability prob_sum = profile_probability::never ();
4636 edge_iterator ei;
4637 edge e2;
4638 bool uninitialized_exit = false;
4640 /* When branch probability guesses are not known, then do nothing. */
4641 if (!impossible && !e->count ().initialized_p ())
4642 return;
4644 profile_probability goal = (impossible ? profile_probability::never ()
4645 : profile_probability::very_unlikely ());
4647 /* If edge is already improbably or cold, just return. */
4648 if (e->probability <= goal
4649 && (!impossible || e->count () == profile_count::zero ()))
4650 return;
4651 FOR_EACH_EDGE (e2, ei, e->src->succs)
4652 if (e2 != e)
4654 if (e->flags & EDGE_FAKE)
4655 continue;
4656 if (e2->count ().initialized_p ())
4657 count_sum += e2->count ();
4658 if (e2->probability.initialized_p ())
4659 prob_sum += e2->probability;
4660 else
4661 uninitialized_exit = true;
4664 /* If we are not guessing profiles but have some other edges out,
4665 just assume the control flow goes elsewhere. */
4666 if (uninitialized_exit)
4667 e->probability = goal;
4668 /* If there are other edges out of e->src, redistribute probabilitity
4669 there. */
4670 else if (prob_sum > profile_probability::never ())
4672 if (dump_file && (dump_flags & TDF_DETAILS))
4674 fprintf (dump_file, "Making edge %i->%i %s by redistributing "
4675 "probability to other edges. Original probability: ",
4676 e->src->index, e->dest->index,
4677 impossible ? "impossible" : "cold");
4678 e->probability.dump (dump_file);
4679 fprintf (dump_file, "\n");
4681 set_edge_probability_and_rescale_others (e, goal);
4682 if (current_ir_type () != IR_GIMPLE
4683 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4684 update_br_prob_note (e->src);
4686 /* If all edges out of e->src are unlikely, the basic block itself
4687 is unlikely. */
4688 else
4690 if (prob_sum == profile_probability::never ())
4691 e->probability = profile_probability::always ();
4692 else
4694 if (impossible)
4695 e->probability = profile_probability::never ();
4696 /* If BB has some edges out that are not impossible, we cannot
4697 assume that BB itself is. */
4698 impossible = false;
4700 if (current_ir_type () != IR_GIMPLE
4701 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4702 update_br_prob_note (e->src);
4703 if (e->src->count == profile_count::zero ())
4704 return;
4705 if (count_sum == profile_count::zero () && impossible)
4707 bool found = false;
4708 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
4710 else if (current_ir_type () == IR_GIMPLE)
4711 for (gimple_stmt_iterator gsi = gsi_start_bb (e->src);
4712 !gsi_end_p (gsi); gsi_next (&gsi))
4714 if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
4716 found = true;
4717 break;
4720 /* FIXME: Implement RTL path. */
4721 else
4722 found = true;
4723 if (!found)
4725 if (dump_file && (dump_flags & TDF_DETAILS))
4726 fprintf (dump_file,
4727 "Making bb %i impossible and dropping count to 0.\n",
4728 e->src->index);
4729 e->src->count = profile_count::zero ();
4730 FOR_EACH_EDGE (e2, ei, e->src->preds)
4731 force_edge_cold (e2, impossible);
4732 return;
4736 /* If we did not adjusting, the source basic block has no likely edeges
4737 leaving other direction. In that case force that bb cold, too.
4738 This in general is difficult task to do, but handle special case when
4739 BB has only one predecestor. This is common case when we are updating
4740 after loop transforms. */
4741 if (!(prob_sum > profile_probability::never ())
4742 && count_sum == profile_count::zero ()
4743 && single_pred_p (e->src) && e->src->count.to_frequency (cfun)
4744 > (impossible ? 0 : 1))
4746 int old_frequency = e->src->count.to_frequency (cfun);
4747 if (dump_file && (dump_flags & TDF_DETAILS))
4748 fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
4749 impossible ? "impossible" : "cold");
4750 int new_frequency = MIN (e->src->count.to_frequency (cfun),
4751 impossible ? 0 : 1);
4752 if (impossible)
4753 e->src->count = profile_count::zero ();
4754 else
4755 e->src->count = e->count ().apply_scale (new_frequency,
4756 old_frequency);
4757 force_edge_cold (single_pred_edge (e->src), impossible);
4759 else if (dump_file && (dump_flags & TDF_DETAILS)
4760 && maybe_hot_bb_p (cfun, e->src))
4761 fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
4762 impossible ? "impossible" : "cold");
4766 #if CHECKING_P
4768 namespace selftest {
4770 /* Test that value range of predictor values defined in predict.def is
4771 within range (50, 100]. */
4773 struct branch_predictor
4775 const char *name;
4776 int probability;
4779 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE },
4781 static void
4782 test_prediction_value_range ()
4784 branch_predictor predictors[] = {
4785 #include "predict.def"
4786 { NULL, PROB_UNINITIALIZED }
4789 for (unsigned i = 0; predictors[i].name != NULL; i++)
4791 if (predictors[i].probability == PROB_UNINITIALIZED)
4792 continue;
4794 unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE;
4795 ASSERT_TRUE (p >= 50 && p <= 100);
4799 #undef DEF_PREDICTOR
4801 /* Run all of the selfests within this file. */
4803 void
4804 predict_cc_tests ()
4806 test_prediction_value_range ();
4809 } // namespace selftest
4810 #endif /* CHECKING_P. */