2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
21 #define INCLUDE_MEMORY
23 #include "coretypes.h"
28 #include "fold-const.h"
30 #include "gimple-iterator.h"
32 #include "tree-ssa-threadupdate.h"
33 #include "tree-ssa-loop.h"
35 #include "tree-pass.h"
36 #include "gimple-ssa.h"
37 #include "tree-phinodes.h"
38 #include "tree-inline.h"
39 #include "tree-vectorizer.h"
40 #include "value-range.h"
41 #include "gimple-range.h"
42 #include "tree-ssa-threadedge.h"
43 #include "gimple-range-path.h"
45 #include "tree-cfgcleanup.h"
46 #include "tree-pretty-print.h"
50 // Path registry for the backwards threader. After all paths have been
51 // registered with register_path(), thread_through_all_blocks() is called
54 class back_threader_registry
: public back_jt_path_registry
57 bool register_path (const vec
<basic_block
> &, edge taken
);
60 // Class to abstract the profitability code for the backwards threader.
62 class back_threader_profitability
65 back_threader_profitability (bool speed_p
, gimple
*stmt
);
66 bool possibly_profitable_path_p (const vec
<basic_block
> &, bool *);
67 bool profitable_path_p (const vec
<basic_block
> &,
68 edge taken
, bool *irreducible_loop
);
71 int m_exit_jump_benefit
;
72 bool m_threaded_multiway_branch
;
73 // The following are computed by possibly_profitable_path_p
74 bool m_threaded_through_latch
;
75 bool m_multiway_branch_in_path
;
76 bool m_contains_hot_bb
;
80 back_threader_profitability::back_threader_profitability (bool speed_p
,
84 m_threaded_multiway_branch
= (gimple_code (last
) == GIMPLE_SWITCH
85 || gimple_code (last
) == GIMPLE_GOTO
);
86 // The forward threader has estimate_threading_killed_stmts, in
87 // particular it estimates further DCE from eliminating the exit
89 m_exit_jump_benefit
= estimate_num_insns (last
, &eni_size_weights
);
92 // Back threader flags.
94 // Generate fast code at the expense of code size.
96 // Resolve unknown SSAs on entry to a threading path. If set, use the
97 // ranger. If not, assume all ranges on entry to a path are VARYING.
103 back_threader (function
*fun
, unsigned flags
, bool first
);
105 unsigned thread_blocks ();
107 void maybe_thread_block (basic_block bb
);
108 bool debug_counter ();
109 edge
maybe_register_path (back_threader_profitability
&);
110 void maybe_register_path_dump (edge taken_edge
);
111 void find_paths_to_names (basic_block bb
, bitmap imports
, unsigned,
112 back_threader_profitability
&);
113 edge
find_taken_edge (const vec
<basic_block
> &path
);
114 edge
find_taken_edge_cond (const vec
<basic_block
> &path
, gcond
*);
115 edge
find_taken_edge_switch (const vec
<basic_block
> &path
, gswitch
*);
116 virtual void debug ();
117 virtual void dump (FILE *out
);
119 back_threader_registry m_registry
;
121 // Current path being analyzed.
122 auto_vec
<basic_block
> m_path
;
123 // Hash to mark visited BBs while analyzing a path.
124 hash_set
<basic_block
> m_visited_bbs
;
125 // The set of SSA names, any of which could potentially change the
126 // value of the final conditional in a path.
127 auto_bitmap m_imports
;
128 // The last statement in the path.
130 // Marker to differentiate unreachable edges.
131 static const edge UNREACHABLE_EDGE
;
132 // Set to TRUE if unknown SSA names along a path should be resolved
133 // with the ranger. Otherwise, unknown SSA names are assumed to be
134 // VARYING. Setting to true is more precise but slower.
136 // Ranger for the path solver.
137 gimple_ranger
*m_ranger
;
139 // Set to TRUE for the first of each thread[12] pass or the first of
140 // each threadfull[12] pass. This is used to differentiate between
141 // the different threading passes so we can set up debug counters.
145 // Used to differentiate unreachable edges, so we may stop the search
146 // in a the given direction.
147 const edge
back_threader::UNREACHABLE_EDGE
= (edge
) -1;
149 back_threader::back_threader (function
*fun
, unsigned flags
, bool first
)
152 if (flags
& BT_SPEED
)
153 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
| LOOPS_HAVE_SIMPLE_LATCHES
);
155 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
161 // The path solver needs EDGE_DFS_BACK in resolving mode.
162 if (flags
& BT_RESOLVE
)
163 mark_dfs_back_edges ();
165 m_ranger
= new gimple_ranger
;
168 back_threader::~back_threader ()
171 loop_optimizer_finalize ();
174 // A wrapper for the various debug counters for the threading passes.
175 // Returns TRUE if it's OK to register the current threading
179 back_threader::debug_counter ()
181 // The ethread pass is mostly harmless ;-).
182 if ((m_flags
& BT_SPEED
) == 0)
185 if (m_flags
& BT_RESOLVE
)
187 if (m_first
&& !dbg_cnt (back_threadfull1
))
190 if (!m_first
&& !dbg_cnt (back_threadfull2
))
195 if (m_first
&& !dbg_cnt (back_thread1
))
198 if (!m_first
&& !dbg_cnt (back_thread2
))
205 dump_path (FILE *dump_file
, const vec
<basic_block
> &path
)
207 for (unsigned i
= path
.length (); i
> 0; --i
)
209 basic_block bb
= path
[i
- 1];
210 fprintf (dump_file
, "%d", bb
->index
);
212 fprintf (dump_file
, "->");
216 // Dump details of an attempt to register a path.
219 back_threader::maybe_register_path_dump (edge taken
)
221 if (m_path
.is_empty ())
224 fprintf (dump_file
, "path: ");
225 dump_path (dump_file
, m_path
);
226 fprintf (dump_file
, "->");
228 if (taken
== UNREACHABLE_EDGE
)
229 fprintf (dump_file
, "xx REJECTED (unreachable)\n");
231 fprintf (dump_file
, "%d SUCCESS\n", taken
->dest
->index
);
233 fprintf (dump_file
, "xx REJECTED\n");
236 // If an outgoing edge can be determined out of the current path,
237 // register it for jump threading and return the taken edge.
239 // Return NULL if it is unprofitable to thread this path, or the
240 // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
244 back_threader::maybe_register_path (back_threader_profitability
&profit
)
246 edge taken_edge
= find_taken_edge (m_path
);
248 if (taken_edge
&& taken_edge
!= UNREACHABLE_EDGE
)
250 bool irreducible
= false;
251 if (profit
.profitable_path_p (m_path
, taken_edge
, &irreducible
)
253 && m_registry
.register_path (m_path
, taken_edge
))
256 vect_free_loop_info_assumptions (m_path
[0]->loop_father
);
262 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
263 maybe_register_path_dump (taken_edge
);
268 // Return the known taken edge out of a path. If the path can be
269 // determined to be unreachable, return UNREACHABLE_EDGE. If no
270 // outgoing edge can be calculated, return NULL.
273 back_threader::find_taken_edge (const vec
<basic_block
> &path
)
275 gcc_checking_assert (path
.length () > 1);
276 switch (gimple_code (m_last_stmt
))
279 return find_taken_edge_cond (path
, as_a
<gcond
*> (m_last_stmt
));
282 return find_taken_edge_switch (path
, as_a
<gswitch
*> (m_last_stmt
));
289 // Same as find_taken_edge, but for paths ending in a switch.
292 back_threader::find_taken_edge_switch (const vec
<basic_block
> &path
,
295 tree name
= gimple_switch_index (sw
);
298 path_range_query
solver (*m_ranger
, path
, m_imports
, m_flags
& BT_RESOLVE
);
299 solver
.range_of_expr (r
, name
, sw
);
301 if (r
.undefined_p ())
302 return UNREACHABLE_EDGE
;
307 tree label
= find_case_label_range (sw
, &r
);
311 return find_edge (gimple_bb (sw
), label_to_block (cfun
, CASE_LABEL (label
)));
314 // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
317 back_threader::find_taken_edge_cond (const vec
<basic_block
> &path
,
322 path_range_query
solver (*m_ranger
, path
, m_imports
, m_flags
& BT_RESOLVE
);
323 solver
.range_of_stmt (r
, cond
);
325 if (solver
.unreachable_path_p ())
326 return UNREACHABLE_EDGE
;
328 int_range
<2> true_range
= range_true ();
329 int_range
<2> false_range
= range_false ();
331 if (r
== true_range
|| r
== false_range
)
333 edge e_true
, e_false
;
334 basic_block bb
= gimple_bb (cond
);
335 extract_true_false_edges_from_block (bb
, &e_true
, &e_false
);
336 return r
== true_range
? e_true
: e_false
;
341 // Find jump threading paths to any of the SSA names in the
342 // INTERESTING bitmap, and register any such paths.
344 // BB is the current path being processed.
346 // OVERALL_PATHS is the search space up to this block
349 back_threader::find_paths_to_names (basic_block bb
, bitmap interesting
,
350 unsigned overall_paths
,
351 back_threader_profitability
&profit
)
353 if (m_visited_bbs
.add (bb
))
356 m_path
.safe_push (bb
);
358 // Try to resolve the path without looking back. Avoid resolving paths
359 // we know are large but are not (yet) recognized as Finite State Machine.
360 // ??? Ideally we'd explore the cheapest path to the loop backedge here,
361 // avoiding the exponential greedy search and only start that from there.
362 // Precomputing a path-size-to-immediate-dominator-of-successor for each
363 // edge might help here. Alternatively copying divergent control flow
364 // on the way to the backedge could be worthwhile.
366 if (m_path
.length () > 1
367 && (!profit
.possibly_profitable_path_p (m_path
, &large_non_fsm
)
369 && maybe_register_path (profit
))))
372 // The backwards thread copier cannot copy blocks that do not belong
373 // to the same loop, so when the new source of the path entry no
374 // longer belongs to it we don't need to search further.
375 else if (m_path
[0]->loop_father
!= bb
->loop_father
)
378 // Continue looking for ways to extend the path but limit the
379 // search space along a branch
380 else if ((overall_paths
= overall_paths
* EDGE_COUNT (bb
->preds
))
381 <= (unsigned)param_max_jump_thread_paths
)
383 // For further greedy searching we want to remove interesting
384 // names defined in BB but add ones on the PHI edges for the
385 // respective edges and adding imports from those stmts.
386 // We do this by starting with all names
387 // not defined in BB as interesting, collecting a list of
388 // interesting PHIs in BB on the fly. Then we iterate over
389 // predecessor edges, adding interesting PHI edge defs to
390 // the set of interesting names to consider when processing it.
391 auto_bitmap new_interesting
;
392 auto_vec
<int, 16> new_imports
;
393 auto_vec
<gphi
*, 4> interesting_phis
;
396 auto_vec
<tree
, 16> worklist
;
397 EXECUTE_IF_SET_IN_BITMAP (interesting
, 0, i
, bi
)
399 tree name
= ssa_name (i
);
400 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
401 /* Imports remain interesting. */
402 if (gimple_bb (def_stmt
) != bb
)
404 bitmap_set_bit (new_interesting
, i
);
407 worklist
.quick_push (name
);
408 while (!worklist
.is_empty ())
410 tree name
= worklist
.pop ();
411 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
412 /* Newly discovered imports are interesting. */
413 if (gimple_bb (def_stmt
) != bb
)
415 bitmap_set_bit (new_interesting
, SSA_NAME_VERSION (name
));
418 /* Local PHIs participate in renaming below. */
419 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt
))
421 tree res
= gimple_phi_result (phi
);
422 if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res
))
423 interesting_phis
.safe_push (phi
);
425 /* For other local defs process their uses, amending
426 imports on the way. */
430 unsigned lim
= gimple_range_ssa_names (ssa
, 3, def_stmt
);
431 for (unsigned j
= 0; j
< lim
; ++j
)
435 && bitmap_set_bit (m_imports
,
436 SSA_NAME_VERSION (rhs
)))
438 new_imports
.safe_push (SSA_NAME_VERSION (rhs
));
439 worklist
.safe_push (rhs
);
445 if (!bitmap_empty_p (new_interesting
)
446 || !interesting_phis
.is_empty ())
448 auto_vec
<int, 4> unwind (interesting_phis
.length ());
449 auto_vec
<int, 4> imports_unwind (interesting_phis
.length ());
452 FOR_EACH_EDGE (e
, iter
, bb
->preds
)
454 if (e
->flags
& EDGE_ABNORMAL
455 // This is like path_crosses_loops in profitable_path_p but
456 // more restrictive to avoid peeling off loop iterations (see
457 // tree-ssa/pr14341.c for an example).
458 // ??? Note this restriction only applied when visiting an
459 // interesting PHI with the former resolve_phi.
460 || (!interesting_phis
.is_empty ()
461 && m_path
[0]->loop_father
!= e
->src
->loop_father
))
463 for (gphi
*phi
: interesting_phis
)
465 tree def
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
466 if (TREE_CODE (def
) == SSA_NAME
)
468 int ver
= SSA_NAME_VERSION (def
);
469 if (bitmap_set_bit (new_interesting
, ver
))
471 if (bitmap_set_bit (m_imports
, ver
))
472 imports_unwind
.quick_push (ver
);
473 unwind
.quick_push (ver
);
477 find_paths_to_names (e
->src
, new_interesting
, overall_paths
,
479 // Restore new_interesting.
480 for (int def
: unwind
)
481 bitmap_clear_bit (new_interesting
, def
);
483 // Restore and m_imports.
484 for (int def
: imports_unwind
)
485 bitmap_clear_bit (m_imports
, def
);
486 imports_unwind
.truncate (0);
489 /* m_imports tracks all interesting names on the path, so when
490 backtracking we have to restore it. */
491 for (int j
: new_imports
)
492 bitmap_clear_bit (m_imports
, j
);
494 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
495 fprintf (dump_file
, " FAIL: Search space limit %d reached.\n",
496 param_max_jump_thread_paths
);
498 // Reset things to their original state.
500 m_visited_bbs
.remove (bb
);
503 // Search backwards from BB looking for paths where the final
504 // conditional maybe threaded to a successor block. Record such paths
505 // for jump threading.
508 back_threader::maybe_thread_block (basic_block bb
)
510 if (EDGE_COUNT (bb
->succs
) <= 1)
513 gimple
*stmt
= *gsi_last_bb (bb
);
517 enum gimple_code code
= gimple_code (stmt
);
518 if (code
!= GIMPLE_SWITCH
519 && code
!= GIMPLE_COND
)
523 m_visited_bbs
.empty ();
526 // We compute imports of the path during discovery starting
527 // just with names used in the conditional.
528 bitmap_clear (m_imports
);
531 FOR_EACH_SSA_TREE_OPERAND (name
, stmt
, iter
, SSA_OP_USE
)
533 if (!gimple_range_ssa_p (name
))
535 bitmap_set_bit (m_imports
, SSA_NAME_VERSION (name
));
538 // Interesting is the set of imports we still not have see
539 // the definition of. So while imports only grow, the
540 // set of interesting defs dwindles and once empty we can
542 auto_bitmap interesting
;
543 bitmap_copy (interesting
, m_imports
);
544 back_threader_profitability
profit (m_flags
& BT_SPEED
, stmt
);
545 find_paths_to_names (bb
, interesting
, 1, profit
);
549 debug (const vec
<basic_block
> &path
)
551 dump_path (stderr
, path
);
552 fputc ('\n', stderr
);
556 back_threader::dump (FILE *out
)
558 fprintf (out
, "\nCandidates for pre-computation:\n");
559 fprintf (out
, "===================================\n");
564 EXECUTE_IF_SET_IN_BITMAP (m_imports
, 0, i
, bi
)
566 tree name
= ssa_name (i
);
567 print_generic_expr (out
, name
, TDF_NONE
);
573 back_threader::debug ()
578 /* Examine jump threading path PATH and return TRUE if it is possibly
579 profitable to thread it, otherwise return FALSE. If this function
580 returns TRUE profitable_path_p might not be satisfied but when
581 the path is extended it might be. In particular indicate in
582 *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
583 but would be OK if we extend the path to cover the loop backedge.
585 ?? It seems we should be able to loosen some of the restrictions in
586 this function after loop optimizations have run. */
589 back_threader_profitability::possibly_profitable_path_p
590 (const vec
<basic_block
> &m_path
,
593 gcc_checking_assert (!m_path
.is_empty ());
595 /* We can an empty path here (excluding the DEF block) when the
596 statement that makes a conditional generate a compile-time
597 constant result is in the same block as the conditional.
599 That's not really a jump threading opportunity, but instead is
600 simple cprop & simplification. We could handle it here if we
601 wanted by wiring up all the incoming edges. If we run this
602 early in IPA, that might be worth doing. For now we just
604 if (m_path
.length () <= 1)
607 gimple_stmt_iterator gsi
;
608 loop_p loop
= m_path
[0]->loop_father
;
610 // We recompute the following, when we rewrite possibly_profitable_path_p
611 // to work incrementally on added BBs we have to unwind them on backtracking
613 m_threaded_through_latch
= false;
614 m_multiway_branch_in_path
= false;
615 m_contains_hot_bb
= false;
617 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
618 fprintf (dump_file
, "Checking profitability of path (backwards): ");
620 /* Count the number of instructions on the path: as these instructions
621 will have to be duplicated, we will not record the path if there
622 are too many instructions on the path. Also check that all the
623 blocks in the path belong to a single loop. */
624 for (unsigned j
= 0; j
< m_path
.length (); j
++)
626 basic_block bb
= m_path
[j
];
628 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
629 fprintf (dump_file
, " bb:%i", bb
->index
);
630 /* Remember, blocks in the path are stored in opposite order in
631 the PATH array. The last entry in the array represents the
632 block with an outgoing edge that we will redirect to the jump
633 threading path. Thus we don't care how many statements are
634 in that block because it will not be copied or whether or not
635 it ends in a multiway branch. */
636 if (j
< m_path
.length () - 1)
638 int orig_n_insns
= m_n_insns
;
639 if (!m_contains_hot_bb
&& m_speed_p
)
640 m_contains_hot_bb
|= optimize_bb_for_speed_p (bb
);
641 for (gsi
= gsi_after_labels (bb
);
643 gsi_next_nondebug (&gsi
))
645 /* Do not allow OpenACC loop markers and __builtin_constant_p on
646 threading paths. The latter is disallowed, because an
647 expression might be constant on two threading paths, and
648 become non-constant (i.e.: phi) when they merge. */
649 gimple
*stmt
= gsi_stmt (gsi
);
650 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
)
651 || gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
))
653 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
654 fputc ('\n', dump_file
);
657 /* Do not count empty statements and labels. */
658 if (gimple_code (stmt
) != GIMPLE_NOP
659 && !is_gimple_debug (stmt
))
660 m_n_insns
+= estimate_num_insns (stmt
, &eni_size_weights
);
662 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
663 fprintf (dump_file
, " (%i insns)", m_n_insns
-orig_n_insns
);
665 /* We do not look at the block with the threaded branch
666 in this loop. So if any block with a last statement that
667 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
668 multiway branch on our path.
670 The block in PATH[0] is special, it's the block were we're
671 going to be able to eliminate its branch. */
674 gimple
*last
= *gsi_last_bb (bb
);
676 && (gimple_code (last
) == GIMPLE_SWITCH
677 || gimple_code (last
) == GIMPLE_GOTO
))
678 m_multiway_branch_in_path
= true;
682 /* Note if we thread through the latch, we will want to include
683 the last entry in the array when determining if we thread
684 through the loop latch. */
685 if (loop
->latch
== bb
)
687 m_threaded_through_latch
= true;
688 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
689 fprintf (dump_file
, " (latch)");
693 /* We are going to remove the control statement at the end of the
694 last block in the threading path. So don't count it against our
696 m_n_insns
-= m_exit_jump_benefit
;
698 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
699 fprintf (dump_file
, "\n Control statement insns: %i\n"
700 " Overall: %i insns\n",
701 m_exit_jump_benefit
, m_n_insns
);
703 /* Threading is profitable if the path duplicated is hot but also
704 in a case we separate cold path from hot path and permit optimization
705 of the hot path later. Be on the agressive side here. In some testcases,
706 as in PR 78407 this leads to noticeable improvements. */
709 if (m_n_insns
>= param_max_fsm_thread_path_insns
)
711 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
712 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
713 "the number of instructions on the path "
714 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
717 edge entry
= find_edge (m_path
[m_path
.length () - 1],
718 m_path
[m_path
.length () - 2]);
719 if (probably_never_executed_edge_p (cfun
, entry
))
721 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
722 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
723 "path entry is probably never executed.\n");
727 else if (m_n_insns
> 1)
729 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
730 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
731 "duplication of %i insns is needed and optimizing for size.\n",
736 /* The generic copier used by the backthreader does not re-use an
737 existing threading path to reduce code duplication. So for that
738 case, drastically reduce the number of statements we are allowed
739 to copy. We don't know yet whether we will thread through the latch
740 so we have to be permissive and continue threading, but indicate
741 to the caller the thread, if final, wouldn't be profitable. */
742 if ((!m_threaded_multiway_branch
744 || loop
->latch
->index
== EXIT_BLOCK
)
745 && (m_n_insns
* param_fsm_scale_path_stmts
746 >= param_max_jump_thread_duplication_stmts
))
748 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
750 " FAIL: Did not thread around loop and would copy too "
751 "many statements.\n");
754 *large_non_fsm
= (!(m_threaded_through_latch
&& m_threaded_multiway_branch
)
755 && (m_n_insns
* param_fsm_scale_path_stmts
756 >= param_max_jump_thread_duplication_stmts
));
758 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
759 fputc ('\n', dump_file
);
763 /* Examine jump threading path PATH and return TRUE if it is profitable to
764 thread it, otherwise return FALSE.
766 The taken edge out of the path is TAKEN_EDGE.
768 CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
769 would create an irreducible loop.
771 ?? It seems we should be able to loosen some of the restrictions in
772 this function after loop optimizations have run. */
775 back_threader_profitability::profitable_path_p (const vec
<basic_block
> &m_path
,
777 bool *creates_irreducible_loop
)
779 // We can assume that possibly_profitable_path_p holds here
781 loop_p loop
= m_path
[0]->loop_father
;
783 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
784 fprintf (dump_file
, "Checking profitability of path (backwards): ");
786 /* If this path threaded through the loop latch back into the
787 same loop and the destination does not dominate the loop
788 latch, then this thread would create an irreducible loop. */
789 *creates_irreducible_loop
= false;
790 if (m_threaded_through_latch
791 && loop
== taken_edge
->dest
->loop_father
792 && (determine_bb_domination_status (loop
, taken_edge
->dest
)
793 == DOMST_NONDOMINATING
))
794 *creates_irreducible_loop
= true;
796 /* Threading is profitable if the path duplicated is hot but also
797 in a case we separate cold path from hot path and permit optimization
798 of the hot path later. Be on the agressive side here. In some testcases,
799 as in PR 78407 this leads to noticeable improvements. */
801 && (optimize_edge_for_speed_p (taken_edge
) || m_contains_hot_bb
))
803 if (probably_never_executed_edge_p (cfun
, taken_edge
))
805 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
806 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
807 "path leads to probably never executed edge.\n");
811 else if (m_n_insns
> 1)
813 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
814 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
815 "duplication of %i insns is needed and optimizing for size.\n",
820 /* We avoid creating irreducible inner loops unless we thread through
821 a multiway branch, in which case we have deemed it worth losing
822 other loop optimizations later.
824 We also consider it worth creating an irreducible inner loop after
825 loop optimizations if the number of copied statement is low. */
826 if (!m_threaded_multiway_branch
827 && *creates_irreducible_loop
828 && (!(cfun
->curr_properties
& PROP_loop_opts_done
)
829 || (m_n_insns
* param_fsm_scale_path_stmts
830 >= param_max_jump_thread_duplication_stmts
)))
832 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
834 " FAIL: Would create irreducible loop early without "
835 "threading multiway branch.\n");
836 /* We compute creates_irreducible_loop only late. */
840 /* The generic copier used by the backthreader does not re-use an
841 existing threading path to reduce code duplication. So for that
842 case, drastically reduce the number of statements we are allowed
844 if (!(m_threaded_through_latch
&& m_threaded_multiway_branch
)
845 && (m_n_insns
* param_fsm_scale_path_stmts
846 >= param_max_jump_thread_duplication_stmts
))
848 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
850 " FAIL: Did not thread around loop and would copy too "
851 "many statements.\n");
855 /* When there is a multi-way branch on the path, then threading can
856 explode the CFG due to duplicating the edges for that multi-way
857 branch. So like above, only allow a multi-way branch on the path
858 if we actually thread a multi-way branch. */
859 if (!m_threaded_multiway_branch
&& m_multiway_branch_in_path
)
861 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
863 " FAIL: Thread through multiway branch without threading "
864 "a multiway branch.\n");
868 /* Threading through an empty latch would cause code to be added to
869 the latch. This could alter the loop form sufficiently to cause
870 loop optimizations to fail. Disable these threads until after
871 loop optimizations have run. */
872 if ((m_threaded_through_latch
|| taken_edge
->dest
== loop
->latch
)
873 && !(cfun
->curr_properties
& PROP_loop_opts_done
)
874 && empty_block_p (loop
->latch
))
876 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
878 " FAIL: Thread through latch before loop opts would create "
879 "non-empty latch\n");
882 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
883 fputc ('\n', dump_file
);
888 /* The current path PATH is a vector of blocks forming a jump threading
889 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
891 Convert the current path into the form used by register_jump_thread and
894 Return TRUE if successful or FALSE otherwise. */
897 back_threader_registry::register_path (const vec
<basic_block
> &m_path
,
900 vec
<jump_thread_edge
*> *jump_thread_path
= allocate_thread_path ();
902 // The generic copier ignores the edge type. We can build the
903 // thread edges with any type.
904 for (unsigned int j
= 0; j
+ 1 < m_path
.length (); j
++)
906 basic_block bb1
= m_path
[m_path
.length () - j
- 1];
907 basic_block bb2
= m_path
[m_path
.length () - j
- 2];
909 edge e
= find_edge (bb1
, bb2
);
911 push_edge (jump_thread_path
, e
, EDGE_COPY_SRC_BLOCK
);
914 push_edge (jump_thread_path
, taken_edge
, EDGE_NO_COPY_SRC_BLOCK
);
915 return register_jump_thread (jump_thread_path
);
918 // Thread all suitable paths in the current function.
920 // Return TODO_flags.
923 back_threader::thread_blocks ()
926 FOR_EACH_BB_FN (bb
, m_fun
)
927 if (EDGE_COUNT (bb
->succs
) > 1)
928 maybe_thread_block (bb
);
930 bool changed
= m_registry
.thread_through_all_blocks (true);
932 if (m_flags
& BT_SPEED
)
933 return changed
? TODO_cleanup_cfg
: 0;
940 const pass_data pass_data_early_thread_jumps
=
945 TV_TREE_SSA_THREAD_JUMPS
,
946 ( PROP_cfg
| PROP_ssa
),
950 ( TODO_cleanup_cfg
| TODO_update_ssa
),
953 const pass_data pass_data_thread_jumps
=
958 TV_TREE_SSA_THREAD_JUMPS
,
959 ( PROP_cfg
| PROP_ssa
),
966 const pass_data pass_data_thread_jumps_full
=
971 TV_TREE_SSA_THREAD_JUMPS
,
972 ( PROP_cfg
| PROP_ssa
),
979 // Early jump threading pass optimizing for size.
980 class pass_early_thread_jumps
: public gimple_opt_pass
983 pass_early_thread_jumps (gcc::context
*ctxt
)
984 : gimple_opt_pass (pass_data_early_thread_jumps
, ctxt
)
987 opt_pass
* clone () override
989 return new pass_early_thread_jumps (m_ctxt
);
991 void set_pass_param (unsigned int, bool param
) override
995 bool gate (function
*) override
997 return flag_thread_jumps
;
999 unsigned int execute (function
*fun
) override
1001 back_threader
threader (fun
, BT_NONE
, m_first
);
1002 return threader
.thread_blocks ();
1008 // Jump threading pass without resolving of unknown SSAs.
1009 class pass_thread_jumps
: public gimple_opt_pass
1012 pass_thread_jumps (gcc::context
*ctxt
)
1013 : gimple_opt_pass (pass_data_thread_jumps
, ctxt
)
1015 opt_pass
* clone (void) override
1017 return new pass_thread_jumps (m_ctxt
);
1019 void set_pass_param (unsigned int, bool param
) override
1023 bool gate (function
*) override
1025 return flag_thread_jumps
&& flag_expensive_optimizations
;
1027 unsigned int execute (function
*fun
) override
1029 back_threader
threader (fun
, BT_SPEED
, m_first
);
1030 return threader
.thread_blocks ();
1036 // Jump threading pass that fully resolves unknown SSAs.
1037 class pass_thread_jumps_full
: public gimple_opt_pass
1040 pass_thread_jumps_full (gcc::context
*ctxt
)
1041 : gimple_opt_pass (pass_data_thread_jumps_full
, ctxt
)
1043 opt_pass
* clone (void) override
1045 return new pass_thread_jumps_full (m_ctxt
);
1047 void set_pass_param (unsigned int, bool param
) override
1051 bool gate (function
*) override
1053 return flag_thread_jumps
&& flag_expensive_optimizations
;
1055 unsigned int execute (function
*fun
) override
1057 back_threader
threader (fun
, BT_SPEED
| BT_RESOLVE
, m_first
);
1058 return threader
.thread_blocks ();
1067 make_pass_thread_jumps (gcc::context
*ctxt
)
1069 return new pass_thread_jumps (ctxt
);
1073 make_pass_thread_jumps_full (gcc::context
*ctxt
)
1075 return new pass_thread_jumps_full (ctxt
);
1079 make_pass_early_thread_jumps (gcc::context
*ctxt
)
1081 return new pass_early_thread_jumps (ctxt
);