1 /* Loop header copying on trees.
2 Copyright (C) 2004-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
28 #include "gimple-ssa.h"
29 #include "gimple-iterator.h"
31 #include "tree-into-ssa.h"
33 #include "tree-inline.h"
34 #include "tree-ssa-threadedge.h"
35 #include "tree-ssa-sccvn.h"
36 #include "tree-phinodes.h"
37 #include "ssa-iterators.h"
38 #include "value-range.h"
39 #include "gimple-range.h"
40 #include "gimple-range-path.h"
41 #include "gimple-pretty-print.h"
43 #include "tree-ssa-loop-manip.h"
44 #include "tree-ssa-loop-niter.h"
45 #include "tree-scalar-evolution.h"
47 /* Return path query insteance for testing ranges of statements
48 in headers of LOOP contained in basic block BB.
49 Use RANGER instance. */
51 static path_range_query
*
52 get_range_query (class loop
*loop
,
54 gimple_ranger
&ranger
)
56 auto_vec
<basic_block
, 8> path
;
57 for (; bb
!= loop
->header
; bb
= single_pred_edge (bb
)->src
)
59 path
.safe_push (loop
->header
);
60 path
.safe_push (loop_preheader_edge (loop
)->src
);
61 return new path_range_query (ranger
, path
);
64 /* Return edge that is true in the first iteration of the loop
66 Formulate corrent ranger query to RANGER. */
69 static_loop_exit (class loop
*l
, basic_block bb
, gimple_ranger
&ranger
,
70 path_range_query
*&query
)
72 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb
));
79 extract_true_false_edges_from_block (bb
, &true_e
, &false_e
);
81 /* If neither edge is the exit edge, this is not a case we'd like to
83 if (!loop_exit_edge_p (l
, true_e
) && !loop_exit_edge_p (l
, false_e
))
86 int_range
<1> desired_static_range
;
87 if (loop_exit_edge_p (l
, true_e
))
89 desired_static_range
= range_false ();
94 desired_static_range
= range_true ();
99 query
= get_range_query (l
, gimple_bb (last
), ranger
);
102 if (!query
->range_of_stmt (r
, last
))
104 return r
== desired_static_range
? ret_e
: NULL
;
107 /* Return true if STMT is static in LOOP. This means that its value
108 is constant in the first iteration.
109 Use RANGER and formulate query cached in QUERY. */
112 loop_static_stmt_p (class loop
*loop
,
113 gimple_ranger
&ranger
,
114 path_range_query
*&query
,
117 tree type
= gimple_range_type (stmt
);
118 if (!type
|| !value_range::supports_type_p (type
))
122 query
= get_range_query (loop
, gimple_bb (stmt
), ranger
);
124 value_range
r (gimple_range_type (stmt
));
125 if (!query
->range_of_stmt (r
, stmt
))
127 return r
.singleton_p ();
130 /* Return true if OP is invariant. */
133 loop_invariant_op_p (class loop
*loop
,
136 if (is_gimple_min_invariant (op
))
138 if (SSA_NAME_IS_DEFAULT_DEF (op
)
139 || !flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (op
))))
141 return gimple_uid (SSA_NAME_DEF_STMT (op
)) & 1;
144 /* Return true if OP combines outcome of static and
145 loop invariant conditional. */
148 loop_static_op_p (class loop
*loop
, tree op
)
150 /* Always check for invariant first. */
151 gcc_checking_assert (!is_gimple_min_invariant (op
)
152 && !SSA_NAME_IS_DEFAULT_DEF (op
)
153 && flow_bb_inside_loop_p (loop
,
154 gimple_bb (SSA_NAME_DEF_STMT (op
))));
155 return gimple_uid (SSA_NAME_DEF_STMT (op
)) & 2;
159 /* Return true if OP combines outcome of static and
160 loop invariant conditional. */
163 loop_combined_static_and_iv_p (class loop
*loop
,
166 /* Always check for invariant first. */
167 gcc_checking_assert (!is_gimple_min_invariant (op
)
168 && !SSA_NAME_IS_DEFAULT_DEF (op
)
169 && flow_bb_inside_loop_p (loop
,
170 gimple_bb (SSA_NAME_DEF_STMT (op
))));
171 return gimple_uid (SSA_NAME_DEF_STMT (op
)) & 4;
174 /* Decision about posibility of copying a given header. */
178 /* We do not want to copy this header. */
180 /* We can copy it if it enables wins. */
182 /* We can "copy" it if it enables wins and doing
183 so will introduce no new code. */
184 ch_possible_zero_cost
,
185 /* We want to copy. */
187 /* We want to copy and we will eliminate loop exit. */
188 ch_win_invariant_exit
191 /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
192 instructions should be duplicated, limit is decreased by the actual
196 should_duplicate_loop_header_p (basic_block header
, class loop
*loop
,
197 gimple_ranger
*ranger
,
199 hash_set
<edge
> *invariant_exits
,
200 hash_set
<edge
> *static_exits
)
202 gimple_stmt_iterator bsi
;
204 gcc_assert (!header
->aux
);
206 gcc_assert (EDGE_COUNT (header
->succs
) > 0);
207 if (single_succ_p (header
))
209 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
211 " Not duplicating bb %i: it is single succ.\n",
213 return ch_impossible
;
216 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
)
217 && flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 1)->dest
))
219 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
221 " Not duplicating bb %i: both successors are in loop.\n",
223 return ch_impossible
;
226 /* If this is not the original loop header, we want it to have just
227 one predecessor in order to match the && pattern. */
228 if (header
!= loop
->header
&& !single_pred_p (header
))
230 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
232 " Not duplicating bb %i: it has mutiple predecestors.\n",
234 return ch_impossible
;
237 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (header
));
240 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
242 " Not duplicating bb %i: it does not end by conditional.\n",
244 return ch_impossible
;
247 path_range_query
*query
= NULL
;
248 for (gphi_iterator psi
= gsi_start_phis (header
); !gsi_end_p (psi
);
250 if (!virtual_operand_p (gimple_phi_result (psi
.phi ())))
252 bool static_p
= loop_static_stmt_p (loop
, *ranger
,
254 gimple_set_uid (psi
.phi (), static_p
? 2 : 0);
256 bool code_size_cost
= false;
258 /* Count number of instructions and punt on calls.
259 Populate stmts INV flag to later apply heuristics to the
260 kind of conditions we want to copy. */
261 for (bsi
= gsi_start_bb (header
); !gsi_end_p (bsi
); gsi_next (&bsi
))
263 gimple
*last
= gsi_stmt (bsi
);
265 if (gimple_code (last
) == GIMPLE_LABEL
)
268 if (is_gimple_debug (last
))
271 if (gimple_code (last
) == GIMPLE_COND
)
274 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
276 fprintf (dump_file
, " Analyzing: ");
277 print_gimple_stmt (dump_file
, last
, 0, TDF_SLIM
);
280 if (gimple_code (last
) == GIMPLE_CALL
281 && (!gimple_inexpensive_call_p (as_a
<gcall
*> (last
))
282 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed
283 at current loop's header. Don't copy in this case. */
284 || gimple_call_internal_p (last
, IFN_LOOP_DIST_ALIAS
)))
286 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
288 " Not duplicating bb %i: it contains call.\n",
292 return ch_impossible
;
295 /* Classify the stmt is invariant in the loop. */
296 gimple_set_uid (last
, 0);
297 if (!gimple_vuse (last
)
298 && gimple_code (last
) != GIMPLE_ASM
299 && (gimple_code (last
) != GIMPLE_CALL
300 || gimple_call_flags (last
) & ECF_CONST
))
302 bool inv
= true, static_p
= false;
305 FOR_EACH_SSA_TREE_OPERAND (op
, last
, i
, SSA_OP_USE
)
306 if (!loop_invariant_op_p (loop
, op
))
308 /* Assume that code is reasonably optimized and invariant
309 constants are already identified. */
311 static_p
= loop_static_stmt_p (loop
, *ranger
, query
, last
);
312 gimple_set_uid (last
, (inv
? 1 : 0) | (static_p
? 2 : 0));
313 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
316 fprintf (dump_file
, " Stmt is loop invariant\n");
318 fprintf (dump_file
, " Stmt is static "
319 "(constant in the first iteration)\n");
321 /* Loop invariants will be optimized out in loop body after
322 duplication; do not account invariant computation in code
325 Similarly static computations will be optimized out in the
330 /* Match the following:
331 _1 = i_1 < 10 <- static condtion
332 _2 = n != 0 <- loop invariant condition
333 _3 = _1 & _2 <- combined static and iv statement. */
335 if (gimple_code (last
) == GIMPLE_ASSIGN
336 && ((code
= gimple_assign_rhs_code (last
)) == BIT_AND_EXPR
337 || code
== BIT_IOR_EXPR
|| code
== BIT_XOR_EXPR
))
339 tree op1
= gimple_assign_rhs1 (last
);
340 tree op2
= gimple_assign_rhs2 (last
);
342 if ((loop_invariant_op_p (loop
, op1
)
343 || loop_combined_static_and_iv_p (loop
, op1
)
344 || loop_static_op_p (loop
, op1
))
345 && (loop_invariant_op_p (loop
, op2
)
346 || loop_combined_static_and_iv_p (loop
, op2
)
347 || loop_static_op_p (loop
, op2
)))
349 /* Duplicating loop header with combned conditional will
350 remove this statement in each copy. But we account for
351 that later when seeing that condition.
353 Note that this may be overly optimistic for bit operations
354 where the static parameter may still result in non-trivial
356 gimple_set_uid (last
, 4);
357 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
359 " Stmt combines static and invariant op\n");
365 int insns
= estimate_num_insns (last
, &eni_size_weights
);
368 code_size_cost
= true;
369 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
371 " Acconting stmt as %i insns\n", insns
);
374 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
376 " Not duplicating bb %i contains too many insns.\n",
380 return ch_impossible
;
384 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
386 fprintf (dump_file
, " Analyzing: ");
387 print_gimple_stmt (dump_file
, last
, 0, TDF_SLIM
);
390 /* If the condition tests a non-IV loop variant we do not want to rotate
391 the loop further. Unless this is the original loop header. */
392 tree lhs
= gimple_cond_lhs (last
);
393 tree rhs
= gimple_cond_rhs (last
);
394 bool lhs_invariant
= loop_invariant_op_p (loop
, lhs
);
395 bool rhs_invariant
= loop_invariant_op_p (loop
, rhs
);
397 /* Combined conditional is a result of if combining:
399 _1 = i_1 < 10 <- static condtion
400 _2 = n != 0 <- loop invariant condition
401 _3 = _1 & _2 <- combined static and iv statement
402 if (_3 != 0) <- combined conditional
404 Combined conditionals will not be optimized out in either copy.
405 However duplicaed header simplifies to:
413 So effectively the resulting code sequence will be of same length as
416 Combined conditionals are never static or invariant, so save some work
418 if (lhs_invariant
!= rhs_invariant
420 || loop_combined_static_and_iv_p (loop
, lhs
))
422 || loop_combined_static_and_iv_p (loop
, rhs
)))
426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
428 " Conditional combines static and invariant op.\n");
432 edge static_exit
= static_loop_exit (loop
, header
, *ranger
, query
);
436 if (static_exit
&& static_exits
)
438 static_exits
->add (static_exit
);
439 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
441 " Will eliminate peeled conditional in bb %d.\n",
442 static_exit
->src
->index
);
443 /* Still look for invariant exits; exit may be both. */
445 if (lhs_invariant
&& rhs_invariant
)
451 FOR_EACH_EDGE (e
, ei
, header
->succs
)
452 if (loop_exit_edge_p (loop
, e
))
454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
456 " Will elliminate invariant exit %i->%i\n",
457 e
->src
->index
, e
->dest
->index
);
458 invariant_exits
->add (e
);
461 return ch_win_invariant_exit
;
464 /* If the static exit fully optimize out, it is win to "duplicate"
467 TODO: Even if duplication costs some size we may opt to do so in case
468 exit probability is significant enough (do partial peeling). */
470 return !code_size_cost
? ch_possible_zero_cost
: ch_possible
;
472 /* We was not able to prove that conditional will be eliminated. */
473 int insns
= estimate_num_insns (last
, &eni_size_weights
);
475 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
477 " Acconting stmt as %i insns\n", insns
);
480 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
482 " Not duplicating bb %i contains too many insns.\n",
484 return ch_impossible
;
490 /* Checks whether LOOP is a do-while style loop. */
493 do_while_loop_p (class loop
*loop
)
495 gimple
*stmt
= last_nondebug_stmt (loop
->latch
);
497 /* If the latch of the loop is not empty, it is not a do-while loop. */
499 && gimple_code (stmt
) != GIMPLE_LABEL
)
501 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
503 "Loop %i is not do-while loop: latch is not empty.\n",
508 /* If the latch does not have a single predecessor, it is not a
510 if (!single_pred_p (loop
->latch
))
512 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
514 "Loop %i is not do-while loop: latch has multiple "
515 "predecessors.\n", loop
->num
);
518 basic_block pred
= single_pred (loop
->latch
);
520 /* If the latch predecessor doesn't exit the loop, it is not a
522 if (!loop_exits_from_bb_p (loop
, pred
))
524 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
526 "Loop %i is not do-while loop: latch predecessor "
527 "does not exit loop.\n", loop
->num
);
530 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (pred
));
532 && (gimple_cond_lhs (last
) == boolean_false_node
533 || gimple_cond_lhs (last
) == boolean_true_node
)
534 && gimple_cond_rhs (last
) == boolean_false_node
)
536 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
538 "Loop %i is not do-while loop: latch predecessor "
539 "contains exit we optimized out.\n", loop
->num
);
543 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
544 fprintf (dump_file
, "Loop %i is do-while loop\n", loop
->num
);
549 /* Update profile after header copying of LOOP.
550 REGION is the original (in loop) sequence, REGION_COPY is the
551 duplicated header (now outside of loop). N_REGION is number of
553 ELIMINATED_EDGE is edge to be removed from duplicated sequence.
554 INVARIANT_EXITS are edges in the loop body to be elimianted
555 since they are loop invariants
557 So We expect the following:
559 // region_copy_start entry will be scaled to entry_count
560 if (cond1) <- this condition will become false
561 and we update probabilities
563 if (cond2) <- this condition is loop invariant
565 goto loop_header <- this will be redirected to loop.
571 if (cond1) <- we need to update probability here
573 if (cond2) <- and determine scaling factor here.
574 moreover cond2 is now always true
580 Adding support for more exits can be done similarly,
581 but only consumer so far is tree-ssa-loop-ch and it uses only this
582 to handle the common case of peeling headers which have
583 conditionals known to be always true upon entry. */
586 update_profile_after_ch (class loop
*loop
,
587 basic_block
*region
, basic_block
*region_copy
,
589 hash_set
<edge
> *invariant_exits
,
590 hash_set
<edge
> *static_exits
,
591 profile_count entry_count
)
593 for (unsigned int i
= 0; i
< n_region
; i
++)
595 edge exit_e
, exit_e_copy
, e
, e_copy
;
596 if (EDGE_COUNT (region
[i
]->succs
) == 1)
598 region_copy
[i
]->count
= entry_count
;
599 region
[i
]->count
-= entry_count
;
603 gcc_checking_assert (EDGE_COUNT (region
[i
]->succs
) == 2);
604 if (loop_exit_edge_p (loop
,
605 EDGE_SUCC (region
[i
], 0)))
607 exit_e
= EDGE_SUCC (region
[i
], 0);
608 exit_e_copy
= EDGE_SUCC (region_copy
[i
], 0);
609 e
= EDGE_SUCC (region
[i
], 1);
610 e_copy
= EDGE_SUCC (region_copy
[i
], 1);
614 exit_e
= EDGE_SUCC (region
[i
], 1);
615 exit_e_copy
= EDGE_SUCC (region_copy
[i
], 1);
616 e
= EDGE_SUCC (region
[i
], 0);
617 e_copy
= EDGE_SUCC (region_copy
[i
], 0);
619 gcc_assert (i
== n_region
- 1
620 || (e
->dest
== region
[i
+ 1]
621 && e_copy
->dest
== region_copy
[i
+ 1]));
622 region_copy
[i
]->count
= entry_count
;
623 profile_count exit_e_count
= exit_e
->count ();
624 bool was_static
= false;
625 if (static_exits
->contains (exit_e
))
627 /* Update profile and the conditional.
628 CFG update is done by caller. */
629 static_exits
->remove (exit_e
);
631 e_copy
->probability
= profile_probability::always ();
632 exit_e_copy
->probability
= profile_probability::never ();
634 = as_a
<gcond
*>(*gsi_last_bb (region_copy
[i
]));
635 if (e_copy
->flags
& EDGE_TRUE_VALUE
)
636 gimple_cond_make_true (cond_stmt
);
638 gimple_cond_make_false (cond_stmt
);
639 update_stmt (cond_stmt
);
640 /* Header copying is a special case of jump threading, so use
641 common code to update loop body exit condition. */
642 update_bb_profile_for_threading (region
[i
], entry_count
, e
);
645 region
[i
]->count
-= region_copy
[i
]->count
;
646 if (invariant_exits
->contains (exit_e
))
648 invariant_exits
->remove (exit_e
);
649 /* All exits will happen in exit_e_copy which is out of the
650 loop, so increase probability accordingly.
651 If the edge is eliminated_edge we already corrected
653 if (entry_count
.nonzero_p () && !was_static
)
654 set_edge_probability_and_rescale_others
655 (exit_e_copy
, exit_e_count
.probability_in
657 /* Eliminate in-loop conditional. */
658 e
->probability
= profile_probability::always ();
659 exit_e
->probability
= profile_probability::never ();
660 gcond
*cond_stmt
= as_a
<gcond
*>(*gsi_last_bb (region
[i
]));
661 if (e
->flags
& EDGE_TRUE_VALUE
)
662 gimple_cond_make_true (cond_stmt
);
664 gimple_cond_make_false (cond_stmt
);
665 update_stmt (cond_stmt
);
667 entry_count
= e_copy
->count ();
669 /* Be sure that we seen all invariant exit edges we are supposed to update.
670 We may have recorded some static exists we decided to not duplicate. */
671 gcc_checking_assert (invariant_exits
->is_empty ());
676 /* Common superclass for both header-copying phases. */
677 class ch_base
: public gimple_opt_pass
680 ch_base (pass_data data
, gcc::context
*ctxt
)
681 : gimple_opt_pass (data
, ctxt
)
684 /* Copies headers of all loops in FUN for which process_loop_p is true. */
685 unsigned int copy_headers (function
*fun
);
687 /* Return true to copy headers of LOOP or false to skip. */
688 virtual bool process_loop_p (class loop
*loop
) = 0;
691 const pass_data pass_data_ch
=
693 GIMPLE_PASS
, /* type */
695 OPTGROUP_LOOP
, /* optinfo_flags */
696 TV_TREE_CH
, /* tv_id */
697 ( PROP_cfg
| PROP_ssa
), /* properties_required */
698 0, /* properties_provided */
699 0, /* properties_destroyed */
700 0, /* todo_flags_start */
701 0, /* todo_flags_finish */
704 class pass_ch
: public ch_base
707 pass_ch (gcc::context
*ctxt
)
708 : ch_base (pass_data_ch
, ctxt
)
711 /* opt_pass methods: */
712 bool gate (function
*) final override
{ return flag_tree_ch
!= 0; }
714 /* Initialize and finalize loop structures, copying headers inbetween. */
715 unsigned int execute (function
*) final override
;
717 opt_pass
* clone () final override
{ return new pass_ch (m_ctxt
); }
720 /* ch_base method: */
721 bool process_loop_p (class loop
*loop
) final override
;
724 const pass_data pass_data_ch_vect
=
726 GIMPLE_PASS
, /* type */
727 "ch_vect", /* name */
728 OPTGROUP_LOOP
, /* optinfo_flags */
729 TV_TREE_CH
, /* tv_id */
730 ( PROP_cfg
| PROP_ssa
), /* properties_required */
731 0, /* properties_provided */
732 0, /* properties_destroyed */
733 0, /* todo_flags_start */
734 0, /* todo_flags_finish */
737 /* This is a more aggressive version of the same pass, designed to run just
738 before if-conversion and vectorization, to put more loops into the form
739 required for those phases. */
740 class pass_ch_vect
: public ch_base
743 pass_ch_vect (gcc::context
*ctxt
)
744 : ch_base (pass_data_ch_vect
, ctxt
)
747 /* opt_pass methods: */
748 bool gate (function
*fun
) final override
750 return flag_tree_ch
!= 0
751 && (flag_tree_loop_vectorize
!= 0 || fun
->has_force_vectorize_loops
);
754 /* Just copy headers, no initialization/finalization of loop structures. */
755 unsigned int execute (function
*) final override
;
758 /* ch_base method: */
759 bool process_loop_p (class loop
*loop
) final override
;
760 }; // class pass_ch_vect
762 /* Sort comparator to order loops after the specified order. */
765 ch_order_loops (const void *a_
, const void *b_
, void *order_
)
767 int *order
= (int *)order_
;
768 const class loop
*a
= *(const class loop
* const *)a_
;
769 const class loop
*b
= *(const class loop
* const *)b_
;
770 if (a
->num
== b
->num
)
772 if (order
[a
->num
] < order
[b
->num
])
777 /* For all loops, copy the condition at the end of the loop body in front
778 of the loop. This is beneficial since it increases efficiency of
779 code motion optimizations. It also saves one jump on entry to the loop. */
782 ch_base::copy_headers (function
*fun
)
784 basic_block
*bbs
, *copied_bbs
;
786 bool changed
= false;
788 if (number_of_loops (fun
) <= 1)
791 bbs
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (fun
));
792 copied_bbs
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (fun
));
793 bbs_size
= n_basic_blocks_for_fn (fun
);
798 unsigned int nheaders
;
799 hash_set
<edge
> *invariant_exits
, *static_exits
;
801 auto_vec
<struct candidate
> candidates
;
802 auto_vec
<std::pair
<edge
, loop_p
> > copied
;
803 auto_vec
<class loop
*> loops_to_unloop
;
804 auto_vec
<int> loops_to_unloop_nunroll
;
806 mark_dfs_back_edges ();
807 gimple_ranger
*ranger
= new gimple_ranger
;
808 for (auto loop
: loops_list (cfun
, 0))
810 int initial_limit
= optimize_loop_for_speed_p (loop
)
811 ? param_max_loop_header_insns
: 0;
812 int remaining_limit
= initial_limit
;
813 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
815 "Analyzing loop %i\n", loop
->num
);
817 /* If the loop is already a do-while style one (either because it was
818 written as such, or because jump threading transformed it into one),
819 we might be in fact peeling the first iteration of the loop. This
820 in general is not a good idea. Also avoid touching infinite loops. */
821 if (!loop_has_exit_edges (loop
)
822 || !process_loop_p (loop
))
825 basic_block header
= loop
->header
;
826 estimate_numbers_of_iterations (loop
);
827 if (!get_max_loop_iterations_int (loop
))
829 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
830 fprintf (dump_file
, "Loop %d never loops.\n", loop
->num
);
831 scale_loop_profile (loop
, profile_probability::always (), 0);
832 loops_to_unloop
.safe_push (loop
);
833 loops_to_unloop_nunroll
.safe_push (0);
837 /* Iterate the header copying up to limit; this takes care of the cases
838 like while (a && b) {...}, where we want to have both of the conditions
839 copied. TODO -- handle while (a || b) - like cases, by not requiring
840 the header to have just a single successor and copying up to
842 unsigned int nheaders
= 0;
843 unsigned int last_win_nheaders
= 0;
844 bool last_win_invariant_exit
= false;
846 auto_vec
<ch_decision
, 32> decision
;
847 hash_set
<edge
> *invariant_exits
= new hash_set
<edge
>;
848 hash_set
<edge
> *static_exits
= new hash_set
<edge
>;
849 while ((ret
= should_duplicate_loop_header_p (header
, loop
, ranger
,
856 decision
.safe_push (ret
);
859 last_win_nheaders
= nheaders
;
860 last_win_invariant_exit
= (ret
== ch_win_invariant_exit
);
861 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
862 fprintf (dump_file
, " Duplicating bb %i is a win\n",
866 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
867 fprintf (dump_file
, " May duplicate bb %i\n", header
->index
);
869 /* Find a successor of header that is inside a loop; i.e. the new
870 header after the condition is copied. */
871 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
))
872 header
= EDGE_SUCC (header
, 0)->dest
;
874 header
= EDGE_SUCC (header
, 1)->dest
;
877 /* Try to turn loop into do-while. This means ensuring that
878 last duplicated header will not have loop invariant exit. */
879 if (last_win_nheaders
&& last_win_invariant_exit
880 && nheaders
> last_win_nheaders
)
883 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
885 " Duplicating additional BB to obtain do-while loop\n");
887 else if (!last_win_nheaders
&& nheaders
&& !do_while_loop_p (loop
))
889 last_win_nheaders
= 1;
890 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
892 " Duplicating header BB to obtain do-while loop\n");
894 /* "Duplicate" all BBs with zero cost following last basic blocks we
896 while (last_win_nheaders
< decision
.length ()
897 && decision
[last_win_nheaders
] == ch_possible_zero_cost
)
899 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
901 " Duplicating extra bb is a win; it has zero cost\n");
905 if (last_win_nheaders
)
906 candidates
.safe_push ({loop
, last_win_nheaders
,
907 invariant_exits
, static_exits
});
910 delete invariant_exits
;
914 /* Do not use ranger after we change the IL and not have updated SSA. */
917 for (auto candidate
: candidates
)
919 class loop
*loop
= candidate
.loop
;
920 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
922 "Copying headers of loop %i\n", loop
->num
);
924 basic_block header
= loop
->header
;
927 unsigned int n_bbs
= 0;
929 profile_count exit_count
= profile_count::zero ();
930 profile_count entry_count
= profile_count::zero ();
934 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
935 if (e
->src
!= loop
->latch
)
936 entry_count
+= e
->count ();
937 while (n_bbs
< candidate
.nheaders
)
939 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
940 fprintf (dump_file
, " Will duplicate bb %i\n", header
->index
);
941 /* Find a successor of header that is inside a loop; i.e. the new
942 header after the condition is copied. */
943 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
))
945 nonexit
= EDGE_SUCC (header
, 0);
946 exit
= EDGE_SUCC (header
, 1);
950 nonexit
= EDGE_SUCC (header
, 1);
951 exit
= EDGE_SUCC (header
, 0);
953 exit_count
+= exit
->count ();
955 bbs
[n_bbs
++] = header
;
956 gcc_assert (bbs_size
> n_bbs
);
957 header
= nonexit
->dest
;
960 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
962 "Duplicating header of the loop %d up to edge %d->%d\n",
963 loop
->num
, exit
->src
->index
, exit
->dest
->index
);
965 /* Ensure that the header will have just the latch as a predecessor
967 if (!single_pred_p (nonexit
->dest
))
969 header
= split_edge (nonexit
);
970 exit
= single_pred_edge (header
);
973 edge entry
= loop_preheader_edge (loop
);
975 propagate_threaded_block_debug_into (nonexit
->dest
, entry
->dest
);
976 if (!gimple_duplicate_seme_region (entry
, exit
, bbs
, n_bbs
, copied_bbs
,
979 delete candidate
.static_exits
;
980 delete candidate
.invariant_exits
;
981 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
982 fprintf (dump_file
, "Duplication failed.\n");
985 if (loop
->header
->count
.initialized_p ())
986 update_profile_after_ch (loop
, bbs
, copied_bbs
, n_bbs
,
987 candidate
.invariant_exits
,
988 candidate
.static_exits
,
990 delete candidate
.static_exits
;
991 delete candidate
.invariant_exits
;
992 copied
.safe_push (std::make_pair (entry
, loop
));
994 /* If the loop has the form "for (i = j; i < j + 10; i++)" then
995 this copying can introduce a case where we rely on undefined
996 signed overflow to eliminate the preheader condition, because
997 we assume that "j < j + 10" is true. We don't want to warn
998 about that case for -Wstrict-overflow, because in general we
999 don't warn about overflow involving loops. Prevent the
1000 warning by setting the no_warning flag in the condition. */
1001 if (warn_strict_overflow
> 0)
1005 for (i
= 0; i
< n_bbs
; ++i
)
1007 gimple_stmt_iterator bsi
;
1009 for (bsi
= gsi_start_bb (copied_bbs
[i
]);
1013 gimple
*stmt
= gsi_stmt (bsi
);
1014 if (gimple_code (stmt
) == GIMPLE_COND
)
1016 tree lhs
= gimple_cond_lhs (stmt
);
1017 if (gimple_cond_code (stmt
) != EQ_EXPR
1018 && gimple_cond_code (stmt
) != NE_EXPR
1019 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
1020 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
)))
1021 suppress_warning (stmt
, OPT_Wstrict_overflow_
);
1023 else if (is_gimple_assign (stmt
))
1025 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
1026 tree rhs1
= gimple_assign_rhs1 (stmt
);
1027 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
1028 && rhs_code
!= EQ_EXPR
1029 && rhs_code
!= NE_EXPR
1030 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
1031 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1
)))
1032 suppress_warning (stmt
, OPT_Wstrict_overflow_
);
1038 /* Update header of the loop. */
1039 loop
->header
= header
;
1040 /* Find correct latch. We only duplicate chain of conditionals so
1041 there should be precisely two edges to the new header. One entry
1042 edge and one to latch. */
1043 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
1044 if (header
!= e
->src
)
1046 loop
->latch
= e
->src
;
1049 /* Ensure that the latch is simple. */
1050 if (!single_succ_p (loop_latch_edge (loop
)->src
))
1051 split_edge (loop_latch_edge (loop
));
1053 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1055 if (do_while_loop_p (loop
))
1056 fprintf (dump_file
, "Loop %d is now do-while loop.\n", loop
->num
);
1058 fprintf (dump_file
, "Loop %d is still not do-while loop.\n",
1060 fprintf (dump_file
, "Exit count: ");
1061 exit_count
.dump (dump_file
);
1062 fprintf (dump_file
, "\nEntry count: ");
1063 entry_count
.dump (dump_file
);
1064 fprintf (dump_file
, "\n");
1067 /* We possibly decreased number of iterations by 1. */
1068 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
1069 bool precise
= (nexits
== (int) exits
.length ());
1070 /* Check that loop may not terminate in other way than via
1071 basic blocks we duplicated. */
1074 basic_block
*bbs
= get_loop_body (loop
);
1075 for (unsigned i
= 0; i
< loop
->num_nodes
&& precise
; ++i
)
1077 basic_block bb
= bbs
[i
];
1078 bool found_exit
= false;
1079 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1080 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
1085 /* If BB has exit, it was duplicated. */
1088 /* Give up on irreducible loops. */
1089 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
1094 /* Check that inner loops are finite. */
1095 for (class loop
*l
= bb
->loop_father
; l
!= loop
&& precise
;
1102 /* Verify that there is no statement that may be terminate
1103 execution in a way not visible to CFG. */
1104 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
);
1105 !gsi_end_p (bsi
); gsi_next (&bsi
))
1106 if (stmt_can_terminate_bb_p (gsi_stmt (bsi
)))
1112 && get_max_loop_iterations_int (loop
) == 1)
1114 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1115 fprintf (dump_file
, "Loop %d no longer loops.\n", loop
->num
);
1116 scale_loop_profile (loop
, profile_probability::always (), 0);
1117 loops_to_unloop
.safe_push (loop
);
1118 loops_to_unloop_nunroll
.safe_push (0);
1122 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1125 " decreased number of iterations of loop %d by 1.\n",
1127 adjust_loop_info_after_peeling (loop
, 1, true);
1129 else if (exit_count
>= entry_count
.apply_scale (9, 10))
1131 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1133 "Peeled likely exits: likely decreased number "
1134 "of iterations of loop %d by 1.\n", loop
->num
);
1135 adjust_loop_info_after_peeling (loop
, 1, false);
1137 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1139 "Not decreased number"
1140 " of iterations of loop %d; likely exits remains.\n",
1148 update_ssa (TODO_update_ssa
);
1149 /* After updating SSA form perform CSE on the loop header
1150 copies. This is esp. required for the pass before
1151 vectorization since nothing cleans up copied exit tests
1152 that can now be simplified. CSE from the entry of the
1153 region we copied till all loop exit blocks but not
1154 entering the loop itself. */
1155 for (unsigned i
= 0; i
< copied
.length (); ++i
)
1157 edge entry
= copied
[i
].first
;
1158 loop_p loop
= copied
[i
].second
;
1159 auto_vec
<edge
> exit_edges
= get_loop_exit_edges (loop
);
1160 bitmap exit_bbs
= BITMAP_ALLOC (NULL
);
1161 for (unsigned j
= 0; j
< exit_edges
.length (); ++j
)
1162 bitmap_set_bit (exit_bbs
, exit_edges
[j
]->dest
->index
);
1163 bitmap_set_bit (exit_bbs
, loop
->header
->index
);
1164 do_rpo_vn (cfun
, entry
, exit_bbs
);
1165 BITMAP_FREE (exit_bbs
);
1168 if (!loops_to_unloop
.is_empty ())
1170 /* Make sure loops are ordered inner to outer for unlooping. */
1171 if (loops_to_unloop
.length () != 1)
1173 auto_vec
<int, 8> order
;
1174 order
.safe_grow (number_of_loops (cfun
), true);
1176 for (auto loop
: loops_list (cfun
, LI_FROM_INNERMOST
))
1177 order
[loop
->num
] = i
++;
1178 loops_to_unloop
.sort (ch_order_loops
, order
.address ());
1180 bool irred_invalidated
;
1181 auto_bitmap lc_invalidated
;
1182 auto_vec
<edge
> edges_to_remove
;
1183 unloop_loops (loops_to_unloop
, loops_to_unloop_nunroll
, edges_to_remove
,
1184 lc_invalidated
, &irred_invalidated
);
1185 if (loops_state_satisfies_p (fun
, LOOP_CLOSED_SSA
)
1186 && !bitmap_empty_p (lc_invalidated
))
1187 rewrite_into_loop_closed_ssa (NULL
, 0);
1193 return changed
? TODO_cleanup_cfg
: 0;
1196 /* Initialize the loop structures we need, and finalize after. */
1199 pass_ch::execute (function
*fun
)
1201 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
1204 unsigned int res
= copy_headers (fun
);
1207 loop_optimizer_finalize ();
1211 /* Assume an earlier phase has already initialized all the loop structures that
1212 we need here (and perhaps others too), and that these will be finalized by
1216 pass_ch_vect::execute (function
*fun
)
1218 return copy_headers (fun
);
1221 /* Apply header copying according to a very simple test of do-while shape. */
1224 pass_ch::process_loop_p (class loop
*)
1229 /* Apply header-copying to loops where we might enable vectorization. */
1232 pass_ch_vect::process_loop_p (class loop
*loop
)
1234 if (!flag_tree_loop_vectorize
&& !loop
->force_vectorize
)
1237 if (loop
->dont_vectorize
)
1240 /* The vectorizer won't handle anything with multiple exits, so skip. */
1241 edge exit
= single_exit (loop
);
1245 if (!do_while_loop_p (loop
))
1254 make_pass_ch_vect (gcc::context
*ctxt
)
1256 return new pass_ch_vect (ctxt
);
1260 make_pass_ch (gcc::context
*ctxt
)
1262 return new pass_ch (ctxt
);