1 /* Induction variable canonicalization and loop peeling.
2 Copyright (C) 2004-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass detects the loops that iterate a constant number of times,
21 adds a canonical induction variable (step -1, tested against 0)
22 and replaces the exit test. This enables the less powerful rtl
23 level analysis to use this information.
25 This might spoil the code in some cases (by increasing register pressure).
26 Note that in the case the new variable is not needed, ivopts will get rid
27 of it, so it might only be a problem when there are no other linear induction
28 variables. In that case the created optimization possibilities are likely
32 - complete unrolling (or peeling) when the loops is rolling few enough
34 - simple peeling (i.e. copying few initial iterations prior the loop)
35 when number of iteration estimate is known (typically by the profile
40 #include "coretypes.h"
45 #include "tree-pass.h"
48 #include "gimple-pretty-print.h"
49 #include "fold-const.h"
51 #include "gimple-iterator.h"
52 #include "gimple-fold.h"
55 #include "tree-ssa-loop-manip.h"
56 #include "tree-ssa-loop-niter.h"
57 #include "tree-ssa-loop.h"
58 #include "tree-into-ssa.h"
60 #include "tree-chrec.h"
61 #include "tree-scalar-evolution.h"
62 #include "tree-inline.h"
63 #include "tree-cfgcleanup.h"
65 #include "tree-ssa-sccvn.h"
66 #include "tree-vectorizer.h" /* For find_loop_location */
69 /* Specifies types of loops that may be unrolled. */
73 UL_SINGLE_ITER
, /* Only loops that exit immediately in the first
75 UL_NO_GROWTH
, /* Only loops whose unrolling will not cause increase
77 UL_ALL
/* All suitable loops. */
80 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
81 is the exit edge whose condition is replaced. The ssa versions of the new
82 IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
83 if they are not NULL. */
86 create_canonical_iv (class loop
*loop
, edge exit
, tree niter
,
87 tree
*var_before
= NULL
, tree
*var_after
= NULL
)
92 gimple_stmt_iterator incr_at
;
95 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
97 fprintf (dump_file
, "Added canonical iv to loop %d, ", loop
->num
);
98 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
99 fprintf (dump_file
, " iterations.\n");
102 cond
= as_a
<gcond
*> (*gsi_last_bb (exit
->src
));
103 in
= EDGE_SUCC (exit
->src
, 0);
105 in
= EDGE_SUCC (exit
->src
, 1);
107 /* Note that we do not need to worry about overflows, since
108 type of niter is always unsigned and all comparisons are
109 just for equality/nonequality -- i.e. everything works
110 with a modulo arithmetics. */
112 type
= TREE_TYPE (niter
);
113 niter
= fold_build2 (PLUS_EXPR
, type
,
115 build_int_cst (type
, 1));
116 incr_at
= gsi_last_bb (in
->src
);
117 create_iv (niter
, PLUS_EXPR
,
118 build_int_cst (type
, -1),
120 &incr_at
, false, var_before
, &var
);
124 cmp
= (exit
->flags
& EDGE_TRUE_VALUE
) ? EQ_EXPR
: NE_EXPR
;
125 gimple_cond_set_code (cond
, cmp
);
126 gimple_cond_set_lhs (cond
, var
);
127 gimple_cond_set_rhs (cond
, build_int_cst (type
, 0));
131 /* Describe size of loop as detected by tree_estimate_loop_size. */
134 /* Number of instructions in the loop. */
137 /* Number of instructions that will be likely optimized out in
138 peeled iterations of loop (i.e. computation based on induction
139 variable where induction variable starts at known constant.) */
140 int eliminated_by_peeling
;
142 /* Number of instructions that cannot be further optimized in the
143 peeled loop, for example volatile accesses. */
144 int not_eliminatable_after_peeling
;
146 /* Same statistics for last iteration of loop: it is smaller because
147 instructions after exit are not executed. */
149 int last_iteration_eliminated_by_peeling
;
150 int last_iteration_not_eliminatable_after_peeling
;
152 /* If some IV computation will become constant. */
155 /* Number of call stmts that are not a builtin and are pure or const
156 present on the hot path. */
157 int num_pure_calls_on_hot_path
;
158 /* Number of call stmts that are not a builtin and are not pure nor const
159 present on the hot path. */
160 int num_non_pure_calls_on_hot_path
;
161 /* Number of statements other than calls in the loop. */
162 int non_call_stmts_on_hot_path
;
163 /* Number of branches seen on the hot path. */
164 int num_branches_on_hot_path
;
167 /* Return true if OP in STMT will be constant after peeling LOOP. */
170 constant_after_peeling (tree op
, gimple
*stmt
, class loop
*loop
)
172 if (CONSTANT_CLASS_P (op
))
175 /* Get at the actual SSA operand. */
176 if (handled_component_p (op
)
177 && TREE_CODE (TREE_OPERAND (op
, 0)) == SSA_NAME
)
178 op
= TREE_OPERAND (op
, 0);
180 /* We can still fold accesses to constant arrays when index is known. */
181 if (TREE_CODE (op
) != SSA_NAME
)
185 /* First make fast look if we see constant array inside. */
186 while (handled_component_p (base
))
187 base
= TREE_OPERAND (base
, 0);
189 && ctor_for_folding (base
) != error_mark_node
)
190 || CONSTANT_CLASS_P (base
))
192 /* If so, see if we understand all the indices. */
194 while (handled_component_p (base
))
196 if (TREE_CODE (base
) == ARRAY_REF
197 && !constant_after_peeling (TREE_OPERAND (base
, 1), stmt
, loop
))
199 base
= TREE_OPERAND (base
, 0);
206 /* Induction variables are constants when defined in loop. */
207 if (loop_containing_stmt (stmt
) != loop
)
209 tree ev
= analyze_scalar_evolution (loop
, op
);
210 if (chrec_contains_undetermined (ev
)
211 || chrec_contains_symbols (ev
))
213 if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op
)))
215 gassign
*ass
= nullptr;
217 if (is_a
<gassign
*> (SSA_NAME_DEF_STMT (op
)))
219 ass
= as_a
<gassign
*> (SSA_NAME_DEF_STMT (op
));
220 if (TREE_CODE (gimple_assign_rhs1 (ass
)) == SSA_NAME
)
221 phi
= dyn_cast
<gphi
*>
222 (SSA_NAME_DEF_STMT (gimple_assign_rhs1 (ass
)));
224 else if (is_a
<gphi
*> (SSA_NAME_DEF_STMT (op
)))
226 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (op
));
227 if (gimple_bb (phi
) == loop
->header
)
229 tree def
= gimple_phi_arg_def_from_edge
230 (phi
, loop_latch_edge (loop
));
231 if (TREE_CODE (def
) == SSA_NAME
232 && is_a
<gassign
*> (SSA_NAME_DEF_STMT (def
)))
233 ass
= as_a
<gassign
*> (SSA_NAME_DEF_STMT (def
));
238 tree rhs1
= gimple_assign_rhs1 (ass
);
239 if (gimple_assign_rhs_class (ass
) == GIMPLE_BINARY_RHS
240 && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass
))
241 && rhs1
== gimple_phi_result (phi
)
242 && gimple_bb (phi
) == loop
->header
243 && (gimple_phi_arg_def_from_edge (phi
, loop_latch_edge (loop
))
244 == gimple_assign_lhs (ass
))
245 && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
246 (phi
, loop_preheader_edge (loop
)))))
255 /* Computes an estimated number of insns in LOOP.
256 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
257 iteration of the loop.
258 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
260 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
261 Stop estimating after UPPER_BOUND is met. Return true in this case. */
264 tree_estimate_loop_size (class loop
*loop
, edge exit
, edge edge_to_cancel
,
265 struct loop_size
*size
, int upper_bound
)
267 basic_block
*body
= get_loop_body (loop
);
268 gimple_stmt_iterator gsi
;
271 auto_vec
<basic_block
> path
= get_loop_hot_path (loop
);
274 size
->eliminated_by_peeling
= 0;
275 size
->not_eliminatable_after_peeling
= 0;
276 size
->last_iteration
= 0;
277 size
->last_iteration_eliminated_by_peeling
= 0;
278 size
->last_iteration_not_eliminatable_after_peeling
= 0;
279 size
->num_pure_calls_on_hot_path
= 0;
280 size
->num_non_pure_calls_on_hot_path
= 0;
281 size
->non_call_stmts_on_hot_path
= 0;
282 size
->num_branches_on_hot_path
= 0;
283 size
->constant_iv
= 0;
285 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
286 fprintf (dump_file
, "Estimating sizes for loop %i\n", loop
->num
);
287 for (i
= 0; i
< loop
->num_nodes
; i
++)
289 if (edge_to_cancel
&& body
[i
] != edge_to_cancel
->src
290 && dominated_by_p (CDI_DOMINATORS
, body
[i
], edge_to_cancel
->src
))
294 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
295 fprintf (dump_file
, " BB: %i, after_exit: %i\n", body
[i
]->index
,
298 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
300 gimple
*stmt
= gsi_stmt (gsi
);
301 int num
= estimate_num_insns (stmt
, &eni_size_weights
);
302 bool not_eliminatable_after_peeling
= false;
303 bool likely_eliminated
= false;
304 bool likely_eliminated_last
= false;
305 bool likely_eliminated_peeled
= false;
307 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
309 fprintf (dump_file
, " size: %3i ", num
);
310 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0);
313 /* Look for reasons why we might optimize this stmt away. */
315 if (gimple_has_side_effects (stmt
))
316 not_eliminatable_after_peeling
= true;
319 /* Exit conditional. */
320 if (exit
&& body
[i
] == exit
->src
321 && stmt
== *gsi_last_bb (exit
->src
))
323 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
324 fprintf (dump_file
, " Exit condition will be eliminated "
325 "in peeled copies.\n");
326 likely_eliminated_peeled
= true;
328 if (edge_to_cancel
&& body
[i
] == edge_to_cancel
->src
329 && stmt
== *gsi_last_bb (edge_to_cancel
->src
))
331 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
332 fprintf (dump_file
, " Exit condition will be eliminated "
334 likely_eliminated_last
= true;
336 /* Sets of IV variables */
337 if (gimple_code (stmt
) == GIMPLE_ASSIGN
338 && constant_after_peeling (gimple_assign_lhs (stmt
), stmt
, loop
))
340 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
341 fprintf (dump_file
, " Induction variable computation will"
342 " be folded away.\n");
343 likely_eliminated
= true;
345 /* Assignments of IV variables. */
346 else if (gimple_code (stmt
) == GIMPLE_ASSIGN
347 && TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
348 && constant_after_peeling (gimple_assign_rhs1 (stmt
),
350 && (gimple_assign_rhs_class (stmt
) != GIMPLE_BINARY_RHS
351 || constant_after_peeling (gimple_assign_rhs2 (stmt
),
353 && gimple_assign_rhs_class (stmt
) != GIMPLE_TERNARY_RHS
)
355 size
->constant_iv
= true;
356 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
358 " Constant expression will be folded away.\n");
359 likely_eliminated
= true;
362 else if ((gimple_code (stmt
) == GIMPLE_COND
363 && constant_after_peeling (gimple_cond_lhs (stmt
), stmt
,
365 && constant_after_peeling (gimple_cond_rhs (stmt
), stmt
,
367 /* We don't simplify all constant compares so make sure
368 they are not both constant already. See PR70288. */
369 && (! is_gimple_min_invariant (gimple_cond_lhs (stmt
))
370 || ! is_gimple_min_invariant
371 (gimple_cond_rhs (stmt
))))
372 || (gimple_code (stmt
) == GIMPLE_SWITCH
373 && constant_after_peeling (gimple_switch_index (
377 && ! is_gimple_min_invariant
379 (as_a
<gswitch
*> (stmt
)))))
381 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
382 fprintf (dump_file
, " Constant conditional.\n");
383 likely_eliminated
= true;
387 size
->overall
+= num
;
388 if (likely_eliminated
|| likely_eliminated_peeled
)
389 size
->eliminated_by_peeling
+= num
;
390 if (not_eliminatable_after_peeling
)
391 size
->not_eliminatable_after_peeling
+= num
;
394 size
->last_iteration
+= num
;
395 if (likely_eliminated
|| likely_eliminated_last
)
396 size
->last_iteration_eliminated_by_peeling
+= num
;
397 if (not_eliminatable_after_peeling
)
398 size
->last_iteration_not_eliminatable_after_peeling
+= num
;
400 if ((size
->overall
* 3 / 2 - size
->eliminated_by_peeling
401 - size
->last_iteration_eliminated_by_peeling
) > upper_bound
)
408 while (path
.length ())
410 basic_block bb
= path
.pop ();
411 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
413 gimple
*stmt
= gsi_stmt (gsi
);
414 if (gimple_code (stmt
) == GIMPLE_CALL
415 && !gimple_inexpensive_call_p (as_a
<gcall
*> (stmt
)))
417 int flags
= gimple_call_flags (stmt
);
418 if (flags
& (ECF_PURE
| ECF_CONST
))
419 size
->num_pure_calls_on_hot_path
++;
421 size
->num_non_pure_calls_on_hot_path
++;
422 size
->num_branches_on_hot_path
++;
424 /* Count inexpensive calls as non-calls, because they will likely
426 else if (gimple_code (stmt
) != GIMPLE_DEBUG
)
427 size
->non_call_stmts_on_hot_path
++;
428 if (((gimple_code (stmt
) == GIMPLE_COND
429 && (!constant_after_peeling (gimple_cond_lhs (stmt
), stmt
, loop
)
430 || !constant_after_peeling (gimple_cond_rhs (stmt
), stmt
,
432 || (gimple_code (stmt
) == GIMPLE_SWITCH
433 && !constant_after_peeling (gimple_switch_index (
434 as_a
<gswitch
*> (stmt
)),
436 && (!exit
|| bb
!= exit
->src
))
437 size
->num_branches_on_hot_path
++;
441 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
442 fprintf (dump_file
, "size: %i-%i, last_iteration: %i-%i\n", size
->overall
,
443 size
->eliminated_by_peeling
, size
->last_iteration
,
444 size
->last_iteration_eliminated_by_peeling
);
450 /* Estimate number of insns of completely unrolled loop.
451 It is (NUNROLL + 1) * size of loop body with taking into account
452 the fact that in last copy everything after exit conditional
453 is dead and that some instructions will be eliminated after
454 peeling. Set *EST_ELIMINATED to the number of stmts that could be
455 optimistically eliminated by followup transforms. */
456 static unsigned HOST_WIDE_INT
457 estimated_unrolled_size (struct loop_size
*size
,
458 unsigned HOST_WIDE_INT
*est_eliminated
,
459 unsigned HOST_WIDE_INT nunroll
)
461 HOST_WIDE_INT unr_insns
= ((nunroll
)
462 * (HOST_WIDE_INT
) (size
->overall
463 - size
->eliminated_by_peeling
));
464 HOST_WIDE_INT not_elim
465 = ((nunroll
) * (HOST_WIDE_INT
) size
->not_eliminatable_after_peeling
);
466 unr_insns
+= size
->last_iteration
- size
->last_iteration_eliminated_by_peeling
;
467 not_elim
+= size
->last_iteration_not_eliminatable_after_peeling
;
469 /* Testcases rely on rounding up, so do not write as
470 (unr_insns - not_elim) / 3. */
471 *est_eliminated
= unr_insns
- not_elim
- (unr_insns
- not_elim
) * 2 / 3;
475 /* Loop LOOP is known to not loop. See if there is an edge in the loop
476 body that can be remove to make the loop to always exit and at
477 the same time it does not make any code potentially executed
478 during the last iteration dead.
480 After complete unrolling we still may get rid of the conditional
481 on the exit in the last copy even if we have no idea what it does.
482 This is quite common case for loops of form
488 Here we prove the loop to iterate 5 times but we do not know
489 it from induction variable.
491 For now we handle only simple case where there is exit condition
492 just before the latch block and the latch block contains no statements
493 with side effect that may otherwise terminate the execution of loop
494 (such as by EH or by terminating the program or longjmp).
496 In the general case we may want to cancel the paths leading to statements
497 loop-niter identified as having undefined effect in the last iteration.
498 The other cases are hopefully rare and will be cleaned up later. */
501 loop_edge_to_cancel (class loop
*loop
)
505 gimple_stmt_iterator gsi
;
507 /* We want only one predecestor of the loop. */
508 if (EDGE_COUNT (loop
->latch
->preds
) > 1)
511 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
513 FOR_EACH_VEC_ELT (exits
, i
, edge_to_cancel
)
515 /* Find the other edge than the loop exit
516 leaving the conditoinal. */
517 if (EDGE_COUNT (edge_to_cancel
->src
->succs
) != 2)
519 if (EDGE_SUCC (edge_to_cancel
->src
, 0) == edge_to_cancel
)
520 edge_to_cancel
= EDGE_SUCC (edge_to_cancel
->src
, 1);
522 edge_to_cancel
= EDGE_SUCC (edge_to_cancel
->src
, 0);
524 /* We only can handle conditionals. */
525 if (!(edge_to_cancel
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
528 /* We should never have conditionals in the loop latch. */
529 gcc_assert (edge_to_cancel
->dest
!= loop
->header
);
531 /* Check that it leads to loop latch. */
532 if (edge_to_cancel
->dest
!= loop
->latch
)
535 /* Verify that the code in loop latch does nothing that may end program
536 execution without really reaching the exit. This may include
537 non-pure/const function calls, EH statements, volatile ASMs etc. */
538 for (gsi
= gsi_start_bb (loop
->latch
); !gsi_end_p (gsi
); gsi_next (&gsi
))
539 if (gimple_has_side_effects (gsi_stmt (gsi
)))
541 return edge_to_cancel
;
546 /* Remove all tests for exits that are known to be taken after LOOP was
547 peeled NPEELED times. Put gcc_unreachable before every statement
548 known to not be executed. */
551 remove_exits_and_undefined_stmts (class loop
*loop
, unsigned int npeeled
)
553 class nb_iter_bound
*elt
;
554 bool changed
= false;
556 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
558 /* If statement is known to be undefined after peeling, turn it
559 into unreachable (or trap when debugging experience is supposed
562 && wi::ltu_p (elt
->bound
, npeeled
))
564 gimple_stmt_iterator gsi
= gsi_for_stmt (elt
->stmt
);
565 location_t loc
= gimple_location (elt
->stmt
);
566 gcall
*stmt
= gimple_build_builtin_unreachable (loc
);
567 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
568 split_block (gimple_bb (stmt
), stmt
);
570 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
572 fprintf (dump_file
, "Forced statement unreachable: ");
573 print_gimple_stmt (dump_file
, elt
->stmt
, 0);
576 /* If we know the exit will be taken after peeling, update. */
577 else if (elt
->is_exit
578 && wi::leu_p (elt
->bound
, npeeled
))
580 basic_block bb
= gimple_bb (elt
->stmt
);
581 edge exit_edge
= EDGE_SUCC (bb
, 0);
583 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
585 fprintf (dump_file
, "Forced exit to be taken: ");
586 print_gimple_stmt (dump_file
, elt
->stmt
, 0);
588 if (!loop_exit_edge_p (loop
, exit_edge
))
589 exit_edge
= EDGE_SUCC (bb
, 1);
590 exit_edge
->probability
= profile_probability::always ();
591 gcc_checking_assert (loop_exit_edge_p (loop
, exit_edge
));
592 gcond
*cond_stmt
= as_a
<gcond
*> (elt
->stmt
);
593 if (exit_edge
->flags
& EDGE_TRUE_VALUE
)
594 gimple_cond_make_true (cond_stmt
);
596 gimple_cond_make_false (cond_stmt
);
597 update_stmt (cond_stmt
);
604 /* Remove all exits that are known to be never taken because of the loop bound
608 remove_redundant_iv_tests (class loop
*loop
)
610 class nb_iter_bound
*elt
;
611 bool changed
= false;
613 if (!loop
->any_upper_bound
)
615 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
617 /* Exit is pointless if it won't be taken before loop reaches
619 if (elt
->is_exit
&& loop
->any_upper_bound
620 && wi::ltu_p (loop
->nb_iterations_upper_bound
, elt
->bound
))
622 basic_block bb
= gimple_bb (elt
->stmt
);
623 edge exit_edge
= EDGE_SUCC (bb
, 0);
624 class tree_niter_desc niter
;
626 if (!loop_exit_edge_p (loop
, exit_edge
))
627 exit_edge
= EDGE_SUCC (bb
, 1);
629 /* Only when we know the actual number of iterations, not
630 just a bound, we can remove the exit. */
631 if (!number_of_iterations_exit (loop
, exit_edge
,
632 &niter
, false, false)
633 || !integer_onep (niter
.assumptions
)
634 || !integer_zerop (niter
.may_be_zero
)
636 || TREE_CODE (niter
.niter
) != INTEGER_CST
637 || !wi::ltu_p (widest_int::from (loop
->nb_iterations_upper_bound
,
639 wi::to_widest (niter
.niter
)))
642 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
644 fprintf (dump_file
, "Removed pointless exit: ");
645 print_gimple_stmt (dump_file
, elt
->stmt
, 0);
647 gcond
*cond_stmt
= as_a
<gcond
*> (elt
->stmt
);
648 if (exit_edge
->flags
& EDGE_TRUE_VALUE
)
649 gimple_cond_make_false (cond_stmt
);
651 gimple_cond_make_true (cond_stmt
);
652 update_stmt (cond_stmt
);
659 /* Stores loops that will be unlooped and edges that will be removed
660 after we process whole loop tree. */
661 static vec
<loop_p
> loops_to_unloop
;
662 static vec
<int> loops_to_unloop_nunroll
;
663 static vec
<edge
> edges_to_remove
;
664 /* Stores loops that has been peeled. */
665 static bitmap peeled_loops
;
667 /* Cancel all fully unrolled loops by putting __builtin_unreachable
669 We do it after all unrolling since unlooping moves basic blocks
670 across loop boundaries trashing loop closed SSA form as well
671 as SCEV info needed to be intact during unrolling.
673 IRRED_INVALIDATED is used to bookkeep if information about
674 irreducible regions may become invalid as a result
675 of the transformation.
676 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
677 when we need to go into loop closed SSA form. */
680 unloop_loops (vec
<class loop
*> &loops_to_unloop
,
681 vec
<int> &loops_to_unloop_nunroll
,
682 vec
<edge
> &edges_to_remove
,
683 bitmap loop_closed_ssa_invalidated
,
684 bool *irred_invalidated
)
686 while (loops_to_unloop
.length ())
688 class loop
*loop
= loops_to_unloop
.pop ();
689 int n_unroll
= loops_to_unloop_nunroll
.pop ();
690 basic_block latch
= loop
->latch
;
691 edge latch_edge
= loop_latch_edge (loop
);
692 int flags
= latch_edge
->flags
;
693 location_t locus
= latch_edge
->goto_locus
;
695 gimple_stmt_iterator gsi
;
697 remove_exits_and_undefined_stmts (loop
, n_unroll
);
699 /* Unloop destroys the latch edge. */
700 unloop (loop
, irred_invalidated
, loop_closed_ssa_invalidated
);
702 /* Create new basic block for the latch edge destination and wire
704 stmt
= gimple_build_builtin_unreachable (locus
);
705 latch_edge
= make_edge (latch
, create_basic_block (NULL
, NULL
, latch
), flags
);
706 latch_edge
->probability
= profile_probability::never ();
707 latch_edge
->flags
|= flags
;
708 latch_edge
->goto_locus
= locus
;
710 add_bb_to_loop (latch_edge
->dest
, current_loops
->tree_root
);
711 latch_edge
->dest
->count
= profile_count::zero ();
712 set_immediate_dominator (CDI_DOMINATORS
, latch_edge
->dest
, latch_edge
->src
);
714 gsi
= gsi_start_bb (latch_edge
->dest
);
715 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
718 /* Remove edges in peeled copies. Given remove_path removes dominated
719 regions we need to cope with removal of already removed paths. */
722 auto_vec
<int, 20> src_bbs
;
723 src_bbs
.reserve_exact (edges_to_remove
.length ());
724 FOR_EACH_VEC_ELT (edges_to_remove
, i
, e
)
725 src_bbs
.quick_push (e
->src
->index
);
726 FOR_EACH_VEC_ELT (edges_to_remove
, i
, e
)
727 if (BASIC_BLOCK_FOR_FN (cfun
, src_bbs
[i
]))
729 bool ok
= remove_path (e
, irred_invalidated
,
730 loop_closed_ssa_invalidated
);
733 edges_to_remove
.release ();
736 /* Tries to unroll LOOP completely, i.e. NITER times.
737 UL determines which loops we are allowed to unroll.
738 EXIT is the exit of the loop that should be eliminated.
739 MAXITER specfy bound on number of iterations, -1 if it is
740 not known or too large for HOST_WIDE_INT. The location
741 LOCUS corresponding to the loop is used when emitting
742 a summary of the unroll to the dump file. */
745 try_unroll_loop_completely (class loop
*loop
,
746 edge exit
, tree niter
, bool may_be_zero
,
747 enum unroll_level ul
,
748 HOST_WIDE_INT maxiter
,
749 dump_user_location_t locus
, bool allow_peel
,
752 unsigned HOST_WIDE_INT n_unroll
= 0;
753 bool n_unroll_found
= false;
754 edge edge_to_cancel
= NULL
;
756 /* See if we proved number of iterations to be low constant.
758 EXIT is an edge that will be removed in all but last iteration of
761 EDGE_TO_CACNEL is an edge that will be removed from the last iteration
762 of the unrolled sequence and is expected to make the final loop not
765 If the number of execution of loop is determined by standard induction
766 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
768 if (tree_fits_uhwi_p (niter
))
770 n_unroll
= tree_to_uhwi (niter
);
771 n_unroll_found
= true;
772 edge_to_cancel
= EDGE_SUCC (exit
->src
, 0);
773 if (edge_to_cancel
== exit
)
774 edge_to_cancel
= EDGE_SUCC (exit
->src
, 1);
776 /* We do not know the number of iterations and thus we cannot eliminate
781 /* See if we can improve our estimate by using recorded loop bounds. */
782 if ((maxiter
== 0 || ul
!= UL_SINGLE_ITER
)
784 && (!n_unroll_found
|| (unsigned HOST_WIDE_INT
)maxiter
< n_unroll
))
787 n_unroll_found
= true;
788 /* Loop terminates before the IV variable test, so we cannot
789 remove it in the last iteration. */
790 edge_to_cancel
= NULL
;
791 /* If we do not allow peeling and we iterate just allow cases
792 that do not grow code. */
793 if (!allow_peel
&& maxiter
!= 0)
801 && n_unroll
> (unsigned) param_max_completely_peel_times
)
803 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
804 fprintf (dump_file
, "Not unrolling loop %d "
805 "(--param max-completely-peel-times limit reached).\n",
811 edge_to_cancel
= loop_edge_to_cancel (loop
);
815 if (ul
== UL_SINGLE_ITER
)
820 /* If the unrolling factor is too large, bail out. */
821 if (n_unroll
> (unsigned)loop
->unroll
)
823 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
825 "Not unrolling loop %d: "
826 "user didn't want it unrolled completely.\n",
833 struct loop_size size
;
834 /* EXIT can be removed only if we are sure it passes first N_UNROLL
836 bool remove_exit
= (exit
&& niter
837 && TREE_CODE (niter
) == INTEGER_CST
838 && wi::leu_p (n_unroll
, wi::to_widest (niter
)));
840 = tree_estimate_loop_size
841 (loop
, remove_exit
? exit
: NULL
, edge_to_cancel
, &size
,
842 param_max_completely_peeled_insns
);
845 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
846 fprintf (dump_file
, "Not unrolling loop %d: it is too large.\n",
851 unsigned HOST_WIDE_INT ninsns
= size
.overall
;
852 unsigned HOST_WIDE_INT est_eliminated
;
853 unsigned HOST_WIDE_INT unr_insns
854 = estimated_unrolled_size (&size
, &est_eliminated
, n_unroll
);
855 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
857 fprintf (dump_file
, " Loop size: %d\n", (int) ninsns
);
858 fprintf (dump_file
, " Estimated size after unrolling: %d-%d\n",
859 (int) unr_insns
, (int) est_eliminated
);
862 /* If the code is going to shrink, we don't need to be extra
863 cautious on guessing if the unrolling is going to be
865 Move from estimated_unrolled_size to unroll small loops. */
866 if (unr_insns
- est_eliminated
867 /* If there is IV variable that will become constant, we
868 save one instruction in the loop prologue we do not
869 account otherwise. */
870 <= ninsns
+ (size
.constant_iv
!= false))
872 /* We unroll only inner loops, because we do not consider it
873 profitable otheriwse. We still can cancel loopback edge
874 of not rolling loop; this is always a good idea. */
875 else if (ul
== UL_NO_GROWTH
)
877 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
878 fprintf (dump_file
, "Not unrolling loop %d: size would grow.\n",
882 /* Outer loops tend to be less interesting candidates for
883 complete unrolling unless we can do a lot of propagation
884 into the inner loop body. For now we disable outer loop
885 unrolling when the code would grow. */
886 else if (loop
->inner
)
888 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
889 fprintf (dump_file
, "Not unrolling loop %d: "
890 "it is not innermost and code would grow.\n",
894 /* If there is call on a hot path through the loop, then
895 there is most probably not much to optimize. */
896 else if (size
.num_non_pure_calls_on_hot_path
)
898 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
899 fprintf (dump_file
, "Not unrolling loop %d: "
900 "contains call and code would grow.\n",
904 /* If there is pure/const call in the function, then we can
905 still optimize the unrolled loop body if it contains some
906 other interesting code than the calls and code storing or
907 cumulating the return value. */
908 else if (size
.num_pure_calls_on_hot_path
909 /* One IV increment, one test, one ivtmp store and
910 one useful stmt. That is about minimal loop
912 && (size
.non_call_stmts_on_hot_path
913 <= 3 + size
.num_pure_calls_on_hot_path
))
915 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
916 fprintf (dump_file
, "Not unrolling loop %d: "
917 "contains just pure calls and code would grow.\n",
921 /* Complete unrolling is major win when control flow is
922 removed and one big basic block is created. If the loop
923 contains control flow the optimization may still be a win
924 because of eliminating the loop overhead but it also may
925 blow the branch predictor tables. Limit number of
926 branches on the hot path through the peeled sequence. */
927 else if (size
.num_branches_on_hot_path
* (int)n_unroll
928 > param_max_peel_branches
)
930 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
931 fprintf (dump_file
, "Not unrolling loop %d: "
932 "number of branches on hot path in the unrolled "
933 "sequence reaches --param max-peel-branches limit.\n",
937 /* Move 2 / 3 reduction from estimated_unrolled_size, but don't reduce
938 unrolled size for innermost loop.
939 1) It could increase register pressure.
940 2) Big loop after completely unroll may not be vectorized
942 else if ((cunrolli
? unr_insns
: unr_insns
- est_eliminated
)
943 > (unsigned) param_max_completely_peeled_insns
)
945 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
946 fprintf (dump_file
, "Not unrolling loop %d: "
947 "number of insns in the unrolled sequence reaches "
948 "--param max-completely-peeled-insns limit.\n",
954 if (!dbg_cnt (gimple_unroll
))
957 initialize_original_copy_tables ();
958 auto_sbitmap
wont_exit (n_unroll
+ 1);
960 && TREE_CODE (niter
) == INTEGER_CST
961 && wi::leu_p (n_unroll
, wi::to_widest (niter
)))
963 bitmap_ones (wont_exit
);
964 if (wi::eq_p (wi::to_widest (niter
), n_unroll
)
966 bitmap_clear_bit (wont_exit
, 0);
971 bitmap_clear (wont_exit
);
974 bitmap_clear_bit (wont_exit
, 1);
976 /* If loop was originally estimated to iterate too many times,
977 reduce the profile to avoid new profile inconsistencies. */
978 scale_loop_profile (loop
, profile_probability::always (), n_unroll
);
980 if (!gimple_duplicate_loop_body_to_header_edge (
981 loop
, loop_preheader_edge (loop
), n_unroll
, wont_exit
, exit
,
983 DLTHE_FLAG_UPDATE_FREQ
| DLTHE_FLAG_COMPLETTE_PEEL
))
985 free_original_copy_tables ();
986 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
987 fprintf (dump_file
, "Failed to duplicate the loop\n");
991 free_original_copy_tables ();
994 scale_loop_profile (loop
, profile_probability::always (), 0);
996 /* Remove the conditional from the last copy of the loop. */
999 gcond
*cond
= as_a
<gcond
*> (*gsi_last_bb (edge_to_cancel
->src
));
1000 force_edge_cold (edge_to_cancel
, true);
1001 if (edge_to_cancel
->flags
& EDGE_TRUE_VALUE
)
1002 gimple_cond_make_false (cond
);
1004 gimple_cond_make_true (cond
);
1006 /* Do not remove the path, as doing so may remove outer loop and
1007 confuse bookkeeping code in tree_unroll_loops_completely. */
1010 /* Store the loop for later unlooping and exit removal. */
1011 loops_to_unloop
.safe_push (loop
);
1012 loops_to_unloop_nunroll
.safe_push (n_unroll
);
1014 if (dump_enabled_p ())
1017 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, locus
,
1018 "loop turned into non-loop; it never loops\n");
1021 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, locus
,
1022 "loop with %d iterations completely unrolled",
1024 if (loop
->header
->count
.initialized_p ())
1025 dump_printf (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
,
1026 " (header execution count %d)",
1027 (int)loop
->header
->count
.to_gcov_type ());
1028 dump_printf (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, "\n");
1032 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1035 fprintf (dump_file
, "Exit condition of peeled iterations was "
1038 fprintf (dump_file
, "Last iteration exit edge was proved true.\n");
1040 fprintf (dump_file
, "Latch of last iteration was marked by "
1041 "__builtin_unreachable ().\n");
1047 /* Return number of instructions after peeling. */
1048 static unsigned HOST_WIDE_INT
1049 estimated_peeled_sequence_size (struct loop_size
*size
,
1050 unsigned HOST_WIDE_INT npeel
)
1052 return MAX (npeel
* (HOST_WIDE_INT
) (size
->overall
1053 - size
->eliminated_by_peeling
), 1);
1056 /* Update loop estimates after peeling LOOP by NPEEL.
1057 If PRECISE is false only likely exists were duplicated and thus
1058 do not update any estimates that are supposed to be always reliable. */
1060 adjust_loop_info_after_peeling (class loop
*loop
, int npeel
, bool precise
)
1062 if (loop
->any_estimate
)
1064 /* Since peeling is mostly about loops where first few
1065 iterations are special, it is not quite correct to
1066 assume that the remaining iterations will behave
1067 the same way. However we do not have better info
1068 so update the esitmate, since it is likely better
1069 than keeping it as it is.
1071 Remove it if it looks wrong.
1073 TODO: We likely want to special case the situation where
1074 peeling is optimizing out exit edges and only update
1076 if (wi::leu_p (npeel
, loop
->nb_iterations_estimate
))
1077 loop
->nb_iterations_estimate
-= npeel
;
1079 loop
->any_estimate
= false;
1081 if (loop
->any_upper_bound
&& precise
)
1083 if (wi::leu_p (npeel
, loop
->nb_iterations_upper_bound
))
1084 loop
->nb_iterations_upper_bound
-= npeel
;
1087 /* Peeling maximal number of iterations or more
1088 makes no sense and is a bug.
1089 We should peel completely. */
1093 if (loop
->any_likely_upper_bound
)
1095 if (wi::leu_p (npeel
, loop
->nb_iterations_likely_upper_bound
))
1096 loop
->nb_iterations_likely_upper_bound
-= npeel
;
1099 loop
->any_estimate
= true;
1100 loop
->nb_iterations_estimate
= 0;
1101 loop
->nb_iterations_likely_upper_bound
= 0;
1106 /* If the loop is expected to iterate N times and is
1107 small enough, duplicate the loop body N+1 times before
1108 the loop itself. This way the hot path will never
1110 Parameters are the same as for try_unroll_loops_completely */
1113 try_peel_loop (class loop
*loop
,
1114 edge exit
, tree niter
, bool may_be_zero
,
1115 HOST_WIDE_INT maxiter
)
1117 HOST_WIDE_INT npeel
;
1118 struct loop_size size
;
1121 if (!flag_peel_loops
1122 || param_max_peel_times
<= 0
1126 if (bitmap_bit_p (peeled_loops
, loop
->num
))
1129 fprintf (dump_file
, "Not peeling: loop is already peeled\n");
1133 /* We don't peel loops that will be unrolled as this can duplicate a
1134 loop more times than the user requested. */
1138 fprintf (dump_file
, "Not peeling: user didn't want it peeled.\n");
1142 /* Peel only innermost loops.
1143 While the code is perfectly capable of peeling non-innermost loops,
1144 the heuristics would probably need some improvements. */
1148 fprintf (dump_file
, "Not peeling: outer loop\n");
1152 if (!optimize_loop_for_speed_p (loop
))
1155 fprintf (dump_file
, "Not peeling: cold loop\n");
1159 /* Check if there is an estimate on the number of iterations. */
1160 npeel
= estimated_loop_iterations_int (loop
);
1162 npeel
= likely_max_loop_iterations_int (loop
);
1166 fprintf (dump_file
, "Not peeling: number of iterations is not "
1170 if (maxiter
>= 0 && maxiter
<= npeel
)
1173 fprintf (dump_file
, "Not peeling: upper bound is known so can "
1174 "unroll completely\n");
1178 /* We want to peel estimated number of iterations + 1 (so we never
1179 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
1180 and be sure to avoid overflows. */
1181 if (npeel
> param_max_peel_times
- 1)
1184 fprintf (dump_file
, "Not peeling: rolls too much "
1185 "(%i + 1 > --param max-peel-times)\n", (int) npeel
);
1190 /* Check peeled loops size. */
1191 tree_estimate_loop_size (loop
, exit
, NULL
, &size
,
1192 param_max_peeled_insns
);
1193 if ((peeled_size
= estimated_peeled_sequence_size (&size
, (int) npeel
))
1194 > param_max_peeled_insns
)
1197 fprintf (dump_file
, "Not peeling: peeled sequence size is too large "
1198 "(%i insns > --param max-peel-insns)", peeled_size
);
1202 if (!dbg_cnt (gimple_unroll
))
1205 /* Duplicate possibly eliminating the exits. */
1206 initialize_original_copy_tables ();
1207 auto_sbitmap
wont_exit (npeel
+ 1);
1209 && TREE_CODE (niter
) == INTEGER_CST
1210 && wi::leu_p (npeel
, wi::to_widest (niter
)))
1212 bitmap_ones (wont_exit
);
1213 bitmap_clear_bit (wont_exit
, 0);
1218 bitmap_clear (wont_exit
);
1221 bitmap_clear_bit (wont_exit
, 1);
1223 if (!gimple_duplicate_loop_body_to_header_edge (
1224 loop
, loop_preheader_edge (loop
), npeel
, wont_exit
, exit
,
1225 &edges_to_remove
, DLTHE_FLAG_UPDATE_FREQ
))
1227 free_original_copy_tables ();
1230 free_original_copy_tables ();
1231 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1233 fprintf (dump_file
, "Peeled loop %d, %i times.\n",
1234 loop
->num
, (int) npeel
);
1236 adjust_loop_info_after_peeling (loop
, npeel
, true);
1238 bitmap_set_bit (peeled_loops
, loop
->num
);
1241 /* Adds a canonical induction variable to LOOP if suitable.
1242 CREATE_IV is true if we may create a new iv. UL determines
1243 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
1244 to determine the number of iterations of a loop by direct evaluation.
1245 Returns true if cfg is changed. */
1248 canonicalize_loop_induction_variables (class loop
*loop
,
1249 bool create_iv
, enum unroll_level ul
,
1250 bool try_eval
, bool allow_peel
,
1251 const_sbitmap innermost
,
1256 HOST_WIDE_INT maxiter
;
1257 bool modified
= false;
1258 class tree_niter_desc niter_desc
;
1259 bool may_be_zero
= false;
1260 bool by_eval
= false;
1262 /* For unrolling allow conditional constant or zero iterations, thus
1263 perform loop-header copying on-the-fly. */
1264 exit
= single_exit (loop
);
1265 niter
= chrec_dont_know
;
1266 if (exit
&& number_of_iterations_exit (loop
, exit
, &niter_desc
, false))
1268 niter
= niter_desc
.niter
;
1270 = niter_desc
.may_be_zero
&& !integer_zerop (niter_desc
.may_be_zero
);
1272 if (TREE_CODE (niter
) != INTEGER_CST
)
1274 /* For non-constant niter fold may_be_zero into niter again. */
1277 if (COMPARISON_CLASS_P (niter_desc
.may_be_zero
))
1278 niter
= fold_build3 (COND_EXPR
, TREE_TYPE (niter
),
1279 niter_desc
.may_be_zero
,
1280 build_int_cst (TREE_TYPE (niter
), 0), niter
);
1282 niter
= chrec_dont_know
;
1283 may_be_zero
= false;
1286 /* If the loop has more than one exit, try checking all of them
1287 for # of iterations determinable through scev. */
1289 niter
= find_loop_niter (loop
, &exit
);
1291 /* Finally if everything else fails, try brute force evaluation. */
1293 && (chrec_contains_undetermined (niter
)
1294 || TREE_CODE (niter
) != INTEGER_CST
))
1296 niter
= find_loop_niter_by_eval (loop
, &exit
);
1297 if (TREE_CODE (niter
) == INTEGER_CST
)
1301 if (TREE_CODE (niter
) != INTEGER_CST
)
1305 /* We work exceptionally hard here to estimate the bound
1306 by find_loop_niter_by_eval. Be sure to keep it for future. */
1307 if (niter
&& TREE_CODE (niter
) == INTEGER_CST
)
1309 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
1310 record_niter_bound (loop
, wi::to_widest (niter
),
1311 exit
== single_likely_exit (loop
, exits
), true);
1314 /* Force re-computation of loop bounds so we can remove redundant exits. */
1315 maxiter
= max_loop_iterations_int (loop
);
1317 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1318 && TREE_CODE (niter
) == INTEGER_CST
)
1320 fprintf (dump_file
, "Loop %d iterates ", loop
->num
);
1321 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1322 fprintf (dump_file
, " times.\n");
1324 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1327 fprintf (dump_file
, "Loop %d iterates at most %i times.\n", loop
->num
,
1330 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1331 && likely_max_loop_iterations_int (loop
) >= 0)
1333 fprintf (dump_file
, "Loop %d likely iterates at most %i times.\n",
1334 loop
->num
, (int)likely_max_loop_iterations_int (loop
));
1337 /* Remove exits that are known to be never taken based on loop bound.
1338 Needs to be called after compilation of max_loop_iterations_int that
1339 populates the loop bounds. */
1340 modified
|= remove_redundant_iv_tests (loop
);
1342 dump_user_location_t locus
= find_loop_location (loop
);
1344 bool innermost_cunrolli_p
1346 && (unsigned) loop
->num
< SBITMAP_SIZE (innermost
)
1347 && bitmap_bit_p (innermost
, loop
->num
);
1349 if (try_unroll_loop_completely (loop
, exit
, niter
, may_be_zero
, ul
,
1350 maxiter
, locus
, allow_peel
,
1351 innermost_cunrolli_p
))
1354 if ((create_iv
|| by_eval
)
1355 && niter
&& !chrec_contains_undetermined (niter
)
1356 && exit
&& just_once_each_iteration_p (loop
, exit
->src
))
1358 tree iv_niter
= niter
;
1361 if (COMPARISON_CLASS_P (niter_desc
.may_be_zero
))
1362 iv_niter
= fold_build3 (COND_EXPR
, TREE_TYPE (iv_niter
),
1363 niter_desc
.may_be_zero
,
1364 build_int_cst (TREE_TYPE (iv_niter
), 0),
1367 iv_niter
= NULL_TREE
;
1370 create_canonical_iv (loop
, exit
, iv_niter
);
1374 modified
|= try_peel_loop (loop
, exit
, niter
, may_be_zero
, maxiter
);
1379 /* The main entry point of the pass. Adds canonical induction variables
1380 to the suitable loops. */
1383 canonicalize_induction_variables (void)
1385 bool changed
= false;
1386 bool irred_invalidated
= false;
1387 bitmap loop_closed_ssa_invalidated
= BITMAP_ALLOC (NULL
);
1388 auto_sbitmap
innermost (number_of_loops (cfun
));
1389 bitmap_clear (innermost
);
1391 estimate_numbers_of_iterations (cfun
);
1393 for (auto loop
: loops_list (cfun
, LI_FROM_INNERMOST
))
1396 |= canonicalize_loop_induction_variables (loop
,
1397 true, UL_SINGLE_ITER
,
1399 (const_sbitmap
) innermost
,
1402 gcc_assert (!need_ssa_update_p (cfun
));
1404 unloop_loops (loops_to_unloop
, loops_to_unloop_nunroll
, edges_to_remove
,
1405 loop_closed_ssa_invalidated
, &irred_invalidated
);
1406 loops_to_unloop
.release ();
1407 loops_to_unloop_nunroll
.release ();
1408 if (irred_invalidated
1409 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
))
1410 mark_irreducible_loops ();
1412 /* Clean up the information about numbers of iterations, since brute force
1413 evaluation could reveal new information. */
1414 free_numbers_of_iterations_estimates (cfun
);
1417 if (!bitmap_empty_p (loop_closed_ssa_invalidated
))
1419 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA
));
1420 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1422 BITMAP_FREE (loop_closed_ssa_invalidated
);
1425 return TODO_cleanup_cfg
;
1429 /* Process loops from innermost to outer, stopping at the innermost
1430 loop we unrolled. */
1433 tree_unroll_loops_completely_1 (bool may_increase_size
, bool unroll_outer
,
1434 bitmap father_bbs
, class loop
*loop
,
1435 const_sbitmap innermost
, bool cunrolli
)
1437 class loop
*loop_father
;
1438 bool changed
= false;
1440 enum unroll_level ul
;
1441 unsigned num
= number_of_loops (cfun
);
1443 /* Process inner loops first. Don't walk loops added by the recursive
1444 calls because SSA form is not up-to-date. They can be handled in the
1446 bitmap child_father_bbs
= NULL
;
1447 for (inner
= loop
->inner
; inner
!= NULL
; inner
= inner
->next
)
1448 if ((unsigned) inner
->num
< num
)
1450 if (!child_father_bbs
)
1451 child_father_bbs
= BITMAP_ALLOC (NULL
);
1452 if (tree_unroll_loops_completely_1 (may_increase_size
, unroll_outer
,
1453 child_father_bbs
, inner
,
1454 innermost
, cunrolli
))
1456 bitmap_ior_into (father_bbs
, child_father_bbs
);
1457 bitmap_clear (child_father_bbs
);
1461 if (child_father_bbs
)
1462 BITMAP_FREE (child_father_bbs
);
1464 /* If we changed an inner loop we cannot process outer loops in this
1465 iteration because SSA form is not up-to-date. Continue with
1466 siblings of outer loops instead. */
1469 /* If we are recorded as father clear all other fathers that
1470 are necessarily covered already to avoid redundant work. */
1471 if (bitmap_bit_p (father_bbs
, loop
->header
->index
))
1473 bitmap_clear (father_bbs
);
1474 bitmap_set_bit (father_bbs
, loop
->header
->index
);
1479 /* Don't unroll #pragma omp simd loops until the vectorizer
1480 attempts to vectorize those. */
1481 if (loop
->force_vectorize
)
1484 /* Try to unroll this loop. */
1485 loop_father
= loop_outer (loop
);
1489 if (loop
->unroll
> 1)
1491 else if (may_increase_size
&& optimize_loop_nest_for_speed_p (loop
)
1492 /* Unroll outermost loops only if asked to do so or they do
1493 not cause code growth. */
1494 && (unroll_outer
|| loop_outer (loop_father
)))
1499 if (canonicalize_loop_induction_variables
1500 (loop
, false, ul
, !flag_tree_loop_ivcanon
|| cunrolli
, unroll_outer
,
1501 innermost
, cunrolli
))
1503 /* If we'll continue unrolling, we need to propagate constants
1504 within the new basic blocks to fold away induction variable
1505 computations; otherwise, the size might blow up before the
1506 iteration is complete and the IR eventually cleaned up. */
1507 if (loop_outer (loop_father
))
1509 /* Once we process our father we will have processed
1510 the fathers of our children as well, so avoid doing
1511 redundant work and clear fathers we've gathered sofar. */
1512 bitmap_clear (father_bbs
);
1513 bitmap_set_bit (father_bbs
, loop_father
->header
->index
);
1515 else if (unroll_outer
)
1516 /* Trigger scalar cleanup once any outermost loop gets unrolled. */
1517 cfun
->pending_TODOs
|= PENDING_TODO_force_next_scalar_cleanup
;
1525 /* Unroll LOOPS completely if they iterate just few times. Unless
1526 MAY_INCREASE_SIZE is true, perform the unrolling only if the
1527 size of the code does not increase.
1528 cunrolli is true when passs is cunrolli. */
1531 tree_unroll_loops_completely (bool may_increase_size
, bool unroll_outer
, bool cunrolli
)
1533 bitmap father_bbs
= BITMAP_ALLOC (NULL
);
1536 bool irred_invalidated
= false;
1537 auto_sbitmap
innermost (number_of_loops (cfun
));
1538 bitmap_clear (innermost
);
1540 estimate_numbers_of_iterations (cfun
);
1542 /* Mark all innermost loop at the begining. */
1543 for (auto loop
: loops_list (cfun
, LI_FROM_INNERMOST
))
1546 bitmap_set_bit (innermost
, loop
->num
);
1552 bitmap loop_closed_ssa_invalidated
= NULL
;
1554 if (loops_state_satisfies_p (LOOP_CLOSED_SSA
))
1555 loop_closed_ssa_invalidated
= BITMAP_ALLOC (NULL
);
1557 free_numbers_of_iterations_estimates (cfun
);
1558 estimate_numbers_of_iterations (cfun
);
1560 changed
= tree_unroll_loops_completely_1 (may_increase_size
,
1561 unroll_outer
, father_bbs
,
1562 current_loops
->tree_root
,
1563 (const_sbitmap
) innermost
,
1569 unloop_loops (loops_to_unloop
, loops_to_unloop_nunroll
,
1570 edges_to_remove
, loop_closed_ssa_invalidated
,
1571 &irred_invalidated
);
1572 loops_to_unloop
.release ();
1573 loops_to_unloop_nunroll
.release ();
1575 /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused. */
1576 if (loop_closed_ssa_invalidated
1577 && !bitmap_empty_p (loop_closed_ssa_invalidated
))
1578 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated
,
1581 update_ssa (TODO_update_ssa
);
1583 /* father_bbs is a bitmap of loop father header BB indices.
1584 Translate that to what non-root loops these BBs belong to now. */
1586 bitmap fathers
= BITMAP_ALLOC (NULL
);
1587 EXECUTE_IF_SET_IN_BITMAP (father_bbs
, 0, i
, bi
)
1589 basic_block unrolled_loop_bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
1590 if (! unrolled_loop_bb
)
1592 if (loop_outer (unrolled_loop_bb
->loop_father
))
1593 bitmap_set_bit (fathers
,
1594 unrolled_loop_bb
->loop_father
->num
);
1596 bitmap_clear (father_bbs
);
1597 /* Propagate the constants within the new basic blocks. */
1598 EXECUTE_IF_SET_IN_BITMAP (fathers
, 0, i
, bi
)
1600 loop_p father
= get_loop (cfun
, i
);
1601 bitmap exit_bbs
= BITMAP_ALLOC (NULL
);
1602 loop_exit
*exit
= father
->exits
->next
;
1605 bitmap_set_bit (exit_bbs
, exit
->e
->dest
->index
);
1608 do_rpo_vn (cfun
, loop_preheader_edge (father
), exit_bbs
);
1610 BITMAP_FREE (fathers
);
1612 /* Clean up the information about numbers of iterations, since
1613 complete unrolling might have invalidated it. */
1616 /* This will take care of removing completely unrolled loops
1617 from the loop structures so we can continue unrolling now
1619 if (cleanup_tree_cfg ())
1620 update_ssa (TODO_update_ssa_only_virtuals
);
1622 if (flag_checking
&& loops_state_satisfies_p (LOOP_CLOSED_SSA
))
1623 verify_loop_closed_ssa (true);
1625 if (loop_closed_ssa_invalidated
)
1626 BITMAP_FREE (loop_closed_ssa_invalidated
);
1629 && ++iteration
<= param_max_unroll_iterations
);
1631 BITMAP_FREE (father_bbs
);
1633 if (irred_invalidated
1634 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
))
1635 mark_irreducible_loops ();
1640 /* Canonical induction variable creation pass. */
1644 const pass_data pass_data_iv_canon
=
1646 GIMPLE_PASS
, /* type */
1647 "ivcanon", /* name */
1648 OPTGROUP_LOOP
, /* optinfo_flags */
1649 TV_TREE_LOOP_IVCANON
, /* tv_id */
1650 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1651 0, /* properties_provided */
1652 0, /* properties_destroyed */
1653 0, /* todo_flags_start */
1654 0, /* todo_flags_finish */
1657 class pass_iv_canon
: public gimple_opt_pass
1660 pass_iv_canon (gcc::context
*ctxt
)
1661 : gimple_opt_pass (pass_data_iv_canon
, ctxt
)
1664 /* opt_pass methods: */
1665 bool gate (function
*) final override
{ return flag_tree_loop_ivcanon
!= 0; }
1666 unsigned int execute (function
*fun
) final override
;
1668 }; // class pass_iv_canon
1671 pass_iv_canon::execute (function
*fun
)
1673 if (number_of_loops (fun
) <= 1)
1676 return canonicalize_induction_variables ();
1682 make_pass_iv_canon (gcc::context
*ctxt
)
1684 return new pass_iv_canon (ctxt
);
1687 /* Complete unrolling of loops. */
1691 const pass_data pass_data_complete_unroll
=
1693 GIMPLE_PASS
, /* type */
1694 "cunroll", /* name */
1695 OPTGROUP_LOOP
, /* optinfo_flags */
1696 TV_COMPLETE_UNROLL
, /* tv_id */
1697 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1698 0, /* properties_provided */
1699 0, /* properties_destroyed */
1700 0, /* todo_flags_start */
1701 0, /* todo_flags_finish */
1704 class pass_complete_unroll
: public gimple_opt_pass
1707 pass_complete_unroll (gcc::context
*ctxt
)
1708 : gimple_opt_pass (pass_data_complete_unroll
, ctxt
)
1711 /* opt_pass methods: */
1712 unsigned int execute (function
*) final override
;
1714 }; // class pass_complete_unroll
1717 pass_complete_unroll::execute (function
*fun
)
1719 if (number_of_loops (fun
) <= 1)
1722 /* If we ever decide to run loop peeling more than once, we will need to
1723 track loops already peeled in loop structures themselves to avoid
1724 re-peeling the same loop multiple times. */
1725 if (flag_peel_loops
)
1726 peeled_loops
= BITMAP_ALLOC (NULL
);
1727 unsigned int val
= tree_unroll_loops_completely (flag_cunroll_grow_size
, true, false);
1730 BITMAP_FREE (peeled_loops
);
1731 peeled_loops
= NULL
;
1739 make_pass_complete_unroll (gcc::context
*ctxt
)
1741 return new pass_complete_unroll (ctxt
);
1744 /* Complete unrolling of inner loops. */
1748 const pass_data pass_data_complete_unrolli
=
1750 GIMPLE_PASS
, /* type */
1751 "cunrolli", /* name */
1752 OPTGROUP_LOOP
, /* optinfo_flags */
1753 TV_COMPLETE_UNROLL
, /* tv_id */
1754 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1755 0, /* properties_provided */
1756 0, /* properties_destroyed */
1757 0, /* todo_flags_start */
1758 0, /* todo_flags_finish */
1761 class pass_complete_unrolli
: public gimple_opt_pass
1764 pass_complete_unrolli (gcc::context
*ctxt
)
1765 : gimple_opt_pass (pass_data_complete_unrolli
, ctxt
)
1768 /* opt_pass methods: */
1769 bool gate (function
*) final override
{ return optimize
>= 2; }
1770 unsigned int execute (function
*) final override
;
1772 }; // class pass_complete_unrolli
1775 pass_complete_unrolli::execute (function
*fun
)
1779 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
1780 if (number_of_loops (fun
) > 1)
1783 ret
= tree_unroll_loops_completely (optimize
>= 3, false, true);
1786 loop_optimizer_finalize ();
1794 make_pass_complete_unrolli (gcc::context
*ctxt
)
1796 return new pass_complete_unrolli (ctxt
);