OpenMP: Update documentation of metadirective implementation status.
[gcc.git] / gcc / tree-ssa-loop-ivcanon.cc
blobca6295c7de2f5e82c32d429bb501d3e8394b56eb
1 /* Induction variable canonicalization and loop peeling.
2 Copyright (C) 2004-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass detects the loops that iterate a constant number of times,
21 adds a canonical induction variable (step -1, tested against 0)
22 and replaces the exit test. This enables the less powerful rtl
23 level analysis to use this information.
25 This might spoil the code in some cases (by increasing register pressure).
26 Note that in the case the new variable is not needed, ivopts will get rid
27 of it, so it might only be a problem when there are no other linear induction
28 variables. In that case the created optimization possibilities are likely
29 to pay up.
31 We also perform
32 - complete unrolling (or peeling) when the loops is rolling few enough
33 times
34 - simple peeling (i.e. copying few initial iterations prior the loop)
35 when number of iteration estimate is known (typically by the profile
36 info). */
38 #include "config.h"
39 #include "system.h"
40 #include "coretypes.h"
41 #include "backend.h"
42 #include "tree.h"
43 #include "gimple.h"
44 #include "cfghooks.h"
45 #include "tree-pass.h"
46 #include "ssa.h"
47 #include "cgraph.h"
48 #include "gimple-pretty-print.h"
49 #include "fold-const.h"
50 #include "profile.h"
51 #include "gimple-iterator.h"
52 #include "gimple-fold.h"
53 #include "tree-eh.h"
54 #include "tree-cfg.h"
55 #include "tree-ssa-loop-manip.h"
56 #include "tree-ssa-loop-niter.h"
57 #include "tree-ssa-loop.h"
58 #include "tree-into-ssa.h"
59 #include "cfgloop.h"
60 #include "tree-chrec.h"
61 #include "tree-scalar-evolution.h"
62 #include "tree-inline.h"
63 #include "tree-cfgcleanup.h"
64 #include "builtins.h"
65 #include "tree-ssa-sccvn.h"
66 #include "tree-vectorizer.h" /* For find_loop_location */
67 #include "dbgcnt.h"
69 /* Specifies types of loops that may be unrolled. */
71 enum unroll_level
73 UL_SINGLE_ITER, /* Only loops that exit immediately in the first
74 iteration. */
75 UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase
76 of code size. */
77 UL_ALL /* All suitable loops. */
80 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
81 is the exit edge whose condition is replaced. The ssa versions of the new
82 IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
83 if they are not NULL. */
85 void
86 create_canonical_iv (class loop *loop, edge exit, tree niter,
87 tree *var_before = NULL, tree *var_after = NULL)
89 edge in;
90 tree type, var;
91 gcond *cond;
92 gimple_stmt_iterator incr_at;
93 enum tree_code cmp;
95 if (dump_file && (dump_flags & TDF_DETAILS))
97 fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
98 print_generic_expr (dump_file, niter, TDF_SLIM);
99 fprintf (dump_file, " iterations.\n");
102 cond = as_a <gcond *> (*gsi_last_bb (exit->src));
103 in = EDGE_SUCC (exit->src, 0);
104 if (in == exit)
105 in = EDGE_SUCC (exit->src, 1);
107 /* Note that we do not need to worry about overflows, since
108 type of niter is always unsigned and all comparisons are
109 just for equality/nonequality -- i.e. everything works
110 with a modulo arithmetics. */
112 type = TREE_TYPE (niter);
113 niter = fold_build2 (PLUS_EXPR, type,
114 niter,
115 build_int_cst (type, 1));
116 incr_at = gsi_last_bb (in->src);
117 create_iv (niter, PLUS_EXPR,
118 build_int_cst (type, -1),
119 NULL_TREE, loop,
120 &incr_at, false, var_before, &var);
121 if (var_after)
122 *var_after = var;
124 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
125 gimple_cond_set_code (cond, cmp);
126 gimple_cond_set_lhs (cond, var);
127 gimple_cond_set_rhs (cond, build_int_cst (type, 0));
128 update_stmt (cond);
131 /* Describe size of loop as detected by tree_estimate_loop_size. */
132 struct loop_size
134 /* Number of instructions in the loop. */
135 int overall;
137 /* Number of instructions that will be likely optimized out in
138 peeled iterations of loop (i.e. computation based on induction
139 variable where induction variable starts at known constant.) */
140 int eliminated_by_peeling;
142 /* Number of instructions that cannot be further optimized in the
143 peeled loop, for example volatile accesses. */
144 int not_eliminatable_after_peeling;
146 /* Same statistics for last iteration of loop: it is smaller because
147 instructions after exit are not executed. */
148 int last_iteration;
149 int last_iteration_eliminated_by_peeling;
150 int last_iteration_not_eliminatable_after_peeling;
152 /* If some IV computation will become constant. */
153 bool constant_iv;
155 /* Number of call stmts that are not a builtin and are pure or const
156 present on the hot path. */
157 int num_pure_calls_on_hot_path;
158 /* Number of call stmts that are not a builtin and are not pure nor const
159 present on the hot path. */
160 int num_non_pure_calls_on_hot_path;
161 /* Number of statements other than calls in the loop. */
162 int non_call_stmts_on_hot_path;
163 /* Number of branches seen on the hot path. */
164 int num_branches_on_hot_path;
167 /* Return true if OP in STMT will be constant after peeling LOOP. */
169 static bool
170 constant_after_peeling (tree op, gimple *stmt, class loop *loop)
172 if (CONSTANT_CLASS_P (op))
173 return true;
175 /* Get at the actual SSA operand. */
176 if (handled_component_p (op)
177 && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
178 op = TREE_OPERAND (op, 0);
180 /* We can still fold accesses to constant arrays when index is known. */
181 if (TREE_CODE (op) != SSA_NAME)
183 tree base = op;
185 /* First make fast look if we see constant array inside. */
186 while (handled_component_p (base))
187 base = TREE_OPERAND (base, 0);
188 if ((DECL_P (base)
189 && ctor_for_folding (base) != error_mark_node)
190 || CONSTANT_CLASS_P (base))
192 /* If so, see if we understand all the indices. */
193 base = op;
194 while (handled_component_p (base))
196 if (TREE_CODE (base) == ARRAY_REF
197 && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
198 return false;
199 base = TREE_OPERAND (base, 0);
201 return true;
203 return false;
206 /* Induction variables are constants when defined in loop. */
207 if (loop_containing_stmt (stmt) != loop)
208 return false;
209 tree ev = analyze_scalar_evolution (loop, op);
210 if (chrec_contains_undetermined (ev)
211 || chrec_contains_symbols (ev))
213 if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
215 gassign *ass = nullptr;
216 gphi *phi = nullptr;
217 if (is_a <gassign *> (SSA_NAME_DEF_STMT (op)))
219 ass = as_a <gassign *> (SSA_NAME_DEF_STMT (op));
220 if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
221 phi = dyn_cast <gphi *>
222 (SSA_NAME_DEF_STMT (gimple_assign_rhs1 (ass)));
224 else if (is_a <gphi *> (SSA_NAME_DEF_STMT (op)))
226 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (op));
227 if (gimple_bb (phi) == loop->header)
229 tree def = gimple_phi_arg_def_from_edge
230 (phi, loop_latch_edge (loop));
231 if (TREE_CODE (def) == SSA_NAME
232 && is_a <gassign *> (SSA_NAME_DEF_STMT (def)))
233 ass = as_a <gassign *> (SSA_NAME_DEF_STMT (def));
236 if (ass && phi)
238 tree rhs1 = gimple_assign_rhs1 (ass);
239 if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
240 && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
241 && rhs1 == gimple_phi_result (phi)
242 && gimple_bb (phi) == loop->header
243 && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
244 == gimple_assign_lhs (ass))
245 && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
246 (phi, loop_preheader_edge (loop)))))
247 return true;
250 return false;
252 return true;
255 /* Computes an estimated number of insns in LOOP.
256 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
257 iteration of the loop.
258 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
259 of loop.
260 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
261 Stop estimating after UPPER_BOUND is met. Return true in this case. */
263 static bool
264 tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
265 struct loop_size *size, int upper_bound)
267 basic_block *body = get_loop_body (loop);
268 gimple_stmt_iterator gsi;
269 unsigned int i;
270 bool after_exit;
271 auto_vec<basic_block> path = get_loop_hot_path (loop);
273 size->overall = 0;
274 size->eliminated_by_peeling = 0;
275 size->not_eliminatable_after_peeling = 0;
276 size->last_iteration = 0;
277 size->last_iteration_eliminated_by_peeling = 0;
278 size->last_iteration_not_eliminatable_after_peeling = 0;
279 size->num_pure_calls_on_hot_path = 0;
280 size->num_non_pure_calls_on_hot_path = 0;
281 size->non_call_stmts_on_hot_path = 0;
282 size->num_branches_on_hot_path = 0;
283 size->constant_iv = 0;
285 if (dump_file && (dump_flags & TDF_DETAILS))
286 fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
287 for (i = 0; i < loop->num_nodes; i++)
289 if (edge_to_cancel && body[i] != edge_to_cancel->src
290 && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
291 after_exit = true;
292 else
293 after_exit = false;
294 if (dump_file && (dump_flags & TDF_DETAILS))
295 fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
296 after_exit);
298 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
300 gimple *stmt = gsi_stmt (gsi);
301 int num = estimate_num_insns (stmt, &eni_size_weights);
302 bool not_eliminatable_after_peeling = false;
303 bool likely_eliminated = false;
304 bool likely_eliminated_last = false;
305 bool likely_eliminated_peeled = false;
307 if (dump_file && (dump_flags & TDF_DETAILS))
309 fprintf (dump_file, " size: %3i ", num);
310 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
313 /* Look for reasons why we might optimize this stmt away. */
315 if (gimple_has_side_effects (stmt))
316 not_eliminatable_after_peeling = true;
317 else
319 /* Exit conditional. */
320 if (exit && body[i] == exit->src
321 && stmt == *gsi_last_bb (exit->src))
323 if (dump_file && (dump_flags & TDF_DETAILS))
324 fprintf (dump_file, " Exit condition will be eliminated "
325 "in peeled copies.\n");
326 likely_eliminated_peeled = true;
328 if (edge_to_cancel && body[i] == edge_to_cancel->src
329 && stmt == *gsi_last_bb (edge_to_cancel->src))
331 if (dump_file && (dump_flags & TDF_DETAILS))
332 fprintf (dump_file, " Exit condition will be eliminated "
333 "in last copy.\n");
334 likely_eliminated_last = true;
336 /* Sets of IV variables */
337 if (gimple_code (stmt) == GIMPLE_ASSIGN
338 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
340 if (dump_file && (dump_flags & TDF_DETAILS))
341 fprintf (dump_file, " Induction variable computation will"
342 " be folded away.\n");
343 likely_eliminated = true;
345 /* Assignments of IV variables. */
346 else if (gimple_code (stmt) == GIMPLE_ASSIGN
347 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
348 && constant_after_peeling (gimple_assign_rhs1 (stmt),
349 stmt, loop)
350 && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
351 || constant_after_peeling (gimple_assign_rhs2 (stmt),
352 stmt, loop))
353 && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
355 size->constant_iv = true;
356 if (dump_file && (dump_flags & TDF_DETAILS))
357 fprintf (dump_file,
358 " Constant expression will be folded away.\n");
359 likely_eliminated = true;
361 /* Conditionals. */
362 else if ((gimple_code (stmt) == GIMPLE_COND
363 && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
364 loop)
365 && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
366 loop)
367 /* We don't simplify all constant compares so make sure
368 they are not both constant already. See PR70288. */
369 && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
370 || ! is_gimple_min_invariant
371 (gimple_cond_rhs (stmt))))
372 || (gimple_code (stmt) == GIMPLE_SWITCH
373 && constant_after_peeling (gimple_switch_index (
374 as_a <gswitch *>
375 (stmt)),
376 stmt, loop)
377 && ! is_gimple_min_invariant
378 (gimple_switch_index
379 (as_a <gswitch *> (stmt)))))
381 if (dump_file && (dump_flags & TDF_DETAILS))
382 fprintf (dump_file, " Constant conditional.\n");
383 likely_eliminated = true;
387 size->overall += num;
388 if (likely_eliminated || likely_eliminated_peeled)
389 size->eliminated_by_peeling += num;
390 if (not_eliminatable_after_peeling)
391 size->not_eliminatable_after_peeling += num;
392 if (!after_exit)
394 size->last_iteration += num;
395 if (likely_eliminated || likely_eliminated_last)
396 size->last_iteration_eliminated_by_peeling += num;
397 if (not_eliminatable_after_peeling)
398 size->last_iteration_not_eliminatable_after_peeling += num;
400 if ((size->overall * 3 / 2 - size->eliminated_by_peeling
401 - size->last_iteration_eliminated_by_peeling) > upper_bound)
403 free (body);
404 return true;
408 while (path.length ())
410 basic_block bb = path.pop ();
411 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
413 gimple *stmt = gsi_stmt (gsi);
414 if (gimple_code (stmt) == GIMPLE_CALL
415 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt)))
417 int flags = gimple_call_flags (stmt);
418 if (flags & (ECF_PURE | ECF_CONST))
419 size->num_pure_calls_on_hot_path++;
420 else
421 size->num_non_pure_calls_on_hot_path++;
422 size->num_branches_on_hot_path ++;
424 /* Count inexpensive calls as non-calls, because they will likely
425 expand inline. */
426 else if (gimple_code (stmt) != GIMPLE_DEBUG)
427 size->non_call_stmts_on_hot_path++;
428 if (((gimple_code (stmt) == GIMPLE_COND
429 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
430 || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
431 loop)))
432 || (gimple_code (stmt) == GIMPLE_SWITCH
433 && !constant_after_peeling (gimple_switch_index (
434 as_a <gswitch *> (stmt)),
435 stmt, loop)))
436 && (!exit || bb != exit->src))
437 size->num_branches_on_hot_path++;
441 if (dump_file && (dump_flags & TDF_DETAILS))
442 fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
443 size->eliminated_by_peeling, size->last_iteration,
444 size->last_iteration_eliminated_by_peeling);
446 free (body);
447 return false;
450 /* Estimate number of insns of completely unrolled loop.
451 It is (NUNROLL + 1) * size of loop body with taking into account
452 the fact that in last copy everything after exit conditional
453 is dead and that some instructions will be eliminated after
454 peeling. Set *EST_ELIMINATED to the number of stmts that could be
455 optimistically eliminated by followup transforms. */
456 static unsigned HOST_WIDE_INT
457 estimated_unrolled_size (struct loop_size *size,
458 unsigned HOST_WIDE_INT *est_eliminated,
459 unsigned HOST_WIDE_INT nunroll)
461 HOST_WIDE_INT unr_insns = ((nunroll)
462 * (HOST_WIDE_INT) (size->overall
463 - size->eliminated_by_peeling));
464 HOST_WIDE_INT not_elim
465 = ((nunroll) * (HOST_WIDE_INT) size->not_eliminatable_after_peeling);
466 unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
467 not_elim += size->last_iteration_not_eliminatable_after_peeling;
469 /* Testcases rely on rounding up, so do not write as
470 (unr_insns - not_elim) / 3. */
471 *est_eliminated = unr_insns - not_elim - (unr_insns - not_elim) * 2 / 3;
472 return unr_insns;
475 /* Loop LOOP is known to not loop. See if there is an edge in the loop
476 body that can be remove to make the loop to always exit and at
477 the same time it does not make any code potentially executed
478 during the last iteration dead.
480 After complete unrolling we still may get rid of the conditional
481 on the exit in the last copy even if we have no idea what it does.
482 This is quite common case for loops of form
484 int a[5];
485 for (i=0;i<b;i++)
486 a[i]=0;
488 Here we prove the loop to iterate 5 times but we do not know
489 it from induction variable.
491 For now we handle only simple case where there is exit condition
492 just before the latch block and the latch block contains no statements
493 with side effect that may otherwise terminate the execution of loop
494 (such as by EH or by terminating the program or longjmp).
496 In the general case we may want to cancel the paths leading to statements
497 loop-niter identified as having undefined effect in the last iteration.
498 The other cases are hopefully rare and will be cleaned up later. */
500 static edge
501 loop_edge_to_cancel (class loop *loop)
503 unsigned i;
504 edge edge_to_cancel;
505 gimple_stmt_iterator gsi;
507 /* We want only one predecestor of the loop. */
508 if (EDGE_COUNT (loop->latch->preds) > 1)
509 return NULL;
511 auto_vec<edge> exits = get_loop_exit_edges (loop);
513 FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
515 /* Find the other edge than the loop exit
516 leaving the conditoinal. */
517 if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
518 continue;
519 if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
520 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
521 else
522 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
524 /* We only can handle conditionals. */
525 if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
526 continue;
528 /* We should never have conditionals in the loop latch. */
529 gcc_assert (edge_to_cancel->dest != loop->header);
531 /* Check that it leads to loop latch. */
532 if (edge_to_cancel->dest != loop->latch)
533 continue;
535 /* Verify that the code in loop latch does nothing that may end program
536 execution without really reaching the exit. This may include
537 non-pure/const function calls, EH statements, volatile ASMs etc. */
538 for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
539 if (gimple_has_side_effects (gsi_stmt (gsi)))
540 return NULL;
541 return edge_to_cancel;
543 return NULL;
546 /* Remove all tests for exits that are known to be taken after LOOP was
547 peeled NPEELED times. Put gcc_unreachable before every statement
548 known to not be executed. */
550 static bool
551 remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
553 class nb_iter_bound *elt;
554 bool changed = false;
556 for (elt = loop->bounds; elt; elt = elt->next)
558 /* If statement is known to be undefined after peeling, turn it
559 into unreachable (or trap when debugging experience is supposed
560 to be good). */
561 if (!elt->is_exit
562 && wi::ltu_p (elt->bound, npeeled))
564 gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
565 location_t loc = gimple_location (elt->stmt);
566 gcall *stmt = gimple_build_builtin_unreachable (loc);
567 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
568 split_block (gimple_bb (stmt), stmt);
569 changed = true;
570 if (dump_file && (dump_flags & TDF_DETAILS))
572 fprintf (dump_file, "Forced statement unreachable: ");
573 print_gimple_stmt (dump_file, elt->stmt, 0);
576 /* If we know the exit will be taken after peeling, update. */
577 else if (elt->is_exit
578 && wi::leu_p (elt->bound, npeeled))
580 basic_block bb = gimple_bb (elt->stmt);
581 edge exit_edge = EDGE_SUCC (bb, 0);
583 if (dump_file && (dump_flags & TDF_DETAILS))
585 fprintf (dump_file, "Forced exit to be taken: ");
586 print_gimple_stmt (dump_file, elt->stmt, 0);
588 if (!loop_exit_edge_p (loop, exit_edge))
589 exit_edge = EDGE_SUCC (bb, 1);
590 exit_edge->probability = profile_probability::always ();
591 gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
592 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
593 if (exit_edge->flags & EDGE_TRUE_VALUE)
594 gimple_cond_make_true (cond_stmt);
595 else
596 gimple_cond_make_false (cond_stmt);
597 update_stmt (cond_stmt);
598 changed = true;
601 return changed;
604 /* Remove all exits that are known to be never taken because of the loop bound
605 discovered. */
607 static bool
608 remove_redundant_iv_tests (class loop *loop)
610 class nb_iter_bound *elt;
611 bool changed = false;
613 if (!loop->any_upper_bound)
614 return false;
615 for (elt = loop->bounds; elt; elt = elt->next)
617 /* Exit is pointless if it won't be taken before loop reaches
618 upper bound. */
619 if (elt->is_exit && loop->any_upper_bound
620 && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
622 basic_block bb = gimple_bb (elt->stmt);
623 edge exit_edge = EDGE_SUCC (bb, 0);
624 class tree_niter_desc niter;
626 if (!loop_exit_edge_p (loop, exit_edge))
627 exit_edge = EDGE_SUCC (bb, 1);
629 /* Only when we know the actual number of iterations, not
630 just a bound, we can remove the exit. */
631 if (!number_of_iterations_exit (loop, exit_edge,
632 &niter, false, false)
633 || !integer_onep (niter.assumptions)
634 || !integer_zerop (niter.may_be_zero)
635 || !niter.niter
636 || TREE_CODE (niter.niter) != INTEGER_CST
637 || !wi::ltu_p (widest_int::from (loop->nb_iterations_upper_bound,
638 SIGNED),
639 wi::to_widest (niter.niter)))
640 continue;
642 if (dump_file && (dump_flags & TDF_DETAILS))
644 fprintf (dump_file, "Removed pointless exit: ");
645 print_gimple_stmt (dump_file, elt->stmt, 0);
647 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
648 if (exit_edge->flags & EDGE_TRUE_VALUE)
649 gimple_cond_make_false (cond_stmt);
650 else
651 gimple_cond_make_true (cond_stmt);
652 update_stmt (cond_stmt);
653 changed = true;
656 return changed;
659 /* Stores loops that will be unlooped and edges that will be removed
660 after we process whole loop tree. */
661 static vec<loop_p> loops_to_unloop;
662 static vec<int> loops_to_unloop_nunroll;
663 static vec<edge> edges_to_remove;
664 /* Stores loops that has been peeled. */
665 static bitmap peeled_loops;
667 /* Cancel all fully unrolled loops by putting __builtin_unreachable
668 on the latch edge.
669 We do it after all unrolling since unlooping moves basic blocks
670 across loop boundaries trashing loop closed SSA form as well
671 as SCEV info needed to be intact during unrolling.
673 IRRED_INVALIDATED is used to bookkeep if information about
674 irreducible regions may become invalid as a result
675 of the transformation.
676 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
677 when we need to go into loop closed SSA form. */
679 void
680 unloop_loops (vec<class loop *> &loops_to_unloop,
681 vec<int> &loops_to_unloop_nunroll,
682 vec<edge> &edges_to_remove,
683 bitmap loop_closed_ssa_invalidated,
684 bool *irred_invalidated)
686 while (loops_to_unloop.length ())
688 class loop *loop = loops_to_unloop.pop ();
689 int n_unroll = loops_to_unloop_nunroll.pop ();
690 basic_block latch = loop->latch;
691 edge latch_edge = loop_latch_edge (loop);
692 int flags = latch_edge->flags;
693 location_t locus = latch_edge->goto_locus;
694 gcall *stmt;
695 gimple_stmt_iterator gsi;
697 remove_exits_and_undefined_stmts (loop, n_unroll);
699 /* Unloop destroys the latch edge. */
700 unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
702 /* Create new basic block for the latch edge destination and wire
703 it in. */
704 stmt = gimple_build_builtin_unreachable (locus);
705 latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
706 latch_edge->probability = profile_probability::never ();
707 latch_edge->flags |= flags;
708 latch_edge->goto_locus = locus;
710 add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
711 latch_edge->dest->count = profile_count::zero ();
712 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
714 gsi = gsi_start_bb (latch_edge->dest);
715 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
718 /* Remove edges in peeled copies. Given remove_path removes dominated
719 regions we need to cope with removal of already removed paths. */
720 unsigned i;
721 edge e;
722 auto_vec<int, 20> src_bbs;
723 src_bbs.reserve_exact (edges_to_remove.length ());
724 FOR_EACH_VEC_ELT (edges_to_remove, i, e)
725 src_bbs.quick_push (e->src->index);
726 FOR_EACH_VEC_ELT (edges_to_remove, i, e)
727 if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
729 bool ok = remove_path (e, irred_invalidated,
730 loop_closed_ssa_invalidated);
731 gcc_assert (ok);
733 edges_to_remove.release ();
736 /* Tries to unroll LOOP completely, i.e. NITER times.
737 UL determines which loops we are allowed to unroll.
738 EXIT is the exit of the loop that should be eliminated.
739 MAXITER specfy bound on number of iterations, -1 if it is
740 not known or too large for HOST_WIDE_INT. The location
741 LOCUS corresponding to the loop is used when emitting
742 a summary of the unroll to the dump file. */
744 static bool
745 try_unroll_loop_completely (class loop *loop,
746 edge exit, tree niter, bool may_be_zero,
747 enum unroll_level ul,
748 HOST_WIDE_INT maxiter,
749 dump_user_location_t locus, bool allow_peel,
750 bool cunrolli)
752 unsigned HOST_WIDE_INT n_unroll = 0;
753 bool n_unroll_found = false;
754 edge edge_to_cancel = NULL;
756 /* See if we proved number of iterations to be low constant.
758 EXIT is an edge that will be removed in all but last iteration of
759 the loop.
761 EDGE_TO_CACNEL is an edge that will be removed from the last iteration
762 of the unrolled sequence and is expected to make the final loop not
763 rolling.
765 If the number of execution of loop is determined by standard induction
766 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
767 from the iv test. */
768 if (tree_fits_uhwi_p (niter))
770 n_unroll = tree_to_uhwi (niter);
771 n_unroll_found = true;
772 edge_to_cancel = EDGE_SUCC (exit->src, 0);
773 if (edge_to_cancel == exit)
774 edge_to_cancel = EDGE_SUCC (exit->src, 1);
776 /* We do not know the number of iterations and thus we cannot eliminate
777 the EXIT edge. */
778 else
779 exit = NULL;
781 /* See if we can improve our estimate by using recorded loop bounds. */
782 if ((maxiter == 0 || ul != UL_SINGLE_ITER)
783 && maxiter >= 0
784 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
786 n_unroll = maxiter;
787 n_unroll_found = true;
788 /* Loop terminates before the IV variable test, so we cannot
789 remove it in the last iteration. */
790 edge_to_cancel = NULL;
791 /* If we do not allow peeling and we iterate just allow cases
792 that do not grow code. */
793 if (!allow_peel && maxiter != 0)
794 ul = UL_NO_GROWTH;
797 if (!n_unroll_found)
798 return false;
800 if (!loop->unroll
801 && n_unroll > (unsigned) param_max_completely_peel_times)
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "Not unrolling loop %d "
805 "(--param max-completely-peel-times limit reached).\n",
806 loop->num);
807 return false;
810 if (!edge_to_cancel)
811 edge_to_cancel = loop_edge_to_cancel (loop);
813 if (n_unroll)
815 if (ul == UL_SINGLE_ITER)
816 return false;
818 if (loop->unroll)
820 /* If the unrolling factor is too large, bail out. */
821 if (n_unroll > (unsigned)loop->unroll)
823 if (dump_file && (dump_flags & TDF_DETAILS))
824 fprintf (dump_file,
825 "Not unrolling loop %d: "
826 "user didn't want it unrolled completely.\n",
827 loop->num);
828 return false;
831 else
833 struct loop_size size;
834 /* EXIT can be removed only if we are sure it passes first N_UNROLL
835 iterations. */
836 bool remove_exit = (exit && niter
837 && TREE_CODE (niter) == INTEGER_CST
838 && wi::leu_p (n_unroll, wi::to_widest (niter)));
839 bool large
840 = tree_estimate_loop_size
841 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
842 param_max_completely_peeled_insns);
843 if (large)
845 if (dump_file && (dump_flags & TDF_DETAILS))
846 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
847 loop->num);
848 return false;
851 unsigned HOST_WIDE_INT ninsns = size.overall;
852 unsigned HOST_WIDE_INT est_eliminated;
853 unsigned HOST_WIDE_INT unr_insns
854 = estimated_unrolled_size (&size, &est_eliminated, n_unroll);
855 if (dump_file && (dump_flags & TDF_DETAILS))
857 fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
858 fprintf (dump_file, " Estimated size after unrolling: %d-%d\n",
859 (int) unr_insns, (int) est_eliminated);
862 /* If the code is going to shrink, we don't need to be extra
863 cautious on guessing if the unrolling is going to be
864 profitable.
865 Move from estimated_unrolled_size to unroll small loops. */
866 if (unr_insns - est_eliminated
867 /* If there is IV variable that will become constant, we
868 save one instruction in the loop prologue we do not
869 account otherwise. */
870 <= ninsns + (size.constant_iv != false))
872 /* We unroll only inner loops, because we do not consider it
873 profitable otheriwse. We still can cancel loopback edge
874 of not rolling loop; this is always a good idea. */
875 else if (ul == UL_NO_GROWTH)
877 if (dump_file && (dump_flags & TDF_DETAILS))
878 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
879 loop->num);
880 return false;
882 /* Outer loops tend to be less interesting candidates for
883 complete unrolling unless we can do a lot of propagation
884 into the inner loop body. For now we disable outer loop
885 unrolling when the code would grow. */
886 else if (loop->inner)
888 if (dump_file && (dump_flags & TDF_DETAILS))
889 fprintf (dump_file, "Not unrolling loop %d: "
890 "it is not innermost and code would grow.\n",
891 loop->num);
892 return false;
894 /* If there is call on a hot path through the loop, then
895 there is most probably not much to optimize. */
896 else if (size.num_non_pure_calls_on_hot_path)
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Not unrolling loop %d: "
900 "contains call and code would grow.\n",
901 loop->num);
902 return false;
904 /* If there is pure/const call in the function, then we can
905 still optimize the unrolled loop body if it contains some
906 other interesting code than the calls and code storing or
907 cumulating the return value. */
908 else if (size.num_pure_calls_on_hot_path
909 /* One IV increment, one test, one ivtmp store and
910 one useful stmt. That is about minimal loop
911 doing pure call. */
912 && (size.non_call_stmts_on_hot_path
913 <= 3 + size.num_pure_calls_on_hot_path))
915 if (dump_file && (dump_flags & TDF_DETAILS))
916 fprintf (dump_file, "Not unrolling loop %d: "
917 "contains just pure calls and code would grow.\n",
918 loop->num);
919 return false;
921 /* Complete unrolling is major win when control flow is
922 removed and one big basic block is created. If the loop
923 contains control flow the optimization may still be a win
924 because of eliminating the loop overhead but it also may
925 blow the branch predictor tables. Limit number of
926 branches on the hot path through the peeled sequence. */
927 else if (size.num_branches_on_hot_path * (int)n_unroll
928 > param_max_peel_branches)
930 if (dump_file && (dump_flags & TDF_DETAILS))
931 fprintf (dump_file, "Not unrolling loop %d: "
932 "number of branches on hot path in the unrolled "
933 "sequence reaches --param max-peel-branches limit.\n",
934 loop->num);
935 return false;
937 /* Move 2 / 3 reduction from estimated_unrolled_size, but don't reduce
938 unrolled size for innermost loop.
939 1) It could increase register pressure.
940 2) Big loop after completely unroll may not be vectorized
941 by BB vectorizer. */
942 else if ((cunrolli ? unr_insns : unr_insns - est_eliminated)
943 > (unsigned) param_max_completely_peeled_insns)
945 if (dump_file && (dump_flags & TDF_DETAILS))
946 fprintf (dump_file, "Not unrolling loop %d: "
947 "number of insns in the unrolled sequence reaches "
948 "--param max-completely-peeled-insns limit.\n",
949 loop->num);
950 return false;
954 if (!dbg_cnt (gimple_unroll))
955 return false;
957 initialize_original_copy_tables ();
958 auto_sbitmap wont_exit (n_unroll + 1);
959 if (exit && niter
960 && TREE_CODE (niter) == INTEGER_CST
961 && wi::leu_p (n_unroll, wi::to_widest (niter)))
963 bitmap_ones (wont_exit);
964 if (wi::eq_p (wi::to_widest (niter), n_unroll)
965 || edge_to_cancel)
966 bitmap_clear_bit (wont_exit, 0);
968 else
970 exit = NULL;
971 bitmap_clear (wont_exit);
973 if (may_be_zero)
974 bitmap_clear_bit (wont_exit, 1);
976 /* If loop was originally estimated to iterate too many times,
977 reduce the profile to avoid new profile inconsistencies. */
978 scale_loop_profile (loop, profile_probability::always (), n_unroll);
980 if (!gimple_duplicate_loop_body_to_header_edge (
981 loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit,
982 &edges_to_remove,
983 DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL))
985 free_original_copy_tables ();
986 if (dump_file && (dump_flags & TDF_DETAILS))
987 fprintf (dump_file, "Failed to duplicate the loop\n");
988 return false;
991 free_original_copy_tables ();
993 else
994 scale_loop_profile (loop, profile_probability::always (), 0);
996 /* Remove the conditional from the last copy of the loop. */
997 if (edge_to_cancel)
999 gcond *cond = as_a <gcond *> (*gsi_last_bb (edge_to_cancel->src));
1000 force_edge_cold (edge_to_cancel, true);
1001 if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
1002 gimple_cond_make_false (cond);
1003 else
1004 gimple_cond_make_true (cond);
1005 update_stmt (cond);
1006 /* Do not remove the path, as doing so may remove outer loop and
1007 confuse bookkeeping code in tree_unroll_loops_completely. */
1010 /* Store the loop for later unlooping and exit removal. */
1011 loops_to_unloop.safe_push (loop);
1012 loops_to_unloop_nunroll.safe_push (n_unroll);
1014 if (dump_enabled_p ())
1016 if (!n_unroll)
1017 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1018 "loop turned into non-loop; it never loops\n");
1019 else
1021 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1022 "loop with %d iterations completely unrolled",
1023 (int) n_unroll);
1024 if (loop->header->count.initialized_p ())
1025 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
1026 " (header execution count %d)",
1027 (int)loop->header->count.to_gcov_type ());
1028 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
1032 if (dump_file && (dump_flags & TDF_DETAILS))
1034 if (exit)
1035 fprintf (dump_file, "Exit condition of peeled iterations was "
1036 "eliminated.\n");
1037 if (edge_to_cancel)
1038 fprintf (dump_file, "Last iteration exit edge was proved true.\n");
1039 else
1040 fprintf (dump_file, "Latch of last iteration was marked by "
1041 "__builtin_unreachable ().\n");
1044 return true;
1047 /* Return number of instructions after peeling. */
1048 static unsigned HOST_WIDE_INT
1049 estimated_peeled_sequence_size (struct loop_size *size,
1050 unsigned HOST_WIDE_INT npeel)
1052 return MAX (npeel * (HOST_WIDE_INT) (size->overall
1053 - size->eliminated_by_peeling), 1);
1056 /* Update loop estimates after peeling LOOP by NPEEL.
1057 If PRECISE is false only likely exists were duplicated and thus
1058 do not update any estimates that are supposed to be always reliable. */
1059 void
1060 adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise)
1062 if (loop->any_estimate)
1064 /* Since peeling is mostly about loops where first few
1065 iterations are special, it is not quite correct to
1066 assume that the remaining iterations will behave
1067 the same way. However we do not have better info
1068 so update the esitmate, since it is likely better
1069 than keeping it as it is.
1071 Remove it if it looks wrong.
1073 TODO: We likely want to special case the situation where
1074 peeling is optimizing out exit edges and only update
1075 estimates here. */
1076 if (wi::leu_p (npeel, loop->nb_iterations_estimate))
1077 loop->nb_iterations_estimate -= npeel;
1078 else
1079 loop->any_estimate = false;
1081 if (loop->any_upper_bound && precise)
1083 if (wi::leu_p (npeel, loop->nb_iterations_upper_bound))
1084 loop->nb_iterations_upper_bound -= npeel;
1085 else
1087 /* Peeling maximal number of iterations or more
1088 makes no sense and is a bug.
1089 We should peel completely. */
1090 gcc_unreachable ();
1093 if (loop->any_likely_upper_bound)
1095 if (wi::leu_p (npeel, loop->nb_iterations_likely_upper_bound))
1096 loop->nb_iterations_likely_upper_bound -= npeel;
1097 else
1099 loop->any_estimate = true;
1100 loop->nb_iterations_estimate = 0;
1101 loop->nb_iterations_likely_upper_bound = 0;
1106 /* If the loop is expected to iterate N times and is
1107 small enough, duplicate the loop body N+1 times before
1108 the loop itself. This way the hot path will never
1109 enter the loop.
1110 Parameters are the same as for try_unroll_loops_completely */
1112 static bool
1113 try_peel_loop (class loop *loop,
1114 edge exit, tree niter, bool may_be_zero,
1115 HOST_WIDE_INT maxiter)
1117 HOST_WIDE_INT npeel;
1118 struct loop_size size;
1119 int peeled_size;
1121 if (!flag_peel_loops
1122 || param_max_peel_times <= 0
1123 || !peeled_loops)
1124 return false;
1126 if (bitmap_bit_p (peeled_loops, loop->num))
1128 if (dump_file)
1129 fprintf (dump_file, "Not peeling: loop is already peeled\n");
1130 return false;
1133 /* We don't peel loops that will be unrolled as this can duplicate a
1134 loop more times than the user requested. */
1135 if (loop->unroll)
1137 if (dump_file)
1138 fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1139 return false;
1142 /* Peel only innermost loops.
1143 While the code is perfectly capable of peeling non-innermost loops,
1144 the heuristics would probably need some improvements. */
1145 if (loop->inner)
1147 if (dump_file)
1148 fprintf (dump_file, "Not peeling: outer loop\n");
1149 return false;
1152 if (!optimize_loop_for_speed_p (loop))
1154 if (dump_file)
1155 fprintf (dump_file, "Not peeling: cold loop\n");
1156 return false;
1159 /* Check if there is an estimate on the number of iterations. */
1160 npeel = estimated_loop_iterations_int (loop);
1161 if (npeel < 0)
1162 npeel = likely_max_loop_iterations_int (loop);
1163 if (npeel < 0)
1165 if (dump_file)
1166 fprintf (dump_file, "Not peeling: number of iterations is not "
1167 "estimated\n");
1168 return false;
1170 if (maxiter >= 0 && maxiter <= npeel)
1172 if (dump_file)
1173 fprintf (dump_file, "Not peeling: upper bound is known so can "
1174 "unroll completely\n");
1175 return false;
1178 /* We want to peel estimated number of iterations + 1 (so we never
1179 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
1180 and be sure to avoid overflows. */
1181 if (npeel > param_max_peel_times - 1)
1183 if (dump_file)
1184 fprintf (dump_file, "Not peeling: rolls too much "
1185 "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1186 return false;
1188 npeel++;
1190 /* Check peeled loops size. */
1191 tree_estimate_loop_size (loop, exit, NULL, &size,
1192 param_max_peeled_insns);
1193 if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1194 > param_max_peeled_insns)
1196 if (dump_file)
1197 fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1198 "(%i insns > --param max-peel-insns)", peeled_size);
1199 return false;
1202 if (!dbg_cnt (gimple_unroll))
1203 return false;
1205 /* Duplicate possibly eliminating the exits. */
1206 initialize_original_copy_tables ();
1207 auto_sbitmap wont_exit (npeel + 1);
1208 if (exit && niter
1209 && TREE_CODE (niter) == INTEGER_CST
1210 && wi::leu_p (npeel, wi::to_widest (niter)))
1212 bitmap_ones (wont_exit);
1213 bitmap_clear_bit (wont_exit, 0);
1215 else
1217 exit = NULL;
1218 bitmap_clear (wont_exit);
1220 if (may_be_zero)
1221 bitmap_clear_bit (wont_exit, 1);
1223 if (!gimple_duplicate_loop_body_to_header_edge (
1224 loop, loop_preheader_edge (loop), npeel, wont_exit, exit,
1225 &edges_to_remove, DLTHE_FLAG_UPDATE_FREQ))
1227 free_original_copy_tables ();
1228 return false;
1230 free_original_copy_tables ();
1231 if (dump_file && (dump_flags & TDF_DETAILS))
1233 fprintf (dump_file, "Peeled loop %d, %i times.\n",
1234 loop->num, (int) npeel);
1236 adjust_loop_info_after_peeling (loop, npeel, true);
1238 bitmap_set_bit (peeled_loops, loop->num);
1239 return true;
1241 /* Adds a canonical induction variable to LOOP if suitable.
1242 CREATE_IV is true if we may create a new iv. UL determines
1243 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
1244 to determine the number of iterations of a loop by direct evaluation.
1245 Returns true if cfg is changed. */
1247 static bool
1248 canonicalize_loop_induction_variables (class loop *loop,
1249 bool create_iv, enum unroll_level ul,
1250 bool try_eval, bool allow_peel,
1251 const_sbitmap innermost,
1252 bool cunrolli)
1254 edge exit = NULL;
1255 tree niter;
1256 HOST_WIDE_INT maxiter;
1257 bool modified = false;
1258 class tree_niter_desc niter_desc;
1259 bool may_be_zero = false;
1260 bool by_eval = false;
1262 /* For unrolling allow conditional constant or zero iterations, thus
1263 perform loop-header copying on-the-fly. */
1264 exit = single_exit (loop);
1265 niter = chrec_dont_know;
1266 if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1268 niter = niter_desc.niter;
1269 may_be_zero
1270 = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1272 if (TREE_CODE (niter) != INTEGER_CST)
1274 /* For non-constant niter fold may_be_zero into niter again. */
1275 if (may_be_zero)
1277 if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1278 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1279 niter_desc.may_be_zero,
1280 build_int_cst (TREE_TYPE (niter), 0), niter);
1281 else
1282 niter = chrec_dont_know;
1283 may_be_zero = false;
1286 /* If the loop has more than one exit, try checking all of them
1287 for # of iterations determinable through scev. */
1288 if (!exit)
1289 niter = find_loop_niter (loop, &exit);
1291 /* Finally if everything else fails, try brute force evaluation. */
1292 if (try_eval
1293 && (chrec_contains_undetermined (niter)
1294 || TREE_CODE (niter) != INTEGER_CST))
1296 niter = find_loop_niter_by_eval (loop, &exit);
1297 if (TREE_CODE (niter) == INTEGER_CST)
1298 by_eval = true;
1301 if (TREE_CODE (niter) != INTEGER_CST)
1302 exit = NULL;
1305 /* We work exceptionally hard here to estimate the bound
1306 by find_loop_niter_by_eval. Be sure to keep it for future. */
1307 if (niter && TREE_CODE (niter) == INTEGER_CST)
1309 auto_vec<edge> exits = get_loop_exit_edges (loop);
1310 record_niter_bound (loop, wi::to_widest (niter),
1311 exit == single_likely_exit (loop, exits), true);
1314 /* Force re-computation of loop bounds so we can remove redundant exits. */
1315 maxiter = max_loop_iterations_int (loop);
1317 if (dump_file && (dump_flags & TDF_DETAILS)
1318 && TREE_CODE (niter) == INTEGER_CST)
1320 fprintf (dump_file, "Loop %d iterates ", loop->num);
1321 print_generic_expr (dump_file, niter, TDF_SLIM);
1322 fprintf (dump_file, " times.\n");
1324 if (dump_file && (dump_flags & TDF_DETAILS)
1325 && maxiter >= 0)
1327 fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1328 (int)maxiter);
1330 if (dump_file && (dump_flags & TDF_DETAILS)
1331 && likely_max_loop_iterations_int (loop) >= 0)
1333 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1334 loop->num, (int)likely_max_loop_iterations_int (loop));
1337 /* Remove exits that are known to be never taken based on loop bound.
1338 Needs to be called after compilation of max_loop_iterations_int that
1339 populates the loop bounds. */
1340 modified |= remove_redundant_iv_tests (loop);
1342 dump_user_location_t locus = find_loop_location (loop);
1344 bool innermost_cunrolli_p
1345 = cunrolli
1346 && (unsigned) loop->num < SBITMAP_SIZE (innermost)
1347 && bitmap_bit_p (innermost, loop->num);
1349 if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1350 maxiter, locus, allow_peel,
1351 innermost_cunrolli_p))
1352 return true;
1354 if ((create_iv || by_eval)
1355 && niter && !chrec_contains_undetermined (niter)
1356 && exit && just_once_each_iteration_p (loop, exit->src))
1358 tree iv_niter = niter;
1359 if (may_be_zero)
1361 if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1362 iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1363 niter_desc.may_be_zero,
1364 build_int_cst (TREE_TYPE (iv_niter), 0),
1365 iv_niter);
1366 else
1367 iv_niter = NULL_TREE;
1369 if (iv_niter)
1370 create_canonical_iv (loop, exit, iv_niter);
1373 if (ul == UL_ALL)
1374 modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1376 return modified;
1379 /* The main entry point of the pass. Adds canonical induction variables
1380 to the suitable loops. */
1382 unsigned int
1383 canonicalize_induction_variables (void)
1385 bool changed = false;
1386 bool irred_invalidated = false;
1387 bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1388 auto_sbitmap innermost (number_of_loops (cfun));
1389 bitmap_clear (innermost);
1391 estimate_numbers_of_iterations (cfun);
1393 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1395 changed
1396 |= canonicalize_loop_induction_variables (loop,
1397 true, UL_SINGLE_ITER,
1398 true, false,
1399 (const_sbitmap) innermost,
1400 false);
1402 gcc_assert (!need_ssa_update_p (cfun));
1404 unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, edges_to_remove,
1405 loop_closed_ssa_invalidated, &irred_invalidated);
1406 loops_to_unloop.release ();
1407 loops_to_unloop_nunroll.release ();
1408 if (irred_invalidated
1409 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1410 mark_irreducible_loops ();
1412 /* Clean up the information about numbers of iterations, since brute force
1413 evaluation could reveal new information. */
1414 free_numbers_of_iterations_estimates (cfun);
1415 scev_reset ();
1417 if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1419 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1420 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1422 BITMAP_FREE (loop_closed_ssa_invalidated);
1424 if (changed)
1425 return TODO_cleanup_cfg;
1426 return 0;
1429 /* Process loops from innermost to outer, stopping at the innermost
1430 loop we unrolled. */
1432 static bool
1433 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1434 bitmap father_bbs, class loop *loop,
1435 const_sbitmap innermost, bool cunrolli)
1437 class loop *loop_father;
1438 bool changed = false;
1439 class loop *inner;
1440 enum unroll_level ul;
1441 unsigned num = number_of_loops (cfun);
1443 /* Process inner loops first. Don't walk loops added by the recursive
1444 calls because SSA form is not up-to-date. They can be handled in the
1445 next iteration. */
1446 bitmap child_father_bbs = NULL;
1447 for (inner = loop->inner; inner != NULL; inner = inner->next)
1448 if ((unsigned) inner->num < num)
1450 if (!child_father_bbs)
1451 child_father_bbs = BITMAP_ALLOC (NULL);
1452 if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1453 child_father_bbs, inner,
1454 innermost, cunrolli))
1456 bitmap_ior_into (father_bbs, child_father_bbs);
1457 bitmap_clear (child_father_bbs);
1458 changed = true;
1461 if (child_father_bbs)
1462 BITMAP_FREE (child_father_bbs);
1464 /* If we changed an inner loop we cannot process outer loops in this
1465 iteration because SSA form is not up-to-date. Continue with
1466 siblings of outer loops instead. */
1467 if (changed)
1469 /* If we are recorded as father clear all other fathers that
1470 are necessarily covered already to avoid redundant work. */
1471 if (bitmap_bit_p (father_bbs, loop->header->index))
1473 bitmap_clear (father_bbs);
1474 bitmap_set_bit (father_bbs, loop->header->index);
1476 return true;
1479 /* Don't unroll #pragma omp simd loops until the vectorizer
1480 attempts to vectorize those. */
1481 if (loop->force_vectorize)
1482 return false;
1484 /* Try to unroll this loop. */
1485 loop_father = loop_outer (loop);
1486 if (!loop_father)
1487 return false;
1489 if (loop->unroll > 1)
1490 ul = UL_ALL;
1491 else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1492 /* Unroll outermost loops only if asked to do so or they do
1493 not cause code growth. */
1494 && (unroll_outer || loop_outer (loop_father)))
1495 ul = UL_ALL;
1496 else
1497 ul = UL_NO_GROWTH;
1499 if (canonicalize_loop_induction_variables
1500 (loop, false, ul, !flag_tree_loop_ivcanon || cunrolli, unroll_outer,
1501 innermost, cunrolli))
1503 /* If we'll continue unrolling, we need to propagate constants
1504 within the new basic blocks to fold away induction variable
1505 computations; otherwise, the size might blow up before the
1506 iteration is complete and the IR eventually cleaned up. */
1507 if (loop_outer (loop_father))
1509 /* Once we process our father we will have processed
1510 the fathers of our children as well, so avoid doing
1511 redundant work and clear fathers we've gathered sofar. */
1512 bitmap_clear (father_bbs);
1513 bitmap_set_bit (father_bbs, loop_father->header->index);
1515 else if (unroll_outer)
1516 /* Trigger scalar cleanup once any outermost loop gets unrolled. */
1517 cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
1519 return true;
1522 return false;
1525 /* Unroll LOOPS completely if they iterate just few times. Unless
1526 MAY_INCREASE_SIZE is true, perform the unrolling only if the
1527 size of the code does not increase.
1528 cunrolli is true when passs is cunrolli. */
1530 static unsigned int
1531 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer, bool cunrolli)
1533 bitmap father_bbs = BITMAP_ALLOC (NULL);
1534 bool changed;
1535 int iteration = 0;
1536 bool irred_invalidated = false;
1537 auto_sbitmap innermost (number_of_loops (cfun));
1538 bitmap_clear (innermost);
1540 estimate_numbers_of_iterations (cfun);
1542 /* Mark all innermost loop at the begining. */
1543 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1545 if (!loop->inner)
1546 bitmap_set_bit (innermost, loop->num);
1551 changed = false;
1552 bitmap loop_closed_ssa_invalidated = NULL;
1554 if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1555 loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1557 free_numbers_of_iterations_estimates (cfun);
1558 estimate_numbers_of_iterations (cfun);
1560 changed = tree_unroll_loops_completely_1 (may_increase_size,
1561 unroll_outer, father_bbs,
1562 current_loops->tree_root,
1563 (const_sbitmap) innermost,
1564 cunrolli);
1565 if (changed)
1567 unsigned i;
1569 unloop_loops (loops_to_unloop, loops_to_unloop_nunroll,
1570 edges_to_remove, loop_closed_ssa_invalidated,
1571 &irred_invalidated);
1572 loops_to_unloop.release ();
1573 loops_to_unloop_nunroll.release ();
1575 /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused. */
1576 if (loop_closed_ssa_invalidated
1577 && !bitmap_empty_p (loop_closed_ssa_invalidated))
1578 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1579 TODO_update_ssa);
1580 else
1581 update_ssa (TODO_update_ssa);
1583 /* father_bbs is a bitmap of loop father header BB indices.
1584 Translate that to what non-root loops these BBs belong to now. */
1585 bitmap_iterator bi;
1586 bitmap fathers = BITMAP_ALLOC (NULL);
1587 EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1589 basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1590 if (! unrolled_loop_bb)
1591 continue;
1592 if (loop_outer (unrolled_loop_bb->loop_father))
1593 bitmap_set_bit (fathers,
1594 unrolled_loop_bb->loop_father->num);
1596 bitmap_clear (father_bbs);
1597 /* Propagate the constants within the new basic blocks. */
1598 EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1600 loop_p father = get_loop (cfun, i);
1601 bitmap exit_bbs = BITMAP_ALLOC (NULL);
1602 loop_exit *exit = father->exits->next;
1603 while (exit->e)
1605 bitmap_set_bit (exit_bbs, exit->e->dest->index);
1606 exit = exit->next;
1608 do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1610 BITMAP_FREE (fathers);
1612 /* Clean up the information about numbers of iterations, since
1613 complete unrolling might have invalidated it. */
1614 scev_reset ();
1616 /* This will take care of removing completely unrolled loops
1617 from the loop structures so we can continue unrolling now
1618 innermost loops. */
1619 if (cleanup_tree_cfg ())
1620 update_ssa (TODO_update_ssa_only_virtuals);
1622 if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1623 verify_loop_closed_ssa (true);
1625 if (loop_closed_ssa_invalidated)
1626 BITMAP_FREE (loop_closed_ssa_invalidated);
1628 while (changed
1629 && ++iteration <= param_max_unroll_iterations);
1631 BITMAP_FREE (father_bbs);
1633 if (irred_invalidated
1634 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1635 mark_irreducible_loops ();
1637 return 0;
1640 /* Canonical induction variable creation pass. */
1642 namespace {
1644 const pass_data pass_data_iv_canon =
1646 GIMPLE_PASS, /* type */
1647 "ivcanon", /* name */
1648 OPTGROUP_LOOP, /* optinfo_flags */
1649 TV_TREE_LOOP_IVCANON, /* tv_id */
1650 ( PROP_cfg | PROP_ssa ), /* properties_required */
1651 0, /* properties_provided */
1652 0, /* properties_destroyed */
1653 0, /* todo_flags_start */
1654 0, /* todo_flags_finish */
1657 class pass_iv_canon : public gimple_opt_pass
1659 public:
1660 pass_iv_canon (gcc::context *ctxt)
1661 : gimple_opt_pass (pass_data_iv_canon, ctxt)
1664 /* opt_pass methods: */
1665 bool gate (function *) final override { return flag_tree_loop_ivcanon != 0; }
1666 unsigned int execute (function *fun) final override;
1668 }; // class pass_iv_canon
1670 unsigned int
1671 pass_iv_canon::execute (function *fun)
1673 if (number_of_loops (fun) <= 1)
1674 return 0;
1676 return canonicalize_induction_variables ();
1679 } // anon namespace
1681 gimple_opt_pass *
1682 make_pass_iv_canon (gcc::context *ctxt)
1684 return new pass_iv_canon (ctxt);
1687 /* Complete unrolling of loops. */
1689 namespace {
1691 const pass_data pass_data_complete_unroll =
1693 GIMPLE_PASS, /* type */
1694 "cunroll", /* name */
1695 OPTGROUP_LOOP, /* optinfo_flags */
1696 TV_COMPLETE_UNROLL, /* tv_id */
1697 ( PROP_cfg | PROP_ssa ), /* properties_required */
1698 0, /* properties_provided */
1699 0, /* properties_destroyed */
1700 0, /* todo_flags_start */
1701 0, /* todo_flags_finish */
1704 class pass_complete_unroll : public gimple_opt_pass
1706 public:
1707 pass_complete_unroll (gcc::context *ctxt)
1708 : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1711 /* opt_pass methods: */
1712 unsigned int execute (function *) final override;
1714 }; // class pass_complete_unroll
1716 unsigned int
1717 pass_complete_unroll::execute (function *fun)
1719 if (number_of_loops (fun) <= 1)
1720 return 0;
1722 /* If we ever decide to run loop peeling more than once, we will need to
1723 track loops already peeled in loop structures themselves to avoid
1724 re-peeling the same loop multiple times. */
1725 if (flag_peel_loops)
1726 peeled_loops = BITMAP_ALLOC (NULL);
1727 unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_size, true, false);
1728 if (peeled_loops)
1730 BITMAP_FREE (peeled_loops);
1731 peeled_loops = NULL;
1733 return val;
1736 } // anon namespace
1738 gimple_opt_pass *
1739 make_pass_complete_unroll (gcc::context *ctxt)
1741 return new pass_complete_unroll (ctxt);
1744 /* Complete unrolling of inner loops. */
1746 namespace {
1748 const pass_data pass_data_complete_unrolli =
1750 GIMPLE_PASS, /* type */
1751 "cunrolli", /* name */
1752 OPTGROUP_LOOP, /* optinfo_flags */
1753 TV_COMPLETE_UNROLL, /* tv_id */
1754 ( PROP_cfg | PROP_ssa ), /* properties_required */
1755 0, /* properties_provided */
1756 0, /* properties_destroyed */
1757 0, /* todo_flags_start */
1758 0, /* todo_flags_finish */
1761 class pass_complete_unrolli : public gimple_opt_pass
1763 public:
1764 pass_complete_unrolli (gcc::context *ctxt)
1765 : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1768 /* opt_pass methods: */
1769 bool gate (function *) final override { return optimize >= 2; }
1770 unsigned int execute (function *) final override;
1772 }; // class pass_complete_unrolli
1774 unsigned int
1775 pass_complete_unrolli::execute (function *fun)
1777 unsigned ret = 0;
1779 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1780 if (number_of_loops (fun) > 1)
1782 scev_initialize ();
1783 ret = tree_unroll_loops_completely (optimize >= 3, false, true);
1784 scev_finalize ();
1786 loop_optimizer_finalize ();
1788 return ret;
1791 } // anon namespace
1793 gimple_opt_pass *
1794 make_pass_complete_unrolli (gcc::context *ctxt)
1796 return new pass_complete_unrolli (ctxt);