[PATCH] RISC-V: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST to correct enum
[gcc.git] / gcc / tree-ssa-loop-split.cc
blob5f78c0be20757bc3a8ebaaa8721a3ab6616ee55c
1 /* Loop splitting.
2 Copyright (C) 2015-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "tree-pass.h"
27 #include "ssa.h"
28 #include "fold-const.h"
29 #include "tree-cfg.h"
30 #include "tree-ssa.h"
31 #include "tree-ssa-loop-niter.h"
32 #include "tree-ssa-loop.h"
33 #include "tree-ssa-loop-manip.h"
34 #include "tree-into-ssa.h"
35 #include "tree-inline.h"
36 #include "tree-cfgcleanup.h"
37 #include "cfgloop.h"
38 #include "tree-scalar-evolution.h"
39 #include "gimple-iterator.h"
40 #include "gimple-pretty-print.h"
41 #include "cfghooks.h"
42 #include "gimple-fold.h"
43 #include "gimplify-me.h"
44 #include "print-tree.h"
45 #include "value-query.h"
46 #include "sreal.h"
48 /* This file implements two kinds of loop splitting.
50 One transformation of loops like:
52 for (i = 0; i < 100; i++)
54 if (i < 50)
56 else
60 into:
62 for (i = 0; i < 50; i++)
66 for (; i < 100; i++)
73 /* Return true when BB inside LOOP is a potential iteration space
74 split point, i.e. ends with a condition like "IV < comp", which
75 is true on one side of the iteration space and false on the other,
76 and the split point can be computed. If so, also return the border
77 point in *BORDER and the comparison induction variable in IV. */
79 static tree
80 split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
81 enum tree_code *guard_code)
83 gcond *stmt;
84 affine_iv iv2;
86 /* BB must end in a simple conditional jump. */
87 stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
88 if (!stmt)
89 return NULL_TREE;
91 enum tree_code code = gimple_cond_code (stmt);
93 if (loop_exits_from_bb_p (loop, bb))
94 return NULL_TREE;
96 tree op0 = gimple_cond_lhs (stmt);
97 tree op1 = gimple_cond_rhs (stmt);
98 class loop *useloop = loop_containing_stmt (stmt);
100 if (!simple_iv (loop, useloop, op0, iv, false))
101 return NULL_TREE;
102 if (!simple_iv (loop, useloop, op1, &iv2, false))
103 return NULL_TREE;
105 /* Make it so that the first argument of the condition is
106 the looping one. */
107 if (!integer_zerop (iv2.step))
109 std::swap (op0, op1);
110 std::swap (*iv, iv2);
111 code = swap_tree_comparison (code);
112 gimple_cond_set_condition (stmt, code, op0, op1);
113 update_stmt (stmt);
115 else if (integer_zerop (iv->step))
116 return NULL_TREE;
117 if (!integer_zerop (iv2.step))
118 return NULL_TREE;
119 if (!iv->no_overflow)
120 return NULL_TREE;
122 /* Only handle relational comparisons, for equality and non-equality
123 we'd have to split the loop into two loops and a middle statement. */
124 switch (code)
126 case LT_EXPR:
127 case LE_EXPR:
128 case GT_EXPR:
129 case GE_EXPR:
130 break;
131 case NE_EXPR:
132 case EQ_EXPR:
133 /* If the test check for first iteration, we can handle NE/EQ
134 with only one split loop. */
135 if (operand_equal_p (iv->base, iv2.base, 0))
137 if (code == EQ_EXPR)
138 code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
139 else
140 code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
141 break;
143 /* Similarly when the test checks for minimal or maximal
144 value range. */
145 else
147 value_range r (TREE_TYPE (op0));
148 get_global_range_query ()->range_of_expr (r, op0, stmt);
149 if (!r.varying_p () && !r.undefined_p ()
150 && TREE_CODE (op1) == INTEGER_CST)
152 wide_int val = wi::to_wide (op1);
153 if (known_eq (val, wi::to_wide (r.lbound ())))
155 code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
156 break;
158 else if (known_eq (val, wi::to_wide (r.ubound ())))
160 code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
161 break;
165 /* TODO: We can compare with exit condition; it seems that testing for
166 last iteration is common case. */
167 return NULL_TREE;
168 default:
169 return NULL_TREE;
172 if (dump_file && (dump_flags & TDF_DETAILS))
174 fprintf (dump_file, "Found potential split point: ");
175 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
176 fprintf (dump_file, " { ");
177 print_generic_expr (dump_file, iv->base, TDF_SLIM);
178 fprintf (dump_file, " + I*");
179 print_generic_expr (dump_file, iv->step, TDF_SLIM);
180 fprintf (dump_file, " } %s ", get_tree_code_name (code));
181 print_generic_expr (dump_file, iv2.base, TDF_SLIM);
182 fprintf (dump_file, "\n");
185 *border = iv2.base;
186 *guard_code = code;
187 return op0;
190 /* Given a GUARD conditional stmt inside LOOP, which we want to make always
191 true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
192 (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
193 exit test statement to loop back only if the GUARD statement will
194 also be true/false in the next iteration. */
196 static void
197 patch_loop_exit (class loop *loop, tree_code guard_code, tree nextval,
198 tree newbound, bool initial_true)
200 edge exit = single_exit (loop);
201 gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
202 gimple_cond_set_condition (stmt, guard_code, nextval, newbound);
203 update_stmt (stmt);
205 edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
207 exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
208 stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
210 if (initial_true)
212 exit->flags |= EDGE_FALSE_VALUE;
213 stay->flags |= EDGE_TRUE_VALUE;
215 else
217 exit->flags |= EDGE_TRUE_VALUE;
218 stay->flags |= EDGE_FALSE_VALUE;
222 /* Give an induction variable GUARD_IV, and its affine descriptor IV,
223 find the loop phi node in LOOP defining it directly, or create
224 such phi node. Return that phi node. */
226 static gphi *
227 find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
229 gimple *def = SSA_NAME_DEF_STMT (guard_iv);
230 gphi *phi;
231 if ((phi = dyn_cast <gphi *> (def))
232 && gimple_bb (phi) == loop->header)
233 return phi;
235 /* XXX Create the PHI instead. */
236 return NULL;
239 /* Returns true if the exit values of all loop phi nodes can be
240 determined easily (i.e. that connect_loop_phis can determine them). */
242 static bool
243 easy_exit_values (class loop *loop)
245 edge exit = single_exit (loop);
246 edge latch = loop_latch_edge (loop);
247 gphi_iterator psi;
249 /* Currently we regard the exit values as easy if they are the same
250 as the value over the backedge. Which is the case if the definition
251 of the backedge value dominates the exit edge. */
252 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
254 gphi *phi = psi.phi ();
255 tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
256 basic_block bb;
257 if (TREE_CODE (next) == SSA_NAME
258 && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
259 && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
260 return false;
263 return true;
266 /* This function updates the SSA form after connect_loops made a new
267 edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
268 conditional). I.e. the second loop can now be entered either
269 via the original entry or via NEW_E, so the entry values of LOOP2
270 phi nodes are either the original ones or those at the exit
271 of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting
272 this. The loops need to fulfill easy_exit_values(). */
274 static void
275 connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
277 basic_block rest = loop_preheader_edge (loop2)->src;
278 gcc_assert (new_e->dest == rest);
279 edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
281 edge firste = loop_preheader_edge (loop1);
282 edge seconde = loop_preheader_edge (loop2);
283 edge firstn = loop_latch_edge (loop1);
284 gphi_iterator psi_first, psi_second;
285 for (psi_first = gsi_start_phis (loop1->header),
286 psi_second = gsi_start_phis (loop2->header);
287 !gsi_end_p (psi_first);
288 gsi_next (&psi_first), gsi_next (&psi_second))
290 tree init, next, new_init;
291 use_operand_p op;
292 gphi *phi_first = psi_first.phi ();
293 gphi *phi_second = psi_second.phi ();
295 init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
296 next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
297 op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
298 gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
300 /* Prefer using original variable as a base for the new ssa name.
301 This is necessary for virtual ops, and useful in order to avoid
302 losing debug info for real ops. */
303 if (TREE_CODE (next) == SSA_NAME
304 && useless_type_conversion_p (TREE_TYPE (next),
305 TREE_TYPE (init)))
306 new_init = copy_ssa_name (next);
307 else if (TREE_CODE (init) == SSA_NAME
308 && useless_type_conversion_p (TREE_TYPE (init),
309 TREE_TYPE (next)))
310 new_init = copy_ssa_name (init);
311 else if (useless_type_conversion_p (TREE_TYPE (next),
312 TREE_TYPE (init)))
313 new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
314 "unrinittmp");
315 else
316 new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
317 "unrinittmp");
319 gphi * newphi = create_phi_node (new_init, rest);
320 add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
321 add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
322 SET_USE (op, new_init);
326 /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
327 they are still equivalent and placed in two arms of a diamond, like so:
329 .------if (cond)------.
331 pre1 pre2
333 .--->h1 h2<----.
334 | | | |
335 | ex1---. .---ex2 |
336 | / | | \ |
337 '---l1 X | l2---'
340 '--->join<---'
342 This function transforms the program such that LOOP1 is conditionally
343 falling through to LOOP2, or skipping it. This is done by splitting
344 the ex1->join edge at X in the diagram above, and inserting a condition
345 whose one arm goes to pre2, resulting in this situation:
347 .------if (cond)------.
349 pre1 .---------->pre2
350 | | |
351 .--->h1 | h2<----.
352 | | | | |
353 | ex1---. | .---ex2 |
354 | / v | | \ |
355 '---l1 skip---' | l2---'
358 '--->join<---'
361 The condition used is the exit condition of LOOP1, which effectively means
362 that when the first loop exits (for whatever reason) but the real original
363 exit expression is still false the second loop will be entered.
364 The function returns the new edge cond->pre2.
366 This doesn't update the SSA form, see connect_loop_phis for that. */
368 static edge
369 connect_loops (class loop *loop1, class loop *loop2)
371 edge exit = single_exit (loop1);
372 basic_block skip_bb = split_edge (exit);
373 gcond *skip_stmt;
374 gimple_stmt_iterator gsi;
375 edge new_e, skip_e;
377 gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
378 skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
379 gimple_cond_lhs (stmt),
380 gimple_cond_rhs (stmt),
381 NULL_TREE, NULL_TREE);
382 gsi = gsi_last_bb (skip_bb);
383 gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
385 skip_e = EDGE_SUCC (skip_bb, 0);
386 skip_e->flags &= ~EDGE_FALLTHRU;
387 new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
388 if (exit->flags & EDGE_TRUE_VALUE)
390 skip_e->flags |= EDGE_TRUE_VALUE;
391 new_e->flags |= EDGE_FALSE_VALUE;
393 else
395 skip_e->flags |= EDGE_FALSE_VALUE;
396 new_e->flags |= EDGE_TRUE_VALUE;
399 new_e->probability = profile_probability::very_likely ();
400 skip_e->probability = new_e->probability.invert ();
402 return new_e;
405 /* This returns the new bound for iterations given the original iteration
406 space in NITER, an arbitrary new bound BORDER, assumed to be some
407 comparison value with a different IV, the initial value GUARD_INIT of
408 that other IV, and the comparison code GUARD_CODE that compares
409 that other IV with BORDER. We return an SSA name, and place any
410 necessary statements for that computation into *STMTS.
412 For example for such a loop:
414 for (i = beg, j = guard_init; i < end; i++, j++)
415 if (j < border) // this is supposed to be true/false
418 we want to return a new bound (on j) that makes the loop iterate
419 as long as the condition j < border stays true. We also don't want
420 to iterate more often than the original loop, so we have to introduce
421 some cut-off as well (via min/max), effectively resulting in:
423 newend = min (end+guard_init-beg, border)
424 for (i = beg; j = guard_init; j < newend; i++, j++)
425 if (j < c)
428 Depending on the direction of the IVs and if the exit tests
429 are strict or non-strict we need to use MIN or MAX,
430 and add or subtract 1. This routine computes newend above. */
432 static tree
433 compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
434 tree border,
435 enum tree_code guard_code, tree guard_init)
437 /* The niter structure contains the after-increment IV, we need
438 the loop-enter base, so subtract STEP once. */
439 tree controlbase = force_gimple_operand (niter->control.base,
440 stmts, true, NULL_TREE);
441 tree controlstep = niter->control.step;
442 tree enddiff;
443 if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
445 controlstep = gimple_build (stmts, NEGATE_EXPR,
446 TREE_TYPE (controlstep), controlstep);
447 enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
448 TREE_TYPE (controlbase),
449 controlbase, controlstep);
451 else
452 enddiff = gimple_build (stmts, MINUS_EXPR,
453 TREE_TYPE (controlbase),
454 controlbase, controlstep);
456 /* Compute end-beg. */
457 gimple_seq stmts2;
458 tree end = force_gimple_operand (niter->bound, &stmts2,
459 true, NULL_TREE);
460 gimple_seq_add_seq_without_update (stmts, stmts2);
461 if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
463 tree tem = gimple_convert (stmts, sizetype, enddiff);
464 tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
465 enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
466 TREE_TYPE (enddiff),
467 end, tem);
469 else
470 enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
471 end, enddiff);
473 /* Compute guard_init + (end-beg). */
474 tree newbound;
475 enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
476 if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
478 enddiff = gimple_convert (stmts, sizetype, enddiff);
479 newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
480 TREE_TYPE (guard_init),
481 guard_init, enddiff);
483 else
484 newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
485 guard_init, enddiff);
487 /* Depending on the direction of the IVs the new bound for the first
488 loop is the minimum or maximum of old bound and border.
489 Also, if the guard condition isn't strictly less or greater,
490 we need to adjust the bound. */
491 int addbound = 0;
492 enum tree_code minmax;
493 if (niter->cmp == LT_EXPR)
495 /* GT and LE are the same, inverted. */
496 if (guard_code == GT_EXPR || guard_code == LE_EXPR)
497 addbound = -1;
498 minmax = MIN_EXPR;
500 else
502 gcc_assert (niter->cmp == GT_EXPR);
503 if (guard_code == GE_EXPR || guard_code == LT_EXPR)
504 addbound = 1;
505 minmax = MAX_EXPR;
508 if (addbound)
510 tree type2 = TREE_TYPE (newbound);
511 if (POINTER_TYPE_P (type2))
512 type2 = sizetype;
513 newbound = gimple_build (stmts,
514 POINTER_TYPE_P (TREE_TYPE (newbound))
515 ? POINTER_PLUS_EXPR : PLUS_EXPR,
516 TREE_TYPE (newbound),
517 newbound,
518 build_int_cst (type2, addbound));
521 tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
522 border, newbound);
523 return newend;
526 /* Fix the two loop's bb count after split based on the split edge probability,
527 don't adjust the bbs dominated by true branches of that loop to avoid
528 dropping 1s down. */
529 static void
530 fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
531 edge false_edge)
533 /* Proportion first loop's bb counts except those dominated by true
534 branch to avoid drop 1s down. */
535 basic_block *bbs1, *bbs2;
536 bbs1 = get_loop_body (loop1);
537 unsigned j;
538 for (j = 0; j < loop1->num_nodes; j++)
539 if (bbs1[j] == loop1->latch
540 /* Watch for case where the true conditional is empty. */
541 || !single_pred_p (true_edge->dest)
542 || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
543 bbs1[j]->count
544 = bbs1[j]->count.apply_probability (true_edge->probability);
545 free (bbs1);
547 /* Proportion second loop's bb counts except those dominated by false
548 branch to avoid drop 1s down. */
549 basic_block bbi_copy = get_bb_copy (false_edge->dest);
550 bbs2 = get_loop_body (loop2);
551 for (j = 0; j < loop2->num_nodes; j++)
552 if (bbs2[j] == loop2->latch
553 /* Watch for case where the flase conditional is empty. */
554 || !single_pred_p (bbi_copy)
555 || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
556 bbs2[j]->count
557 = bbs2[j]->count.apply_probability (true_edge->probability.invert ());
558 free (bbs2);
561 /* Checks if LOOP contains an conditional block whose condition
562 depends on which side in the iteration space it is, and if so
563 splits the iteration space into two loops. Returns true if the
564 loop was split. NITER must contain the iteration descriptor for the
565 single exit of LOOP. */
567 static bool
568 split_loop (class loop *loop1)
570 class tree_niter_desc niter;
571 basic_block *bbs;
572 unsigned i;
573 bool changed = false;
574 tree guard_iv;
575 tree border = NULL_TREE;
576 affine_iv iv;
577 edge exit1;
579 if (!(exit1 = single_exit (loop1))
580 || EDGE_COUNT (exit1->src->succs) != 2
581 /* ??? We could handle non-empty latches when we split the latch edge
582 (not the exit edge), and put the new exit condition in the new block.
583 OTOH this executes some code unconditionally that might have been
584 skipped by the original exit before. */
585 || !empty_block_p (loop1->latch)
586 || !easy_exit_values (loop1)
587 || !number_of_iterations_exit (loop1, exit1, &niter, false, true)
588 || niter.cmp == ERROR_MARK)
589 return false;
590 if (niter.cmp == NE_EXPR)
592 if (!niter.control.no_overflow)
593 return false;
594 if (tree_int_cst_sign_bit (niter.control.step))
595 niter.cmp = GT_EXPR;
596 else
597 niter.cmp = LT_EXPR;
600 bbs = get_loop_body (loop1);
602 if (!can_copy_bbs_p (bbs, loop1->num_nodes))
604 free (bbs);
605 return false;
608 /* Find a splitting opportunity. */
609 enum tree_code guard_code;
610 for (i = 0; i < loop1->num_nodes; i++)
611 if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code)))
613 /* Handling opposite steps is not implemented yet. Neither
614 is handling different step sizes. */
615 if ((tree_int_cst_sign_bit (iv.step)
616 != tree_int_cst_sign_bit (niter.control.step))
617 || !tree_int_cst_equal (iv.step, niter.control.step))
618 continue;
620 /* Find a loop PHI node that defines guard_iv directly,
621 or create one doing that. */
622 gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
623 if (!phi)
624 continue;
625 const tree phi_result = gimple_phi_result (phi);
626 gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
627 tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
628 loop_preheader_edge (loop1));
630 /* Loop splitting is implemented by versioning the loop, placing
631 the new loop after the old loop, make the first loop iterate
632 as long as the conditional stays true (or false) and let the
633 second (new) loop handle the rest of the iterations.
635 First we need to determine if the condition will start being true
636 or false in the first loop. */
637 bool initial_true;
638 switch (guard_code)
640 case LT_EXPR:
641 case LE_EXPR:
642 initial_true = !tree_int_cst_sign_bit (iv.step);
643 break;
644 case GT_EXPR:
645 case GE_EXPR:
646 initial_true = tree_int_cst_sign_bit (iv.step);
647 break;
648 default:
649 gcc_unreachable ();
652 /* Build a condition that will skip the first loop when the
653 guard condition won't ever be true (or false). */
654 gimple_seq stmts2;
655 border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
656 if (stmts2)
658 /* When the split condition is not always evaluated make sure
659 to rewrite it to defined overflow. */
660 if (!dominated_by_p (CDI_DOMINATORS, exit1->src, bbs[i]))
662 gimple_stmt_iterator gsi;
663 gsi = gsi_start (stmts2);
664 while (!gsi_end_p (gsi))
666 gimple *stmt = gsi_stmt (gsi);
667 if (is_gimple_assign (stmt)
668 && arith_code_with_undefined_signed_overflow
669 (gimple_assign_rhs_code (stmt)))
670 rewrite_to_defined_overflow (&gsi);
671 gsi_next (&gsi);
674 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
675 stmts2);
677 tree cond = fold_build2 (guard_code, boolean_type_node,
678 guard_init, border);
679 if (!initial_true)
680 cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
682 edge true_edge, false_edge;
683 extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
685 /* Now version the loop, placing loop2 after loop1 connecting
686 them, and fix up SSA form for that. */
687 initialize_original_copy_tables ();
688 basic_block cond_bb;
690 profile_probability loop1_prob
691 = integer_onep (cond) ? profile_probability::always ()
692 : true_edge->probability;
693 /* TODO: It is commonly a case that we know that both loops will be
694 entered. very_likely below is the probability that second loop will
695 be entered given by connect_loops. We should work out the common
696 case it is always true. */
697 class loop *loop2 = loop_version (loop1, cond, &cond_bb,
698 loop1_prob,
699 /* Pass always as we will later
700 redirect first loop to second
701 loop. */
702 profile_probability::always (),
703 profile_probability::always (),
704 profile_probability::very_likely (),
705 true);
706 gcc_assert (loop2);
708 /* The phi address may have changed due to being resized in
709 loop_version (), so reobtain it. */
710 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (phi_result));
711 gcc_checking_assert (gimple_bb (phi) == loop1->header);
713 /* Correct probability of edge cond_bb->preheader_of_loop2. */
714 single_pred_edge
715 (loop_preheader_edge (loop2)->src)->probability
716 = loop1_prob.invert ();
718 fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
719 /* If conditional we split on has reliable profilea nd both
720 preconditionals of loop1 and loop2 are constant true, we can
721 only redistribute the iteration counts to the split loops.
723 If the conditionals we insert before loop1 or loop2 are non-trivial
724 they increase expected loop count, so account this accordingly.
725 If we do not know the probability of split conditional, avoid
726 reudcing loop estimates, since we do not really know how they are
727 split between of the two new loops. Keep orignal estimate since
728 it is likely better then completely dropping it.
730 TODO: If we know that one of the new loops has constant
731 number of iterations, we can do better. We could also update
732 upper bounds. */
733 if (loop1->any_estimate
734 && wi::fits_shwi_p (loop1->nb_iterations_estimate))
736 sreal scale = true_edge->probability.reliable_p ()
737 ? true_edge->probability.to_sreal () : (sreal)1;
738 sreal scale2 = false_edge->probability.reliable_p ()
739 ? false_edge->probability.to_sreal () : (sreal)1;
740 sreal div1 = loop1_prob.initialized_p ()
741 ? loop1_prob.to_sreal () : (sreal)1/(sreal)2;
742 /* +1 to get header interations rather than latch iterations and then
743 -1 to convert back. */
744 if (div1 != 0)
745 loop1->nb_iterations_estimate
746 = MAX ((((sreal)loop1->nb_iterations_estimate.to_shwi () + 1)
747 * scale / div1).to_nearest_int () - 1, 0);
748 else
749 loop1->any_estimate = false;
750 loop2->nb_iterations_estimate
751 = MAX ((((sreal)loop2->nb_iterations_estimate.to_shwi () + 1) * scale2
752 / profile_probability::very_likely ().to_sreal ())
753 .to_nearest_int () - 1, 0);
755 update_loop_exit_probability_scale_dom_bbs (loop1);
756 update_loop_exit_probability_scale_dom_bbs (loop2);
758 edge new_e = connect_loops (loop1, loop2);
759 connect_loop_phis (loop1, loop2, new_e);
761 /* The iterations of the second loop is now already
762 exactly those that the first loop didn't do, but the
763 iteration space of the first loop is still the original one.
764 Compute the new bound for the guarding IV and patch the
765 loop exit to use it instead of original IV and bound. */
766 gimple_seq stmts = NULL;
767 tree newend = compute_new_first_bound (&stmts, &niter, border,
768 guard_code, guard_init);
769 if (stmts)
770 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
771 stmts);
772 tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
773 patch_loop_exit (loop1, guard_code, guard_next, newend, initial_true);
775 /* Finally patch out the two copies of the condition to be always
776 true/false (or opposite). */
777 gcond *force_true = as_a<gcond *> (*gsi_last_bb (bbs[i]));
778 gcond *force_false = as_a<gcond *> (*gsi_last_bb (get_bb_copy (bbs[i])));
779 if (!initial_true)
780 std::swap (force_true, force_false);
781 gimple_cond_make_true (force_true);
782 gimple_cond_make_false (force_false);
783 update_stmt (force_true);
784 update_stmt (force_false);
786 free_original_copy_tables ();
788 changed = true;
789 if (dump_file && (dump_flags & TDF_DETAILS))
790 fprintf (dump_file, ";; Loop split.\n");
792 if (dump_enabled_p ())
793 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
795 /* Only deal with the first opportunity. */
796 break;
799 free (bbs);
800 return changed;
803 /* Another transformation of loops like:
805 for (i = INIT (); CHECK (i); i = NEXT ())
807 if (expr (a_1, a_2, ..., a_n)) // expr is pure
808 a_j = ...; // change at least one a_j
809 else
810 S; // not change any a_j
813 into:
815 for (i = INIT (); CHECK (i); i = NEXT ())
817 if (expr (a_1, a_2, ..., a_n))
818 a_j = ...;
819 else
822 i = NEXT ();
823 break;
827 for (; CHECK (i); i = NEXT ())
834 /* Data structure to hold temporary information during loop split upon
835 semi-invariant conditional statement. */
836 class split_info {
837 public:
838 /* Array of all basic blocks in a loop, returned by get_loop_body(). */
839 basic_block *bbs;
841 /* All memory store/clobber statements in a loop. */
842 auto_vec<gimple *> memory_stores;
844 /* Whether above memory stores vector has been filled. */
845 int need_init;
847 /* Control dependencies of basic blocks in a loop. */
848 auto_vec<hash_set<basic_block> *> control_deps;
850 split_info () : bbs (NULL), need_init (true) { }
852 ~split_info ()
854 if (bbs)
855 free (bbs);
857 for (unsigned i = 0; i < control_deps.length (); i++)
858 delete control_deps[i];
862 /* Find all statements with memory-write effect in LOOP, including memory
863 store and non-pure function call, and keep those in a vector. This work
864 is only done one time, for the vector should be constant during analysis
865 stage of semi-invariant condition. */
867 static void
868 find_vdef_in_loop (struct loop *loop)
870 split_info *info = (split_info *) loop->aux;
871 gphi *vphi = get_virtual_phi (loop->header);
873 /* Indicate memory store vector has been filled. */
874 info->need_init = false;
876 /* If loop contains memory operation, there must be a virtual PHI node in
877 loop header basic block. */
878 if (vphi == NULL)
879 return;
881 /* All virtual SSA names inside the loop are connected to be a cyclic
882 graph via virtual PHI nodes. The virtual PHI node in loop header just
883 links the first and the last virtual SSA names, by using the last as
884 PHI operand to define the first. */
885 const edge latch = loop_latch_edge (loop);
886 const tree first = gimple_phi_result (vphi);
887 const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
889 /* The virtual SSA cyclic graph might consist of only one SSA name, who
890 is defined by itself.
892 .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
894 This means the loop contains only memory loads, so we can skip it. */
895 if (first == last)
896 return;
898 auto_vec<gimple *> other_stores;
899 auto_vec<tree> worklist;
900 auto_bitmap visited;
902 bitmap_set_bit (visited, SSA_NAME_VERSION (first));
903 bitmap_set_bit (visited, SSA_NAME_VERSION (last));
904 worklist.safe_push (last);
908 tree vuse = worklist.pop ();
909 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
911 /* We mark the first and last SSA names as visited at the beginning,
912 and reversely start the process from the last SSA name towards the
913 first, which ensures that this do-while will not touch SSA names
914 defined outside the loop. */
915 gcc_assert (gimple_bb (stmt)
916 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
918 if (gimple_code (stmt) == GIMPLE_PHI)
920 gphi *phi = as_a <gphi *> (stmt);
922 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
924 tree arg = gimple_phi_arg_def (stmt, i);
926 if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
927 worklist.safe_push (arg);
930 else
932 tree prev = gimple_vuse (stmt);
934 /* Non-pure call statement is conservatively assumed to impact all
935 memory locations. So place call statements ahead of other memory
936 stores in the vector with an idea of using them as shortcut
937 terminators to memory alias analysis. */
938 if (gimple_code (stmt) == GIMPLE_CALL)
939 info->memory_stores.safe_push (stmt);
940 else
941 other_stores.safe_push (stmt);
943 if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
944 worklist.safe_push (prev);
946 } while (!worklist.is_empty ());
948 info->memory_stores.safe_splice (other_stores);
951 /* Two basic blocks have equivalent control dependency if one dominates to
952 the other, and it is post-dominated by the latter. Given a basic block
953 BB in LOOP, find farest equivalent dominating basic block. For BB, there
954 is a constraint that BB does not post-dominate loop header of LOOP, this
955 means BB is control-dependent on at least one basic block in LOOP. */
957 static basic_block
958 get_control_equiv_head_block (struct loop *loop, basic_block bb)
960 while (!bb->aux)
962 basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
964 gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
966 if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
967 break;
969 bb = dom_bb;
971 return bb;
974 /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
975 dependent on. */
977 static hash_set<basic_block> *
978 find_control_dep_blocks (struct loop *loop, basic_block bb)
980 /* BB has same control dependency as loop header, then it is not control-
981 dependent on any basic block in LOOP. */
982 if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
983 return NULL;
985 basic_block equiv_head = get_control_equiv_head_block (loop, bb);
987 if (equiv_head->aux)
989 /* There is a basic block containing control dependency equivalent
990 to BB. No need to recompute that, and also set this information
991 to other equivalent basic blocks. */
992 for (; bb != equiv_head;
993 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
994 bb->aux = equiv_head->aux;
995 return (hash_set<basic_block> *) equiv_head->aux;
998 /* A basic block X is control-dependent on another Y iff there exists
999 a path from X to Y, in which every basic block other than X and Y
1000 is post-dominated by Y, but X is not post-dominated by Y.
1002 According to this rule, traverse basic blocks in the loop backwards
1003 starting from BB, if a basic block is post-dominated by BB, extend
1004 current post-dominating path to this block, otherwise it is another
1005 one that BB is control-dependent on. */
1007 auto_vec<basic_block> pdom_worklist;
1008 hash_set<basic_block> pdom_visited;
1009 hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
1011 pdom_worklist.safe_push (equiv_head);
1015 basic_block pdom_bb = pdom_worklist.pop ();
1016 edge_iterator ei;
1017 edge e;
1019 if (pdom_visited.add (pdom_bb))
1020 continue;
1022 FOR_EACH_EDGE (e, ei, pdom_bb->preds)
1024 basic_block pred_bb = e->src;
1026 if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
1028 dep_bbs->add (pred_bb);
1029 continue;
1032 pred_bb = get_control_equiv_head_block (loop, pred_bb);
1034 if (pdom_visited.contains (pred_bb))
1035 continue;
1037 if (!pred_bb->aux)
1039 pdom_worklist.safe_push (pred_bb);
1040 continue;
1043 /* If control dependency of basic block is available, fast extend
1044 post-dominating path using the information instead of advancing
1045 forward step-by-step. */
1046 hash_set<basic_block> *pred_dep_bbs
1047 = (hash_set<basic_block> *) pred_bb->aux;
1049 for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
1050 iter != pred_dep_bbs->end (); ++iter)
1052 basic_block pred_dep_bb = *iter;
1054 /* Basic blocks can either be in control dependency of BB, or
1055 must be post-dominated by BB, if so, extend the path from
1056 these basic blocks. */
1057 if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
1058 dep_bbs->add (pred_dep_bb);
1059 else if (!pdom_visited.contains (pred_dep_bb))
1060 pdom_worklist.safe_push (pred_dep_bb);
1063 } while (!pdom_worklist.is_empty ());
1065 /* Record computed control dependencies in loop so that we can reach them
1066 when reclaiming resources. */
1067 ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
1069 /* Associate control dependence with related equivalent basic blocks. */
1070 for (equiv_head->aux = dep_bbs; bb != equiv_head;
1071 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
1072 bb->aux = dep_bbs;
1074 return dep_bbs;
1077 /* Forward declaration */
1079 static bool
1080 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1081 const_basic_block skip_head,
1082 hash_map<gimple *, bool> &stmt_stat);
1084 /* Given STMT, memory load or pure call statement, check whether it is impacted
1085 by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
1086 trace is composed of SKIP_HEAD and those basic block dominated by it, always
1087 corresponds to one branch of a conditional statement). If SKIP_HEAD is
1088 NULL, all basic blocks of LOOP are checked. */
1090 static bool
1091 vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
1092 const_basic_block skip_head)
1094 split_info *info = (split_info *) loop->aux;
1095 tree rhs = NULL_TREE;
1096 ao_ref ref;
1097 gimple *store;
1098 unsigned i;
1100 /* Collect memory store/clobber statements if haven't done that. */
1101 if (info->need_init)
1102 find_vdef_in_loop (loop);
1104 if (is_gimple_assign (stmt))
1105 rhs = gimple_assign_rhs1 (stmt);
1107 ao_ref_init (&ref, rhs);
1109 FOR_EACH_VEC_ELT (info->memory_stores, i, store)
1111 /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL. */
1112 if (skip_head
1113 && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
1114 continue;
1116 if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1117 return false;
1120 return true;
1123 /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1124 certain iteration of LOOP, check whether an SSA name (NAME) remains
1125 unchanged in next iteration. We call this characteristic semi-
1126 invariantness. SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1127 blocks and control flows in the loop will be considered. Semi-invariant
1128 state of checked statement is cached in hash map STMT_STAT to avoid
1129 redundant computation in possible following re-check. */
1131 static inline bool
1132 ssa_semi_invariant_p (struct loop *loop, tree name,
1133 const_basic_block skip_head,
1134 hash_map<gimple *, bool> &stmt_stat)
1136 gimple *def = SSA_NAME_DEF_STMT (name);
1137 const_basic_block def_bb = gimple_bb (def);
1139 /* An SSA name defined outside loop is definitely semi-invariant. */
1140 if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1141 return true;
1143 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1144 return false;
1146 return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
1149 /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1150 semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1151 are excluded from LOOP. */
1153 static bool
1154 loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1155 const_basic_block skip_head)
1157 const_edge latch = loop_latch_edge (loop);
1158 tree name = gimple_phi_result (loop_phi);
1159 tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1161 gcc_checking_assert (from);
1163 /* Loop iteration PHI node locates in loop header, and it has two source
1164 operands, one is an initial value coming from outside the loop, the other
1165 is a value through latch of the loop, which is derived in last iteration,
1166 we call the latter latch value. From the PHI node to definition of latch
1167 value, if excluding branch trace starting from SKIP_HEAD, except copy-
1168 assignment or likewise, there is no other kind of value redefinition, SSA
1169 name defined by the PHI node is semi-invariant.
1171 loop entry
1172 | .--- latch ---.
1173 | | |
1174 v v |
1175 x_1 = PHI <x_0, x_3> |
1178 .------- if (cond) -------. |
1179 | | |
1180 | [ SKIP ] |
1181 | | |
1182 | x_2 = ... |
1183 | | |
1184 '---- T ---->.<---- F ----' |
1187 x_3 = PHI <x_1, x_2> |
1189 '----------------------'
1191 Suppose in certain iteration, execution flow in above graph goes through
1192 true branch, which means that one source value to define x_3 in false
1193 branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1194 iterations is defined by x_3, we know that x_1 will never changed if COND
1195 always chooses true branch from then on. */
1197 while (from != name)
1199 /* A new value comes from a CONSTANT. */
1200 if (TREE_CODE (from) != SSA_NAME)
1201 return false;
1203 gimple *stmt = SSA_NAME_DEF_STMT (from);
1204 const_basic_block bb = gimple_bb (stmt);
1206 /* A new value comes from outside the loop. */
1207 if (!bb || !flow_bb_inside_loop_p (loop, bb))
1208 return false;
1210 from = NULL_TREE;
1212 if (gimple_code (stmt) == GIMPLE_PHI)
1214 gphi *phi = as_a <gphi *> (stmt);
1216 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1218 if (skip_head)
1220 const_edge e = gimple_phi_arg_edge (phi, i);
1222 /* Don't consider redefinitions in excluded basic blocks. */
1223 if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1224 continue;
1227 tree arg = gimple_phi_arg_def (phi, i);
1229 if (!from)
1230 from = arg;
1231 else if (!operand_equal_p (from, arg, 0))
1232 /* There are more than one source operands that provide
1233 different values to the SSA name, it is variant. */
1234 return false;
1237 else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1239 /* For simple value copy, check its rhs instead. */
1240 if (gimple_assign_ssa_name_copy_p (stmt))
1241 from = gimple_assign_rhs1 (stmt);
1244 /* Any other kind of definition is deemed to introduce a new value
1245 to the SSA name. */
1246 if (!from)
1247 return false;
1249 return true;
1252 /* Check whether conditional predicates that BB is control-dependent on, are
1253 semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1254 are excluded from LOOP. Semi-invariant state of checked statement is cached
1255 in hash map STMT_STAT. */
1257 static bool
1258 control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1259 const_basic_block skip_head,
1260 hash_map<gimple *, bool> &stmt_stat)
1262 hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1264 if (!dep_bbs)
1265 return true;
1267 for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1268 iter != dep_bbs->end (); ++iter)
1270 gimple *last = *gsi_last_bb (*iter);
1271 if (!last)
1272 return false;
1274 /* Only check condition predicates. */
1275 if (gimple_code (last) != GIMPLE_COND
1276 && gimple_code (last) != GIMPLE_SWITCH)
1277 return false;
1279 if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1280 return false;
1283 return true;
1286 /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1287 semi-invariant, consequently, all its defined values are semi-invariant.
1288 Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1289 Semi-invariant state of checked statement is cached in hash map
1290 STMT_STAT. */
1292 static bool
1293 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1294 const_basic_block skip_head,
1295 hash_map<gimple *, bool> &stmt_stat)
1297 bool existed;
1298 bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1300 if (existed)
1301 return invar;
1303 /* A statement might depend on itself, which is treated as variant. So set
1304 state of statement under check to be variant to ensure that. */
1305 invar = false;
1307 if (gimple_code (stmt) == GIMPLE_PHI)
1309 gphi *phi = as_a <gphi *> (stmt);
1311 if (gimple_bb (stmt) == loop->header)
1313 /* If the entry value is subject to abnormal coalescing
1314 avoid the transform since we're going to duplicate the
1315 loop header and thus likely introduce overlapping life-ranges
1316 between the PHI def and the entry on the path when the
1317 first loop is skipped. */
1318 tree entry_def
1319 = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1320 if (TREE_CODE (entry_def) == SSA_NAME
1321 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1322 return false;
1323 invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1324 return invar;
1327 /* For a loop PHI node that does not locate in loop header, it is semi-
1328 invariant only if two conditions are met. The first is its source
1329 values are derived from CONSTANT (including loop-invariant value), or
1330 from SSA name defined by semi-invariant loop iteration PHI node. The
1331 second is its source incoming edges are control-dependent on semi-
1332 invariant conditional predicates. */
1333 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1335 const_edge e = gimple_phi_arg_edge (phi, i);
1336 tree arg = gimple_phi_arg_def (phi, i);
1338 if (TREE_CODE (arg) == SSA_NAME)
1340 if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1341 return false;
1343 /* If source value is defined in location from where the source
1344 edge comes in, no need to check control dependency again
1345 since this has been done in above SSA name check stage. */
1346 if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1347 continue;
1350 if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1351 stmt_stat))
1352 return false;
1355 else
1357 ssa_op_iter iter;
1358 tree use;
1360 /* Volatile memory load or return of normal (non-const/non-pure) call
1361 should not be treated as constant in each iteration of loop. */
1362 if (gimple_has_side_effects (stmt))
1363 return false;
1365 /* Check if any memory store may kill memory load at this place. */
1366 if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1367 return false;
1369 /* Although operand of a statement might be SSA name, CONSTANT or
1370 VARDECL, here we only need to check SSA name operands. This is
1371 because check on VARDECL operands, which involve memory loads,
1372 must have been done prior to invocation of this function in
1373 vuse_semi_invariant_p. */
1374 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1375 if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1376 return false;
1379 if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1380 stmt_stat))
1381 return false;
1383 /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1384 to new insertion, and thus invar may point to invalid memory. */
1385 stmt_stat.put (stmt, true);
1386 return true;
1389 /* A helper function to check whether STMT is semi-invariant in LOOP. Basic
1390 blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. */
1392 static bool
1393 stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1394 const_basic_block skip_head)
1396 hash_map<gimple *, bool> stmt_stat;
1397 return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1400 /* Determine when conditional statement never transfers execution to one of its
1401 branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1402 and those basic blocks dominated by BRANCH_BB. */
1404 static bool
1405 branch_removable_p (basic_block branch_bb)
1407 edge_iterator ei;
1408 edge e;
1410 if (single_pred_p (branch_bb))
1411 return true;
1413 FOR_EACH_EDGE (e, ei, branch_bb->preds)
1415 if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1416 continue;
1418 if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1419 continue;
1421 /* The branch can be reached from opposite branch, or from some
1422 statement not dominated by the conditional statement. */
1423 return false;
1426 return true;
1429 /* Find out which branch of a conditional statement (COND) is invariant in the
1430 execution context of LOOP. That is: once the branch is selected in certain
1431 iteration of the loop, any operand that contributes to computation of the
1432 conditional statement remains unchanged in all following iterations. */
1434 static edge
1435 get_cond_invariant_branch (struct loop *loop, gcond *cond)
1437 basic_block cond_bb = gimple_bb (cond);
1438 basic_block targ_bb[2];
1439 bool invar[2];
1440 unsigned invar_checks = 0;
1442 for (unsigned i = 0; i < 2; i++)
1444 targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1446 /* One branch directs to loop exit, no need to perform loop split upon
1447 this conditional statement. Firstly, it is trivial if the exit branch
1448 is semi-invariant, for the statement is just to break loop. Secondly,
1449 if the opposite branch is semi-invariant, it means that the statement
1450 is real loop-invariant, which is covered by loop unswitch. */
1451 if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1452 return NULL;
1455 for (unsigned i = 0; i < 2; i++)
1457 invar[!i] = false;
1459 if (!branch_removable_p (targ_bb[i]))
1460 continue;
1462 /* Given a semi-invariant branch, if its opposite branch dominates
1463 loop latch, it and its following trace will only be executed in
1464 final iteration of loop, namely it is not part of repeated body
1465 of the loop. Similar to the above case that the branch is loop
1466 exit, no need to split loop. */
1467 if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1468 continue;
1470 invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1471 invar_checks++;
1474 /* With both branches being invariant (handled by loop unswitch) or
1475 variant is not what we want. */
1476 if (invar[0] ^ !invar[1])
1477 return NULL;
1479 /* Found a real loop-invariant condition, do nothing. */
1480 if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1481 return NULL;
1483 return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1486 /* Calculate increased code size measured by estimated insn number if applying
1487 loop split upon certain branch (BRANCH_EDGE) of a conditional statement. */
1489 static int
1490 compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1492 basic_block cond_bb = branch_edge->src;
1493 unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1494 basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1495 basic_block *bbs = ((split_info *) loop->aux)->bbs;
1496 int num = 0;
1498 for (unsigned i = 0; i < loop->num_nodes; i++)
1500 /* Do no count basic blocks only in opposite branch. */
1501 if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1502 continue;
1504 num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1507 /* It is unnecessary to evaluate expression of the conditional statement
1508 in new loop that contains only invariant branch. This expression should
1509 be constant value (either true or false). Exclude code size of insns
1510 that contribute to computation of the expression. */
1512 auto_vec<gimple *> worklist;
1513 hash_set<gimple *> removed;
1514 gimple *stmt = last_nondebug_stmt (cond_bb);
1516 worklist.safe_push (stmt);
1517 removed.add (stmt);
1518 num -= estimate_num_insns (stmt, &eni_size_weights);
1522 ssa_op_iter opnd_iter;
1523 use_operand_p opnd_p;
1525 stmt = worklist.pop ();
1526 FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1528 tree opnd = USE_FROM_PTR (opnd_p);
1530 if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1531 continue;
1533 gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1534 use_operand_p use_p;
1535 imm_use_iterator use_iter;
1537 if (removed.contains (opnd_stmt)
1538 || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1539 continue;
1541 FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1543 gimple *use_stmt = USE_STMT (use_p);
1545 if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1547 opnd_stmt = NULL;
1548 break;
1552 if (opnd_stmt)
1554 worklist.safe_push (opnd_stmt);
1555 removed.add (opnd_stmt);
1556 num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1559 } while (!worklist.is_empty ());
1561 gcc_assert (num >= 0);
1562 return num;
1565 /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1566 and check whether it is eligible and profitable to perform loop split upon
1567 this branch in LOOP. */
1569 static edge
1570 get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1572 edge invar_branch = get_cond_invariant_branch (loop, cond);
1573 if (!invar_branch)
1574 return NULL;
1576 /* When accurate profile information is available, and execution
1577 frequency of the branch is too low, just let it go. */
1578 profile_probability prob = invar_branch->probability;
1579 if (prob.reliable_p ())
1581 int thres = param_min_loop_cond_split_prob;
1583 if (prob < profile_probability::always ().apply_scale (thres, 100))
1584 return NULL;
1587 /* Add a threshold for increased code size to disable loop split. */
1588 if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1589 return NULL;
1591 return invar_branch;
1594 /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1595 conditional statement, perform loop split transformation illustrated
1596 as the following graph.
1598 .-------T------ if (true) ------F------.
1599 | .---------------. |
1600 | | | |
1601 v | v v
1602 pre-header | pre-header
1603 | .------------. | | .------------.
1604 | | | | | | |
1605 | v | | | v |
1606 header | | header |
1607 | | | | |
1608 .--- if (cond) ---. | | .--- if (true) ---. |
1609 | | | | | | |
1610 invariant | | | invariant | |
1611 | | | | | | |
1612 '---T--->.<---F---' | | '---T--->.<---F---' |
1613 | | / | |
1614 stmts | / stmts |
1615 | F T | |
1616 / \ | / / \ |
1617 .-------* * [ if (cond) ] .-------* * |
1618 | | | | | |
1619 | latch | | latch |
1620 | | | | | |
1621 | '------------' | '------------'
1622 '------------------------. .-----------'
1623 loop1 | | loop2
1625 exits
1627 In the graph, loop1 represents the part derived from original one, and
1628 loop2 is duplicated using loop_version (), which corresponds to the part
1629 of original one being splitted out. In original latch edge of loop1, we
1630 insert a new conditional statement duplicated from the semi-invariant cond,
1631 and one of its branch goes back to loop1 header as a latch edge, and the
1632 other branch goes to loop2 pre-header as an entry edge. And also in loop2,
1633 we abandon the variant branch of the conditional statement by setting a
1634 constant bool condition, based on which branch is semi-invariant. */
1636 static bool
1637 do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1639 basic_block cond_bb = invar_branch->src;
1640 bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1641 gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
1643 gcc_assert (cond_bb->loop_father == loop1);
1645 if (dump_enabled_p ())
1646 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1647 "loop split on semi-invariant condition at %s branch\n",
1648 true_invar ? "true" : "false");
1650 initialize_original_copy_tables ();
1652 struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1653 invar_branch->probability.invert (),
1654 invar_branch->probability,
1655 profile_probability::always (),
1656 profile_probability::always (),
1657 true);
1658 if (!loop2)
1660 free_original_copy_tables ();
1661 return false;
1664 basic_block cond_bb_copy = get_bb_copy (cond_bb);
1665 gcond *cond_copy = as_a<gcond *> (*gsi_last_bb (cond_bb_copy));
1667 /* Replace the condition in loop2 with a bool constant to let PassManager
1668 remove the variant branch after current pass completes. */
1669 if (true_invar)
1670 gimple_cond_make_true (cond_copy);
1671 else
1672 gimple_cond_make_false (cond_copy);
1674 update_stmt (cond_copy);
1676 /* Insert a new conditional statement on latch edge of loop1, its condition
1677 is duplicated from the semi-invariant. This statement acts as a switch
1678 to transfer execution from loop1 to loop2, when loop1 enters into
1679 invariant state. */
1680 basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1681 basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1682 gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1683 gimple_cond_lhs (cond),
1684 gimple_cond_rhs (cond),
1685 NULL_TREE, NULL_TREE);
1687 gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1688 gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1690 edge to_loop1 = single_succ_edge (break_bb);
1691 edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1693 to_loop1->flags &= ~EDGE_FALLTHRU;
1694 to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1695 to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1697 /* Due to introduction of a control flow edge from loop1 latch to loop2
1698 pre-header, we should update PHIs in loop2 to reflect this connection
1699 between loop1 and loop2. */
1700 connect_loop_phis (loop1, loop2, to_loop2);
1702 edge true_edge, false_edge, skip_edge1, skip_edge2;
1703 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1705 skip_edge1 = true_invar ? false_edge : true_edge;
1706 skip_edge2 = true_invar ? true_edge : false_edge;
1707 fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
1709 /* Fix first loop's exit probability after scaling. */
1710 to_loop1->probability = invar_branch->probability.invert ();
1711 to_loop2->probability = invar_branch->probability;
1713 free_original_copy_tables ();
1715 return true;
1718 /* Traverse all conditional statements in LOOP, to find out a good candidate
1719 upon which we can do loop split. */
1721 static bool
1722 split_loop_on_cond (struct loop *loop)
1724 split_info *info = new split_info ();
1725 basic_block *bbs = info->bbs = get_loop_body (loop);
1726 bool do_split = false;
1728 /* Allocate an area to keep temporary info, and associate its address
1729 with loop aux field. */
1730 loop->aux = info;
1732 for (unsigned i = 0; i < loop->num_nodes; i++)
1733 bbs[i]->aux = NULL;
1735 for (unsigned i = 0; i < loop->num_nodes; i++)
1737 basic_block bb = bbs[i];
1739 /* We only consider conditional statement, which be executed at most once
1740 in each iteration of the loop. So skip statements in inner loops. */
1741 if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1742 continue;
1744 /* Actually this check is not a must constraint. With it, we can ensure
1745 conditional statement will always be executed in each iteration. */
1746 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1747 continue;
1749 gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1750 if (!cond)
1751 continue;
1753 edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1755 if (branch_edge)
1757 do_split_loop_on_cond (loop, branch_edge);
1758 do_split = true;
1759 break;
1763 delete info;
1764 loop->aux = NULL;
1766 return do_split;
1769 /* Main entry point. Perform loop splitting on all suitable loops. */
1771 static unsigned int
1772 tree_ssa_split_loops (void)
1774 bool changed = false;
1776 gcc_assert (scev_initialized_p ());
1778 calculate_dominance_info (CDI_POST_DOMINATORS);
1780 for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1781 loop->aux = NULL;
1783 /* Go through all loops starting from innermost. */
1784 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1786 if (loop->aux)
1788 /* If any of our inner loops was split, don't split us,
1789 and mark our containing loop as having had splits as well.
1790 This allows for delaying SSA update. */
1791 loop_outer (loop)->aux = loop;
1792 continue;
1795 if (optimize_loop_for_size_p (loop))
1796 continue;
1798 if (split_loop (loop) || split_loop_on_cond (loop))
1800 /* Mark our containing loop as having had some split inner loops. */
1801 loop_outer (loop)->aux = loop;
1802 changed = true;
1806 for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1807 loop->aux = NULL;
1809 clear_aux_for_blocks ();
1811 free_dominance_info (CDI_POST_DOMINATORS);
1813 if (changed)
1815 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1816 return TODO_cleanup_cfg;
1818 return 0;
1821 /* Loop splitting pass. */
1823 namespace {
1825 const pass_data pass_data_loop_split =
1827 GIMPLE_PASS, /* type */
1828 "lsplit", /* name */
1829 OPTGROUP_LOOP, /* optinfo_flags */
1830 TV_LOOP_SPLIT, /* tv_id */
1831 PROP_cfg, /* properties_required */
1832 0, /* properties_provided */
1833 0, /* properties_destroyed */
1834 0, /* todo_flags_start */
1835 0, /* todo_flags_finish */
1838 class pass_loop_split : public gimple_opt_pass
1840 public:
1841 pass_loop_split (gcc::context *ctxt)
1842 : gimple_opt_pass (pass_data_loop_split, ctxt)
1845 /* opt_pass methods: */
1846 bool gate (function *) final override { return flag_split_loops != 0; }
1847 unsigned int execute (function *) final override;
1849 }; // class pass_loop_split
1851 unsigned int
1852 pass_loop_split::execute (function *fun)
1854 if (number_of_loops (fun) <= 1)
1855 return 0;
1857 return tree_ssa_split_loops ();
1860 } // anon namespace
1862 gimple_opt_pass *
1863 make_pass_loop_split (gcc::context *ctxt)
1865 return new pass_loop_split (ctxt);