gcc/tree-ssa-loop-ivcanon.cc

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2025 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #include "system.h"
  40 #include "coretypes.h"
  41 #include "backend.h"
  42 #include "tree.h"
  43 #include "gimple.h"
  44 #include "cfghooks.h"
  45 #include "tree-pass.h"
  46 #include "ssa.h"
  47 #include "cgraph.h"
  48 #include "gimple-pretty-print.h"
  49 #include "fold-const.h"
  50 #include "profile.h"
  51 #include "gimple-iterator.h"
  52 #include "gimple-fold.h"
  53 #include "tree-eh.h"
  54 #include "tree-cfg.h"
  55 #include "tree-ssa-loop-manip.h"
  56 #include "tree-ssa-loop-niter.h"
  57 #include "tree-ssa-loop.h"
  58 #include "tree-into-ssa.h"
  59 #include "cfgloop.h"
  60 #include "tree-chrec.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "tree-inline.h"
  63 #include "tree-cfgcleanup.h"
  64 #include "builtins.h"
  65 #include "tree-ssa-sccvn.h"
  66 #include "tree-vectorizer.h" /* For find_loop_location */
  67 #include "dbgcnt.h"
  68
  69 /* Specifies types of loops that may be unrolled.  */
  70
  71 enum unroll_level
  72 {
  73   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  74                            iteration.  */
  75   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  76                            of code size.  */
  77   UL_ALL                /* All suitable loops.  */
  78 };
  79
  80 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  81    is the exit edge whose condition is replaced.  The ssa versions of the new
  82    IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
  83    if they are not NULL.  */
  84
  85 void
  86 create_canonical_iv (class loop *loop, edge exit, tree niter,
  87                      tree *var_before = NULL, tree *var_after = NULL)
  88 {
  89   edge in;
  90   tree type, var;
  91   gcond *cond;
  92   gimple_stmt_iterator incr_at;
  93   enum tree_code cmp;
  94
  95   if (dump_file && (dump_flags & TDF_DETAILS))
  96     {
  97       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  98       print_generic_expr (dump_file, niter, TDF_SLIM);
  99       fprintf (dump_file, " iterations.\n");
 100     }
 101
 102   cond = as_a <gcond *> (*gsi_last_bb (exit->src));
 103   in = EDGE_SUCC (exit->src, 0);
 104   if (in == exit)
 105     in = EDGE_SUCC (exit->src, 1);
 106
 107   /* Note that we do not need to worry about overflows, since
 108      type of niter is always unsigned and all comparisons are
 109      just for equality/nonequality -- i.e. everything works
 110      with a modulo arithmetics.  */
 111
 112   type = TREE_TYPE (niter);
 113   niter = fold_build2 (PLUS_EXPR, type,
 114                        niter,
 115                        build_int_cst (type, 1));
 116   incr_at = gsi_last_bb (in->src);
 117   create_iv (niter, PLUS_EXPR,
 118              build_int_cst (type, -1),
 119              NULL_TREE, loop,
 120              &incr_at, false, var_before, &var);
 121   if (var_after)
 122     *var_after = var;
 123
 124   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 125   gimple_cond_set_code (cond, cmp);
 126   gimple_cond_set_lhs (cond, var);
 127   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 128   update_stmt (cond);
 129 }
 130
 131 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 132 struct loop_size
 133 {
 134   /* Number of instructions in the loop.  */
 135   int overall;
 136
 137   /* Number of instructions that will be likely optimized out in
 138      peeled iterations of loop  (i.e. computation based on induction
 139      variable where induction variable starts at known constant.)  */
 140   int eliminated_by_peeling;
 141
 142   /* Number of instructions that cannot be further optimized in the
 143      peeled loop, for example volatile accesses.  */
 144   int not_eliminatable_after_peeling;
 145
 146   /* Same statistics for last iteration of loop: it is smaller because
 147      instructions after exit are not executed.  */
 148   int last_iteration;
 149   int last_iteration_eliminated_by_peeling;
 150   int last_iteration_not_eliminatable_after_peeling;
 151
 152   /* If some IV computation will become constant.  */
 153   bool constant_iv;
 154
 155   /* Number of call stmts that are not a builtin and are pure or const
 156      present on the hot path.  */
 157   int num_pure_calls_on_hot_path;
 158   /* Number of call stmts that are not a builtin and are not pure nor const
 159      present on the hot path.  */
 160   int num_non_pure_calls_on_hot_path;
 161   /* Number of statements other than calls in the loop.  */
 162   int non_call_stmts_on_hot_path;
 163   /* Number of branches seen on the hot path.  */
 164   int num_branches_on_hot_path;
 165 };
 166
 167 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 168
 169 static bool
 170 constant_after_peeling (tree op, gimple *stmt, class loop *loop)
 171 {
 172   if (CONSTANT_CLASS_P (op))
 173     return true;
 174
 175   /* Get at the actual SSA operand.  */
 176   if (handled_component_p (op)
 177       && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
 178     op = TREE_OPERAND (op, 0);
 179
 180   /* We can still fold accesses to constant arrays when index is known.  */
 181   if (TREE_CODE (op) != SSA_NAME)
 182     {
 183       tree base = op;
 184
 185       /* First make fast look if we see constant array inside.  */
 186       while (handled_component_p (base))
 187         base = TREE_OPERAND (base, 0);
 188       if ((DECL_P (base)
 189            && ctor_for_folding (base) != error_mark_node)
 190           || CONSTANT_CLASS_P (base))
 191         {
 192           /* If so, see if we understand all the indices.  */
 193           base = op;
 194           while (handled_component_p (base))
 195             {
 196               if (TREE_CODE (base) == ARRAY_REF
 197                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 198                 return false;
 199               base = TREE_OPERAND (base, 0);
 200             }
 201           return true;
 202         }
 203       return false;
 204     }
 205
 206   /* Induction variables are constants when defined in loop.  */
 207   if (loop_containing_stmt (stmt) != loop)
 208     return false;
 209   tree ev = analyze_scalar_evolution (loop, op);
 210   if (chrec_contains_undetermined (ev)
 211       || chrec_contains_symbols (ev))
 212     {
 213       if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
 214         {
 215           gassign *ass = nullptr;
 216           gphi *phi = nullptr;
 217           if (is_a <gassign *> (SSA_NAME_DEF_STMT (op)))
 218             {
 219               ass = as_a <gassign *> (SSA_NAME_DEF_STMT (op));
 220               if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
 221                 phi = dyn_cast <gphi *>
 222                         (SSA_NAME_DEF_STMT (gimple_assign_rhs1  (ass)));
 223             }
 224           else if (is_a <gphi *> (SSA_NAME_DEF_STMT (op)))
 225             {
 226               phi = as_a <gphi *> (SSA_NAME_DEF_STMT (op));
 227               if (gimple_bb (phi) == loop->header)
 228                 {
 229                   tree def = gimple_phi_arg_def_from_edge
 230                     (phi, loop_latch_edge (loop));
 231                   if (TREE_CODE (def) == SSA_NAME
 232                       && is_a <gassign *> (SSA_NAME_DEF_STMT (def)))
 233                     ass = as_a <gassign *> (SSA_NAME_DEF_STMT (def));
 234                 }
 235             }
 236           if (ass && phi)
 237             {
 238               tree rhs1 = gimple_assign_rhs1 (ass);
 239               if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
 240                   && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
 241                   && rhs1 == gimple_phi_result (phi)
 242                   && gimple_bb (phi) == loop->header
 243                   && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
 244                       == gimple_assign_lhs (ass))
 245                   && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
 246                                          (phi, loop_preheader_edge (loop)))))
 247                 return true;
 248             }
 249         }
 250       return false;
 251     }
 252   return true;
 253 }
 254
 255 /* Computes an estimated number of insns in LOOP.
 256    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 257    iteration of the loop.
 258    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 259    of loop.
 260    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 261    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 262
 263 static bool
 264 tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
 265                          struct loop_size *size, int upper_bound)
 266 {
 267   basic_block *body = get_loop_body (loop);
 268   gimple_stmt_iterator gsi;
 269   unsigned int i;
 270   bool after_exit;
 271   auto_vec<basic_block> path = get_loop_hot_path (loop);
 272
 273   size->overall = 0;
 274   size->eliminated_by_peeling = 0;
 275   size->not_eliminatable_after_peeling = 0;
 276   size->last_iteration = 0;
 277   size->last_iteration_eliminated_by_peeling = 0;
 278   size->last_iteration_not_eliminatable_after_peeling = 0;
 279   size->num_pure_calls_on_hot_path = 0;
 280   size->num_non_pure_calls_on_hot_path = 0;
 281   size->non_call_stmts_on_hot_path = 0;
 282   size->num_branches_on_hot_path = 0;
 283   size->constant_iv = 0;
 284
 285   if (dump_file && (dump_flags & TDF_DETAILS))
 286     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 287   for (i = 0; i < loop->num_nodes; i++)
 288     {
 289       if (edge_to_cancel && body[i] != edge_to_cancel->src
 290           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 291         after_exit = true;
 292       else
 293         after_exit = false;
 294       if (dump_file && (dump_flags & TDF_DETAILS))
 295         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
 296                  after_exit);
 297
 298       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 299         {
 300           gimple *stmt = gsi_stmt (gsi);
 301           int num = estimate_num_insns (stmt, &eni_size_weights);
 302           bool not_eliminatable_after_peeling = false;
 303           bool likely_eliminated = false;
 304           bool likely_eliminated_last = false;
 305           bool likely_eliminated_peeled = false;
 306
 307           if (dump_file && (dump_flags & TDF_DETAILS))
 308             {
 309               fprintf (dump_file, "  size: %3i ", num);
 310               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 311             }
 312
 313           /* Look for reasons why we might optimize this stmt away. */
 314
 315           if (gimple_has_side_effects (stmt))
 316             not_eliminatable_after_peeling = true;
 317           else
 318             {
 319               /* Exit conditional.  */
 320               if (exit && body[i] == exit->src
 321                   && stmt == *gsi_last_bb (exit->src))
 322                 {
 323                   if (dump_file && (dump_flags & TDF_DETAILS))
 324                     fprintf (dump_file, "   Exit condition will be eliminated "
 325                              "in peeled copies.\n");
 326                   likely_eliminated_peeled = true;
 327                 }
 328               if (edge_to_cancel && body[i] == edge_to_cancel->src
 329                   && stmt == *gsi_last_bb (edge_to_cancel->src))
 330                 {
 331                   if (dump_file && (dump_flags & TDF_DETAILS))
 332                     fprintf (dump_file, "   Exit condition will be eliminated "
 333                              "in last copy.\n");
 334                   likely_eliminated_last = true;
 335                 }
 336               /* Sets of IV variables  */
 337               if (gimple_code (stmt) == GIMPLE_ASSIGN
 338                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 339                 {
 340                   if (dump_file && (dump_flags & TDF_DETAILS))
 341                     fprintf (dump_file, "   Induction variable computation will"
 342                              " be folded away.\n");
 343                   likely_eliminated = true;
 344                 }
 345               /* Assignments of IV variables.  */
 346               else if (gimple_code (stmt) == GIMPLE_ASSIGN
 347                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 348                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
 349                                                   stmt, loop)
 350                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 351                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
 352                                                       stmt, loop))
 353                        && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
 354                 {
 355                   size->constant_iv = true;
 356                   if (dump_file && (dump_flags & TDF_DETAILS))
 357                     fprintf (dump_file,
 358                              "   Constant expression will be folded away.\n");
 359                   likely_eliminated = true;
 360                 }
 361               /* Conditionals.  */
 362               else if ((gimple_code (stmt) == GIMPLE_COND
 363                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
 364                                                    loop)
 365                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 366                                                    loop)
 367                         /* We don't simplify all constant compares so make sure
 368                            they are not both constant already.  See PR70288.  */
 369                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
 370                             || ! is_gimple_min_invariant
 371                                  (gimple_cond_rhs (stmt))))
 372                        || (gimple_code (stmt) == GIMPLE_SWITCH
 373                            && constant_after_peeling (gimple_switch_index (
 374                                                         as_a <gswitch *>
 375                                                           (stmt)),
 376                                                       stmt, loop)
 377                            && ! is_gimple_min_invariant
 378                                    (gimple_switch_index
 379                                       (as_a <gswitch *> (stmt)))))
 380                 {
 381                   if (dump_file && (dump_flags & TDF_DETAILS))
 382                     fprintf (dump_file, "   Constant conditional.\n");
 383                   likely_eliminated = true;
 384                 }
 385             }
 386
 387           size->overall += num;
 388           if (likely_eliminated || likely_eliminated_peeled)
 389             size->eliminated_by_peeling += num;
 390           if (not_eliminatable_after_peeling)
 391             size->not_eliminatable_after_peeling += num;
 392           if (!after_exit)
 393             {
 394               size->last_iteration += num;
 395               if (likely_eliminated || likely_eliminated_last)
 396                 size->last_iteration_eliminated_by_peeling += num;
 397               if (not_eliminatable_after_peeling)
 398                 size->last_iteration_not_eliminatable_after_peeling += num;
 399             }
 400           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 401               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 402             {
 403               free (body);
 404               return true;
 405             }
 406         }
 407     }
 408   while (path.length ())
 409     {
 410       basic_block bb = path.pop ();
 411       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 412         {
 413           gimple *stmt = gsi_stmt (gsi);
 414           if (gimple_code (stmt) == GIMPLE_CALL
 415               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
 416             {
 417               int flags = gimple_call_flags (stmt);
 418               if (flags & (ECF_PURE | ECF_CONST))
 419                 size->num_pure_calls_on_hot_path++;
 420               else
 421                 size->num_non_pure_calls_on_hot_path++;
 422               size->num_branches_on_hot_path ++;
 423             }
 424           /* Count inexpensive calls as non-calls, because they will likely
 425              expand inline.  */
 426           else if (gimple_code (stmt) != GIMPLE_DEBUG)
 427             size->non_call_stmts_on_hot_path++;
 428           if (((gimple_code (stmt) == GIMPLE_COND
 429                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 430                     || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 431                                                 loop)))
 432                || (gimple_code (stmt) == GIMPLE_SWITCH
 433                    && !constant_after_peeling (gimple_switch_index (
 434                                                  as_a <gswitch *> (stmt)),
 435                                                stmt, loop)))
 436               && (!exit || bb != exit->src))
 437             size->num_branches_on_hot_path++;
 438         }
 439     }
 440
 441   if (dump_file && (dump_flags & TDF_DETAILS))
 442     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 443              size->eliminated_by_peeling, size->last_iteration,
 444              size->last_iteration_eliminated_by_peeling);
 445
 446   free (body);
 447   return false;
 448 }
 449
 450 /* Estimate number of insns of completely unrolled loop.
 451    It is (NUNROLL + 1) * size of loop body with taking into account
 452    the fact that in last copy everything after exit conditional
 453    is dead and that some instructions will be eliminated after
 454    peeling.  Set *EST_ELIMINATED to the number of stmts that could be
 455    optimistically eliminated by followup transforms.  */
 456 static unsigned HOST_WIDE_INT
 457 estimated_unrolled_size (struct loop_size *size,
 458                          unsigned HOST_WIDE_INT *est_eliminated,
 459                          unsigned HOST_WIDE_INT nunroll)
 460 {
 461   HOST_WIDE_INT unr_insns = ((nunroll)
 462                              * (HOST_WIDE_INT) (size->overall
 463                                                 - size->eliminated_by_peeling));
 464   HOST_WIDE_INT not_elim
 465     = ((nunroll) * (HOST_WIDE_INT) size->not_eliminatable_after_peeling);
 466   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 467   not_elim += size->last_iteration_not_eliminatable_after_peeling;
 468
 469   /* Testcases rely on rounding up, so do not write as
 470      (unr_insns - not_elim) / 3.  */
 471   *est_eliminated = unr_insns - not_elim - (unr_insns - not_elim) * 2 / 3;
 472   return unr_insns;
 473 }
 474
 475 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 476    body that can be remove to make the loop to always exit and at
 477    the same time it does not make any code potentially executed
 478    during the last iteration dead.
 479
 480    After complete unrolling we still may get rid of the conditional
 481    on the exit in the last copy even if we have no idea what it does.
 482    This is quite common case for loops of form
 483
 484      int a[5];
 485      for (i=0;i<b;i++)
 486        a[i]=0;
 487
 488    Here we prove the loop to iterate 5 times but we do not know
 489    it from induction variable.
 490
 491    For now we handle only simple case where there is exit condition
 492    just before the latch block and the latch block contains no statements
 493    with side effect that may otherwise terminate the execution of loop
 494    (such as by EH or by terminating the program or longjmp).
 495
 496    In the general case we may want to cancel the paths leading to statements
 497    loop-niter identified as having undefined effect in the last iteration.
 498    The other cases are hopefully rare and will be cleaned up later.  */
 499
 500 static edge
 501 loop_edge_to_cancel (class loop *loop)
 502 {
 503   unsigned i;
 504   edge edge_to_cancel;
 505   gimple_stmt_iterator gsi;
 506
 507   /* We want only one predecestor of the loop.  */
 508   if (EDGE_COUNT (loop->latch->preds) > 1)
 509     return NULL;
 510
 511   auto_vec<edge> exits = get_loop_exit_edges (loop);
 512
 513   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 514     {
 515        /* Find the other edge than the loop exit
 516           leaving the conditoinal.  */
 517        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 518          continue;
 519        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 520          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 521        else
 522          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 523
 524       /* We only can handle conditionals.  */
 525       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 526         continue;
 527
 528       /* We should never have conditionals in the loop latch. */
 529       gcc_assert (edge_to_cancel->dest != loop->header);
 530
 531       /* Check that it leads to loop latch.  */
 532       if (edge_to_cancel->dest != loop->latch)
 533         continue;
 534
 535       /* Verify that the code in loop latch does nothing that may end program
 536          execution without really reaching the exit.  This may include
 537          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 538       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 539         if (gimple_has_side_effects (gsi_stmt (gsi)))
 540            return NULL;
 541       return edge_to_cancel;
 542     }
 543   return NULL;
 544 }
 545
 546 /* Remove all tests for exits that are known to be taken after LOOP was
 547    peeled NPEELED times. Put gcc_unreachable before every statement
 548    known to not be executed.  */
 549
 550 static bool
 551 remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
 552 {
 553   class nb_iter_bound *elt;
 554   bool changed = false;
 555
 556   for (elt = loop->bounds; elt; elt = elt->next)
 557     {
 558       /* If statement is known to be undefined after peeling, turn it
 559          into unreachable (or trap when debugging experience is supposed
 560          to be good).  */
 561       if (!elt->is_exit
 562           && wi::ltu_p (elt->bound, npeeled))
 563         {
 564           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 565           location_t loc = gimple_location (elt->stmt);
 566           gcall *stmt = gimple_build_builtin_unreachable (loc);
 567           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 568           split_block (gimple_bb (stmt), stmt);
 569           changed = true;
 570           if (dump_file && (dump_flags & TDF_DETAILS))
 571             {
 572               fprintf (dump_file, "Forced statement unreachable: ");
 573               print_gimple_stmt (dump_file, elt->stmt, 0);
 574             }
 575         }
 576       /* If we know the exit will be taken after peeling, update.  */
 577       else if (elt->is_exit
 578                && wi::leu_p (elt->bound, npeeled))
 579         {
 580           basic_block bb = gimple_bb (elt->stmt);
 581           edge exit_edge = EDGE_SUCC (bb, 0);
 582
 583           if (dump_file && (dump_flags & TDF_DETAILS))
 584             {
 585               fprintf (dump_file, "Forced exit to be taken: ");
 586               print_gimple_stmt (dump_file, elt->stmt, 0);
 587             }
 588           if (!loop_exit_edge_p (loop, exit_edge))
 589             exit_edge = EDGE_SUCC (bb, 1);
 590           exit_edge->probability = profile_probability::always ();
 591           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 592           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 593           if (exit_edge->flags & EDGE_TRUE_VALUE)
 594             gimple_cond_make_true (cond_stmt);
 595           else
 596             gimple_cond_make_false (cond_stmt);
 597           update_stmt (cond_stmt);
 598           changed = true;
 599         }
 600     }
 601   return changed;
 602 }
 603
 604 /* Remove all exits that are known to be never taken because of the loop bound
 605    discovered.  */
 606
 607 static bool
 608 remove_redundant_iv_tests (class loop *loop)
 609 {
 610   class nb_iter_bound *elt;
 611   bool changed = false;
 612
 613   if (!loop->any_upper_bound)
 614     return false;
 615   for (elt = loop->bounds; elt; elt = elt->next)
 616     {
 617       /* Exit is pointless if it won't be taken before loop reaches
 618          upper bound.  */
 619       if (elt->is_exit && loop->any_upper_bound
 620           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 621         {
 622           basic_block bb = gimple_bb (elt->stmt);
 623           edge exit_edge = EDGE_SUCC (bb, 0);
 624           class tree_niter_desc niter;
 625
 626           if (!loop_exit_edge_p (loop, exit_edge))
 627             exit_edge = EDGE_SUCC (bb, 1);
 628
 629           /* Only when we know the actual number of iterations, not
 630              just a bound, we can remove the exit.  */
 631           if (!number_of_iterations_exit (loop, exit_edge,
 632                                           &niter, false, false)
 633               || !integer_onep (niter.assumptions)
 634               || !integer_zerop (niter.may_be_zero)
 635               || !niter.niter
 636               || TREE_CODE (niter.niter) != INTEGER_CST
 637               || !wi::ltu_p (widest_int::from (loop->nb_iterations_upper_bound,
 638                                                SIGNED),
 639                              wi::to_widest (niter.niter)))
 640             continue;
 641
 642           if (dump_file && (dump_flags & TDF_DETAILS))
 643             {
 644               fprintf (dump_file, "Removed pointless exit: ");
 645               print_gimple_stmt (dump_file, elt->stmt, 0);
 646             }
 647           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 648           if (exit_edge->flags & EDGE_TRUE_VALUE)
 649             gimple_cond_make_false (cond_stmt);
 650           else
 651             gimple_cond_make_true (cond_stmt);
 652           update_stmt (cond_stmt);
 653           changed = true;
 654         }
 655     }
 656   return changed;
 657 }
 658
 659 /* Stores loops that will be unlooped and edges that will be removed
 660    after we process whole loop tree. */
 661 static vec<loop_p> loops_to_unloop;
 662 static vec<int> loops_to_unloop_nunroll;
 663 static vec<edge> edges_to_remove;
 664 /* Stores loops that has been peeled.  */
 665 static bitmap peeled_loops;
 666
 667 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 668    on the latch edge.
 669    We do it after all unrolling since unlooping moves basic blocks
 670    across loop boundaries trashing loop closed SSA form as well
 671    as SCEV info needed to be intact during unrolling.
 672
 673    IRRED_INVALIDATED is used to bookkeep if information about
 674    irreducible regions may become invalid as a result
 675    of the transformation.
 676    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 677    when we need to go into loop closed SSA form.  */
 678
 679 void
 680 unloop_loops (vec<class loop *> &loops_to_unloop,
 681               vec<int> &loops_to_unloop_nunroll,
 682               vec<edge> &edges_to_remove,
 683               bitmap loop_closed_ssa_invalidated,
 684               bool *irred_invalidated)
 685 {
 686   while (loops_to_unloop.length ())
 687     {
 688       class loop *loop = loops_to_unloop.pop ();
 689       int n_unroll = loops_to_unloop_nunroll.pop ();
 690       basic_block latch = loop->latch;
 691       edge latch_edge = loop_latch_edge (loop);
 692       int flags = latch_edge->flags;
 693       location_t locus = latch_edge->goto_locus;
 694       gcall *stmt;
 695       gimple_stmt_iterator gsi;
 696
 697       remove_exits_and_undefined_stmts (loop, n_unroll);
 698
 699       /* Unloop destroys the latch edge.  */
 700       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 701
 702       /* Create new basic block for the latch edge destination and wire
 703          it in.  */
 704       stmt = gimple_build_builtin_unreachable (locus);
 705       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 706       latch_edge->probability = profile_probability::never ();
 707       latch_edge->flags |= flags;
 708       latch_edge->goto_locus = locus;
 709
 710       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 711       latch_edge->dest->count = profile_count::zero ();
 712       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 713
 714       gsi = gsi_start_bb (latch_edge->dest);
 715       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 716     }
 717
 718   /* Remove edges in peeled copies.  Given remove_path removes dominated
 719      regions we need to cope with removal of already removed paths.  */
 720   unsigned i;
 721   edge e;
 722   auto_vec<int, 20> src_bbs;
 723   src_bbs.reserve_exact (edges_to_remove.length ());
 724   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 725     src_bbs.quick_push (e->src->index);
 726   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 727     if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
 728       {
 729         bool ok = remove_path (e, irred_invalidated,
 730                                loop_closed_ssa_invalidated);
 731         gcc_assert (ok);
 732       }
 733   edges_to_remove.release ();
 734 }
 735
 736 /* Tries to unroll LOOP completely, i.e. NITER times.
 737    UL determines which loops we are allowed to unroll.
 738    EXIT is the exit of the loop that should be eliminated.
 739    MAXITER specfy bound on number of iterations, -1 if it is
 740    not known or too large for HOST_WIDE_INT.  The location
 741    LOCUS corresponding to the loop is used when emitting
 742    a summary of the unroll to the dump file.  */
 743
 744 static bool
 745 try_unroll_loop_completely (class loop *loop,
 746                             edge exit, tree niter, bool may_be_zero,
 747                             enum unroll_level ul,
 748                             HOST_WIDE_INT maxiter,
 749                             dump_user_location_t locus, bool allow_peel,
 750                             bool cunrolli)
 751 {
 752   unsigned HOST_WIDE_INT n_unroll = 0;
 753   bool n_unroll_found = false;
 754   edge edge_to_cancel = NULL;
 755
 756   /* See if we proved number of iterations to be low constant.
 757
 758      EXIT is an edge that will be removed in all but last iteration of
 759      the loop.
 760
 761      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 762      of the unrolled sequence and is expected to make the final loop not
 763      rolling.
 764
 765      If the number of execution of loop is determined by standard induction
 766      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 767      from the iv test.  */
 768   if (tree_fits_uhwi_p (niter))
 769     {
 770       n_unroll = tree_to_uhwi (niter);
 771       n_unroll_found = true;
 772       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 773       if (edge_to_cancel == exit)
 774         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 775     }
 776   /* We do not know the number of iterations and thus we cannot eliminate
 777      the EXIT edge.  */
 778   else
 779     exit = NULL;
 780
 781   /* See if we can improve our estimate by using recorded loop bounds.  */
 782   if ((maxiter == 0 || ul != UL_SINGLE_ITER)
 783       && maxiter >= 0
 784       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 785     {
 786       n_unroll = maxiter;
 787       n_unroll_found = true;
 788       /* Loop terminates before the IV variable test, so we cannot
 789          remove it in the last iteration.  */
 790       edge_to_cancel = NULL;
 791       /* If we do not allow peeling and we iterate just allow cases
 792          that do not grow code.  */
 793       if (!allow_peel && maxiter != 0)
 794         ul = UL_NO_GROWTH;
 795     }
 796
 797   if (!n_unroll_found)
 798     return false;
 799
 800   if (!loop->unroll
 801       && n_unroll > (unsigned) param_max_completely_peel_times)
 802     {
 803       if (dump_file && (dump_flags & TDF_DETAILS))
 804         fprintf (dump_file, "Not unrolling loop %d "
 805                  "(--param max-completely-peel-times limit reached).\n",
 806                  loop->num);
 807       return false;
 808     }
 809
 810   if (!edge_to_cancel)
 811     edge_to_cancel = loop_edge_to_cancel (loop);
 812
 813   if (n_unroll)
 814     {
 815       if (ul == UL_SINGLE_ITER)
 816         return false;
 817
 818       if (loop->unroll)
 819         {
 820           /* If the unrolling factor is too large, bail out.  */
 821           if (n_unroll > (unsigned)loop->unroll)
 822             {
 823               if (dump_file && (dump_flags & TDF_DETAILS))
 824                 fprintf (dump_file,
 825                          "Not unrolling loop %d: "
 826                          "user didn't want it unrolled completely.\n",
 827                          loop->num);
 828               return false;
 829             }
 830         }
 831       else
 832         {
 833           struct loop_size size;
 834           /* EXIT can be removed only if we are sure it passes first N_UNROLL
 835              iterations.  */
 836           bool remove_exit = (exit && niter
 837                               && TREE_CODE (niter) == INTEGER_CST
 838                               && wi::leu_p (n_unroll, wi::to_widest (niter)));
 839           bool large
 840             = tree_estimate_loop_size
 841                 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 842                  param_max_completely_peeled_insns);
 843           if (large)
 844             {
 845               if (dump_file && (dump_flags & TDF_DETAILS))
 846                 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 847                          loop->num);
 848               return false;
 849             }
 850
 851           unsigned HOST_WIDE_INT ninsns = size.overall;
 852           unsigned HOST_WIDE_INT est_eliminated;
 853           unsigned HOST_WIDE_INT unr_insns
 854             = estimated_unrolled_size (&size, &est_eliminated, n_unroll);
 855           if (dump_file && (dump_flags & TDF_DETAILS))
 856             {
 857               fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 858               fprintf (dump_file, "  Estimated size after unrolling: %d-%d\n",
 859                        (int) unr_insns, (int) est_eliminated);
 860             }
 861
 862           /* If the code is going to shrink, we don't need to be extra
 863              cautious on guessing if the unrolling is going to be
 864              profitable.
 865              Move from estimated_unrolled_size to unroll small loops.  */
 866           if (unr_insns - est_eliminated
 867               /* If there is IV variable that will become constant, we
 868                  save one instruction in the loop prologue we do not
 869                  account otherwise.  */
 870               <= ninsns + (size.constant_iv != false))
 871             ;
 872           /* We unroll only inner loops, because we do not consider it
 873              profitable otheriwse.  We still can cancel loopback edge
 874              of not rolling loop; this is always a good idea.  */
 875           else if (ul == UL_NO_GROWTH)
 876             {
 877               if (dump_file && (dump_flags & TDF_DETAILS))
 878                 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 879                          loop->num);
 880               return false;
 881             }
 882           /* Outer loops tend to be less interesting candidates for
 883              complete unrolling unless we can do a lot of propagation
 884              into the inner loop body.  For now we disable outer loop
 885              unrolling when the code would grow.  */
 886           else if (loop->inner)
 887             {
 888               if (dump_file && (dump_flags & TDF_DETAILS))
 889                 fprintf (dump_file, "Not unrolling loop %d: "
 890                          "it is not innermost and code would grow.\n",
 891                          loop->num);
 892               return false;
 893             }
 894           /* If there is call on a hot path through the loop, then
 895              there is most probably not much to optimize.  */
 896           else if (size.num_non_pure_calls_on_hot_path)
 897             {
 898               if (dump_file && (dump_flags & TDF_DETAILS))
 899                 fprintf (dump_file, "Not unrolling loop %d: "
 900                          "contains call and code would grow.\n",
 901                          loop->num);
 902               return false;
 903             }
 904           /* If there is pure/const call in the function, then we can
 905              still optimize the unrolled loop body if it contains some
 906              other interesting code than the calls and code storing or
 907              cumulating the return value.  */
 908           else if (size.num_pure_calls_on_hot_path
 909                    /* One IV increment, one test, one ivtmp store and
 910                       one useful stmt.  That is about minimal loop
 911                       doing pure call.  */
 912                    && (size.non_call_stmts_on_hot_path
 913                        <= 3 + size.num_pure_calls_on_hot_path))
 914             {
 915               if (dump_file && (dump_flags & TDF_DETAILS))
 916                 fprintf (dump_file, "Not unrolling loop %d: "
 917                          "contains just pure calls and code would grow.\n",
 918                          loop->num);
 919               return false;
 920             }
 921           /* Complete unrolling is major win when control flow is
 922              removed and one big basic block is created.  If the loop
 923              contains control flow the optimization may still be a win
 924              because of eliminating the loop overhead but it also may
 925              blow the branch predictor tables.  Limit number of
 926              branches on the hot path through the peeled sequence.  */
 927           else if (size.num_branches_on_hot_path * (int)n_unroll
 928                    > param_max_peel_branches)
 929             {
 930               if (dump_file && (dump_flags & TDF_DETAILS))
 931                 fprintf (dump_file, "Not unrolling loop %d: "
 932                          "number of branches on hot path in the unrolled "
 933                          "sequence reaches --param max-peel-branches limit.\n",
 934                          loop->num);
 935               return false;
 936             }
 937           /* Move 2 / 3 reduction from estimated_unrolled_size, but don't reduce
 938              unrolled size for innermost loop.
 939              1) It could increase register pressure.
 940              2) Big loop after completely unroll may not be vectorized
 941              by BB vectorizer.  */
 942           else if ((cunrolli ? unr_insns : unr_insns - est_eliminated)
 943                    > (unsigned) param_max_completely_peeled_insns)
 944             {
 945               if (dump_file && (dump_flags & TDF_DETAILS))
 946                 fprintf (dump_file, "Not unrolling loop %d: "
 947                          "number of insns in the unrolled sequence reaches "
 948                          "--param max-completely-peeled-insns limit.\n",
 949                          loop->num);
 950               return false;
 951             }
 952         }
 953
 954       if (!dbg_cnt (gimple_unroll))
 955         return false;
 956
 957       initialize_original_copy_tables ();
 958       auto_sbitmap wont_exit (n_unroll + 1);
 959       if (exit && niter
 960           && TREE_CODE (niter) == INTEGER_CST
 961           && wi::leu_p (n_unroll, wi::to_widest (niter)))
 962         {
 963           bitmap_ones (wont_exit);
 964           if (wi::eq_p (wi::to_widest (niter), n_unroll)
 965               || edge_to_cancel)
 966             bitmap_clear_bit (wont_exit, 0);
 967         }
 968       else
 969         {
 970           exit = NULL;
 971           bitmap_clear (wont_exit);
 972         }
 973       if (may_be_zero)
 974         bitmap_clear_bit (wont_exit, 1);
 975
 976       /* If loop was originally estimated to iterate too many times,
 977          reduce the profile to avoid new profile inconsistencies.  */
 978       scale_loop_profile (loop, profile_probability::always (), n_unroll);
 979
 980       if (!gimple_duplicate_loop_body_to_header_edge (
 981             loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit,
 982             &edges_to_remove,
 983             DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL))
 984         {
 985           free_original_copy_tables ();
 986           if (dump_file && (dump_flags & TDF_DETAILS))
 987             fprintf (dump_file, "Failed to duplicate the loop\n");
 988           return false;
 989         }
 990
 991       free_original_copy_tables ();
 992     }
 993   else
 994     scale_loop_profile (loop, profile_probability::always (), 0);
 995
 996   /* Remove the conditional from the last copy of the loop.  */
 997   if (edge_to_cancel)
 998     {
 999       gcond *cond = as_a <gcond *> (*gsi_last_bb (edge_to_cancel->src));
1000       force_edge_cold (edge_to_cancel, true);
1001       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
1002         gimple_cond_make_false (cond);
1003       else
1004         gimple_cond_make_true (cond);
1005       update_stmt (cond);
1006       /* Do not remove the path, as doing so may remove outer loop and
1007          confuse bookkeeping code in tree_unroll_loops_completely.  */
1008     }
1009
1010   /* Store the loop for later unlooping and exit removal.  */
1011   loops_to_unloop.safe_push (loop);
1012   loops_to_unloop_nunroll.safe_push (n_unroll);
1013
1014   if (dump_enabled_p ())
1015     {
1016       if (!n_unroll)
1017         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1018                          "loop turned into non-loop; it never loops\n");
1019       else
1020         {
1021           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1022                            "loop with %d iterations completely unrolled",
1023                            (int) n_unroll);
1024           if (loop->header->count.initialized_p ())
1025             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
1026                          " (header execution count %d)",
1027                          (int)loop->header->count.to_gcov_type ());
1028           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
1029         }
1030     }
1031
1032   if (dump_file && (dump_flags & TDF_DETAILS))
1033     {
1034       if (exit)
1035         fprintf (dump_file, "Exit condition of peeled iterations was "
1036                  "eliminated.\n");
1037       if (edge_to_cancel)
1038         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
1039       else
1040         fprintf (dump_file, "Latch of last iteration was marked by "
1041                  "__builtin_unreachable ().\n");
1042     }
1043
1044   return true;
1045 }
1046
1047 /* Return number of instructions after peeling.  */
1048 static unsigned HOST_WIDE_INT
1049 estimated_peeled_sequence_size (struct loop_size *size,
1050                                 unsigned HOST_WIDE_INT npeel)
1051 {
1052   return MAX (npeel * (HOST_WIDE_INT) (size->overall
1053                                        - size->eliminated_by_peeling), 1);
1054 }
1055
1056 /* Update loop estimates after peeling LOOP by NPEEL.
1057    If PRECISE is false only likely exists were duplicated and thus
1058    do not update any estimates that are supposed to be always reliable.  */
1059 void
1060 adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise)
1061 {
1062   if (loop->any_estimate)
1063     {
1064       /* Since peeling is mostly about loops where first few
1065          iterations are special, it is not quite correct to
1066          assume that the remaining iterations will behave
1067          the same way.  However we do not have better info
1068          so update the esitmate, since it is likely better
1069          than keeping it as it is.
1070
1071          Remove it if it looks wrong.
1072
1073          TODO: We likely want to special case the situation where
1074          peeling is optimizing out exit edges and only update
1075          estimates here.  */
1076       if (wi::leu_p (npeel, loop->nb_iterations_estimate))
1077         loop->nb_iterations_estimate -= npeel;
1078       else
1079         loop->any_estimate = false;
1080     }
1081   if (loop->any_upper_bound && precise)
1082     {
1083       if (wi::leu_p (npeel, loop->nb_iterations_upper_bound))
1084         loop->nb_iterations_upper_bound -= npeel;
1085       else
1086         {
1087           /* Peeling maximal number of iterations or more
1088              makes no sense and is a bug.
1089              We should peel completely.  */
1090           gcc_unreachable ();
1091         }
1092     }
1093   if (loop->any_likely_upper_bound)
1094     {
1095       if (wi::leu_p (npeel, loop->nb_iterations_likely_upper_bound))
1096         loop->nb_iterations_likely_upper_bound -= npeel;
1097       else
1098         {
1099           loop->any_estimate = true;
1100           loop->nb_iterations_estimate = 0;
1101           loop->nb_iterations_likely_upper_bound = 0;
1102         }
1103     }
1104 }
1105
1106 /* If the loop is expected to iterate N times and is
1107    small enough, duplicate the loop body N+1 times before
1108    the loop itself.  This way the hot path will never
1109    enter the loop.
1110    Parameters are the same as for try_unroll_loops_completely */
1111
1112 static bool
1113 try_peel_loop (class loop *loop,
1114                edge exit, tree niter, bool may_be_zero,
1115                HOST_WIDE_INT maxiter)
1116 {
1117   HOST_WIDE_INT npeel;
1118   struct loop_size size;
1119   int peeled_size;
1120
1121   if (!flag_peel_loops
1122       || param_max_peel_times <= 0
1123       || !peeled_loops)
1124     return false;
1125
1126   if (bitmap_bit_p (peeled_loops, loop->num))
1127     {
1128       if (dump_file)
1129         fprintf (dump_file, "Not peeling: loop is already peeled\n");
1130       return false;
1131     }
1132
1133   /* We don't peel loops that will be unrolled as this can duplicate a
1134      loop more times than the user requested.  */
1135   if (loop->unroll)
1136     {
1137       if (dump_file)
1138         fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1139       return false;
1140     }
1141
1142   /* Peel only innermost loops.
1143      While the code is perfectly capable of peeling non-innermost loops,
1144      the heuristics would probably need some improvements. */
1145   if (loop->inner)
1146     {
1147       if (dump_file)
1148         fprintf (dump_file, "Not peeling: outer loop\n");
1149       return false;
1150     }
1151
1152   if (!optimize_loop_for_speed_p (loop))
1153     {
1154       if (dump_file)
1155         fprintf (dump_file, "Not peeling: cold loop\n");
1156       return false;
1157     }
1158
1159   /* Check if there is an estimate on the number of iterations.  */
1160   npeel = estimated_loop_iterations_int (loop);
1161   if (npeel < 0)
1162     npeel = likely_max_loop_iterations_int (loop);
1163   if (npeel < 0)
1164     {
1165       if (dump_file)
1166         fprintf (dump_file, "Not peeling: number of iterations is not "
1167                  "estimated\n");
1168       return false;
1169     }
1170   if (maxiter >= 0 && maxiter <= npeel)
1171     {
1172       if (dump_file)
1173         fprintf (dump_file, "Not peeling: upper bound is known so can "
1174                  "unroll completely\n");
1175       return false;
1176     }
1177
1178   /* We want to peel estimated number of iterations + 1 (so we never
1179      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1180      and be sure to avoid overflows.  */
1181   if (npeel > param_max_peel_times - 1)
1182     {
1183       if (dump_file)
1184         fprintf (dump_file, "Not peeling: rolls too much "
1185                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1186       return false;
1187     }
1188   npeel++;
1189
1190   /* Check peeled loops size.  */
1191   tree_estimate_loop_size (loop, exit, NULL, &size,
1192                            param_max_peeled_insns);
1193   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1194       > param_max_peeled_insns)
1195     {
1196       if (dump_file)
1197         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1198                  "(%i insns > --param max-peel-insns)", peeled_size);
1199       return false;
1200     }
1201
1202   if (!dbg_cnt (gimple_unroll))
1203     return false;
1204
1205   /* Duplicate possibly eliminating the exits.  */
1206   initialize_original_copy_tables ();
1207   auto_sbitmap wont_exit (npeel + 1);
1208   if (exit && niter
1209       && TREE_CODE (niter) == INTEGER_CST
1210       && wi::leu_p (npeel, wi::to_widest (niter)))
1211     {
1212       bitmap_ones (wont_exit);
1213       bitmap_clear_bit (wont_exit, 0);
1214     }
1215   else
1216     {
1217       exit = NULL;
1218       bitmap_clear (wont_exit);
1219     }
1220   if (may_be_zero)
1221     bitmap_clear_bit (wont_exit, 1);
1222
1223   if (!gimple_duplicate_loop_body_to_header_edge (
1224         loop, loop_preheader_edge (loop), npeel, wont_exit, exit,
1225         &edges_to_remove, DLTHE_FLAG_UPDATE_FREQ))
1226     {
1227       free_original_copy_tables ();
1228       return false;
1229     }
1230   free_original_copy_tables ();
1231   if (dump_file && (dump_flags & TDF_DETAILS))
1232     {
1233       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1234                loop->num, (int) npeel);
1235     }
1236   adjust_loop_info_after_peeling (loop, npeel, true);
1237
1238   bitmap_set_bit (peeled_loops, loop->num);
1239   return true;
1240 }
1241 /* Adds a canonical induction variable to LOOP if suitable.
1242    CREATE_IV is true if we may create a new iv.  UL determines
1243    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1244    to determine the number of iterations of a loop by direct evaluation.
1245    Returns true if cfg is changed.   */
1246
1247 static bool
1248 canonicalize_loop_induction_variables (class loop *loop,
1249                                        bool create_iv, enum unroll_level ul,
1250                                        bool try_eval, bool allow_peel,
1251                                        const_sbitmap innermost,
1252                                        bool cunrolli)
1253 {
1254   edge exit = NULL;
1255   tree niter;
1256   HOST_WIDE_INT maxiter;
1257   bool modified = false;
1258   class tree_niter_desc niter_desc;
1259   bool may_be_zero = false;
1260   bool by_eval = false;
1261
1262   /* For unrolling allow conditional constant or zero iterations, thus
1263      perform loop-header copying on-the-fly.  */
1264   exit = single_exit (loop);
1265   niter = chrec_dont_know;
1266   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1267     {
1268       niter = niter_desc.niter;
1269       may_be_zero
1270         = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1271     }
1272   if (TREE_CODE (niter) != INTEGER_CST)
1273     {
1274       /* For non-constant niter fold may_be_zero into niter again.  */
1275       if (may_be_zero)
1276         {
1277           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1278             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1279                                  niter_desc.may_be_zero,
1280                                  build_int_cst (TREE_TYPE (niter), 0), niter);
1281           else
1282             niter = chrec_dont_know;
1283           may_be_zero = false;
1284         }
1285
1286       /* If the loop has more than one exit, try checking all of them
1287          for # of iterations determinable through scev.  */
1288       if (!exit)
1289         niter = find_loop_niter (loop, &exit);
1290
1291       /* Finally if everything else fails, try brute force evaluation.  */
1292       if (try_eval
1293           && (chrec_contains_undetermined (niter)
1294               || TREE_CODE (niter) != INTEGER_CST))
1295         {
1296           niter = find_loop_niter_by_eval (loop, &exit);
1297           if (TREE_CODE (niter) == INTEGER_CST)
1298             by_eval = true;
1299         }
1300
1301       if (TREE_CODE (niter) != INTEGER_CST)
1302         exit = NULL;
1303     }
1304
1305   /* We work exceptionally hard here to estimate the bound
1306      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1307   if (niter && TREE_CODE (niter) == INTEGER_CST)
1308     {
1309       auto_vec<edge> exits = get_loop_exit_edges  (loop);
1310       record_niter_bound (loop, wi::to_widest (niter),
1311                           exit == single_likely_exit (loop, exits), true);
1312     }
1313
1314   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1315   maxiter = max_loop_iterations_int (loop);
1316
1317   if (dump_file && (dump_flags & TDF_DETAILS)
1318       && TREE_CODE (niter) == INTEGER_CST)
1319     {
1320       fprintf (dump_file, "Loop %d iterates ", loop->num);
1321       print_generic_expr (dump_file, niter, TDF_SLIM);
1322       fprintf (dump_file, " times.\n");
1323     }
1324   if (dump_file && (dump_flags & TDF_DETAILS)
1325       && maxiter >= 0)
1326     {
1327       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1328                (int)maxiter);
1329     }
1330   if (dump_file && (dump_flags & TDF_DETAILS)
1331       && likely_max_loop_iterations_int (loop) >= 0)
1332     {
1333       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1334                loop->num, (int)likely_max_loop_iterations_int (loop));
1335     }
1336
1337   /* Remove exits that are known to be never taken based on loop bound.
1338      Needs to be called after compilation of max_loop_iterations_int that
1339      populates the loop bounds.  */
1340   modified |= remove_redundant_iv_tests (loop);
1341
1342   dump_user_location_t locus = find_loop_location (loop);
1343
1344   bool innermost_cunrolli_p
1345     = cunrolli
1346       && (unsigned) loop->num < SBITMAP_SIZE (innermost)
1347       && bitmap_bit_p (innermost, loop->num);
1348
1349   if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1350                                   maxiter, locus, allow_peel,
1351                                   innermost_cunrolli_p))
1352     return true;
1353
1354   if ((create_iv || by_eval)
1355       && niter && !chrec_contains_undetermined (niter)
1356       && exit && just_once_each_iteration_p (loop, exit->src))
1357     {
1358       tree iv_niter = niter;
1359       if (may_be_zero)
1360         {
1361           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1362             iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1363                                     niter_desc.may_be_zero,
1364                                     build_int_cst (TREE_TYPE (iv_niter), 0),
1365                                     iv_niter);
1366           else
1367             iv_niter = NULL_TREE;
1368         }
1369       if (iv_niter)
1370         create_canonical_iv (loop, exit, iv_niter);
1371     }
1372
1373   if (ul == UL_ALL)
1374     modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1375
1376   return modified;
1377 }
1378
1379 /* The main entry point of the pass.  Adds canonical induction variables
1380    to the suitable loops.  */
1381
1382 unsigned int
1383 canonicalize_induction_variables (void)
1384 {
1385   bool changed = false;
1386   bool irred_invalidated = false;
1387   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1388   auto_sbitmap innermost (number_of_loops (cfun));
1389   bitmap_clear (innermost);
1390
1391   estimate_numbers_of_iterations (cfun);
1392
1393   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1394     {
1395       changed
1396         |= canonicalize_loop_induction_variables (loop,
1397                                                   true, UL_SINGLE_ITER,
1398                                                   true, false,
1399                                                   (const_sbitmap) innermost,
1400                                                   false);
1401     }
1402   gcc_assert (!need_ssa_update_p (cfun));
1403
1404   unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, edges_to_remove,
1405                 loop_closed_ssa_invalidated, &irred_invalidated);
1406   loops_to_unloop.release ();
1407   loops_to_unloop_nunroll.release ();
1408   if (irred_invalidated
1409       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1410     mark_irreducible_loops ();
1411
1412   /* Clean up the information about numbers of iterations, since brute force
1413      evaluation could reveal new information.  */
1414   free_numbers_of_iterations_estimates (cfun);
1415   scev_reset ();
1416
1417   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1418     {
1419       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1420       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1421     }
1422   BITMAP_FREE (loop_closed_ssa_invalidated);
1423
1424   if (changed)
1425     return TODO_cleanup_cfg;
1426   return 0;
1427 }
1428
1429 /* Process loops from innermost to outer, stopping at the innermost
1430    loop we unrolled.  */
1431
1432 static bool
1433 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1434                                 bitmap father_bbs, class loop *loop,
1435                                 const_sbitmap innermost, bool cunrolli)
1436 {
1437   class loop *loop_father;
1438   bool changed = false;
1439   class loop *inner;
1440   enum unroll_level ul;
1441   unsigned num = number_of_loops (cfun);
1442
1443   /* Process inner loops first.  Don't walk loops added by the recursive
1444      calls because SSA form is not up-to-date.  They can be handled in the
1445      next iteration.  */
1446   bitmap child_father_bbs = NULL;
1447   for (inner = loop->inner; inner != NULL; inner = inner->next)
1448     if ((unsigned) inner->num < num)
1449       {
1450         if (!child_father_bbs)
1451           child_father_bbs = BITMAP_ALLOC (NULL);
1452         if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1453                                             child_father_bbs, inner,
1454                                             innermost, cunrolli))
1455           {
1456             bitmap_ior_into (father_bbs, child_father_bbs);
1457             bitmap_clear (child_father_bbs);
1458             changed = true;
1459           }
1460       }
1461   if (child_father_bbs)
1462     BITMAP_FREE (child_father_bbs);
1463
1464   /* If we changed an inner loop we cannot process outer loops in this
1465      iteration because SSA form is not up-to-date.  Continue with
1466      siblings of outer loops instead.  */
1467   if (changed)
1468     {
1469       /* If we are recorded as father clear all other fathers that
1470          are necessarily covered already to avoid redundant work.  */
1471       if (bitmap_bit_p (father_bbs, loop->header->index))
1472         {
1473           bitmap_clear (father_bbs);
1474           bitmap_set_bit (father_bbs, loop->header->index);
1475         }
1476       return true;
1477     }
1478
1479   /* Don't unroll #pragma omp simd loops until the vectorizer
1480      attempts to vectorize those.  */
1481   if (loop->force_vectorize)
1482     return false;
1483
1484   /* Try to unroll this loop.  */
1485   loop_father = loop_outer (loop);
1486   if (!loop_father)
1487     return false;
1488
1489   if (loop->unroll > 1)
1490     ul = UL_ALL;
1491   else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1492       /* Unroll outermost loops only if asked to do so or they do
1493          not cause code growth.  */
1494       && (unroll_outer || loop_outer (loop_father)))
1495     ul = UL_ALL;
1496   else
1497     ul = UL_NO_GROWTH;
1498
1499   if (canonicalize_loop_induction_variables
1500       (loop, false, ul, !flag_tree_loop_ivcanon || cunrolli, unroll_outer,
1501        innermost, cunrolli))
1502     {
1503       /* If we'll continue unrolling, we need to propagate constants
1504          within the new basic blocks to fold away induction variable
1505          computations; otherwise, the size might blow up before the
1506          iteration is complete and the IR eventually cleaned up.  */
1507       if (loop_outer (loop_father))
1508         {
1509           /* Once we process our father we will have processed
1510              the fathers of our children as well, so avoid doing
1511              redundant work and clear fathers we've gathered sofar.  */
1512           bitmap_clear (father_bbs);
1513           bitmap_set_bit (father_bbs, loop_father->header->index);
1514         }
1515       else if (unroll_outer)
1516         /* Trigger scalar cleanup once any outermost loop gets unrolled.  */
1517         cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
1518
1519       return true;
1520     }
1521
1522   return false;
1523 }
1524
1525 /* Unroll LOOPS completely if they iterate just few times.  Unless
1526    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1527    size of the code does not increase.
1528    cunrolli is true when passs is cunrolli.  */
1529
1530 static unsigned int
1531 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer, bool cunrolli)
1532 {
1533   bitmap father_bbs = BITMAP_ALLOC (NULL);
1534   bool changed;
1535   int iteration = 0;
1536   bool irred_invalidated = false;
1537   auto_sbitmap innermost (number_of_loops (cfun));
1538   bitmap_clear (innermost);
1539
1540   estimate_numbers_of_iterations (cfun);
1541
1542   /* Mark all innermost loop at the begining.  */
1543   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1544     {
1545       if (!loop->inner)
1546         bitmap_set_bit (innermost, loop->num);
1547     }
1548
1549   do
1550     {
1551       changed = false;
1552       bitmap loop_closed_ssa_invalidated = NULL;
1553
1554       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1555         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1556
1557       free_numbers_of_iterations_estimates (cfun);
1558       estimate_numbers_of_iterations (cfun);
1559
1560       changed = tree_unroll_loops_completely_1 (may_increase_size,
1561                                                 unroll_outer, father_bbs,
1562                                                 current_loops->tree_root,
1563                                                 (const_sbitmap) innermost,
1564                                                 cunrolli);
1565       if (changed)
1566         {
1567           unsigned i;
1568
1569           unloop_loops (loops_to_unloop, loops_to_unloop_nunroll,
1570                         edges_to_remove, loop_closed_ssa_invalidated,
1571                         &irred_invalidated);
1572           loops_to_unloop.release ();
1573           loops_to_unloop_nunroll.release ();
1574
1575           /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
1576           if (loop_closed_ssa_invalidated
1577               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1578             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1579                                           TODO_update_ssa);
1580           else
1581             update_ssa (TODO_update_ssa);
1582
1583           /* father_bbs is a bitmap of loop father header BB indices.
1584              Translate that to what non-root loops these BBs belong to now.  */
1585           bitmap_iterator bi;
1586           bitmap fathers = BITMAP_ALLOC (NULL);
1587           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1588             {
1589               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1590               if (! unrolled_loop_bb)
1591                 continue;
1592               if (loop_outer (unrolled_loop_bb->loop_father))
1593                 bitmap_set_bit (fathers,
1594                                 unrolled_loop_bb->loop_father->num);
1595             }
1596           bitmap_clear (father_bbs);
1597           /* Propagate the constants within the new basic blocks.  */
1598           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1599             {
1600               loop_p father = get_loop (cfun, i);
1601               bitmap exit_bbs = BITMAP_ALLOC (NULL);
1602               loop_exit *exit = father->exits->next;
1603               while (exit->e)
1604                 {
1605                   bitmap_set_bit (exit_bbs, exit->e->dest->index);
1606                   exit = exit->next;
1607                 }
1608               do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1609             }
1610           BITMAP_FREE (fathers);
1611
1612           /* Clean up the information about numbers of iterations, since
1613              complete unrolling might have invalidated it.  */
1614           scev_reset ();
1615
1616           /* This will take care of removing completely unrolled loops
1617              from the loop structures so we can continue unrolling now
1618              innermost loops.  */
1619           if (cleanup_tree_cfg ())
1620             update_ssa (TODO_update_ssa_only_virtuals);
1621
1622           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1623             verify_loop_closed_ssa (true);
1624         }
1625       if (loop_closed_ssa_invalidated)
1626         BITMAP_FREE (loop_closed_ssa_invalidated);
1627     }
1628   while (changed
1629          && ++iteration <= param_max_unroll_iterations);
1630
1631   BITMAP_FREE (father_bbs);
1632
1633   if (irred_invalidated
1634       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1635     mark_irreducible_loops ();
1636
1637   return 0;
1638 }
1639
1640 /* Canonical induction variable creation pass.  */
1641
1642 namespace {
1643
1644 const pass_data pass_data_iv_canon =
1645 {
1646   GIMPLE_PASS, /* type */
1647   "ivcanon", /* name */
1648   OPTGROUP_LOOP, /* optinfo_flags */
1649   TV_TREE_LOOP_IVCANON, /* tv_id */
1650   ( PROP_cfg | PROP_ssa ), /* properties_required */
1651   0, /* properties_provided */
1652   0, /* properties_destroyed */
1653   0, /* todo_flags_start */
1654   0, /* todo_flags_finish */
1655 };
1656
1657 class pass_iv_canon : public gimple_opt_pass
1658 {
1659 public:
1660   pass_iv_canon (gcc::context *ctxt)
1661     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1662   {}
1663
1664   /* opt_pass methods: */
1665   bool gate (function *) final override { return flag_tree_loop_ivcanon != 0; }
1666   unsigned int execute (function *fun) final override;
1667
1668 }; // class pass_iv_canon
1669
1670 unsigned int
1671 pass_iv_canon::execute (function *fun)
1672 {
1673   if (number_of_loops (fun) <= 1)
1674     return 0;
1675
1676   return canonicalize_induction_variables ();
1677 }
1678
1679 } // anon namespace
1680
1681 gimple_opt_pass *
1682 make_pass_iv_canon (gcc::context *ctxt)
1683 {
1684   return new pass_iv_canon (ctxt);
1685 }
1686
1687 /* Complete unrolling of loops.  */
1688
1689 namespace {
1690
1691 const pass_data pass_data_complete_unroll =
1692 {
1693   GIMPLE_PASS, /* type */
1694   "cunroll", /* name */
1695   OPTGROUP_LOOP, /* optinfo_flags */
1696   TV_COMPLETE_UNROLL, /* tv_id */
1697   ( PROP_cfg | PROP_ssa ), /* properties_required */
1698   0, /* properties_provided */
1699   0, /* properties_destroyed */
1700   0, /* todo_flags_start */
1701   0, /* todo_flags_finish */
1702 };
1703
1704 class pass_complete_unroll : public gimple_opt_pass
1705 {
1706 public:
1707   pass_complete_unroll (gcc::context *ctxt)
1708     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1709   {}
1710
1711   /* opt_pass methods: */
1712   unsigned int execute (function *) final override;
1713
1714 }; // class pass_complete_unroll
1715
1716 unsigned int
1717 pass_complete_unroll::execute (function *fun)
1718 {
1719   if (number_of_loops (fun) <= 1)
1720     return 0;
1721
1722   /* If we ever decide to run loop peeling more than once, we will need to
1723      track loops already peeled in loop structures themselves to avoid
1724      re-peeling the same loop multiple times.  */
1725   if (flag_peel_loops)
1726     peeled_loops = BITMAP_ALLOC (NULL);
1727   unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_size, true, false);
1728   if (peeled_loops)
1729     {
1730       BITMAP_FREE (peeled_loops);
1731       peeled_loops = NULL;
1732     }
1733   return val;
1734 }
1735
1736 } // anon namespace
1737
1738 gimple_opt_pass *
1739 make_pass_complete_unroll (gcc::context *ctxt)
1740 {
1741   return new pass_complete_unroll (ctxt);
1742 }
1743
1744 /* Complete unrolling of inner loops.  */
1745
1746 namespace {
1747
1748 const pass_data pass_data_complete_unrolli =
1749 {
1750   GIMPLE_PASS, /* type */
1751   "cunrolli", /* name */
1752   OPTGROUP_LOOP, /* optinfo_flags */
1753   TV_COMPLETE_UNROLL, /* tv_id */
1754   ( PROP_cfg | PROP_ssa ), /* properties_required */
1755   0, /* properties_provided */
1756   0, /* properties_destroyed */
1757   0, /* todo_flags_start */
1758   0, /* todo_flags_finish */
1759 };
1760
1761 class pass_complete_unrolli : public gimple_opt_pass
1762 {
1763 public:
1764   pass_complete_unrolli (gcc::context *ctxt)
1765     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1766   {}
1767
1768   /* opt_pass methods: */
1769   bool gate (function *) final override { return optimize >= 2; }
1770   unsigned int execute (function *) final override;
1771
1772 }; // class pass_complete_unrolli
1773
1774 unsigned int
1775 pass_complete_unrolli::execute (function *fun)
1776 {
1777   unsigned ret = 0;
1778
1779   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1780   if (number_of_loops (fun) > 1)
1781     {
1782       scev_initialize ();
1783       ret = tree_unroll_loops_completely (optimize >= 3, false, true);
1784       scev_finalize ();
1785     }
1786   loop_optimizer_finalize ();
1787
1788   return ret;
1789 }
1790
1791 } // anon namespace
1792
1793 gimple_opt_pass *
1794 make_pass_complete_unrolli (gcc::context *ctxt)
1795 {
1796   return new pass_complete_unrolli (ctxt);
1797 }
1798
1799