gcc/tree-ssa-loop-ivcanon.cc

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2024 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #define INCLUDE_MEMORY
  40 #include "system.h"
  41 #include "coretypes.h"
  42 #include "backend.h"
  43 #include "tree.h"
  44 #include "gimple.h"
  45 #include "cfghooks.h"
  46 #include "tree-pass.h"
  47 #include "ssa.h"
  48 #include "cgraph.h"
  49 #include "gimple-pretty-print.h"
  50 #include "fold-const.h"
  51 #include "profile.h"
  52 #include "gimple-iterator.h"
  53 #include "gimple-fold.h"
  54 #include "tree-eh.h"
  55 #include "tree-cfg.h"
  56 #include "tree-ssa-loop-manip.h"
  57 #include "tree-ssa-loop-niter.h"
  58 #include "tree-ssa-loop.h"
  59 #include "tree-into-ssa.h"
  60 #include "cfgloop.h"
  61 #include "tree-chrec.h"
  62 #include "tree-scalar-evolution.h"
  63 #include "tree-inline.h"
  64 #include "tree-cfgcleanup.h"
  65 #include "builtins.h"
  66 #include "tree-ssa-sccvn.h"
  67 #include "tree-vectorizer.h" /* For find_loop_location */
  68 #include "dbgcnt.h"
  69
  70 /* Specifies types of loops that may be unrolled.  */
  71
  72 enum unroll_level
  73 {
  74   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  75                            iteration.  */
  76   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  77                            of code size.  */
  78   UL_ALL                /* All suitable loops.  */
  79 };
  80
  81 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  82    is the exit edge whose condition is replaced.  The ssa versions of the new
  83    IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
  84    if they are not NULL.  */
  85
  86 void
  87 create_canonical_iv (class loop *loop, edge exit, tree niter,
  88                      tree *var_before = NULL, tree *var_after = NULL)
  89 {
  90   edge in;
  91   tree type, var;
  92   gcond *cond;
  93   gimple_stmt_iterator incr_at;
  94   enum tree_code cmp;
  95
  96   if (dump_file && (dump_flags & TDF_DETAILS))
  97     {
  98       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  99       print_generic_expr (dump_file, niter, TDF_SLIM);
 100       fprintf (dump_file, " iterations.\n");
 101     }
 102
 103   cond = as_a <gcond *> (*gsi_last_bb (exit->src));
 104   in = EDGE_SUCC (exit->src, 0);
 105   if (in == exit)
 106     in = EDGE_SUCC (exit->src, 1);
 107
 108   /* Note that we do not need to worry about overflows, since
 109      type of niter is always unsigned and all comparisons are
 110      just for equality/nonequality -- i.e. everything works
 111      with a modulo arithmetics.  */
 112
 113   type = TREE_TYPE (niter);
 114   niter = fold_build2 (PLUS_EXPR, type,
 115                        niter,
 116                        build_int_cst (type, 1));
 117   incr_at = gsi_last_bb (in->src);
 118   create_iv (niter, PLUS_EXPR,
 119              build_int_cst (type, -1),
 120              NULL_TREE, loop,
 121              &incr_at, false, var_before, &var);
 122   if (var_after)
 123     *var_after = var;
 124
 125   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 126   gimple_cond_set_code (cond, cmp);
 127   gimple_cond_set_lhs (cond, var);
 128   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 129   update_stmt (cond);
 130 }
 131
 132 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 133 struct loop_size
 134 {
 135   /* Number of instructions in the loop.  */
 136   int overall;
 137
 138   /* Number of instructions that will be likely optimized out in
 139      peeled iterations of loop  (i.e. computation based on induction
 140      variable where induction variable starts at known constant.)  */
 141   int eliminated_by_peeling;
 142
 143   /* Same statistics for last iteration of loop: it is smaller because
 144      instructions after exit are not executed.  */
 145   int last_iteration;
 146   int last_iteration_eliminated_by_peeling;
 147
 148   /* If some IV computation will become constant.  */
 149   bool constant_iv;
 150
 151   /* Number of call stmts that are not a builtin and are pure or const
 152      present on the hot path.  */
 153   int num_pure_calls_on_hot_path;
 154   /* Number of call stmts that are not a builtin and are not pure nor const
 155      present on the hot path.  */
 156   int num_non_pure_calls_on_hot_path;
 157   /* Number of statements other than calls in the loop.  */
 158   int non_call_stmts_on_hot_path;
 159   /* Number of branches seen on the hot path.  */
 160   int num_branches_on_hot_path;
 161 };
 162
 163 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 164
 165 static bool
 166 constant_after_peeling (tree op, gimple *stmt, class loop *loop)
 167 {
 168   if (CONSTANT_CLASS_P (op))
 169     return true;
 170
 171   /* Get at the actual SSA operand.  */
 172   if (handled_component_p (op)
 173       && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
 174     op = TREE_OPERAND (op, 0);
 175
 176   /* We can still fold accesses to constant arrays when index is known.  */
 177   if (TREE_CODE (op) != SSA_NAME)
 178     {
 179       tree base = op;
 180
 181       /* First make fast look if we see constant array inside.  */
 182       while (handled_component_p (base))
 183         base = TREE_OPERAND (base, 0);
 184       if ((DECL_P (base)
 185            && ctor_for_folding (base) != error_mark_node)
 186           || CONSTANT_CLASS_P (base))
 187         {
 188           /* If so, see if we understand all the indices.  */
 189           base = op;
 190           while (handled_component_p (base))
 191             {
 192               if (TREE_CODE (base) == ARRAY_REF
 193                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 194                 return false;
 195               base = TREE_OPERAND (base, 0);
 196             }
 197           return true;
 198         }
 199       return false;
 200     }
 201
 202   /* Induction variables are constants when defined in loop.  */
 203   if (loop_containing_stmt (stmt) != loop)
 204     return false;
 205   tree ev = analyze_scalar_evolution (loop, op);
 206   if (chrec_contains_undetermined (ev)
 207       || chrec_contains_symbols (ev))
 208     {
 209       if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
 210         {
 211           gassign *ass = nullptr;
 212           gphi *phi = nullptr;
 213           if (is_a <gassign *> (SSA_NAME_DEF_STMT (op)))
 214             {
 215               ass = as_a <gassign *> (SSA_NAME_DEF_STMT (op));
 216               if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
 217                 phi = dyn_cast <gphi *>
 218                         (SSA_NAME_DEF_STMT (gimple_assign_rhs1  (ass)));
 219             }
 220           else if (is_a <gphi *> (SSA_NAME_DEF_STMT (op)))
 221             {
 222               phi = as_a <gphi *> (SSA_NAME_DEF_STMT (op));
 223               if (gimple_bb (phi) == loop->header)
 224                 {
 225                   tree def = gimple_phi_arg_def_from_edge
 226                     (phi, loop_latch_edge (loop));
 227                   if (TREE_CODE (def) == SSA_NAME
 228                       && is_a <gassign *> (SSA_NAME_DEF_STMT (def)))
 229                     ass = as_a <gassign *> (SSA_NAME_DEF_STMT (def));
 230                 }
 231             }
 232           if (ass && phi)
 233             {
 234               tree rhs1 = gimple_assign_rhs1 (ass);
 235               if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
 236                   && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
 237                   && rhs1 == gimple_phi_result (phi)
 238                   && gimple_bb (phi) == loop->header
 239                   && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
 240                       == gimple_assign_lhs (ass))
 241                   && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
 242                                          (phi, loop_preheader_edge (loop)))))
 243                 return true;
 244             }
 245         }
 246       return false;
 247     }
 248   return true;
 249 }
 250
 251 /* Computes an estimated number of insns in LOOP.
 252    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 253    iteration of the loop.
 254    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 255    of loop.
 256    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 257    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 258
 259 static bool
 260 tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
 261                          struct loop_size *size, int upper_bound)
 262 {
 263   basic_block *body = get_loop_body (loop);
 264   gimple_stmt_iterator gsi;
 265   unsigned int i;
 266   bool after_exit;
 267   auto_vec<basic_block> path = get_loop_hot_path (loop);
 268
 269   size->overall = 0;
 270   size->eliminated_by_peeling = 0;
 271   size->last_iteration = 0;
 272   size->last_iteration_eliminated_by_peeling = 0;
 273   size->num_pure_calls_on_hot_path = 0;
 274   size->num_non_pure_calls_on_hot_path = 0;
 275   size->non_call_stmts_on_hot_path = 0;
 276   size->num_branches_on_hot_path = 0;
 277   size->constant_iv = 0;
 278
 279   if (dump_file && (dump_flags & TDF_DETAILS))
 280     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 281   for (i = 0; i < loop->num_nodes; i++)
 282     {
 283       if (edge_to_cancel && body[i] != edge_to_cancel->src
 284           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 285         after_exit = true;
 286       else
 287         after_exit = false;
 288       if (dump_file && (dump_flags & TDF_DETAILS))
 289         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
 290                  after_exit);
 291
 292       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 293         {
 294           gimple *stmt = gsi_stmt (gsi);
 295           int num = estimate_num_insns (stmt, &eni_size_weights);
 296           bool likely_eliminated = false;
 297           bool likely_eliminated_last = false;
 298           bool likely_eliminated_peeled = false;
 299
 300           if (dump_file && (dump_flags & TDF_DETAILS))
 301             {
 302               fprintf (dump_file, "  size: %3i ", num);
 303               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 304             }
 305
 306           /* Look for reasons why we might optimize this stmt away. */
 307
 308           if (!gimple_has_side_effects (stmt))
 309             {
 310               /* Exit conditional.  */
 311               if (exit && body[i] == exit->src
 312                   && stmt == *gsi_last_bb (exit->src))
 313                 {
 314                   if (dump_file && (dump_flags & TDF_DETAILS))
 315                     fprintf (dump_file, "   Exit condition will be eliminated "
 316                              "in peeled copies.\n");
 317                   likely_eliminated_peeled = true;
 318                 }
 319               if (edge_to_cancel && body[i] == edge_to_cancel->src
 320                   && stmt == *gsi_last_bb (edge_to_cancel->src))
 321                 {
 322                   if (dump_file && (dump_flags & TDF_DETAILS))
 323                     fprintf (dump_file, "   Exit condition will be eliminated "
 324                              "in last copy.\n");
 325                   likely_eliminated_last = true;
 326                 }
 327               /* Sets of IV variables  */
 328               if (gimple_code (stmt) == GIMPLE_ASSIGN
 329                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 330                 {
 331                   if (dump_file && (dump_flags & TDF_DETAILS))
 332                     fprintf (dump_file, "   Induction variable computation will"
 333                              " be folded away.\n");
 334                   likely_eliminated = true;
 335                 }
 336               /* Assignments of IV variables.  */
 337               else if (gimple_code (stmt) == GIMPLE_ASSIGN
 338                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 339                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
 340                                                   stmt, loop)
 341                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 342                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
 343                                                       stmt, loop))
 344                        && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
 345                 {
 346                   size->constant_iv = true;
 347                   if (dump_file && (dump_flags & TDF_DETAILS))
 348                     fprintf (dump_file,
 349                              "   Constant expression will be folded away.\n");
 350                   likely_eliminated = true;
 351                 }
 352               /* Conditionals.  */
 353               else if ((gimple_code (stmt) == GIMPLE_COND
 354                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
 355                                                    loop)
 356                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 357                                                    loop)
 358                         /* We don't simplify all constant compares so make sure
 359                            they are not both constant already.  See PR70288.  */
 360                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
 361                             || ! is_gimple_min_invariant
 362                                  (gimple_cond_rhs (stmt))))
 363                        || (gimple_code (stmt) == GIMPLE_SWITCH
 364                            && constant_after_peeling (gimple_switch_index (
 365                                                         as_a <gswitch *>
 366                                                           (stmt)),
 367                                                       stmt, loop)
 368                            && ! is_gimple_min_invariant
 369                                    (gimple_switch_index
 370                                       (as_a <gswitch *> (stmt)))))
 371                 {
 372                   if (dump_file && (dump_flags & TDF_DETAILS))
 373                     fprintf (dump_file, "   Constant conditional.\n");
 374                   likely_eliminated = true;
 375                 }
 376             }
 377
 378           size->overall += num;
 379           if (likely_eliminated || likely_eliminated_peeled)
 380             size->eliminated_by_peeling += num;
 381           if (!after_exit)
 382             {
 383               size->last_iteration += num;
 384               if (likely_eliminated || likely_eliminated_last)
 385                 size->last_iteration_eliminated_by_peeling += num;
 386             }
 387           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 388               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 389             {
 390               free (body);
 391               return true;
 392             }
 393         }
 394     }
 395   while (path.length ())
 396     {
 397       basic_block bb = path.pop ();
 398       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 399         {
 400           gimple *stmt = gsi_stmt (gsi);
 401           if (gimple_code (stmt) == GIMPLE_CALL
 402               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
 403             {
 404               int flags = gimple_call_flags (stmt);
 405               if (flags & (ECF_PURE | ECF_CONST))
 406                 size->num_pure_calls_on_hot_path++;
 407               else
 408                 size->num_non_pure_calls_on_hot_path++;
 409               size->num_branches_on_hot_path ++;
 410             }
 411           /* Count inexpensive calls as non-calls, because they will likely
 412              expand inline.  */
 413           else if (gimple_code (stmt) != GIMPLE_DEBUG)
 414             size->non_call_stmts_on_hot_path++;
 415           if (((gimple_code (stmt) == GIMPLE_COND
 416                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 417                     || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 418                                                 loop)))
 419                || (gimple_code (stmt) == GIMPLE_SWITCH
 420                    && !constant_after_peeling (gimple_switch_index (
 421                                                  as_a <gswitch *> (stmt)),
 422                                                stmt, loop)))
 423               && (!exit || bb != exit->src))
 424             size->num_branches_on_hot_path++;
 425         }
 426     }
 427
 428   if (dump_file && (dump_flags & TDF_DETAILS))
 429     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 430              size->eliminated_by_peeling, size->last_iteration,
 431              size->last_iteration_eliminated_by_peeling);
 432
 433   free (body);
 434   return false;
 435 }
 436
 437 /* Estimate number of insns of completely unrolled loop.
 438    It is (NUNROLL + 1) * size of loop body with taking into account
 439    the fact that in last copy everything after exit conditional
 440    is dead and that some instructions will be eliminated after
 441    peeling.  */
 442 static unsigned HOST_WIDE_INT
 443 estimated_unrolled_size (struct loop_size *size,
 444                          unsigned HOST_WIDE_INT nunroll)
 445 {
 446   HOST_WIDE_INT unr_insns = ((nunroll)
 447                              * (HOST_WIDE_INT) (size->overall
 448                                                 - size->eliminated_by_peeling));
 449   if (!nunroll)
 450     unr_insns = 0;
 451   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 452
 453   return unr_insns;
 454 }
 455
 456 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 457    body that can be remove to make the loop to always exit and at
 458    the same time it does not make any code potentially executed
 459    during the last iteration dead.
 460
 461    After complete unrolling we still may get rid of the conditional
 462    on the exit in the last copy even if we have no idea what it does.
 463    This is quite common case for loops of form
 464
 465      int a[5];
 466      for (i=0;i<b;i++)
 467        a[i]=0;
 468
 469    Here we prove the loop to iterate 5 times but we do not know
 470    it from induction variable.
 471
 472    For now we handle only simple case where there is exit condition
 473    just before the latch block and the latch block contains no statements
 474    with side effect that may otherwise terminate the execution of loop
 475    (such as by EH or by terminating the program or longjmp).
 476
 477    In the general case we may want to cancel the paths leading to statements
 478    loop-niter identified as having undefined effect in the last iteration.
 479    The other cases are hopefully rare and will be cleaned up later.  */
 480
 481 static edge
 482 loop_edge_to_cancel (class loop *loop)
 483 {
 484   unsigned i;
 485   edge edge_to_cancel;
 486   gimple_stmt_iterator gsi;
 487
 488   /* We want only one predecestor of the loop.  */
 489   if (EDGE_COUNT (loop->latch->preds) > 1)
 490     return NULL;
 491
 492   auto_vec<edge> exits = get_loop_exit_edges (loop);
 493
 494   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 495     {
 496        /* Find the other edge than the loop exit
 497           leaving the conditoinal.  */
 498        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 499          continue;
 500        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 501          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 502        else
 503          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 504
 505       /* We only can handle conditionals.  */
 506       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 507         continue;
 508
 509       /* We should never have conditionals in the loop latch. */
 510       gcc_assert (edge_to_cancel->dest != loop->header);
 511
 512       /* Check that it leads to loop latch.  */
 513       if (edge_to_cancel->dest != loop->latch)
 514         continue;
 515
 516       /* Verify that the code in loop latch does nothing that may end program
 517          execution without really reaching the exit.  This may include
 518          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 519       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 520         if (gimple_has_side_effects (gsi_stmt (gsi)))
 521            return NULL;
 522       return edge_to_cancel;
 523     }
 524   return NULL;
 525 }
 526
 527 /* Remove all tests for exits that are known to be taken after LOOP was
 528    peeled NPEELED times. Put gcc_unreachable before every statement
 529    known to not be executed.  */
 530
 531 static bool
 532 remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
 533 {
 534   class nb_iter_bound *elt;
 535   bool changed = false;
 536
 537   for (elt = loop->bounds; elt; elt = elt->next)
 538     {
 539       /* If statement is known to be undefined after peeling, turn it
 540          into unreachable (or trap when debugging experience is supposed
 541          to be good).  */
 542       if (!elt->is_exit
 543           && wi::ltu_p (elt->bound, npeeled))
 544         {
 545           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 546           location_t loc = gimple_location (elt->stmt);
 547           gcall *stmt = gimple_build_builtin_unreachable (loc);
 548           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 549           split_block (gimple_bb (stmt), stmt);
 550           changed = true;
 551           if (dump_file && (dump_flags & TDF_DETAILS))
 552             {
 553               fprintf (dump_file, "Forced statement unreachable: ");
 554               print_gimple_stmt (dump_file, elt->stmt, 0);
 555             }
 556         }
 557       /* If we know the exit will be taken after peeling, update.  */
 558       else if (elt->is_exit
 559                && wi::leu_p (elt->bound, npeeled))
 560         {
 561           basic_block bb = gimple_bb (elt->stmt);
 562           edge exit_edge = EDGE_SUCC (bb, 0);
 563
 564           if (dump_file && (dump_flags & TDF_DETAILS))
 565             {
 566               fprintf (dump_file, "Forced exit to be taken: ");
 567               print_gimple_stmt (dump_file, elt->stmt, 0);
 568             }
 569           if (!loop_exit_edge_p (loop, exit_edge))
 570             exit_edge = EDGE_SUCC (bb, 1);
 571           exit_edge->probability = profile_probability::always ();
 572           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 573           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 574           if (exit_edge->flags & EDGE_TRUE_VALUE)
 575             gimple_cond_make_true (cond_stmt);
 576           else
 577             gimple_cond_make_false (cond_stmt);
 578           update_stmt (cond_stmt);
 579           changed = true;
 580         }
 581     }
 582   return changed;
 583 }
 584
 585 /* Remove all exits that are known to be never taken because of the loop bound
 586    discovered.  */
 587
 588 static bool
 589 remove_redundant_iv_tests (class loop *loop)
 590 {
 591   class nb_iter_bound *elt;
 592   bool changed = false;
 593
 594   if (!loop->any_upper_bound)
 595     return false;
 596   for (elt = loop->bounds; elt; elt = elt->next)
 597     {
 598       /* Exit is pointless if it won't be taken before loop reaches
 599          upper bound.  */
 600       if (elt->is_exit && loop->any_upper_bound
 601           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 602         {
 603           basic_block bb = gimple_bb (elt->stmt);
 604           edge exit_edge = EDGE_SUCC (bb, 0);
 605           class tree_niter_desc niter;
 606
 607           if (!loop_exit_edge_p (loop, exit_edge))
 608             exit_edge = EDGE_SUCC (bb, 1);
 609
 610           /* Only when we know the actual number of iterations, not
 611              just a bound, we can remove the exit.  */
 612           if (!number_of_iterations_exit (loop, exit_edge,
 613                                           &niter, false, false)
 614               || !integer_onep (niter.assumptions)
 615               || !integer_zerop (niter.may_be_zero)
 616               || !niter.niter
 617               || TREE_CODE (niter.niter) != INTEGER_CST
 618               || !wi::ltu_p (widest_int::from (loop->nb_iterations_upper_bound,
 619                                                SIGNED),
 620                              wi::to_widest (niter.niter)))
 621             continue;
 622
 623           if (dump_file && (dump_flags & TDF_DETAILS))
 624             {
 625               fprintf (dump_file, "Removed pointless exit: ");
 626               print_gimple_stmt (dump_file, elt->stmt, 0);
 627             }
 628           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 629           if (exit_edge->flags & EDGE_TRUE_VALUE)
 630             gimple_cond_make_false (cond_stmt);
 631           else
 632             gimple_cond_make_true (cond_stmt);
 633           update_stmt (cond_stmt);
 634           changed = true;
 635         }
 636     }
 637   return changed;
 638 }
 639
 640 /* Stores loops that will be unlooped and edges that will be removed
 641    after we process whole loop tree. */
 642 static vec<loop_p> loops_to_unloop;
 643 static vec<int> loops_to_unloop_nunroll;
 644 static vec<edge> edges_to_remove;
 645 /* Stores loops that has been peeled.  */
 646 static bitmap peeled_loops;
 647
 648 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 649    on the latch edge.
 650    We do it after all unrolling since unlooping moves basic blocks
 651    across loop boundaries trashing loop closed SSA form as well
 652    as SCEV info needed to be intact during unrolling.
 653
 654    IRRED_INVALIDATED is used to bookkeep if information about
 655    irreducible regions may become invalid as a result
 656    of the transformation.
 657    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 658    when we need to go into loop closed SSA form.  */
 659
 660 void
 661 unloop_loops (vec<class loop *> &loops_to_unloop,
 662               vec<int> &loops_to_unloop_nunroll,
 663               vec<edge> &edges_to_remove,
 664               bitmap loop_closed_ssa_invalidated,
 665               bool *irred_invalidated)
 666 {
 667   while (loops_to_unloop.length ())
 668     {
 669       class loop *loop = loops_to_unloop.pop ();
 670       int n_unroll = loops_to_unloop_nunroll.pop ();
 671       basic_block latch = loop->latch;
 672       edge latch_edge = loop_latch_edge (loop);
 673       int flags = latch_edge->flags;
 674       location_t locus = latch_edge->goto_locus;
 675       gcall *stmt;
 676       gimple_stmt_iterator gsi;
 677
 678       remove_exits_and_undefined_stmts (loop, n_unroll);
 679
 680       /* Unloop destroys the latch edge.  */
 681       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 682
 683       /* Create new basic block for the latch edge destination and wire
 684          it in.  */
 685       stmt = gimple_build_builtin_unreachable (locus);
 686       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 687       latch_edge->probability = profile_probability::never ();
 688       latch_edge->flags |= flags;
 689       latch_edge->goto_locus = locus;
 690
 691       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 692       latch_edge->dest->count = profile_count::zero ();
 693       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 694
 695       gsi = gsi_start_bb (latch_edge->dest);
 696       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 697     }
 698
 699   /* Remove edges in peeled copies.  Given remove_path removes dominated
 700      regions we need to cope with removal of already removed paths.  */
 701   unsigned i;
 702   edge e;
 703   auto_vec<int, 20> src_bbs;
 704   src_bbs.reserve_exact (edges_to_remove.length ());
 705   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 706     src_bbs.quick_push (e->src->index);
 707   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 708     if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
 709       {
 710         bool ok = remove_path (e, irred_invalidated,
 711                                loop_closed_ssa_invalidated);
 712         gcc_assert (ok);
 713       }
 714   edges_to_remove.release ();
 715 }
 716
 717 /* Tries to unroll LOOP completely, i.e. NITER times.
 718    UL determines which loops we are allowed to unroll.
 719    EXIT is the exit of the loop that should be eliminated.
 720    MAXITER specfy bound on number of iterations, -1 if it is
 721    not known or too large for HOST_WIDE_INT.  The location
 722    LOCUS corresponding to the loop is used when emitting
 723    a summary of the unroll to the dump file.  */
 724
 725 static bool
 726 try_unroll_loop_completely (class loop *loop,
 727                             edge exit, tree niter, bool may_be_zero,
 728                             enum unroll_level ul,
 729                             HOST_WIDE_INT maxiter,
 730                             dump_user_location_t locus, bool allow_peel,
 731                             bool cunrolli)
 732 {
 733   unsigned HOST_WIDE_INT n_unroll = 0;
 734   bool n_unroll_found = false;
 735   edge edge_to_cancel = NULL;
 736
 737   /* See if we proved number of iterations to be low constant.
 738
 739      EXIT is an edge that will be removed in all but last iteration of
 740      the loop.
 741
 742      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 743      of the unrolled sequence and is expected to make the final loop not
 744      rolling.
 745
 746      If the number of execution of loop is determined by standard induction
 747      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 748      from the iv test.  */
 749   if (tree_fits_uhwi_p (niter))
 750     {
 751       n_unroll = tree_to_uhwi (niter);
 752       n_unroll_found = true;
 753       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 754       if (edge_to_cancel == exit)
 755         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 756     }
 757   /* We do not know the number of iterations and thus we cannot eliminate
 758      the EXIT edge.  */
 759   else
 760     exit = NULL;
 761
 762   /* See if we can improve our estimate by using recorded loop bounds.  */
 763   if ((maxiter == 0 || ul != UL_SINGLE_ITER)
 764       && maxiter >= 0
 765       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 766     {
 767       n_unroll = maxiter;
 768       n_unroll_found = true;
 769       /* Loop terminates before the IV variable test, so we cannot
 770          remove it in the last iteration.  */
 771       edge_to_cancel = NULL;
 772       /* If we do not allow peeling and we iterate just allow cases
 773          that do not grow code.  */
 774       if (!allow_peel && maxiter != 0)
 775         ul = UL_NO_GROWTH;
 776     }
 777
 778   if (!n_unroll_found)
 779     return false;
 780
 781   if (!loop->unroll
 782       && n_unroll > (unsigned) param_max_completely_peel_times)
 783     {
 784       if (dump_file && (dump_flags & TDF_DETAILS))
 785         fprintf (dump_file, "Not unrolling loop %d "
 786                  "(--param max-completely-peel-times limit reached).\n",
 787                  loop->num);
 788       return false;
 789     }
 790
 791   if (!edge_to_cancel)
 792     edge_to_cancel = loop_edge_to_cancel (loop);
 793
 794   if (n_unroll)
 795     {
 796       if (ul == UL_SINGLE_ITER)
 797         return false;
 798
 799       if (loop->unroll)
 800         {
 801           /* If the unrolling factor is too large, bail out.  */
 802           if (n_unroll > (unsigned)loop->unroll)
 803             {
 804               if (dump_file && (dump_flags & TDF_DETAILS))
 805                 fprintf (dump_file,
 806                          "Not unrolling loop %d: "
 807                          "user didn't want it unrolled completely.\n",
 808                          loop->num);
 809               return false;
 810             }
 811         }
 812       else
 813         {
 814           struct loop_size size;
 815           /* EXIT can be removed only if we are sure it passes first N_UNROLL
 816              iterations.  */
 817           bool remove_exit = (exit && niter
 818                               && TREE_CODE (niter) == INTEGER_CST
 819                               && wi::leu_p (n_unroll, wi::to_widest (niter)));
 820           bool large
 821             = tree_estimate_loop_size
 822                 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 823                  param_max_completely_peeled_insns);
 824           if (large)
 825             {
 826               if (dump_file && (dump_flags & TDF_DETAILS))
 827                 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 828                          loop->num);
 829               return false;
 830             }
 831
 832           unsigned HOST_WIDE_INT ninsns = size.overall;
 833           unsigned HOST_WIDE_INT unr_insns
 834             = estimated_unrolled_size (&size, n_unroll);
 835           if (dump_file && (dump_flags & TDF_DETAILS))
 836             {
 837               fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 838               fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 839                        (int) unr_insns);
 840             }
 841
 842           /* If the code is going to shrink, we don't need to be extra
 843              cautious on guessing if the unrolling is going to be
 844              profitable.
 845              Move from estimated_unrolled_size to unroll small loops.  */
 846           if (unr_insns * 2 / 3
 847               /* If there is IV variable that will become constant, we
 848                  save one instruction in the loop prologue we do not
 849                  account otherwise.  */
 850               <= ninsns + (size.constant_iv != false))
 851             ;
 852           /* We unroll only inner loops, because we do not consider it
 853              profitable otheriwse.  We still can cancel loopback edge
 854              of not rolling loop; this is always a good idea.  */
 855           else if (ul == UL_NO_GROWTH)
 856             {
 857               if (dump_file && (dump_flags & TDF_DETAILS))
 858                 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 859                          loop->num);
 860               return false;
 861             }
 862           /* Outer loops tend to be less interesting candidates for
 863              complete unrolling unless we can do a lot of propagation
 864              into the inner loop body.  For now we disable outer loop
 865              unrolling when the code would grow.  */
 866           else if (loop->inner)
 867             {
 868               if (dump_file && (dump_flags & TDF_DETAILS))
 869                 fprintf (dump_file, "Not unrolling loop %d: "
 870                          "it is not innermost and code would grow.\n",
 871                          loop->num);
 872               return false;
 873             }
 874           /* If there is call on a hot path through the loop, then
 875              there is most probably not much to optimize.  */
 876           else if (size.num_non_pure_calls_on_hot_path)
 877             {
 878               if (dump_file && (dump_flags & TDF_DETAILS))
 879                 fprintf (dump_file, "Not unrolling loop %d: "
 880                          "contains call and code would grow.\n",
 881                          loop->num);
 882               return false;
 883             }
 884           /* If there is pure/const call in the function, then we can
 885              still optimize the unrolled loop body if it contains some
 886              other interesting code than the calls and code storing or
 887              cumulating the return value.  */
 888           else if (size.num_pure_calls_on_hot_path
 889                    /* One IV increment, one test, one ivtmp store and
 890                       one useful stmt.  That is about minimal loop
 891                       doing pure call.  */
 892                    && (size.non_call_stmts_on_hot_path
 893                        <= 3 + size.num_pure_calls_on_hot_path))
 894             {
 895               if (dump_file && (dump_flags & TDF_DETAILS))
 896                 fprintf (dump_file, "Not unrolling loop %d: "
 897                          "contains just pure calls and code would grow.\n",
 898                          loop->num);
 899               return false;
 900             }
 901           /* Complete unrolling is major win when control flow is
 902              removed and one big basic block is created.  If the loop
 903              contains control flow the optimization may still be a win
 904              because of eliminating the loop overhead but it also may
 905              blow the branch predictor tables.  Limit number of
 906              branches on the hot path through the peeled sequence.  */
 907           else if (size.num_branches_on_hot_path * (int)n_unroll
 908                    > param_max_peel_branches)
 909             {
 910               if (dump_file && (dump_flags & TDF_DETAILS))
 911                 fprintf (dump_file, "Not unrolling loop %d: "
 912                          "number of branches on hot path in the unrolled "
 913                          "sequence reaches --param max-peel-branches limit.\n",
 914                          loop->num);
 915               return false;
 916             }
 917           /* Move 2 / 3 reduction from estimated_unrolled_size, but don't reduce
 918              unrolled size for innermost loop.
 919              1) It could increase register pressure.
 920              2) Big loop after completely unroll may not be vectorized
 921              by BB vectorizer.  */
 922           else if ((cunrolli && !loop->inner
 923                     ? unr_insns : unr_insns * 2 / 3)
 924                    > (unsigned) param_max_completely_peeled_insns)
 925             {
 926               if (dump_file && (dump_flags & TDF_DETAILS))
 927                 fprintf (dump_file, "Not unrolling loop %d: "
 928                          "number of insns in the unrolled sequence reaches "
 929                          "--param max-completely-peeled-insns limit.\n",
 930                          loop->num);
 931               return false;
 932             }
 933         }
 934
 935       if (!dbg_cnt (gimple_unroll))
 936         return false;
 937
 938       initialize_original_copy_tables ();
 939       auto_sbitmap wont_exit (n_unroll + 1);
 940       if (exit && niter
 941           && TREE_CODE (niter) == INTEGER_CST
 942           && wi::leu_p (n_unroll, wi::to_widest (niter)))
 943         {
 944           bitmap_ones (wont_exit);
 945           if (wi::eq_p (wi::to_widest (niter), n_unroll)
 946               || edge_to_cancel)
 947             bitmap_clear_bit (wont_exit, 0);
 948         }
 949       else
 950         {
 951           exit = NULL;
 952           bitmap_clear (wont_exit);
 953         }
 954       if (may_be_zero)
 955         bitmap_clear_bit (wont_exit, 1);
 956
 957       /* If loop was originally estimated to iterate too many times,
 958          reduce the profile to avoid new profile inconsistencies.  */
 959       scale_loop_profile (loop, profile_probability::always (), n_unroll);
 960
 961       if (!gimple_duplicate_loop_body_to_header_edge (
 962             loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit,
 963             &edges_to_remove,
 964             DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL))
 965         {
 966           free_original_copy_tables ();
 967           if (dump_file && (dump_flags & TDF_DETAILS))
 968             fprintf (dump_file, "Failed to duplicate the loop\n");
 969           return false;
 970         }
 971
 972       free_original_copy_tables ();
 973     }
 974   else
 975     scale_loop_profile (loop, profile_probability::always (), 0);
 976
 977   /* Remove the conditional from the last copy of the loop.  */
 978   if (edge_to_cancel)
 979     {
 980       gcond *cond = as_a <gcond *> (*gsi_last_bb (edge_to_cancel->src));
 981       force_edge_cold (edge_to_cancel, true);
 982       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 983         gimple_cond_make_false (cond);
 984       else
 985         gimple_cond_make_true (cond);
 986       update_stmt (cond);
 987       /* Do not remove the path, as doing so may remove outer loop and
 988          confuse bookkeeping code in tree_unroll_loops_completely.  */
 989     }
 990
 991   /* Store the loop for later unlooping and exit removal.  */
 992   loops_to_unloop.safe_push (loop);
 993   loops_to_unloop_nunroll.safe_push (n_unroll);
 994
 995   if (dump_enabled_p ())
 996     {
 997       if (!n_unroll)
 998         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 999                          "loop turned into non-loop; it never loops\n");
1000       else
1001         {
1002           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1003                            "loop with %d iterations completely unrolled",
1004                            (int) n_unroll);
1005           if (loop->header->count.initialized_p ())
1006             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
1007                          " (header execution count %d)",
1008                          (int)loop->header->count.to_gcov_type ());
1009           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
1010         }
1011     }
1012
1013   if (dump_file && (dump_flags & TDF_DETAILS))
1014     {
1015       if (exit)
1016         fprintf (dump_file, "Exit condition of peeled iterations was "
1017                  "eliminated.\n");
1018       if (edge_to_cancel)
1019         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
1020       else
1021         fprintf (dump_file, "Latch of last iteration was marked by "
1022                  "__builtin_unreachable ().\n");
1023     }
1024
1025   return true;
1026 }
1027
1028 /* Return number of instructions after peeling.  */
1029 static unsigned HOST_WIDE_INT
1030 estimated_peeled_sequence_size (struct loop_size *size,
1031                                 unsigned HOST_WIDE_INT npeel)
1032 {
1033   return MAX (npeel * (HOST_WIDE_INT) (size->overall
1034                                        - size->eliminated_by_peeling), 1);
1035 }
1036
1037 /* Update loop estimates after peeling LOOP by NPEEL.
1038    If PRECISE is false only likely exists were duplicated and thus
1039    do not update any estimates that are supposed to be always reliable.  */
1040 void
1041 adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise)
1042 {
1043   if (loop->any_estimate)
1044     {
1045       /* Since peeling is mostly about loops where first few
1046          iterations are special, it is not quite correct to
1047          assume that the remaining iterations will behave
1048          the same way.  However we do not have better info
1049          so update the esitmate, since it is likely better
1050          than keeping it as it is.
1051
1052          Remove it if it looks wrong.
1053
1054          TODO: We likely want to special case the situation where
1055          peeling is optimizing out exit edges and only update
1056          estimates here.  */
1057       if (wi::leu_p (npeel, loop->nb_iterations_estimate))
1058         loop->nb_iterations_estimate -= npeel;
1059       else
1060         loop->any_estimate = false;
1061     }
1062   if (loop->any_upper_bound && precise)
1063     {
1064       if (wi::leu_p (npeel, loop->nb_iterations_upper_bound))
1065         loop->nb_iterations_upper_bound -= npeel;
1066       else
1067         {
1068           /* Peeling maximal number of iterations or more
1069              makes no sense and is a bug.
1070              We should peel completely.  */
1071           gcc_unreachable ();
1072         }
1073     }
1074   if (loop->any_likely_upper_bound)
1075     {
1076       if (wi::leu_p (npeel, loop->nb_iterations_likely_upper_bound))
1077         loop->nb_iterations_likely_upper_bound -= npeel;
1078       else
1079         {
1080           loop->any_estimate = true;
1081           loop->nb_iterations_estimate = 0;
1082           loop->nb_iterations_likely_upper_bound = 0;
1083         }
1084     }
1085 }
1086
1087 /* If the loop is expected to iterate N times and is
1088    small enough, duplicate the loop body N+1 times before
1089    the loop itself.  This way the hot path will never
1090    enter the loop.
1091    Parameters are the same as for try_unroll_loops_completely */
1092
1093 static bool
1094 try_peel_loop (class loop *loop,
1095                edge exit, tree niter, bool may_be_zero,
1096                HOST_WIDE_INT maxiter)
1097 {
1098   HOST_WIDE_INT npeel;
1099   struct loop_size size;
1100   int peeled_size;
1101
1102   if (!flag_peel_loops
1103       || param_max_peel_times <= 0
1104       || !peeled_loops)
1105     return false;
1106
1107   if (bitmap_bit_p (peeled_loops, loop->num))
1108     {
1109       if (dump_file)
1110         fprintf (dump_file, "Not peeling: loop is already peeled\n");
1111       return false;
1112     }
1113
1114   /* We don't peel loops that will be unrolled as this can duplicate a
1115      loop more times than the user requested.  */
1116   if (loop->unroll)
1117     {
1118       if (dump_file)
1119         fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1120       return false;
1121     }
1122
1123   /* Peel only innermost loops.
1124      While the code is perfectly capable of peeling non-innermost loops,
1125      the heuristics would probably need some improvements. */
1126   if (loop->inner)
1127     {
1128       if (dump_file)
1129         fprintf (dump_file, "Not peeling: outer loop\n");
1130       return false;
1131     }
1132
1133   if (!optimize_loop_for_speed_p (loop))
1134     {
1135       if (dump_file)
1136         fprintf (dump_file, "Not peeling: cold loop\n");
1137       return false;
1138     }
1139
1140   /* Check if there is an estimate on the number of iterations.  */
1141   npeel = estimated_loop_iterations_int (loop);
1142   if (npeel < 0)
1143     npeel = likely_max_loop_iterations_int (loop);
1144   if (npeel < 0)
1145     {
1146       if (dump_file)
1147         fprintf (dump_file, "Not peeling: number of iterations is not "
1148                  "estimated\n");
1149       return false;
1150     }
1151   if (maxiter >= 0 && maxiter <= npeel)
1152     {
1153       if (dump_file)
1154         fprintf (dump_file, "Not peeling: upper bound is known so can "
1155                  "unroll completely\n");
1156       return false;
1157     }
1158
1159   /* We want to peel estimated number of iterations + 1 (so we never
1160      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1161      and be sure to avoid overflows.  */
1162   if (npeel > param_max_peel_times - 1)
1163     {
1164       if (dump_file)
1165         fprintf (dump_file, "Not peeling: rolls too much "
1166                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1167       return false;
1168     }
1169   npeel++;
1170
1171   /* Check peeled loops size.  */
1172   tree_estimate_loop_size (loop, exit, NULL, &size,
1173                            param_max_peeled_insns);
1174   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1175       > param_max_peeled_insns)
1176     {
1177       if (dump_file)
1178         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1179                  "(%i insns > --param max-peel-insns)", peeled_size);
1180       return false;
1181     }
1182
1183   if (!dbg_cnt (gimple_unroll))
1184     return false;
1185
1186   /* Duplicate possibly eliminating the exits.  */
1187   initialize_original_copy_tables ();
1188   auto_sbitmap wont_exit (npeel + 1);
1189   if (exit && niter
1190       && TREE_CODE (niter) == INTEGER_CST
1191       && wi::leu_p (npeel, wi::to_widest (niter)))
1192     {
1193       bitmap_ones (wont_exit);
1194       bitmap_clear_bit (wont_exit, 0);
1195     }
1196   else
1197     {
1198       exit = NULL;
1199       bitmap_clear (wont_exit);
1200     }
1201   if (may_be_zero)
1202     bitmap_clear_bit (wont_exit, 1);
1203
1204   if (!gimple_duplicate_loop_body_to_header_edge (
1205         loop, loop_preheader_edge (loop), npeel, wont_exit, exit,
1206         &edges_to_remove, DLTHE_FLAG_UPDATE_FREQ))
1207     {
1208       free_original_copy_tables ();
1209       return false;
1210     }
1211   free_original_copy_tables ();
1212   if (dump_file && (dump_flags & TDF_DETAILS))
1213     {
1214       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1215                loop->num, (int) npeel);
1216     }
1217   adjust_loop_info_after_peeling (loop, npeel, true);
1218
1219   bitmap_set_bit (peeled_loops, loop->num);
1220   return true;
1221 }
1222 /* Adds a canonical induction variable to LOOP if suitable.
1223    CREATE_IV is true if we may create a new iv.  UL determines
1224    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1225    to determine the number of iterations of a loop by direct evaluation.
1226    Returns true if cfg is changed.   */
1227
1228 static bool
1229 canonicalize_loop_induction_variables (class loop *loop,
1230                                        bool create_iv, enum unroll_level ul,
1231                                        bool try_eval, bool allow_peel, bool cunrolli)
1232 {
1233   edge exit = NULL;
1234   tree niter;
1235   HOST_WIDE_INT maxiter;
1236   bool modified = false;
1237   class tree_niter_desc niter_desc;
1238   bool may_be_zero = false;
1239
1240   /* For unrolling allow conditional constant or zero iterations, thus
1241      perform loop-header copying on-the-fly.  */
1242   exit = single_exit (loop);
1243   niter = chrec_dont_know;
1244   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1245     {
1246       niter = niter_desc.niter;
1247       may_be_zero
1248         = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1249     }
1250   if (TREE_CODE (niter) != INTEGER_CST)
1251     {
1252       /* For non-constant niter fold may_be_zero into niter again.  */
1253       if (may_be_zero)
1254         {
1255           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1256             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1257                                  niter_desc.may_be_zero,
1258                                  build_int_cst (TREE_TYPE (niter), 0), niter);
1259           else
1260             niter = chrec_dont_know;
1261           may_be_zero = false;
1262         }
1263
1264       /* If the loop has more than one exit, try checking all of them
1265          for # of iterations determinable through scev.  */
1266       if (!exit)
1267         niter = find_loop_niter (loop, &exit);
1268
1269       /* Finally if everything else fails, try brute force evaluation.  */
1270       if (try_eval
1271           && (chrec_contains_undetermined (niter)
1272               || TREE_CODE (niter) != INTEGER_CST))
1273         niter = find_loop_niter_by_eval (loop, &exit);
1274
1275       if (TREE_CODE (niter) != INTEGER_CST)
1276         exit = NULL;
1277     }
1278
1279   /* We work exceptionally hard here to estimate the bound
1280      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1281   if (niter && TREE_CODE (niter) == INTEGER_CST)
1282     {
1283       auto_vec<edge> exits = get_loop_exit_edges  (loop);
1284       record_niter_bound (loop, wi::to_widest (niter),
1285                           exit == single_likely_exit (loop, exits), true);
1286     }
1287
1288   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1289   maxiter = max_loop_iterations_int (loop);
1290
1291   if (dump_file && (dump_flags & TDF_DETAILS)
1292       && TREE_CODE (niter) == INTEGER_CST)
1293     {
1294       fprintf (dump_file, "Loop %d iterates ", loop->num);
1295       print_generic_expr (dump_file, niter, TDF_SLIM);
1296       fprintf (dump_file, " times.\n");
1297     }
1298   if (dump_file && (dump_flags & TDF_DETAILS)
1299       && maxiter >= 0)
1300     {
1301       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1302                (int)maxiter);
1303     }
1304   if (dump_file && (dump_flags & TDF_DETAILS)
1305       && likely_max_loop_iterations_int (loop) >= 0)
1306     {
1307       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1308                loop->num, (int)likely_max_loop_iterations_int (loop));
1309     }
1310
1311   /* Remove exits that are known to be never taken based on loop bound.
1312      Needs to be called after compilation of max_loop_iterations_int that
1313      populates the loop bounds.  */
1314   modified |= remove_redundant_iv_tests (loop);
1315
1316   dump_user_location_t locus = find_loop_location (loop);
1317   if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1318                                   maxiter, locus, allow_peel, cunrolli))
1319     return true;
1320
1321   if (create_iv
1322       && niter && !chrec_contains_undetermined (niter)
1323       && exit && just_once_each_iteration_p (loop, exit->src))
1324     {
1325       tree iv_niter = niter;
1326       if (may_be_zero)
1327         {
1328           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1329             iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1330                                     niter_desc.may_be_zero,
1331                                     build_int_cst (TREE_TYPE (iv_niter), 0),
1332                                     iv_niter);
1333           else
1334             iv_niter = NULL_TREE;
1335         }
1336       if (iv_niter)
1337         create_canonical_iv (loop, exit, iv_niter);
1338     }
1339
1340   if (ul == UL_ALL)
1341     modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1342
1343   return modified;
1344 }
1345
1346 /* The main entry point of the pass.  Adds canonical induction variables
1347    to the suitable loops.  */
1348
1349 unsigned int
1350 canonicalize_induction_variables (void)
1351 {
1352   bool changed = false;
1353   bool irred_invalidated = false;
1354   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1355
1356   estimate_numbers_of_iterations (cfun);
1357
1358   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1359     {
1360       changed |= canonicalize_loop_induction_variables (loop,
1361                                                         true, UL_SINGLE_ITER,
1362                                                         true, false, false);
1363     }
1364   gcc_assert (!need_ssa_update_p (cfun));
1365
1366   unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, edges_to_remove,
1367                 loop_closed_ssa_invalidated, &irred_invalidated);
1368   loops_to_unloop.release ();
1369   loops_to_unloop_nunroll.release ();
1370   if (irred_invalidated
1371       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1372     mark_irreducible_loops ();
1373
1374   /* Clean up the information about numbers of iterations, since brute force
1375      evaluation could reveal new information.  */
1376   free_numbers_of_iterations_estimates (cfun);
1377   scev_reset ();
1378
1379   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1380     {
1381       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1382       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1383     }
1384   BITMAP_FREE (loop_closed_ssa_invalidated);
1385
1386   if (changed)
1387     return TODO_cleanup_cfg;
1388   return 0;
1389 }
1390
1391 /* Process loops from innermost to outer, stopping at the innermost
1392    loop we unrolled.  */
1393
1394 static bool
1395 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1396                                 bitmap father_bbs, class loop *loop, bool cunrolli)
1397 {
1398   class loop *loop_father;
1399   bool changed = false;
1400   class loop *inner;
1401   enum unroll_level ul;
1402   unsigned num = number_of_loops (cfun);
1403
1404   /* Process inner loops first.  Don't walk loops added by the recursive
1405      calls because SSA form is not up-to-date.  They can be handled in the
1406      next iteration.  */
1407   bitmap child_father_bbs = NULL;
1408   for (inner = loop->inner; inner != NULL; inner = inner->next)
1409     if ((unsigned) inner->num < num)
1410       {
1411         if (!child_father_bbs)
1412           child_father_bbs = BITMAP_ALLOC (NULL);
1413         if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1414                                             child_father_bbs, inner, cunrolli))
1415           {
1416             bitmap_ior_into (father_bbs, child_father_bbs);
1417             bitmap_clear (child_father_bbs);
1418             changed = true;
1419           }
1420       }
1421   if (child_father_bbs)
1422     BITMAP_FREE (child_father_bbs);
1423
1424   /* If we changed an inner loop we cannot process outer loops in this
1425      iteration because SSA form is not up-to-date.  Continue with
1426      siblings of outer loops instead.  */
1427   if (changed)
1428     {
1429       /* If we are recorded as father clear all other fathers that
1430          are necessarily covered already to avoid redundant work.  */
1431       if (bitmap_bit_p (father_bbs, loop->header->index))
1432         {
1433           bitmap_clear (father_bbs);
1434           bitmap_set_bit (father_bbs, loop->header->index);
1435         }
1436       return true;
1437     }
1438
1439   /* Don't unroll #pragma omp simd loops until the vectorizer
1440      attempts to vectorize those.  */
1441   if (loop->force_vectorize)
1442     return false;
1443
1444   /* Try to unroll this loop.  */
1445   loop_father = loop_outer (loop);
1446   if (!loop_father)
1447     return false;
1448
1449   if (loop->unroll > 1)
1450     ul = UL_ALL;
1451   else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1452       /* Unroll outermost loops only if asked to do so or they do
1453          not cause code growth.  */
1454       && (unroll_outer || loop_outer (loop_father)))
1455     ul = UL_ALL;
1456   else
1457     ul = UL_NO_GROWTH;
1458
1459   if (canonicalize_loop_induction_variables
1460       (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer, cunrolli))
1461     {
1462       /* If we'll continue unrolling, we need to propagate constants
1463          within the new basic blocks to fold away induction variable
1464          computations; otherwise, the size might blow up before the
1465          iteration is complete and the IR eventually cleaned up.  */
1466       if (loop_outer (loop_father))
1467         {
1468           /* Once we process our father we will have processed
1469              the fathers of our children as well, so avoid doing
1470              redundant work and clear fathers we've gathered sofar.  */
1471           bitmap_clear (father_bbs);
1472           bitmap_set_bit (father_bbs, loop_father->header->index);
1473         }
1474       else if (unroll_outer)
1475         /* Trigger scalar cleanup once any outermost loop gets unrolled.  */
1476         cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
1477
1478       return true;
1479     }
1480
1481   return false;
1482 }
1483
1484 /* Unroll LOOPS completely if they iterate just few times.  Unless
1485    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1486    size of the code does not increase.  */
1487
1488 static unsigned int
1489 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1490 {
1491   bitmap father_bbs = BITMAP_ALLOC (NULL);
1492   bool changed;
1493   int iteration = 0;
1494   bool irred_invalidated = false;
1495   bool cunrolli = true;
1496
1497   estimate_numbers_of_iterations (cfun);
1498
1499   do
1500     {
1501       changed = false;
1502       bitmap loop_closed_ssa_invalidated = NULL;
1503
1504       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1505         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1506
1507       free_numbers_of_iterations_estimates (cfun);
1508       estimate_numbers_of_iterations (cfun);
1509
1510       changed = tree_unroll_loops_completely_1 (may_increase_size,
1511                                                 unroll_outer, father_bbs,
1512                                                 current_loops->tree_root,
1513                                                 cunrolli);
1514       if (changed)
1515         {
1516           unsigned i;
1517           /* For the outer loop, considering that the inner loop is completely
1518              unrolled, it would expose more optimization opportunities, so it's
1519              better to keep 2/3 reduction of estimated unrolled size.  */
1520           cunrolli = false;
1521
1522           unloop_loops (loops_to_unloop, loops_to_unloop_nunroll,
1523                         edges_to_remove, loop_closed_ssa_invalidated,
1524                         &irred_invalidated);
1525           loops_to_unloop.release ();
1526           loops_to_unloop_nunroll.release ();
1527
1528           /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
1529           if (loop_closed_ssa_invalidated
1530               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1531             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1532                                           TODO_update_ssa);
1533           else
1534             update_ssa (TODO_update_ssa);
1535
1536           /* father_bbs is a bitmap of loop father header BB indices.
1537              Translate that to what non-root loops these BBs belong to now.  */
1538           bitmap_iterator bi;
1539           bitmap fathers = BITMAP_ALLOC (NULL);
1540           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1541             {
1542               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1543               if (! unrolled_loop_bb)
1544                 continue;
1545               if (loop_outer (unrolled_loop_bb->loop_father))
1546                 bitmap_set_bit (fathers,
1547                                 unrolled_loop_bb->loop_father->num);
1548             }
1549           bitmap_clear (father_bbs);
1550           /* Propagate the constants within the new basic blocks.  */
1551           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1552             {
1553               loop_p father = get_loop (cfun, i);
1554               bitmap exit_bbs = BITMAP_ALLOC (NULL);
1555               loop_exit *exit = father->exits->next;
1556               while (exit->e)
1557                 {
1558                   bitmap_set_bit (exit_bbs, exit->e->dest->index);
1559                   exit = exit->next;
1560                 }
1561               do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1562             }
1563           BITMAP_FREE (fathers);
1564
1565           /* Clean up the information about numbers of iterations, since
1566              complete unrolling might have invalidated it.  */
1567           scev_reset ();
1568
1569           /* This will take care of removing completely unrolled loops
1570              from the loop structures so we can continue unrolling now
1571              innermost loops.  */
1572           if (cleanup_tree_cfg ())
1573             update_ssa (TODO_update_ssa_only_virtuals);
1574
1575           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1576             verify_loop_closed_ssa (true);
1577         }
1578       if (loop_closed_ssa_invalidated)
1579         BITMAP_FREE (loop_closed_ssa_invalidated);
1580     }
1581   while (changed
1582          && ++iteration <= param_max_unroll_iterations);
1583
1584   BITMAP_FREE (father_bbs);
1585
1586   if (irred_invalidated
1587       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1588     mark_irreducible_loops ();
1589
1590   return 0;
1591 }
1592
1593 /* Canonical induction variable creation pass.  */
1594
1595 namespace {
1596
1597 const pass_data pass_data_iv_canon =
1598 {
1599   GIMPLE_PASS, /* type */
1600   "ivcanon", /* name */
1601   OPTGROUP_LOOP, /* optinfo_flags */
1602   TV_TREE_LOOP_IVCANON, /* tv_id */
1603   ( PROP_cfg | PROP_ssa ), /* properties_required */
1604   0, /* properties_provided */
1605   0, /* properties_destroyed */
1606   0, /* todo_flags_start */
1607   0, /* todo_flags_finish */
1608 };
1609
1610 class pass_iv_canon : public gimple_opt_pass
1611 {
1612 public:
1613   pass_iv_canon (gcc::context *ctxt)
1614     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1615   {}
1616
1617   /* opt_pass methods: */
1618   bool gate (function *) final override { return flag_tree_loop_ivcanon != 0; }
1619   unsigned int execute (function *fun) final override;
1620
1621 }; // class pass_iv_canon
1622
1623 unsigned int
1624 pass_iv_canon::execute (function *fun)
1625 {
1626   if (number_of_loops (fun) <= 1)
1627     return 0;
1628
1629   return canonicalize_induction_variables ();
1630 }
1631
1632 } // anon namespace
1633
1634 gimple_opt_pass *
1635 make_pass_iv_canon (gcc::context *ctxt)
1636 {
1637   return new pass_iv_canon (ctxt);
1638 }
1639
1640 /* Complete unrolling of loops.  */
1641
1642 namespace {
1643
1644 const pass_data pass_data_complete_unroll =
1645 {
1646   GIMPLE_PASS, /* type */
1647   "cunroll", /* name */
1648   OPTGROUP_LOOP, /* optinfo_flags */
1649   TV_COMPLETE_UNROLL, /* tv_id */
1650   ( PROP_cfg | PROP_ssa ), /* properties_required */
1651   0, /* properties_provided */
1652   0, /* properties_destroyed */
1653   0, /* todo_flags_start */
1654   0, /* todo_flags_finish */
1655 };
1656
1657 class pass_complete_unroll : public gimple_opt_pass
1658 {
1659 public:
1660   pass_complete_unroll (gcc::context *ctxt)
1661     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1662   {}
1663
1664   /* opt_pass methods: */
1665   unsigned int execute (function *) final override;
1666
1667 }; // class pass_complete_unroll
1668
1669 unsigned int
1670 pass_complete_unroll::execute (function *fun)
1671 {
1672   if (number_of_loops (fun) <= 1)
1673     return 0;
1674
1675   /* If we ever decide to run loop peeling more than once, we will need to
1676      track loops already peeled in loop structures themselves to avoid
1677      re-peeling the same loop multiple times.  */
1678   if (flag_peel_loops)
1679     peeled_loops = BITMAP_ALLOC (NULL);
1680   unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_size, true);
1681   if (peeled_loops)
1682     {
1683       BITMAP_FREE (peeled_loops);
1684       peeled_loops = NULL;
1685     }
1686   return val;
1687 }
1688
1689 } // anon namespace
1690
1691 gimple_opt_pass *
1692 make_pass_complete_unroll (gcc::context *ctxt)
1693 {
1694   return new pass_complete_unroll (ctxt);
1695 }
1696
1697 /* Complete unrolling of inner loops.  */
1698
1699 namespace {
1700
1701 const pass_data pass_data_complete_unrolli =
1702 {
1703   GIMPLE_PASS, /* type */
1704   "cunrolli", /* name */
1705   OPTGROUP_LOOP, /* optinfo_flags */
1706   TV_COMPLETE_UNROLL, /* tv_id */
1707   ( PROP_cfg | PROP_ssa ), /* properties_required */
1708   0, /* properties_provided */
1709   0, /* properties_destroyed */
1710   0, /* todo_flags_start */
1711   0, /* todo_flags_finish */
1712 };
1713
1714 class pass_complete_unrolli : public gimple_opt_pass
1715 {
1716 public:
1717   pass_complete_unrolli (gcc::context *ctxt)
1718     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1719   {}
1720
1721   /* opt_pass methods: */
1722   bool gate (function *) final override { return optimize >= 2; }
1723   unsigned int execute (function *) final override;
1724
1725 }; // class pass_complete_unrolli
1726
1727 unsigned int
1728 pass_complete_unrolli::execute (function *fun)
1729 {
1730   unsigned ret = 0;
1731
1732   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1733   if (number_of_loops (fun) > 1)
1734     {
1735       scev_initialize ();
1736       ret = tree_unroll_loops_completely (optimize >= 3, false);
1737       scev_finalize ();
1738     }
1739   loop_optimizer_finalize ();
1740
1741   return ret;
1742 }
1743
1744 } // anon namespace
1745
1746 gimple_opt_pass *
1747 make_pass_complete_unrolli (gcc::context *ctxt)
1748 {
1749   return new pass_complete_unrolli (ctxt);
1750 }
1751
1752