gcc/gimple-loop-jam.cc

   1 /* Loop unroll-and-jam.
   2    Copyright (C) 2017-2025 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tree-pass.h"
  24 #include "backend.h"
  25 #include "tree.h"
  26 #include "gimple.h"
  27 #include "ssa.h"
  28 #include "fold-const.h"
  29 #include "tree-cfg.h"
  30 #include "tree-ssa.h"
  31 #include "tree-ssa-loop-niter.h"
  32 #include "tree-ssa-loop.h"
  33 #include "tree-ssa-loop-manip.h"
  34 #include "cfgloop.h"
  35 #include "tree-scalar-evolution.h"
  36 #include "gimple-iterator.h"
  37 #include "cfghooks.h"
  38 #include "tree-data-ref.h"
  39 #include "tree-ssa-loop-ivopts.h"
  40 #include "tree-vectorizer.h"
  41 #include "tree-ssa-sccvn.h"
  42 #include "tree-cfgcleanup.h"
  43
  44 /* Unroll and Jam transformation
  45
  46    This is a combination of two transformations, where the second
  47    is not always valid.  It's applicable if a loop nest has redundancies
  48    over the iterations of an outer loop while not having that with
  49    an inner loop.
  50
  51    Given this nest:
  52        for (i) {
  53          for (j) {
  54            B(i,j)
  55          }
  56        }
  57
  58    first unroll:
  59        for (i by 2) {
  60          for (j) {
  61            B(i,j)
  62          }
  63          for (j) {
  64            B(i+1,j)
  65          }
  66        }
  67
  68    then fuse the two adjacent inner loops resulting from that:
  69        for (i by 2) {
  70          for (j) {
  71            B(i,j)
  72            B(i+1,j)
  73          }
  74        }
  75
  76    As the order of evaluations of the body B changes this is valid
  77    only in certain situations: all distance vectors need to be forward.
  78    Additionally if there are multiple induction variables than just
  79    a counting control IV (j above) we can also deal with some situations.
  80
  81    The validity is checked by unroll_jam_possible_p, and the data-dep
  82    testing below.
  83
  84    A trivial example where the fusion is wrong would be when
  85    B(i,j) == x[j-1] = x[j];
  86        for (i by 2) {
  87          for (j) {
  88            x[j-1] = x[j];
  89          }
  90          for (j) {
  91            x[j-1] = x[j];
  92          }
  93        }  effect: move content to front by two elements
  94        -->
  95        for (i by 2) {
  96          for (j) {
  97            x[j-1] = x[j];
  98            x[j-1] = x[j];
  99          }
 100        }  effect: move content to front by one element
 101 */
 102
 103 /* Modify the loop tree for the fact that all code once belonging
 104    to the OLD loop or the outer loop of OLD now is inside LOOP.  */
 105
 106 static void
 107 merge_loop_tree (class loop *loop, class loop *old)
 108 {
 109   basic_block *bbs;
 110   int i, n;
 111   class loop *subloop;
 112   edge e;
 113   edge_iterator ei;
 114
 115   /* Find its nodes.  */
 116   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
 117   n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
 118
 119   for (i = 0; i < n; i++)
 120     {
 121       /* If the block was direct child of OLD loop it's now part
 122          of LOOP.  If it was outside OLD, then it moved into LOOP
 123          as well.  This avoids changing the loop father for BBs
 124          in inner loops of OLD.  */
 125       if (bbs[i]->loop_father == old
 126           || loop_depth (bbs[i]->loop_father) < loop_depth (old))
 127         {
 128           remove_bb_from_loops (bbs[i]);
 129           add_bb_to_loop (bbs[i], loop);
 130           continue;
 131         }
 132
 133       /* If we find a direct subloop of OLD, move it to LOOP.  */
 134       subloop = bbs[i]->loop_father;
 135       if (loop_outer (subloop) == old && subloop->header == bbs[i])
 136         {
 137           flow_loop_tree_node_remove (subloop);
 138           flow_loop_tree_node_add (loop, subloop);
 139         }
 140     }
 141
 142   /* Update the information about loop exit edges.  */
 143   for (i = 0; i < n; i++)
 144     {
 145       FOR_EACH_EDGE (e, ei, bbs[i]->succs)
 146         {
 147           rescan_loop_exit (e, false, false);
 148         }
 149     }
 150
 151   loop->num_nodes = n;
 152
 153   free (bbs);
 154 }
 155
 156 /* BB is part of the outer loop of an unroll-and-jam situation.
 157    Check if any statements therein would prevent the transformation.  */
 158
 159 static bool
 160 bb_prevents_fusion_p (basic_block bb)
 161 {
 162   gimple_stmt_iterator gsi;
 163   /* BB is duplicated by outer unrolling and then all N-1 first copies
 164      move into the body of the fused inner loop.  If BB exits the outer loop
 165      the last copy still does so, and the first N-1 copies are cancelled
 166      by loop unrolling, so also after fusion it's the exit block.
 167      But there might be other reasons that prevent fusion:
 168        * stores or unknown side-effects prevent fusion
 169        * loads don't
 170        * computations into SSA names: these aren't problematic.  Their
 171          result will be unused on the exit edges of the first N-1 copies
 172          (those aren't taken after unrolling).  If they are used on the
 173          other edge (the one leading to the outer latch block) they are
 174          loop-carried (on the outer loop) and the Nth copy of BB will
 175          compute them again (i.e. the first N-1 copies will be dead).  */
 176   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 177     {
 178       gimple *g = gsi_stmt (gsi);
 179       if (gimple_vdef (g) || gimple_has_side_effects (g))
 180         return true;
 181     }
 182   return false;
 183 }
 184
 185 /* Given an inner loop LOOP (of some OUTER loop) determine if
 186    we can safely fuse copies of it (generated by outer unrolling).
 187    If so return true, otherwise return false.  */
 188
 189 static bool
 190 unroll_jam_possible_p (class loop *outer, class loop *loop)
 191 {
 192   basic_block *bbs;
 193   int i, n;
 194   class tree_niter_desc niter;
 195
 196   /* When fusing the loops we skip the latch block
 197      of the first one, so it mustn't have any effects to
 198      preserve.  */
 199   if (!empty_block_p (loop->latch))
 200     return false;
 201
 202   edge exit;
 203   if (!(exit = single_exit (loop)))
 204     return false;
 205
 206   /* We need a perfect nest.  Quick check for adjacent inner loops.  */
 207   if (outer->inner != loop || loop->next)
 208     return false;
 209
 210   /* Prevent head-controlled inner loops, that we usually have.
 211      The guard block would need to be accepted
 212      (invariant condition either entering or skipping the loop),
 213      without also accepting arbitrary control flow.  When unswitching
 214      ran before us (as with -O3) this won't be a problem because its
 215      outer loop unswitching will have moved out the invariant condition.
 216
 217      If we do that we need to extend fuse_loops() to cope with this
 218      by threading through the (still invariant) copied condition
 219      between the two loop copies.  */
 220   if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header))
 221     return false;
 222
 223   /* The number of iterations of the inner loop must be loop invariant
 224      with respect to the outer loop.  */
 225   if (!number_of_iterations_exit (loop, single_exit (loop), &niter,
 226                                  false, true)
 227       || niter.cmp == ERROR_MARK
 228       || !integer_zerop (niter.may_be_zero)
 229       || !expr_invariant_in_loop_p (outer, niter.niter))
 230     return false;
 231
 232   /* If the inner loop produces any values that are used inside the
 233      outer loop (except the virtual op) then it can flow
 234      back (perhaps indirectly) into the inner loop.  This prevents
 235      fusion: without fusion the value at the last iteration is used,
 236      with fusion the value after the initial iteration is used.
 237
 238      If all uses are outside the outer loop this doesn't prevent fusion;
 239      the value of the last iteration is still used (and the values from
 240      all intermediate iterations are dead).  */
 241   gphi_iterator psi;
 242   for (psi = gsi_start_phis (single_exit (loop)->dest);
 243        !gsi_end_p (psi); gsi_next (&psi))
 244     {
 245       imm_use_iterator imm_iter;
 246       use_operand_p use_p;
 247       tree op = gimple_phi_result (psi.phi ());
 248       if (virtual_operand_p (op))
 249         continue;
 250       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
 251         {
 252           gimple *use_stmt = USE_STMT (use_p);
 253           if (!is_gimple_debug (use_stmt)
 254               && flow_bb_inside_loop_p (outer, gimple_bb (use_stmt)))
 255             return false;
 256         }
 257     }
 258
 259   /* And check blocks belonging to just outer loop.  */
 260   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
 261   n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun));
 262
 263   for (i = 0; i < n; i++)
 264     if (bbs[i]->loop_father == outer
 265         && (bb_prevents_fusion_p (bbs[i])
 266             /* Outer loop exits must come after the inner loop, otherwise
 267                we'll put the outer loop exit into the fused inner loop.  */
 268             || (loop_exits_from_bb_p (outer, bbs[i])
 269                 && !dominated_by_p (CDI_DOMINATORS, bbs[i], exit->src))))
 270       break;
 271   free (bbs);
 272   if (i != n)
 273     return false;
 274
 275   /* For now we can safely fuse copies of LOOP only if all
 276      loop carried variables are inductions (or the virtual op).
 277
 278      We could handle reductions as well (the initial value in the second
 279      body would be the after-iter value of the first body) if it's over
 280      an associative and commutative operation.  We wouldn't
 281      be able to handle unknown cycles.  */
 282   bool inner_vdef = false;
 283   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 284     {
 285       affine_iv iv;
 286       tree op = gimple_phi_result (psi.phi ());
 287
 288       if (virtual_operand_p (op))
 289         {
 290           inner_vdef = true;
 291           continue;
 292         }
 293       if (!simple_iv (loop, loop, op, &iv, true))
 294         return false;
 295       /* The inductions must be regular, loop invariant step and initial
 296          value.  */
 297       if (!expr_invariant_in_loop_p (outer, iv.step)
 298           || !expr_invariant_in_loop_p (outer, iv.base))
 299         return false;
 300       /* XXX With more effort we could also be able to deal with inductions
 301          where the initial value is loop variant but a simple IV in the
 302          outer loop.  The initial value for the second body would be
 303          the original initial value plus iv.base.step.  The next value
 304          for the fused loop would be the original next value of the first
 305          copy, _not_ the next value of the second body.  */
 306     }
 307
 308   /* When there's no inner loop virtual PHI IV we cannot handle the update
 309      required to the inner loop if that doesn't already have one.  See
 310      PR117113.  */
 311   if (!inner_vdef && get_virtual_phi (outer->header))
 312     return false;
 313
 314   return true;
 315 }
 316
 317 /* Fuse LOOP with all further neighbors.  The loops are expected to
 318    be in appropriate form.  */
 319
 320 static void
 321 fuse_loops (class loop *loop)
 322 {
 323   class loop *next = loop->next;
 324
 325   while (next)
 326     {
 327       edge e;
 328
 329       remove_branch (single_pred_edge (loop->latch));
 330       /* Make delete_basic_block not fiddle with the loop structure.  */
 331       basic_block oldlatch = loop->latch;
 332       loop->latch = NULL;
 333       delete_basic_block (oldlatch);
 334       e = redirect_edge_and_branch (loop_latch_edge (next),
 335                                     loop->header);
 336       loop->latch = e->src;
 337       flush_pending_stmts (e);
 338
 339       gcc_assert (EDGE_COUNT (next->header->preds) == 1);
 340
 341       /* The PHI nodes of the second body (single-argument now)
 342          need adjustments to use the right values: either directly
 343          the value of the corresponding PHI in the first copy or
 344          the one leaving the first body which unrolling did for us.
 345
 346          See also unroll_jam_possible_p() for further possibilities.  */
 347       gphi_iterator psi_first, psi_second;
 348       e = single_pred_edge (next->header);
 349       for (psi_first = gsi_start_phis (loop->header),
 350            psi_second = gsi_start_phis (next->header);
 351            !gsi_end_p (psi_first);
 352            gsi_next (&psi_first), gsi_next (&psi_second))
 353         {
 354           gphi *phi_first = psi_first.phi ();
 355           gphi *phi_second = psi_second.phi ();
 356           tree firstop = gimple_phi_result (phi_first);
 357           /* The virtual operand is correct already as it's
 358              always live at exit, hence has a LCSSA node and outer
 359              loop unrolling updated SSA form.  */
 360           if (virtual_operand_p (firstop))
 361             continue;
 362
 363           /* Due to unroll_jam_possible_p() we know that this is
 364              an induction.  The second body goes over the same
 365              iteration space.  */
 366           add_phi_arg (phi_second, firstop, e,
 367                        gimple_location (phi_first));
 368         }
 369       gcc_assert (gsi_end_p (psi_second));
 370
 371       merge_loop_tree (loop, next);
 372       gcc_assert (!next->num_nodes);
 373       class loop *ln = next->next;
 374       delete_loop (next);
 375       next = ln;
 376     }
 377 }
 378
 379 /* Return true if any of the access functions for dataref A
 380    isn't invariant with respect to loop LOOP_NEST.  */
 381 static bool
 382 any_access_function_variant_p (const struct data_reference *a,
 383                                const class loop *loop_nest)
 384 {
 385   vec<tree> fns = DR_ACCESS_FNS (a);
 386
 387   for (tree t : fns)
 388     if (!evolution_function_is_invariant_p (t, loop_nest->num))
 389       return true;
 390
 391   return false;
 392 }
 393
 394 /* Returns true if the distance in DDR can be determined and adjusts
 395    the unroll factor in *UNROLL to make unrolling valid for that distance.
 396    Otherwise return false.  DDR is with respect to the outer loop of INNER.
 397
 398    If this data dep can lead to a removed memory reference, increment
 399    *REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
 400    for this to happen.  */
 401
 402 static bool
 403 adjust_unroll_factor (class loop *inner, struct data_dependence_relation *ddr,
 404                       unsigned *unroll, unsigned *profit_unroll,
 405                       unsigned *removed)
 406 {
 407   bool ret = false;
 408   if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
 409     {
 410       if (DDR_NUM_DIST_VECTS (ddr) == 0)
 411         return false;
 412       unsigned i;
 413       lambda_vector dist_v;
 414       FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
 415         {
 416           /* A distance (a,b) is at worst transformed into (a/N,b) by the
 417              unrolling (factor N), so the transformation is valid if
 418              a >= N, or b > 0, or b is zero and a > 0.  Otherwise the unroll
 419              factor needs to be limited so that the first condition holds.
 420              That may limit the factor down to zero in the worst case.  */
 421           lambda_int dist = dist_v[0];
 422           if (dist < 0)
 423             gcc_unreachable ();
 424           else if (dist >= (lambda_int)*unroll)
 425             ;
 426           else if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
 427             {
 428               /* We have (a,0) with a < N, so this will be transformed into
 429                  (0,0) after unrolling by N.  This might potentially be a
 430                  problem, if it's not a read-read dependency.  */
 431               if (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))
 432                 ;
 433               else
 434                 {
 435                   /* So, at least one is a write, and we might reduce the
 436                      distance vector to (0,0).  This is still no problem
 437                      if both data-refs are affine with respect to the inner
 438                      loops.  But if one of them is invariant with respect
 439                      to an inner loop our reordering implicit in loop fusion
 440                      corrupts the program, as our data dependences don't
 441                      capture this.  E.g. for:
 442                        for (0 <= i < n)
 443                          for (0 <= j < m)
 444                            a[i][0] = a[i+1][0] + 2;    // (1)
 445                            b[i][j] = b[i+1][j] + 2;    // (2)
 446                      the distance vector for both statements is (-1,0),
 447                      but exchanging the order for (2) is okay, while
 448                      for (1) it is not.  To see this, write out the original
 449                      accesses (assume m is 2):
 450                        a i j original
 451                        0 0 0 r a[1][0] b[1][0]
 452                        1 0 0 w a[0][0] b[0][0]
 453                        2 0 1 r a[1][0] b[1][1]
 454                        3 0 1 w a[0][0] b[0][1]
 455                        4 1 0 r a[2][0] b[2][0]
 456                        5 1 0 w a[1][0] b[1][0]
 457                      after unroll-by-2 and fusion the accesses are done in
 458                      this order (from column a): 0,1, 4,5, 2,3, i.e. this:
 459                        a i j transformed
 460                        0 0 0 r a[1][0] b[1][0]
 461                        1 0 0 w a[0][0] b[0][0]
 462                        4 1 0 r a[2][0] b[2][0]
 463                        5 1 0 w a[1][0] b[1][0]
 464                        2 0 1 r a[1][0] b[1][1]
 465                        3 0 1 w a[0][0] b[0][1]
 466                      Note how access 2 accesses the same element as access 5
 467                      for array 'a' but not for array 'b'.  */
 468                   if (any_access_function_variant_p (DDR_A (ddr), inner)
 469                       && any_access_function_variant_p (DDR_B (ddr), inner))
 470                     ;
 471                   else
 472                     /* And if any dataref of this pair is invariant with
 473                        respect to the inner loop, we have no chance than
 474                        to reduce the unroll factor.  */
 475                     *unroll = dist;
 476                 }
 477             }
 478           else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
 479             ;
 480           else
 481             *unroll = dist;
 482
 483           /* With a distance (a,0) it's always profitable to unroll-and-jam
 484              (by a+1), because one memory reference will go away.  With
 485              (a,b) and b != 0 that's less clear.  We will increase the
 486              number of streams without lowering the number of mem refs.
 487              So for now only handle the first situation.  */
 488           if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
 489             {
 490               *profit_unroll = MAX (*profit_unroll, (unsigned)dist + 1);
 491               (*removed)++;
 492             }
 493
 494           ret = true;
 495         }
 496     }
 497   return ret;
 498 }
 499
 500 /* Main entry point for the unroll-and-jam transformation
 501    described above.  */
 502
 503 static unsigned int
 504 tree_loop_unroll_and_jam (void)
 505 {
 506   unsigned int todo = 0;
 507
 508   gcc_assert (scev_initialized_p ());
 509
 510   /* Go through all innermost loops.  */
 511   for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
 512     {
 513       class loop *outer = loop_outer (loop);
 514
 515       if (loop_depth (loop) < 2
 516           || optimize_loop_nest_for_size_p (outer))
 517         continue;
 518
 519       if (!unroll_jam_possible_p (outer, loop))
 520         continue;
 521
 522       vec<data_reference_p> datarefs = vNULL;
 523       vec<ddr_p> dependences = vNULL;
 524       unsigned unroll_factor, profit_unroll, removed;
 525       class tree_niter_desc desc;
 526       bool unroll = false;
 527
 528       auto_vec<loop_p, 3> loop_nest;
 529       if (!compute_data_dependences_for_loop (outer, true, &loop_nest,
 530                                               &datarefs, &dependences))
 531         {
 532           if (dump_file && (dump_flags & TDF_DETAILS))
 533             fprintf (dump_file, "Cannot analyze data dependencies\n");
 534           free_data_refs (datarefs);
 535           free_dependence_relations (dependences);
 536           continue;
 537         }
 538       if (!datarefs.length ())
 539         continue;
 540
 541       if (dump_file && (dump_flags & TDF_DETAILS))
 542         dump_data_dependence_relations (dump_file, dependences);
 543
 544       unroll_factor = (unsigned)-1;
 545       profit_unroll = 1;
 546       removed = 0;
 547
 548       /* Check all dependencies.  */
 549       unsigned i;
 550       struct data_dependence_relation *ddr;
 551       FOR_EACH_VEC_ELT (dependences, i, ddr)
 552         {
 553           struct data_reference *dra, *drb;
 554
 555           /* If the refs are independend there's nothing to do.  */
 556           if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 557             continue;
 558
 559           dra = DDR_A (ddr);
 560           drb = DDR_B (ddr);
 561
 562           /* Nothing interesting for the self dependencies, except for WAW if
 563              the access function is not affine or constant because we may end
 564              up reordering writes to the same location.  */
 565           if (dra == drb)
 566             {
 567               if (DR_IS_WRITE (dra)
 568                   && !DR_ACCESS_FNS (dra).is_empty ()
 569                   && DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 570                 {
 571                   unroll_factor = 0;
 572                   break;
 573                 }
 574               else
 575                 continue;
 576             }
 577
 578           /* Now check the distance vector, for determining a sensible
 579              outer unroll factor, and for validity of merging the inner
 580              loop copies.  */
 581           if (!adjust_unroll_factor (loop, ddr, &unroll_factor, &profit_unroll,
 582                                      &removed))
 583             {
 584               /* Couldn't get the distance vector.  For two reads that's
 585                  harmless (we assume we should unroll).  For at least
 586                  one write this means we can't check the dependence direction
 587                  and hence can't determine safety.  */
 588
 589               if (DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
 590                 {
 591                   unroll_factor = 0;
 592                   break;
 593                 }
 594             }
 595         }
 596
 597       /* We regard a user-specified minimum percentage of zero as a request
 598          to ignore all profitability concerns and apply the transformation
 599          always.  */
 600       if (!param_unroll_jam_min_percent)
 601         profit_unroll = MAX(2, profit_unroll);
 602       else if (removed * 100 / datarefs.length ()
 603           < (unsigned)param_unroll_jam_min_percent)
 604         profit_unroll = 1;
 605       if (unroll_factor > profit_unroll)
 606         unroll_factor = profit_unroll;
 607       if (unroll_factor > (unsigned)param_unroll_jam_max_unroll)
 608         unroll_factor = param_unroll_jam_max_unroll;
 609       unroll = (unroll_factor > 1
 610                 && can_unroll_loop_p (outer, unroll_factor, &desc));
 611
 612       if (unroll)
 613         {
 614           if (dump_enabled_p ())
 615             dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 616                              find_loop_location (outer),
 617                              "applying unroll and jam with factor %d\n",
 618                              unroll_factor);
 619           initialize_original_copy_tables ();
 620           tree_unroll_loop (outer, unroll_factor, &desc);
 621           free_original_copy_tables ();
 622           fuse_loops (outer->inner);
 623           todo |= TODO_cleanup_cfg;
 624
 625           auto_bitmap exit_bbs;
 626           bitmap_set_bit (exit_bbs, single_exit (outer)->dest->index);
 627           todo |= do_rpo_vn (cfun, loop_preheader_edge (outer), exit_bbs);
 628         }
 629
 630       loop_nest.release ();
 631       free_dependence_relations (dependences);
 632       free_data_refs (datarefs);
 633     }
 634
 635   if (todo)
 636     {
 637       free_dominance_info (CDI_DOMINATORS);
 638       /* We need to cleanup the CFG first since otherwise SSA form can
 639          be not up-to-date from the VN run.  */
 640       if (todo & TODO_cleanup_cfg)
 641         {
 642           cleanup_tree_cfg ();
 643           todo &= ~TODO_cleanup_cfg;
 644         }
 645       rewrite_into_loop_closed_ssa (NULL, 0);
 646       scev_reset ();
 647     }
 648   return todo;
 649 }
 650
 651 /* Pass boilerplate */
 652
 653 namespace {
 654
 655 const pass_data pass_data_loop_jam =
 656 {
 657   GIMPLE_PASS, /* type */
 658   "unrolljam", /* name */
 659   OPTGROUP_LOOP, /* optinfo_flags */
 660   TV_LOOP_JAM, /* tv_id */
 661   PROP_cfg, /* properties_required */
 662   0, /* properties_provided */
 663   0, /* properties_destroyed */
 664   0, /* todo_flags_start */
 665   0, /* todo_flags_finish */
 666 };
 667
 668 class pass_loop_jam : public gimple_opt_pass
 669 {
 670 public:
 671   pass_loop_jam (gcc::context *ctxt)
 672     : gimple_opt_pass (pass_data_loop_jam, ctxt)
 673   {}
 674
 675   /* opt_pass methods: */
 676   bool gate (function *) final override { return flag_unroll_jam != 0; }
 677   unsigned int execute (function *) final override;
 678
 679 };
 680
 681 unsigned int
 682 pass_loop_jam::execute (function *fun)
 683 {
 684   if (number_of_loops (fun) <= 1)
 685     return 0;
 686
 687   return tree_loop_unroll_and_jam ();
 688 }
 689
 690 }
 691
 692 gimple_opt_pass *
 693 make_pass_loop_jam (gcc::context *ctxt)
 694 {
 695   return new pass_loop_jam (ctxt);
 696 }