contrib/gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 2, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING.  If not, write to the Free
  19     Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20     02110-1301, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "ggc.h"
  27 #include "tree.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "basic-block.h"
  31 #include "diagnostic.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "expr.h"
  37 #include "optabs.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-pass.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "vec.h"
  43 #include "lambda.h"
  44 #include "vecprim.h"
  45
  46 /* This loop nest code generation is based on non-singular matrix
  47    math.
  48
  49  A little terminology and a general sketch of the algorithm.  See "A singular
  50  loop transformation framework based on non-singular matrices" by Wei Li and
  51  Keshav Pingali for formal proofs that the various statements below are
  52  correct.
  53
  54  A loop iteration space represents the points traversed by the loop.  A point in the
  55  iteration space can be represented by a vector of size <loop depth>.  You can
  56  therefore represent the iteration space as an integral combinations of a set
  57  of basis vectors.
  58
  59  A loop iteration space is dense if every integer point between the loop
  60  bounds is a point in the iteration space.  Every loop with a step of 1
  61  therefore has a dense iteration space.
  62
  63  for i = 1 to 3, step 1 is a dense iteration space.
  64
  65  A loop iteration space is sparse if it is not dense.  That is, the iteration
  66  space skips integer points that are within the loop bounds.
  67
  68  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  69  2 is skipped.
  70
  71  Dense source spaces are easy to transform, because they don't skip any
  72  points to begin with.  Thus we can compute the exact bounds of the target
  73  space using min/max and floor/ceil.
  74
  75  For a dense source space, we take the transformation matrix, decompose it
  76  into a lower triangular part (H) and a unimodular part (U).
  77  We then compute the auxiliary space from the unimodular part (source loop
  78  nest . U = auxiliary space) , which has two important properties:
  79   1. It traverses the iterations in the same lexicographic order as the source
  80   space.
  81   2. It is a dense space when the source is a dense space (even if the target
  82   space is going to be sparse).
  83
  84  Given the auxiliary space, we use the lower triangular part to compute the
  85  bounds in the target space by simple matrix multiplication.
  86  The gaps in the target space (IE the new loop step sizes) will be the
  87  diagonals of the H matrix.
  88
  89  Sparse source spaces require another step, because you can't directly compute
  90  the exact bounds of the auxiliary and target space from the sparse space.
  91  Rather than try to come up with a separate algorithm to handle sparse source
  92  spaces directly, we just find a legal transformation matrix that gives you
  93  the sparse source space, from a dense space, and then transform the dense
  94  space.
  95
  96  For a regular sparse space, you can represent the source space as an integer
  97  lattice, and the base space of that lattice will always be dense.  Thus, we
  98  effectively use the lattice to figure out the transformation from the lattice
  99  base space, to the sparse iteration space (IE what transform was applied to
 100  the dense space to make it sparse).  We then compose this transform with the
 101  transformation matrix specified by the user (since our matrix transformations
 102  are closed under composition, this is okay).  We can then use the base space
 103  (which is dense) plus the composed transformation matrix, to compute the rest
 104  of the transform using the dense space algorithm above.
 105
 106  In other words, our sparse source space (B) is decomposed into a dense base
 107  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 108  We then compute the composition of L and the user transformation matrix (T),
 109  so that T is now a transform from A to the result, instead of from B to the
 110  result.
 111  IE A.(LT) = result instead of B.T = result
 112  Since A is now a dense source space, we can use the dense source space
 113  algorithm above to compute the result of applying transform (LT) to A.
 114
 115  Fourier-Motzkin elimination is used to compute the bounds of the base space
 116  of the lattice.  */
 117
 118 static bool perfect_nestify (struct loops *,
 119                              struct loop *, VEC(tree,heap) *,
 120                              VEC(tree,heap) *, VEC(int,heap) *,
 121                              VEC(tree,heap) *);
 122 /* Lattice stuff that is internal to the code generation algorithm.  */
 123
 124 typedef struct
 125 {
 126   /* Lattice base matrix.  */
 127   lambda_matrix base;
 128   /* Lattice dimension.  */
 129   int dimension;
 130   /* Origin vector for the coefficients.  */
 131   lambda_vector origin;
 132   /* Origin matrix for the invariants.  */
 133   lambda_matrix origin_invariants;
 134   /* Number of invariants.  */
 135   int invariants;
 136 } *lambda_lattice;
 137
 138 #define LATTICE_BASE(T) ((T)->base)
 139 #define LATTICE_DIMENSION(T) ((T)->dimension)
 140 #define LATTICE_ORIGIN(T) ((T)->origin)
 141 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 142 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 143
 144 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 145                        int, int);
 146 static lambda_lattice lambda_lattice_new (int, int);
 147 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 148
 149 static tree find_induction_var_from_exit_cond (struct loop *);
 150 static bool can_convert_to_perfect_nest (struct loop *);
 151
 152 /* Create a new lambda body vector.  */
 153
 154 lambda_body_vector
 155 lambda_body_vector_new (int size)
 156 {
 157   lambda_body_vector ret;
 158
 159   ret = ggc_alloc (sizeof (*ret));
 160   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 161   LBV_SIZE (ret) = size;
 162   LBV_DENOMINATOR (ret) = 1;
 163   return ret;
 164 }
 165
 166 /* Compute the new coefficients for the vector based on the
 167   *inverse* of the transformation matrix.  */
 168
 169 lambda_body_vector
 170 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 171                                 lambda_body_vector vect)
 172 {
 173   lambda_body_vector temp;
 174   int depth;
 175
 176   /* Make sure the matrix is square.  */
 177   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 178
 179   depth = LTM_ROWSIZE (transform);
 180
 181   temp = lambda_body_vector_new (depth);
 182   LBV_DENOMINATOR (temp) =
 183     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 184   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 185                              LTM_MATRIX (transform), depth,
 186                              LBV_COEFFICIENTS (temp));
 187   LBV_SIZE (temp) = LBV_SIZE (vect);
 188   return temp;
 189 }
 190
 191 /* Print out a lambda body vector.  */
 192
 193 void
 194 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 195 {
 196   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 197 }
 198
 199 /* Return TRUE if two linear expressions are equal.  */
 200
 201 static bool
 202 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 203            int depth, int invariants)
 204 {
 205   int i;
 206
 207   if (lle1 == NULL || lle2 == NULL)
 208     return false;
 209   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 210     return false;
 211   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 212     return false;
 213   for (i = 0; i < depth; i++)
 214     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 215       return false;
 216   for (i = 0; i < invariants; i++)
 217     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 218         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 219       return false;
 220   return true;
 221 }
 222
 223 /* Create a new linear expression with dimension DIM, and total number
 224    of invariants INVARIANTS.  */
 225
 226 lambda_linear_expression
 227 lambda_linear_expression_new (int dim, int invariants)
 228 {
 229   lambda_linear_expression ret;
 230
 231   ret = ggc_alloc_cleared (sizeof (*ret));
 232
 233   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 234   LLE_CONSTANT (ret) = 0;
 235   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 236   LLE_DENOMINATOR (ret) = 1;
 237   LLE_NEXT (ret) = NULL;
 238
 239   return ret;
 240 }
 241
 242 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 243    The starting letter used for variable names is START.  */
 244
 245 static void
 246 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 247                          char start)
 248 {
 249   int i;
 250   bool first = true;
 251   for (i = 0; i < size; i++)
 252     {
 253       if (expr[i] != 0)
 254         {
 255           if (first)
 256             {
 257               if (expr[i] < 0)
 258                 fprintf (outfile, "-");
 259               first = false;
 260             }
 261           else if (expr[i] > 0)
 262             fprintf (outfile, " + ");
 263           else
 264             fprintf (outfile, " - ");
 265           if (abs (expr[i]) == 1)
 266             fprintf (outfile, "%c", start + i);
 267           else
 268             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 269         }
 270     }
 271 }
 272
 273 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 274    depth/number of coefficients is given by DEPTH, the number of invariants is
 275    given by INVARIANTS, and the character to start variable names with is given
 276    by START.  */
 277
 278 void
 279 print_lambda_linear_expression (FILE * outfile,
 280                                 lambda_linear_expression expr,
 281                                 int depth, int invariants, char start)
 282 {
 283   fprintf (outfile, "\tLinear expression: ");
 284   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 285   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 286   fprintf (outfile, "  invariants: ");
 287   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 288                            invariants, 'A');
 289   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 290 }
 291
 292 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 293    coefficients is given by DEPTH, the number of invariants is
 294    given by INVARIANTS, and the character to start variable names with is given
 295    by START.  */
 296
 297 void
 298 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 299                    int invariants, char start)
 300 {
 301   int step;
 302   lambda_linear_expression expr;
 303
 304   gcc_assert (loop);
 305
 306   expr = LL_LINEAR_OFFSET (loop);
 307   step = LL_STEP (loop);
 308   fprintf (outfile, "  step size = %d \n", step);
 309
 310   if (expr)
 311     {
 312       fprintf (outfile, "  linear offset: \n");
 313       print_lambda_linear_expression (outfile, expr, depth, invariants,
 314                                       start);
 315     }
 316
 317   fprintf (outfile, "  lower bound: \n");
 318   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 319     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 320   fprintf (outfile, "  upper bound: \n");
 321   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 322     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 323 }
 324
 325 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 326    number of invariants.  */
 327
 328 lambda_loopnest
 329 lambda_loopnest_new (int depth, int invariants)
 330 {
 331   lambda_loopnest ret;
 332   ret = ggc_alloc (sizeof (*ret));
 333
 334   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
 335   LN_DEPTH (ret) = depth;
 336   LN_INVARIANTS (ret) = invariants;
 337
 338   return ret;
 339 }
 340
 341 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 342    character to use for loop names is given by START.  */
 343
 344 void
 345 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 346 {
 347   int i;
 348   for (i = 0; i < LN_DEPTH (nest); i++)
 349     {
 350       fprintf (outfile, "Loop %c\n", start + i);
 351       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 352                          LN_INVARIANTS (nest), 'i');
 353       fprintf (outfile, "\n");
 354     }
 355 }
 356
 357 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 358    of invariants.  */
 359
 360 static lambda_lattice
 361 lambda_lattice_new (int depth, int invariants)
 362 {
 363   lambda_lattice ret;
 364   ret = ggc_alloc (sizeof (*ret));
 365   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 366   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 367   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 368   LATTICE_DIMENSION (ret) = depth;
 369   LATTICE_INVARIANTS (ret) = invariants;
 370   return ret;
 371 }
 372
 373 /* Compute the lattice base for NEST.  The lattice base is essentially a
 374    non-singular transform from a dense base space to a sparse iteration space.
 375    We use it so that we don't have to specially handle the case of a sparse
 376    iteration space in other parts of the algorithm.  As a result, this routine
 377    only does something interesting (IE produce a matrix that isn't the
 378    identity matrix) if NEST is a sparse space.  */
 379
 380 static lambda_lattice
 381 lambda_lattice_compute_base (lambda_loopnest nest)
 382 {
 383   lambda_lattice ret;
 384   int depth, invariants;
 385   lambda_matrix base;
 386
 387   int i, j, step;
 388   lambda_loop loop;
 389   lambda_linear_expression expression;
 390
 391   depth = LN_DEPTH (nest);
 392   invariants = LN_INVARIANTS (nest);
 393
 394   ret = lambda_lattice_new (depth, invariants);
 395   base = LATTICE_BASE (ret);
 396   for (i = 0; i < depth; i++)
 397     {
 398       loop = LN_LOOPS (nest)[i];
 399       gcc_assert (loop);
 400       step = LL_STEP (loop);
 401       /* If we have a step of 1, then the base is one, and the
 402          origin and invariant coefficients are 0.  */
 403       if (step == 1)
 404         {
 405           for (j = 0; j < depth; j++)
 406             base[i][j] = 0;
 407           base[i][i] = 1;
 408           LATTICE_ORIGIN (ret)[i] = 0;
 409           for (j = 0; j < invariants; j++)
 410             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 411         }
 412       else
 413         {
 414           /* Otherwise, we need the lower bound expression (which must
 415              be an affine function)  to determine the base.  */
 416           expression = LL_LOWER_BOUND (loop);
 417           gcc_assert (expression && !LLE_NEXT (expression)
 418                       && LLE_DENOMINATOR (expression) == 1);
 419
 420           /* The lower triangular portion of the base is going to be the
 421              coefficient times the step */
 422           for (j = 0; j < i; j++)
 423             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 424               * LL_STEP (LN_LOOPS (nest)[j]);
 425           base[i][i] = step;
 426           for (j = i + 1; j < depth; j++)
 427             base[i][j] = 0;
 428
 429           /* Origin for this loop is the constant of the lower bound
 430              expression.  */
 431           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 432
 433           /* Coefficient for the invariants are equal to the invariant
 434              coefficients in the expression.  */
 435           for (j = 0; j < invariants; j++)
 436             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 437               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 438         }
 439     }
 440   return ret;
 441 }
 442
 443 /* Compute the least common multiple of two numbers A and B .  */
 444
 445 static int
 446 lcm (int a, int b)
 447 {
 448   return (abs (a) * abs (b) / gcd (a, b));
 449 }
 450
 451 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 452    auxiliary nest.
 453    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 454    it is easy to calculate the answer and bounds.
 455    A sketch of how it works:
 456    Given a system of linear inequalities, ai * xj >= bk, you can always
 457    rewrite the constraints so they are all of the form
 458    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 459    in b1 ... bk, and some a in a1...ai)
 460    You can then eliminate this x from the non-constant inequalities by
 461    rewriting these as a <= b, x >= constant, and delete the x variable.
 462    You can then repeat this for any remaining x variables, and then we have
 463    an easy to use variable <= constant (or no variables at all) form that we
 464    can construct our bounds from.
 465
 466    In our case, each time we eliminate, we construct part of the bound from
 467    the ith variable, then delete the ith variable.
 468
 469    Remember the constant are in our vector a, our coefficient matrix is A,
 470    and our invariant coefficient matrix is B.
 471
 472    SIZE is the size of the matrices being passed.
 473    DEPTH is the loop nest depth.
 474    INVARIANTS is the number of loop invariants.
 475    A, B, and a are the coefficient matrix, invariant coefficient, and a
 476    vector of constants, respectively.  */
 477
 478 static lambda_loopnest
 479 compute_nest_using_fourier_motzkin (int size,
 480                                     int depth,
 481                                     int invariants,
 482                                     lambda_matrix A,
 483                                     lambda_matrix B,
 484                                     lambda_vector a)
 485 {
 486
 487   int multiple, f1, f2;
 488   int i, j, k;
 489   lambda_linear_expression expression;
 490   lambda_loop loop;
 491   lambda_loopnest auxillary_nest;
 492   lambda_matrix swapmatrix, A1, B1;
 493   lambda_vector swapvector, a1;
 494   int newsize;
 495
 496   A1 = lambda_matrix_new (128, depth);
 497   B1 = lambda_matrix_new (128, invariants);
 498   a1 = lambda_vector_new (128);
 499
 500   auxillary_nest = lambda_loopnest_new (depth, invariants);
 501
 502   for (i = depth - 1; i >= 0; i--)
 503     {
 504       loop = lambda_loop_new ();
 505       LN_LOOPS (auxillary_nest)[i] = loop;
 506       LL_STEP (loop) = 1;
 507
 508       for (j = 0; j < size; j++)
 509         {
 510           if (A[j][i] < 0)
 511             {
 512               /* Any linear expression in the matrix with a coefficient less
 513                  than 0 becomes part of the new lower bound.  */
 514               expression = lambda_linear_expression_new (depth, invariants);
 515
 516               for (k = 0; k < i; k++)
 517                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 518
 519               for (k = 0; k < invariants; k++)
 520                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 521
 522               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 523               LLE_CONSTANT (expression) = -1 * a[j];
 524
 525               /* Ignore if identical to the existing lower bound.  */
 526               if (!lle_equal (LL_LOWER_BOUND (loop),
 527                               expression, depth, invariants))
 528                 {
 529                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 530                   LL_LOWER_BOUND (loop) = expression;
 531                 }
 532
 533             }
 534           else if (A[j][i] > 0)
 535             {
 536               /* Any linear expression with a coefficient greater than 0
 537                  becomes part of the new upper bound.  */
 538               expression = lambda_linear_expression_new (depth, invariants);
 539               for (k = 0; k < i; k++)
 540                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 541
 542               for (k = 0; k < invariants; k++)
 543                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 544
 545               LLE_DENOMINATOR (expression) = A[j][i];
 546               LLE_CONSTANT (expression) = a[j];
 547
 548               /* Ignore if identical to the existing upper bound.  */
 549               if (!lle_equal (LL_UPPER_BOUND (loop),
 550                               expression, depth, invariants))
 551                 {
 552                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 553                   LL_UPPER_BOUND (loop) = expression;
 554                 }
 555
 556             }
 557         }
 558
 559       /* This portion creates a new system of linear inequalities by deleting
 560          the i'th variable, reducing the system by one variable.  */
 561       newsize = 0;
 562       for (j = 0; j < size; j++)
 563         {
 564           /* If the coefficient for the i'th variable is 0, then we can just
 565              eliminate the variable straightaway.  Otherwise, we have to
 566              multiply through by the coefficients we are eliminating.  */
 567           if (A[j][i] == 0)
 568             {
 569               lambda_vector_copy (A[j], A1[newsize], depth);
 570               lambda_vector_copy (B[j], B1[newsize], invariants);
 571               a1[newsize] = a[j];
 572               newsize++;
 573             }
 574           else if (A[j][i] > 0)
 575             {
 576               for (k = 0; k < size; k++)
 577                 {
 578                   if (A[k][i] < 0)
 579                     {
 580                       multiple = lcm (A[j][i], A[k][i]);
 581                       f1 = multiple / A[j][i];
 582                       f2 = -1 * multiple / A[k][i];
 583
 584                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 585                                             A1[newsize], depth);
 586                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 587                                             B1[newsize], invariants);
 588                       a1[newsize] = f1 * a[j] + f2 * a[k];
 589                       newsize++;
 590                     }
 591                 }
 592             }
 593         }
 594
 595       swapmatrix = A;
 596       A = A1;
 597       A1 = swapmatrix;
 598
 599       swapmatrix = B;
 600       B = B1;
 601       B1 = swapmatrix;
 602
 603       swapvector = a;
 604       a = a1;
 605       a1 = swapvector;
 606
 607       size = newsize;
 608     }
 609
 610   return auxillary_nest;
 611 }
 612
 613 /* Compute the loop bounds for the auxiliary space NEST.
 614    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 615    Given the original nest, this function will
 616    1. Convert the nest into matrix form, which consists of a matrix for the
 617    coefficients, a matrix for the
 618    invariant coefficients, and a vector for the constants.
 619    2. Use the matrix form to calculate the lattice base for the nest (which is
 620    a dense space)
 621    3. Compose the dense space transform with the user specified transform, to
 622    get a transform we can easily calculate transformed bounds for.
 623    4. Multiply the composed transformation matrix times the matrix form of the
 624    loop.
 625    5. Transform the newly created matrix (from step 4) back into a loop nest
 626    using Fourier-Motzkin elimination to figure out the bounds.  */
 627
 628 static lambda_loopnest
 629 lambda_compute_auxillary_space (lambda_loopnest nest,
 630                                 lambda_trans_matrix trans)
 631 {
 632   lambda_matrix A, B, A1, B1;
 633   lambda_vector a, a1;
 634   lambda_matrix invertedtrans;
 635   int depth, invariants, size;
 636   int i, j;
 637   lambda_loop loop;
 638   lambda_linear_expression expression;
 639   lambda_lattice lattice;
 640
 641   depth = LN_DEPTH (nest);
 642   invariants = LN_INVARIANTS (nest);
 643
 644   /* Unfortunately, we can't know the number of constraints we'll have
 645      ahead of time, but this should be enough even in ridiculous loop nest
 646      cases. We must not go over this limit.  */
 647   A = lambda_matrix_new (128, depth);
 648   B = lambda_matrix_new (128, invariants);
 649   a = lambda_vector_new (128);
 650
 651   A1 = lambda_matrix_new (128, depth);
 652   B1 = lambda_matrix_new (128, invariants);
 653   a1 = lambda_vector_new (128);
 654
 655   /* Store the bounds in the equation matrix A, constant vector a, and
 656      invariant matrix B, so that we have Ax <= a + B.
 657      This requires a little equation rearranging so that everything is on the
 658      correct side of the inequality.  */
 659   size = 0;
 660   for (i = 0; i < depth; i++)
 661     {
 662       loop = LN_LOOPS (nest)[i];
 663
 664       /* First we do the lower bound.  */
 665       if (LL_STEP (loop) > 0)
 666         expression = LL_LOWER_BOUND (loop);
 667       else
 668         expression = LL_UPPER_BOUND (loop);
 669
 670       for (; expression != NULL; expression = LLE_NEXT (expression))
 671         {
 672           /* Fill in the coefficient.  */
 673           for (j = 0; j < i; j++)
 674             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 675
 676           /* And the invariant coefficient.  */
 677           for (j = 0; j < invariants; j++)
 678             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 679
 680           /* And the constant.  */
 681           a[size] = LLE_CONSTANT (expression);
 682
 683           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 684              constants and single variables on   */
 685           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 686           a[size] *= -1;
 687           for (j = 0; j < invariants; j++)
 688             B[size][j] *= -1;
 689
 690           size++;
 691           /* Need to increase matrix sizes above.  */
 692           gcc_assert (size <= 127);
 693
 694         }
 695
 696       /* Then do the exact same thing for the upper bounds.  */
 697       if (LL_STEP (loop) > 0)
 698         expression = LL_UPPER_BOUND (loop);
 699       else
 700         expression = LL_LOWER_BOUND (loop);
 701
 702       for (; expression != NULL; expression = LLE_NEXT (expression))
 703         {
 704           /* Fill in the coefficient.  */
 705           for (j = 0; j < i; j++)
 706             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 707
 708           /* And the invariant coefficient.  */
 709           for (j = 0; j < invariants; j++)
 710             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 711
 712           /* And the constant.  */
 713           a[size] = LLE_CONSTANT (expression);
 714
 715           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 716           for (j = 0; j < i; j++)
 717             A[size][j] *= -1;
 718           A[size][i] = LLE_DENOMINATOR (expression);
 719           size++;
 720           /* Need to increase matrix sizes above.  */
 721           gcc_assert (size <= 127);
 722
 723         }
 724     }
 725
 726   /* Compute the lattice base x = base * y + origin, where y is the
 727      base space.  */
 728   lattice = lambda_lattice_compute_base (nest);
 729
 730   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 731
 732   /* A1 = A * L */
 733   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 734
 735   /* a1 = a - A * origin constant.  */
 736   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 737   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 738
 739   /* B1 = B - A * origin invariant.  */
 740   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 741                       invariants);
 742   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 743
 744   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 745      it by A1, then performing Fourier-Motzkin.  */
 746
 747   invertedtrans = lambda_matrix_new (depth, depth);
 748
 749   /* Compute the inverse of U.  */
 750   lambda_matrix_inverse (LTM_MATRIX (trans),
 751                          invertedtrans, depth);
 752
 753   /* A = A1 inv(U).  */
 754   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 755
 756   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 757                                              A, B1, a1);
 758 }
 759
 760 /* Compute the loop bounds for the target space, using the bounds of
 761    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 762    The target space loop bounds are computed by multiplying the triangular
 763    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 764    the loop steps (positive or negative) is then used to swap the bounds if
 765    the loop counts downwards.
 766    Return the target loopnest.  */
 767
 768 static lambda_loopnest
 769 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 770                              lambda_trans_matrix H, lambda_vector stepsigns)
 771 {
 772   lambda_matrix inverse, H1;
 773   int determinant, i, j;
 774   int gcd1, gcd2;
 775   int factor;
 776
 777   lambda_loopnest target_nest;
 778   int depth, invariants;
 779   lambda_matrix target;
 780
 781   lambda_loop auxillary_loop, target_loop;
 782   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 783
 784   depth = LN_DEPTH (auxillary_nest);
 785   invariants = LN_INVARIANTS (auxillary_nest);
 786
 787   inverse = lambda_matrix_new (depth, depth);
 788   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 789
 790   /* H1 is H excluding its diagonal.  */
 791   H1 = lambda_matrix_new (depth, depth);
 792   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 793
 794   for (i = 0; i < depth; i++)
 795     H1[i][i] = 0;
 796
 797   /* Computes the linear offsets of the loop bounds.  */
 798   target = lambda_matrix_new (depth, depth);
 799   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 800
 801   target_nest = lambda_loopnest_new (depth, invariants);
 802
 803   for (i = 0; i < depth; i++)
 804     {
 805
 806       /* Get a new loop structure.  */
 807       target_loop = lambda_loop_new ();
 808       LN_LOOPS (target_nest)[i] = target_loop;
 809
 810       /* Computes the gcd of the coefficients of the linear part.  */
 811       gcd1 = lambda_vector_gcd (target[i], i);
 812
 813       /* Include the denominator in the GCD.  */
 814       gcd1 = gcd (gcd1, determinant);
 815
 816       /* Now divide through by the gcd.  */
 817       for (j = 0; j < i; j++)
 818         target[i][j] = target[i][j] / gcd1;
 819
 820       expression = lambda_linear_expression_new (depth, invariants);
 821       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 822       LLE_DENOMINATOR (expression) = determinant / gcd1;
 823       LLE_CONSTANT (expression) = 0;
 824       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 825                            invariants);
 826       LL_LINEAR_OFFSET (target_loop) = expression;
 827     }
 828
 829   /* For each loop, compute the new bounds from H.  */
 830   for (i = 0; i < depth; i++)
 831     {
 832       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 833       target_loop = LN_LOOPS (target_nest)[i];
 834       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 835       factor = LTM_MATRIX (H)[i][i];
 836
 837       /* First we do the lower bound.  */
 838       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 839
 840       for (; auxillary_expr != NULL;
 841            auxillary_expr = LLE_NEXT (auxillary_expr))
 842         {
 843           target_expr = lambda_linear_expression_new (depth, invariants);
 844           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 845                                      depth, inverse, depth,
 846                                      LLE_COEFFICIENTS (target_expr));
 847           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 848                                     LLE_COEFFICIENTS (target_expr), depth,
 849                                     factor);
 850
 851           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 852           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 853                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 854                               invariants);
 855           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 856                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 857                                     invariants, factor);
 858           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 859
 860           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 861             {
 862               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 863                 * determinant;
 864               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 865                                         (target_expr),
 866                                         LLE_INVARIANT_COEFFICIENTS
 867                                         (target_expr), invariants,
 868                                         determinant);
 869               LLE_DENOMINATOR (target_expr) =
 870                 LLE_DENOMINATOR (target_expr) * determinant;
 871             }
 872           /* Find the gcd and divide by it here, rather than doing it
 873              at the tree level.  */
 874           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 875           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 876                                     invariants);
 877           gcd1 = gcd (gcd1, gcd2);
 878           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 879           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 880           for (j = 0; j < depth; j++)
 881             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 882           for (j = 0; j < invariants; j++)
 883             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 884           LLE_CONSTANT (target_expr) /= gcd1;
 885           LLE_DENOMINATOR (target_expr) /= gcd1;
 886           /* Ignore if identical to existing bound.  */
 887           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 888                           invariants))
 889             {
 890               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 891               LL_LOWER_BOUND (target_loop) = target_expr;
 892             }
 893         }
 894       /* Now do the upper bound.  */
 895       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 896
 897       for (; auxillary_expr != NULL;
 898            auxillary_expr = LLE_NEXT (auxillary_expr))
 899         {
 900           target_expr = lambda_linear_expression_new (depth, invariants);
 901           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 902                                      depth, inverse, depth,
 903                                      LLE_COEFFICIENTS (target_expr));
 904           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 905                                     LLE_COEFFICIENTS (target_expr), depth,
 906                                     factor);
 907           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 908           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 909                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 910                               invariants);
 911           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 912                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 913                                     invariants, factor);
 914           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 915
 916           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 917             {
 918               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 919                 * determinant;
 920               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 921                                         (target_expr),
 922                                         LLE_INVARIANT_COEFFICIENTS
 923                                         (target_expr), invariants,
 924                                         determinant);
 925               LLE_DENOMINATOR (target_expr) =
 926                 LLE_DENOMINATOR (target_expr) * determinant;
 927             }
 928           /* Find the gcd and divide by it here, instead of at the
 929              tree level.  */
 930           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 931           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 932                                     invariants);
 933           gcd1 = gcd (gcd1, gcd2);
 934           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 935           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 936           for (j = 0; j < depth; j++)
 937             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 938           for (j = 0; j < invariants; j++)
 939             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 940           LLE_CONSTANT (target_expr) /= gcd1;
 941           LLE_DENOMINATOR (target_expr) /= gcd1;
 942           /* Ignore if equal to existing bound.  */
 943           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 944                           invariants))
 945             {
 946               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 947               LL_UPPER_BOUND (target_loop) = target_expr;
 948             }
 949         }
 950     }
 951   for (i = 0; i < depth; i++)
 952     {
 953       target_loop = LN_LOOPS (target_nest)[i];
 954       /* If necessary, exchange the upper and lower bounds and negate
 955          the step size.  */
 956       if (stepsigns[i] < 0)
 957         {
 958           LL_STEP (target_loop) *= -1;
 959           tmp_expr = LL_LOWER_BOUND (target_loop);
 960           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 961           LL_UPPER_BOUND (target_loop) = tmp_expr;
 962         }
 963     }
 964   return target_nest;
 965 }
 966
 967 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 968    result.  */
 969
 970 static lambda_vector
 971 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
 972 {
 973   lambda_matrix matrix, H;
 974   int size;
 975   lambda_vector newsteps;
 976   int i, j, factor, minimum_column;
 977   int temp;
 978
 979   matrix = LTM_MATRIX (trans);
 980   size = LTM_ROWSIZE (trans);
 981   H = lambda_matrix_new (size, size);
 982
 983   newsteps = lambda_vector_new (size);
 984   lambda_vector_copy (stepsigns, newsteps, size);
 985
 986   lambda_matrix_copy (matrix, H, size, size);
 987
 988   for (j = 0; j < size; j++)
 989     {
 990       lambda_vector row;
 991       row = H[j];
 992       for (i = j; i < size; i++)
 993         if (row[i] < 0)
 994           lambda_matrix_col_negate (H, size, i);
 995       while (lambda_vector_first_nz (row, size, j + 1) < size)
 996         {
 997           minimum_column = lambda_vector_min_nz (row, size, j);
 998           lambda_matrix_col_exchange (H, size, j, minimum_column);
 999
1000           temp = newsteps[j];
1001           newsteps[j] = newsteps[minimum_column];
1002           newsteps[minimum_column] = temp;
1003
1004           for (i = j + 1; i < size; i++)
1005             {
1006               factor = row[i] / row[j];
1007               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1008             }
1009         }
1010     }
1011   return newsteps;
1012 }
1013
1014 /* Transform NEST according to TRANS, and return the new loopnest.
1015    This involves
1016    1. Computing a lattice base for the transformation
1017    2. Composing the dense base with the specified transformation (TRANS)
1018    3. Decomposing the combined transformation into a lower triangular portion,
1019    and a unimodular portion.
1020    4. Computing the auxiliary nest using the unimodular portion.
1021    5. Computing the target nest using the auxiliary nest and the lower
1022    triangular portion.  */
1023
1024 lambda_loopnest
1025 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1026 {
1027   lambda_loopnest auxillary_nest, target_nest;
1028
1029   int depth, invariants;
1030   int i, j;
1031   lambda_lattice lattice;
1032   lambda_trans_matrix trans1, H, U;
1033   lambda_loop loop;
1034   lambda_linear_expression expression;
1035   lambda_vector origin;
1036   lambda_matrix origin_invariants;
1037   lambda_vector stepsigns;
1038   int f;
1039
1040   depth = LN_DEPTH (nest);
1041   invariants = LN_INVARIANTS (nest);
1042
1043   /* Keep track of the signs of the loop steps.  */
1044   stepsigns = lambda_vector_new (depth);
1045   for (i = 0; i < depth; i++)
1046     {
1047       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1048         stepsigns[i] = 1;
1049       else
1050         stepsigns[i] = -1;
1051     }
1052
1053   /* Compute the lattice base.  */
1054   lattice = lambda_lattice_compute_base (nest);
1055   trans1 = lambda_trans_matrix_new (depth, depth);
1056
1057   /* Multiply the transformation matrix by the lattice base.  */
1058
1059   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1060                       LTM_MATRIX (trans1), depth, depth, depth);
1061
1062   /* Compute the Hermite normal form for the new transformation matrix.  */
1063   H = lambda_trans_matrix_new (depth, depth);
1064   U = lambda_trans_matrix_new (depth, depth);
1065   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1066                          LTM_MATRIX (U));
1067
1068   /* Compute the auxiliary loop nest's space from the unimodular
1069      portion.  */
1070   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1071
1072   /* Compute the loop step signs from the old step signs and the
1073      transformation matrix.  */
1074   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1075
1076   /* Compute the target loop nest space from the auxiliary nest and
1077      the lower triangular matrix H.  */
1078   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1079   origin = lambda_vector_new (depth);
1080   origin_invariants = lambda_matrix_new (depth, invariants);
1081   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1082                              LATTICE_ORIGIN (lattice), origin);
1083   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1084                       origin_invariants, depth, depth, invariants);
1085
1086   for (i = 0; i < depth; i++)
1087     {
1088       loop = LN_LOOPS (target_nest)[i];
1089       expression = LL_LINEAR_OFFSET (loop);
1090       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1091         f = 1;
1092       else
1093         f = LLE_DENOMINATOR (expression);
1094
1095       LLE_CONSTANT (expression) += f * origin[i];
1096
1097       for (j = 0; j < invariants; j++)
1098         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1099           f * origin_invariants[i][j];
1100     }
1101
1102   return target_nest;
1103
1104 }
1105
1106 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1107    return the new expression.  DEPTH is the depth of the loopnest.
1108    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1109    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1110    is the amount we have to add/subtract from the expression because of the
1111    type of comparison it is used in.  */
1112
1113 static lambda_linear_expression
1114 gcc_tree_to_linear_expression (int depth, tree expr,
1115                                VEC(tree,heap) *outerinductionvars,
1116                                VEC(tree,heap) *invariants, int extra)
1117 {
1118   lambda_linear_expression lle = NULL;
1119   switch (TREE_CODE (expr))
1120     {
1121     case INTEGER_CST:
1122       {
1123         lle = lambda_linear_expression_new (depth, 2 * depth);
1124         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1125         if (extra != 0)
1126           LLE_CONSTANT (lle) += extra;
1127
1128         LLE_DENOMINATOR (lle) = 1;
1129       }
1130       break;
1131     case SSA_NAME:
1132       {
1133         tree iv, invar;
1134         size_t i;
1135         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1136           if (iv != NULL)
1137             {
1138               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1139                 {
1140                   lle = lambda_linear_expression_new (depth, 2 * depth);
1141                   LLE_COEFFICIENTS (lle)[i] = 1;
1142                   if (extra != 0)
1143                     LLE_CONSTANT (lle) = extra;
1144
1145                   LLE_DENOMINATOR (lle) = 1;
1146                 }
1147             }
1148         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1149           if (invar != NULL)
1150             {
1151               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1152                 {
1153                   lle = lambda_linear_expression_new (depth, 2 * depth);
1154                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1155                   if (extra != 0)
1156                     LLE_CONSTANT (lle) = extra;
1157                   LLE_DENOMINATOR (lle) = 1;
1158                 }
1159             }
1160       }
1161       break;
1162     default:
1163       return NULL;
1164     }
1165
1166   return lle;
1167 }
1168
1169 /* Return the depth of the loopnest NEST */
1170
1171 static int
1172 depth_of_nest (struct loop *nest)
1173 {
1174   size_t depth = 0;
1175   while (nest)
1176     {
1177       depth++;
1178       nest = nest->inner;
1179     }
1180   return depth;
1181 }
1182
1183
1184 /* Return true if OP is invariant in LOOP and all outer loops.  */
1185
1186 static bool
1187 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1188 {
1189   if (is_gimple_min_invariant (op))
1190     return true;
1191   if (loop->depth == 0)
1192     return true;
1193   if (!expr_invariant_in_loop_p (loop, op))
1194     return false;
1195   if (loop->outer
1196       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1197     return false;
1198   return true;
1199 }
1200
1201 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1202    or NULL if it could not be converted.
1203    DEPTH is the depth of the loop.
1204    INVARIANTS is a pointer to the array of loop invariants.
1205    The induction variable for this loop should be stored in the parameter
1206    OURINDUCTIONVAR.
1207    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1208
1209 static lambda_loop
1210 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1211                          VEC(tree,heap) ** invariants,
1212                          tree * ourinductionvar,
1213                          VEC(tree,heap) * outerinductionvars,
1214                          VEC(tree,heap) ** lboundvars,
1215                          VEC(tree,heap) ** uboundvars,
1216                          VEC(int,heap) ** steps)
1217 {
1218   tree phi;
1219   tree exit_cond;
1220   tree access_fn, inductionvar;
1221   tree step;
1222   lambda_loop lloop = NULL;
1223   lambda_linear_expression lbound, ubound;
1224   tree test;
1225   int stepint;
1226   int extra = 0;
1227   tree lboundvar, uboundvar, uboundresult;
1228
1229   /* Find out induction var and exit condition.  */
1230   inductionvar = find_induction_var_from_exit_cond (loop);
1231   exit_cond = get_loop_exit_condition (loop);
1232
1233   if (inductionvar == NULL || exit_cond == NULL)
1234     {
1235       if (dump_file && (dump_flags & TDF_DETAILS))
1236         fprintf (dump_file,
1237                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1238       return NULL;
1239     }
1240
1241   test = TREE_OPERAND (exit_cond, 0);
1242
1243   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1244     {
1245
1246       if (dump_file && (dump_flags & TDF_DETAILS))
1247         fprintf (dump_file,
1248                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1249
1250       return NULL;
1251     }
1252
1253   phi = SSA_NAME_DEF_STMT (inductionvar);
1254   if (TREE_CODE (phi) != PHI_NODE)
1255     {
1256       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1257       if (!phi)
1258         {
1259
1260           if (dump_file && (dump_flags & TDF_DETAILS))
1261             fprintf (dump_file,
1262                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1263
1264           return NULL;
1265         }
1266
1267       phi = SSA_NAME_DEF_STMT (phi);
1268       if (TREE_CODE (phi) != PHI_NODE)
1269         {
1270
1271           if (dump_file && (dump_flags & TDF_DETAILS))
1272             fprintf (dump_file,
1273                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1274           return NULL;
1275         }
1276
1277     }
1278
1279   /* The induction variable name/version we want to put in the array is the
1280      result of the induction variable phi node.  */
1281   *ourinductionvar = PHI_RESULT (phi);
1282   access_fn = instantiate_parameters
1283     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1284   if (access_fn == chrec_dont_know)
1285     {
1286       if (dump_file && (dump_flags & TDF_DETAILS))
1287         fprintf (dump_file,
1288                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1289
1290       return NULL;
1291     }
1292
1293   step = evolution_part_in_loop_num (access_fn, loop->num);
1294   if (!step || step == chrec_dont_know)
1295     {
1296       if (dump_file && (dump_flags & TDF_DETAILS))
1297         fprintf (dump_file,
1298                  "Unable to convert loop: Cannot determine step of loop.\n");
1299
1300       return NULL;
1301     }
1302   if (TREE_CODE (step) != INTEGER_CST)
1303     {
1304
1305       if (dump_file && (dump_flags & TDF_DETAILS))
1306         fprintf (dump_file,
1307                  "Unable to convert loop: Step of loop is not integer.\n");
1308       return NULL;
1309     }
1310
1311   stepint = TREE_INT_CST_LOW (step);
1312
1313   /* Only want phis for induction vars, which will have two
1314      arguments.  */
1315   if (PHI_NUM_ARGS (phi) != 2)
1316     {
1317       if (dump_file && (dump_flags & TDF_DETAILS))
1318         fprintf (dump_file,
1319                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1320       return NULL;
1321     }
1322
1323   /* Another induction variable check. One argument's source should be
1324      in the loop, one outside the loop.  */
1325   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1326       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1327     {
1328
1329       if (dump_file && (dump_flags & TDF_DETAILS))
1330         fprintf (dump_file,
1331                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1332
1333       return NULL;
1334     }
1335
1336   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1337     {
1338       lboundvar = PHI_ARG_DEF (phi, 1);
1339       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1340                                               outerinductionvars, *invariants,
1341                                               0);
1342     }
1343   else
1344     {
1345       lboundvar = PHI_ARG_DEF (phi, 0);
1346       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1347                                               outerinductionvars, *invariants,
1348                                               0);
1349     }
1350
1351   if (!lbound)
1352     {
1353
1354       if (dump_file && (dump_flags & TDF_DETAILS))
1355         fprintf (dump_file,
1356                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1357
1358       return NULL;
1359     }
1360   /* One part of the test may be a loop invariant tree.  */
1361   VEC_reserve (tree, heap, *invariants, 1);
1362   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1363       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1364     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1365   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1366            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1367     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1368
1369   /* The non-induction variable part of the test is the upper bound variable.
1370    */
1371   if (TREE_OPERAND (test, 0) == inductionvar)
1372     uboundvar = TREE_OPERAND (test, 1);
1373   else
1374     uboundvar = TREE_OPERAND (test, 0);
1375
1376
1377   /* We only size the vectors assuming we have, at max, 2 times as many
1378      invariants as we do loops (one for each bound).
1379      This is just an arbitrary number, but it has to be matched against the
1380      code below.  */
1381   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1382
1383
1384   /* We might have some leftover.  */
1385   if (TREE_CODE (test) == LT_EXPR)
1386     extra = -1 * stepint;
1387   else if (TREE_CODE (test) == NE_EXPR)
1388     extra = -1 * stepint;
1389   else if (TREE_CODE (test) == GT_EXPR)
1390     extra = -1 * stepint;
1391   else if (TREE_CODE (test) == EQ_EXPR)
1392     extra = 1 * stepint;
1393
1394   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1395                                           outerinductionvars,
1396                                           *invariants, extra);
1397   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1398                          build_int_cst (TREE_TYPE (uboundvar), extra));
1399   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1400   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1401   VEC_safe_push (int, heap, *steps, stepint);
1402   if (!ubound)
1403     {
1404       if (dump_file && (dump_flags & TDF_DETAILS))
1405         fprintf (dump_file,
1406                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1407       return NULL;
1408     }
1409
1410   lloop = lambda_loop_new ();
1411   LL_STEP (lloop) = stepint;
1412   LL_LOWER_BOUND (lloop) = lbound;
1413   LL_UPPER_BOUND (lloop) = ubound;
1414   return lloop;
1415 }
1416
1417 /* Given a LOOP, find the induction variable it is testing against in the exit
1418    condition.  Return the induction variable if found, NULL otherwise.  */
1419
1420 static tree
1421 find_induction_var_from_exit_cond (struct loop *loop)
1422 {
1423   tree expr = get_loop_exit_condition (loop);
1424   tree ivarop;
1425   tree test;
1426   if (expr == NULL_TREE)
1427     return NULL_TREE;
1428   if (TREE_CODE (expr) != COND_EXPR)
1429     return NULL_TREE;
1430   test = TREE_OPERAND (expr, 0);
1431   if (!COMPARISON_CLASS_P (test))
1432     return NULL_TREE;
1433
1434   /* Find the side that is invariant in this loop. The ivar must be the other
1435      side.  */
1436
1437   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1438       ivarop = TREE_OPERAND (test, 1);
1439   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1440       ivarop = TREE_OPERAND (test, 0);
1441   else
1442     return NULL_TREE;
1443
1444   if (TREE_CODE (ivarop) != SSA_NAME)
1445     return NULL_TREE;
1446   return ivarop;
1447 }
1448
1449 DEF_VEC_P(lambda_loop);
1450 DEF_VEC_ALLOC_P(lambda_loop,heap);
1451
1452 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1453    Return the new loop nest.
1454    INDUCTIONVARS is a pointer to an array of induction variables for the
1455    loopnest that will be filled in during this process.
1456    INVARIANTS is a pointer to an array of invariants that will be filled in
1457    during this process.  */
1458
1459 lambda_loopnest
1460 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1461                                  struct loop *loop_nest,
1462                                  VEC(tree,heap) **inductionvars,
1463                                  VEC(tree,heap) **invariants)
1464 {
1465   lambda_loopnest ret = NULL;
1466   struct loop *temp = loop_nest;
1467   int depth = depth_of_nest (loop_nest);
1468   size_t i;
1469   VEC(lambda_loop,heap) *loops = NULL;
1470   VEC(tree,heap) *uboundvars = NULL;
1471   VEC(tree,heap) *lboundvars  = NULL;
1472   VEC(int,heap) *steps = NULL;
1473   lambda_loop newloop;
1474   tree inductionvar = NULL;
1475   bool perfect_nest = perfect_nest_p (loop_nest);
1476
1477   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1478     goto fail;
1479
1480   while (temp)
1481     {
1482       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1483                                          &inductionvar, *inductionvars,
1484                                          &lboundvars, &uboundvars,
1485                                          &steps);
1486       if (!newloop)
1487         goto fail;
1488
1489       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1490       VEC_safe_push (lambda_loop, heap, loops, newloop);
1491       temp = temp->inner;
1492     }
1493
1494   if (!perfect_nest)
1495     {
1496       if (!perfect_nestify (currloops, loop_nest,
1497                             lboundvars, uboundvars, steps, *inductionvars))
1498         {
1499           if (dump_file)
1500             fprintf (dump_file,
1501                      "Not a perfect loop nest and couldn't convert to one.\n");
1502           goto fail;
1503         }
1504       else if (dump_file)
1505         fprintf (dump_file,
1506                  "Successfully converted loop nest to perfect loop nest.\n");
1507     }
1508
1509   ret = lambda_loopnest_new (depth, 2 * depth);
1510
1511   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1512     LN_LOOPS (ret)[i] = newloop;
1513
1514  fail:
1515   VEC_free (lambda_loop, heap, loops);
1516   VEC_free (tree, heap, uboundvars);
1517   VEC_free (tree, heap, lboundvars);
1518   VEC_free (int, heap, steps);
1519
1520   return ret;
1521 }
1522
1523 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1524    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1525    inserted for us are stored.  INDUCTION_VARS is the array of induction
1526    variables for the loop this LBV is from.  TYPE is the tree type to use for
1527    the variables and trees involved.  */
1528
1529 static tree
1530 lbv_to_gcc_expression (lambda_body_vector lbv,
1531                        tree type, VEC(tree,heap) *induction_vars,
1532                        tree *stmts_to_insert)
1533 {
1534   tree stmts, stmt, resvar, name;
1535   tree iv;
1536   size_t i;
1537   tree_stmt_iterator tsi;
1538
1539   /* Create a statement list and a linear expression temporary.  */
1540   stmts = alloc_stmt_list ();
1541   resvar = create_tmp_var (type, "lbvtmp");
1542   add_referenced_var (resvar);
1543
1544   /* Start at 0.  */
1545   stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1546   name = make_ssa_name (resvar, stmt);
1547   TREE_OPERAND (stmt, 0) = name;
1548   tsi = tsi_last (stmts);
1549   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1550
1551   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1552     {
1553       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1554         {
1555           tree newname;
1556           tree coeffmult;
1557
1558           /* newname = coefficient * induction_variable */
1559           coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1560           stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1561                          fold_build2 (MULT_EXPR, type, iv, coeffmult));
1562
1563           newname = make_ssa_name (resvar, stmt);
1564           TREE_OPERAND (stmt, 0) = newname;
1565           fold_stmt (&stmt);
1566           tsi = tsi_last (stmts);
1567           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1568
1569           /* name = name + newname */
1570           stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1571                          build2 (PLUS_EXPR, type, name, newname));
1572           name = make_ssa_name (resvar, stmt);
1573           TREE_OPERAND (stmt, 0) = name;
1574           fold_stmt (&stmt);
1575           tsi = tsi_last (stmts);
1576           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1577
1578         }
1579     }
1580
1581   /* Handle any denominator that occurs.  */
1582   if (LBV_DENOMINATOR (lbv) != 1)
1583     {
1584       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1585       stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1586                      build2 (CEIL_DIV_EXPR, type, name, denominator));
1587       name = make_ssa_name (resvar, stmt);
1588       TREE_OPERAND (stmt, 0) = name;
1589       fold_stmt (&stmt);
1590       tsi = tsi_last (stmts);
1591       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1592     }
1593   *stmts_to_insert = stmts;
1594   return name;
1595 }
1596
1597 /* Convert a linear expression from coefficient and constant form to a
1598    gcc tree.
1599    Return the tree that represents the final value of the expression.
1600    LLE is the linear expression to convert.
1601    OFFSET is the linear offset to apply to the expression.
1602    TYPE is the tree type to use for the variables and math.
1603    INDUCTION_VARS is a vector of induction variables for the loops.
1604    INVARIANTS is a vector of the loop nest invariants.
1605    WRAP specifies what tree code to wrap the results in, if there is more than
1606    one (it is either MAX_EXPR, or MIN_EXPR).
1607    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1608    statements that need to be inserted for the linear expression.  */
1609
1610 static tree
1611 lle_to_gcc_expression (lambda_linear_expression lle,
1612                        lambda_linear_expression offset,
1613                        tree type,
1614                        VEC(tree,heap) *induction_vars,
1615                        VEC(tree,heap) *invariants,
1616                        enum tree_code wrap, tree *stmts_to_insert)
1617 {
1618   tree stmts, stmt, resvar, name;
1619   size_t i;
1620   tree_stmt_iterator tsi;
1621   tree iv, invar;
1622   VEC(tree,heap) *results = NULL;
1623
1624   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1625   name = NULL_TREE;
1626   /* Create a statement list and a linear expression temporary.  */
1627   stmts = alloc_stmt_list ();
1628   resvar = create_tmp_var (type, "lletmp");
1629   add_referenced_var (resvar);
1630
1631   /* Build up the linear expressions, and put the variable representing the
1632      result in the results array.  */
1633   for (; lle != NULL; lle = LLE_NEXT (lle))
1634     {
1635       /* Start at name = 0.  */
1636       stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1637       name = make_ssa_name (resvar, stmt);
1638       TREE_OPERAND (stmt, 0) = name;
1639       fold_stmt (&stmt);
1640       tsi = tsi_last (stmts);
1641       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1642
1643       /* First do the induction variables.
1644          at the end, name = name + all the induction variables added
1645          together.  */
1646       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1647         {
1648           if (LLE_COEFFICIENTS (lle)[i] != 0)
1649             {
1650               tree newname;
1651               tree mult;
1652               tree coeff;
1653
1654               /* mult = induction variable * coefficient.  */
1655               if (LLE_COEFFICIENTS (lle)[i] == 1)
1656                 {
1657                   mult = VEC_index (tree, induction_vars, i);
1658                 }
1659               else
1660                 {
1661                   coeff = build_int_cst (type,
1662                                          LLE_COEFFICIENTS (lle)[i]);
1663                   mult = fold_build2 (MULT_EXPR, type, iv, coeff);
1664                 }
1665
1666               /* newname = mult */
1667               stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1668               newname = make_ssa_name (resvar, stmt);
1669               TREE_OPERAND (stmt, 0) = newname;
1670               fold_stmt (&stmt);
1671               tsi = tsi_last (stmts);
1672               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1673
1674               /* name = name + newname */
1675               stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1676                              build2 (PLUS_EXPR, type, name, newname));
1677               name = make_ssa_name (resvar, stmt);
1678               TREE_OPERAND (stmt, 0) = name;
1679               fold_stmt (&stmt);
1680               tsi = tsi_last (stmts);
1681               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1682             }
1683         }
1684
1685       /* Handle our invariants.
1686          At the end, we have name = name + result of adding all multiplied
1687          invariants.  */
1688       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1689         {
1690           if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1691             {
1692               tree newname;
1693               tree mult;
1694               tree coeff;
1695               int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1696               /* mult = invariant * coefficient  */
1697               if (invcoeff == 1)
1698                 {
1699                   mult = invar;
1700                 }
1701               else
1702                 {
1703                   coeff = build_int_cst (type, invcoeff);
1704                   mult = fold_build2 (MULT_EXPR, type, invar, coeff);
1705                 }
1706
1707               /* newname = mult */
1708               stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1709               newname = make_ssa_name (resvar, stmt);
1710               TREE_OPERAND (stmt, 0) = newname;
1711               fold_stmt (&stmt);
1712               tsi = tsi_last (stmts);
1713               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1714
1715               /* name = name + newname */
1716               stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1717                              build2 (PLUS_EXPR, type, name, newname));
1718               name = make_ssa_name (resvar, stmt);
1719               TREE_OPERAND (stmt, 0) = name;
1720               fold_stmt (&stmt);
1721               tsi = tsi_last (stmts);
1722               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1723             }
1724         }
1725
1726       /* Now handle the constant.
1727          name = name + constant.  */
1728       if (LLE_CONSTANT (lle) != 0)
1729         {
1730           stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1731                          build2 (PLUS_EXPR, type, name,
1732                                  build_int_cst (type, LLE_CONSTANT (lle))));
1733           name = make_ssa_name (resvar, stmt);
1734           TREE_OPERAND (stmt, 0) = name;
1735           fold_stmt (&stmt);
1736           tsi = tsi_last (stmts);
1737           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1738         }
1739
1740       /* Now handle the offset.
1741          name = name + linear offset.  */
1742       if (LLE_CONSTANT (offset) != 0)
1743         {
1744           stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1745                          build2 (PLUS_EXPR, type, name,
1746                                  build_int_cst (type, LLE_CONSTANT (offset))));
1747           name = make_ssa_name (resvar, stmt);
1748           TREE_OPERAND (stmt, 0) = name;
1749           fold_stmt (&stmt);
1750           tsi = tsi_last (stmts);
1751           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1752         }
1753
1754       /* Handle any denominator that occurs.  */
1755       if (LLE_DENOMINATOR (lle) != 1)
1756         {
1757           stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1758           stmt = build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1759                          type, name, stmt);
1760           stmt = build2 (MODIFY_EXPR, void_type_node, resvar, stmt);
1761
1762           /* name = {ceil, floor}(name/denominator) */
1763           name = make_ssa_name (resvar, stmt);
1764           TREE_OPERAND (stmt, 0) = name;
1765           tsi = tsi_last (stmts);
1766           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1767         }
1768       VEC_safe_push (tree, heap, results, name);
1769     }
1770
1771   /* Again, out of laziness, we don't handle this case yet.  It's not
1772      hard, it just hasn't occurred.  */
1773   gcc_assert (VEC_length (tree, results) <= 2);
1774
1775   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1776   if (VEC_length (tree, results) > 1)
1777     {
1778       tree op1 = VEC_index (tree, results, 0);
1779       tree op2 = VEC_index (tree, results, 1);
1780       stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1781                      build2 (wrap, type, op1, op2));
1782       name = make_ssa_name (resvar, stmt);
1783       TREE_OPERAND (stmt, 0) = name;
1784       tsi = tsi_last (stmts);
1785       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1786     }
1787
1788   VEC_free (tree, heap, results);
1789
1790   *stmts_to_insert = stmts;
1791   return name;
1792 }
1793
1794 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1795    it, back into gcc code.  This changes the
1796    loops, their induction variables, and their bodies, so that they
1797    match the transformed loopnest.
1798    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1799    loopnest.
1800    OLD_IVS is a vector of induction variables from the old loopnest.
1801    INVARIANTS is a vector of loop invariants from the old loopnest.
1802    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1803    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1804    NEW_LOOPNEST.  */
1805
1806 void
1807 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1808                                  VEC(tree,heap) *old_ivs,
1809                                  VEC(tree,heap) *invariants,
1810                                  lambda_loopnest new_loopnest,
1811                                  lambda_trans_matrix transform)
1812 {
1813   struct loop *temp;
1814   size_t i = 0;
1815   size_t depth = 0;
1816   VEC(tree,heap) *new_ivs = NULL;
1817   tree oldiv;
1818
1819   block_stmt_iterator bsi;
1820
1821   if (dump_file)
1822     {
1823       transform = lambda_trans_matrix_inverse (transform);
1824       fprintf (dump_file, "Inverse of transformation matrix:\n");
1825       print_lambda_trans_matrix (dump_file, transform);
1826     }
1827   depth = depth_of_nest (old_loopnest);
1828   temp = old_loopnest;
1829
1830   while (temp)
1831     {
1832       lambda_loop newloop;
1833       basic_block bb;
1834       edge exit;
1835       tree ivvar, ivvarinced, exitcond, stmts;
1836       enum tree_code testtype;
1837       tree newupperbound, newlowerbound;
1838       lambda_linear_expression offset;
1839       tree type;
1840       bool insert_after;
1841       tree inc_stmt;
1842
1843       oldiv = VEC_index (tree, old_ivs, i);
1844       type = TREE_TYPE (oldiv);
1845
1846       /* First, build the new induction variable temporary  */
1847
1848       ivvar = create_tmp_var (type, "lnivtmp");
1849       add_referenced_var (ivvar);
1850
1851       VEC_safe_push (tree, heap, new_ivs, ivvar);
1852
1853       newloop = LN_LOOPS (new_loopnest)[i];
1854
1855       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1856          cases for now.  */
1857       offset = LL_LINEAR_OFFSET (newloop);
1858
1859       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1860                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1861
1862       /* Now build the  new lower bounds, and insert the statements
1863          necessary to generate it on the loop preheader.  */
1864       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1865                                              LL_LINEAR_OFFSET (newloop),
1866                                              type,
1867                                              new_ivs,
1868                                              invariants, MAX_EXPR, &stmts);
1869       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1870       bsi_commit_edge_inserts ();
1871       /* Build the new upper bound and insert its statements in the
1872          basic block of the exit condition */
1873       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1874                                              LL_LINEAR_OFFSET (newloop),
1875                                              type,
1876                                              new_ivs,
1877                                              invariants, MIN_EXPR, &stmts);
1878       exit = temp->single_exit;
1879       exitcond = get_loop_exit_condition (temp);
1880       bb = bb_for_stmt (exitcond);
1881       bsi = bsi_start (bb);
1882       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1883
1884       /* Create the new iv.  */
1885
1886       standard_iv_increment_position (temp, &bsi, &insert_after);
1887       create_iv (newlowerbound,
1888                  build_int_cst (type, LL_STEP (newloop)),
1889                  ivvar, temp, &bsi, insert_after, &ivvar,
1890                  NULL);
1891
1892       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1893          dominate the block containing the exit condition.
1894          So we simply create our own incremented iv to use in the new exit
1895          test,  and let redundancy elimination sort it out.  */
1896       inc_stmt = build2 (PLUS_EXPR, type,
1897                          ivvar, build_int_cst (type, LL_STEP (newloop)));
1898       inc_stmt = build2 (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1899                          inc_stmt);
1900       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1901       TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1902       bsi = bsi_for_stmt (exitcond);
1903       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1904
1905       /* Replace the exit condition with the new upper bound
1906          comparison.  */
1907
1908       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1909
1910       /* We want to build a conditional where true means exit the loop, and
1911          false means continue the loop.
1912          So swap the testtype if this isn't the way things are.*/
1913
1914       if (exit->flags & EDGE_FALSE_VALUE)
1915         testtype = swap_tree_comparison (testtype);
1916
1917       COND_EXPR_COND (exitcond) = build2 (testtype,
1918                                           boolean_type_node,
1919                                           newupperbound, ivvarinced);
1920       update_stmt (exitcond);
1921       VEC_replace (tree, new_ivs, i, ivvar);
1922
1923       i++;
1924       temp = temp->inner;
1925     }
1926
1927   /* Rewrite uses of the old ivs so that they are now specified in terms of
1928      the new ivs.  */
1929
1930   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1931     {
1932       imm_use_iterator imm_iter;
1933       use_operand_p use_p;
1934       tree oldiv_def;
1935       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1936       tree stmt;
1937
1938       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1939         oldiv_def = PHI_RESULT (oldiv_stmt);
1940       else
1941         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1942       gcc_assert (oldiv_def != NULL_TREE);
1943
1944       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1945         {
1946           tree newiv, stmts;
1947           lambda_body_vector lbv, newlbv;
1948
1949           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1950
1951           /* Compute the new expression for the induction
1952              variable.  */
1953           depth = VEC_length (tree, new_ivs);
1954           lbv = lambda_body_vector_new (depth);
1955           LBV_COEFFICIENTS (lbv)[i] = 1;
1956
1957           newlbv = lambda_body_vector_compute_new (transform, lbv);
1958
1959           newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1960                                          new_ivs, &stmts);
1961           bsi = bsi_for_stmt (stmt);
1962           /* Insert the statements to build that
1963              expression.  */
1964           bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1965
1966           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1967             propagate_value (use_p, newiv);
1968           update_stmt (stmt);
1969         }
1970     }
1971   VEC_free (tree, heap, new_ivs);
1972 }
1973
1974 /* Return TRUE if this is not interesting statement from the perspective of
1975    determining if we have a perfect loop nest.  */
1976
1977 static bool
1978 not_interesting_stmt (tree stmt)
1979 {
1980   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1981      loop, we would have already failed the number of exits tests.  */
1982   if (TREE_CODE (stmt) == LABEL_EXPR
1983       || TREE_CODE (stmt) == GOTO_EXPR
1984       || TREE_CODE (stmt) == COND_EXPR)
1985     return true;
1986   return false;
1987 }
1988
1989 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1990
1991 static bool
1992 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1993 {
1994   int i;
1995   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1996     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1997       if (PHI_ARG_DEF (phi, i) == def)
1998         return true;
1999   return false;
2000 }
2001
2002 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2003
2004 static bool
2005 stmt_uses_phi_result (tree stmt, tree phi_result)
2006 {
2007   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2008
2009   /* This is conservatively true, because we only want SIMPLE bumpers
2010      of the form x +- constant for our pass.  */
2011   return (use == phi_result);
2012 }
2013
2014 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2015    in-loop-edge in a phi node, and the operand it uses is the result of that
2016    phi node.
2017    I.E. i_29 = i_3 + 1
2018         i_3 = PHI (0, i_29);  */
2019
2020 static bool
2021 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2022 {
2023   tree use;
2024   tree def;
2025   imm_use_iterator iter;
2026   use_operand_p use_p;
2027
2028   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2029   if (!def)
2030     return false;
2031
2032   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2033     {
2034       use = USE_STMT (use_p);
2035       if (TREE_CODE (use) == PHI_NODE)
2036         {
2037           if (phi_loop_edge_uses_def (loop, use, def))
2038             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2039               return true;
2040         }
2041     }
2042   return false;
2043 }
2044
2045
2046 /* Return true if LOOP is a perfect loop nest.
2047    Perfect loop nests are those loop nests where all code occurs in the
2048    innermost loop body.
2049    If S is a program statement, then
2050
2051    i.e.
2052    DO I = 1, 20
2053        S1
2054        DO J = 1, 20
2055        ...
2056        END DO
2057    END DO
2058    is not a perfect loop nest because of S1.
2059
2060    DO I = 1, 20
2061       DO J = 1, 20
2062         S1
2063         ...
2064       END DO
2065    END DO
2066    is a perfect loop nest.
2067
2068    Since we don't have high level loops anymore, we basically have to walk our
2069    statements and ignore those that are there because the loop needs them (IE
2070    the induction variable increment, and jump back to the top of the loop).  */
2071
2072 bool
2073 perfect_nest_p (struct loop *loop)
2074 {
2075   basic_block *bbs;
2076   size_t i;
2077   tree exit_cond;
2078
2079   if (!loop->inner)
2080     return true;
2081   bbs = get_loop_body (loop);
2082   exit_cond = get_loop_exit_condition (loop);
2083   for (i = 0; i < loop->num_nodes; i++)
2084     {
2085       if (bbs[i]->loop_father == loop)
2086         {
2087           block_stmt_iterator bsi;
2088           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2089             {
2090               tree stmt = bsi_stmt (bsi);
2091               if (stmt == exit_cond
2092                   || not_interesting_stmt (stmt)
2093                   || stmt_is_bumper_for_loop (loop, stmt))
2094                 continue;
2095               free (bbs);
2096               return false;
2097             }
2098         }
2099     }
2100   free (bbs);
2101   /* See if the inner loops are perfectly nested as well.  */
2102   if (loop->inner)
2103     return perfect_nest_p (loop->inner);
2104   return true;
2105 }
2106
2107 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2108    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2109    avoid creating duplicate temporaries and FIRSTBSI is statement
2110    iterator where new temporaries should be inserted at the beginning
2111    of body basic block.  */
2112
2113 static void
2114 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2115                                 int xstep, tree y, tree yinit,
2116                                 htab_t replacements,
2117                                 block_stmt_iterator *firstbsi)
2118 {
2119   ssa_op_iter iter;
2120   use_operand_p use_p;
2121
2122   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2123     {
2124       tree use = USE_FROM_PTR (use_p);
2125       tree step = NULL_TREE;
2126       tree scev, init, val, var, setstmt;
2127       struct tree_map *h, in;
2128       void **loc;
2129
2130       /* Replace uses of X with Y right away.  */
2131       if (use == x)
2132         {
2133           SET_USE (use_p, y);
2134           continue;
2135         }
2136
2137       scev = instantiate_parameters (loop,
2138                                      analyze_scalar_evolution (loop, use));
2139
2140       if (scev == NULL || scev == chrec_dont_know)
2141         continue;
2142
2143       step = evolution_part_in_loop_num (scev, loop->num);
2144       if (step == NULL
2145           || step == chrec_dont_know
2146           || TREE_CODE (step) != INTEGER_CST
2147           || int_cst_value (step) != xstep)
2148         continue;
2149
2150       /* Use REPLACEMENTS hash table to cache already created
2151          temporaries.  */
2152       in.hash = htab_hash_pointer (use);
2153       in.from = use;
2154       h = htab_find_with_hash (replacements, &in, in.hash);
2155       if (h != NULL)
2156         {
2157           SET_USE (use_p, h->to);
2158           continue;
2159         }
2160
2161       /* USE which has the same step as X should be replaced
2162          with a temporary set to Y + YINIT - INIT.  */
2163       init = initial_condition_in_loop_num (scev, loop->num);
2164       gcc_assert (init != NULL && init != chrec_dont_know);
2165       if (TREE_TYPE (use) == TREE_TYPE (y))
2166         {
2167           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2168           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2169           if (val == y)
2170             {
2171               /* If X has the same type as USE, the same step
2172                  and same initial value, it can be replaced by Y.  */
2173               SET_USE (use_p, y);
2174               continue;
2175             }
2176         }
2177       else
2178         {
2179           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2180           val = fold_convert (TREE_TYPE (use), val);
2181           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2182         }
2183
2184       /* Create a temporary variable and insert it at the beginning
2185          of the loop body basic block, right after the PHI node
2186          which sets Y.  */
2187       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2188       add_referenced_var (var);
2189       val = force_gimple_operand_bsi (firstbsi, val, false, NULL);
2190       setstmt = build2 (MODIFY_EXPR, void_type_node, var, val);
2191       var = make_ssa_name (var, setstmt);
2192       TREE_OPERAND (setstmt, 0) = var;
2193       bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2194       update_stmt (setstmt);
2195       SET_USE (use_p, var);
2196       h = ggc_alloc (sizeof (struct tree_map));
2197       h->hash = in.hash;
2198       h->from = use;
2199       h->to = var;
2200       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2201       gcc_assert ((*(struct tree_map **)loc) == NULL);
2202       *(struct tree_map **) loc = h;
2203     }
2204 }
2205
2206 /* Return true if STMT is an exit PHI for LOOP */
2207
2208 static bool
2209 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2210 {
2211
2212   if (TREE_CODE (stmt) != PHI_NODE
2213       || PHI_NUM_ARGS (stmt) != 1
2214       || bb_for_stmt (stmt) != loop->single_exit->dest)
2215     return false;
2216
2217   return true;
2218 }
2219
2220 /* Return true if STMT can be put back into the loop INNER, by
2221    copying it to the beginning of that loop and changing the uses.  */
2222
2223 static bool
2224 can_put_in_inner_loop (struct loop *inner, tree stmt)
2225 {
2226   imm_use_iterator imm_iter;
2227   use_operand_p use_p;
2228
2229   gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2230   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2231       || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2232     return false;
2233
2234   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2235     {
2236       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2237         {
2238           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2239
2240           if (!flow_bb_inside_loop_p (inner, immbb))
2241             return false;
2242         }
2243     }
2244   return true;
2245 }
2246
2247 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2248 static bool
2249 can_put_after_inner_loop (struct loop *loop, tree stmt)
2250 {
2251   imm_use_iterator imm_iter;
2252   use_operand_p use_p;
2253
2254   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2255     return false;
2256
2257   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2258     {
2259       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2260         {
2261           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2262
2263           if (!dominated_by_p (CDI_DOMINATORS,
2264                                immbb,
2265                                loop->inner->header)
2266               && !can_put_in_inner_loop (loop->inner, stmt))
2267             return false;
2268         }
2269     }
2270   return true;
2271 }
2272
2273
2274
2275 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2276    perfect one.  At the moment, we only handle imperfect nests of
2277    depth 2, where all of the statements occur after the inner loop.  */
2278
2279 static bool
2280 can_convert_to_perfect_nest (struct loop *loop)
2281 {
2282   basic_block *bbs;
2283   tree exit_condition, phi;
2284   size_t i;
2285   block_stmt_iterator bsi;
2286   basic_block exitdest;
2287
2288   /* Can't handle triply nested+ loops yet.  */
2289   if (!loop->inner || loop->inner->inner)
2290     return false;
2291
2292   bbs = get_loop_body (loop);
2293   exit_condition = get_loop_exit_condition (loop);
2294   for (i = 0; i < loop->num_nodes; i++)
2295     {
2296       if (bbs[i]->loop_father == loop)
2297         {
2298           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2299             {
2300               tree stmt = bsi_stmt (bsi);
2301
2302               if (stmt == exit_condition
2303                   || not_interesting_stmt (stmt)
2304                   || stmt_is_bumper_for_loop (loop, stmt))
2305                 continue;
2306
2307               /* If this is a scalar operation that can be put back
2308                  into the inner loop, or after the inner loop, through
2309                  copying, then do so. This works on the theory that
2310                  any amount of scalar code we have to reduplicate
2311                  into or after the loops is less expensive that the
2312                  win we get from rearranging the memory walk
2313                  the loop is doing so that it has better
2314                  cache behavior.  */
2315               if (TREE_CODE (stmt) == MODIFY_EXPR)
2316                 {
2317                   use_operand_p use_a, use_b;
2318                   imm_use_iterator imm_iter;
2319                   ssa_op_iter op_iter, op_iter1;
2320                   tree op0 = TREE_OPERAND (stmt, 0);
2321                   tree scev = instantiate_parameters
2322                     (loop, analyze_scalar_evolution (loop, op0));
2323
2324                   /* If the IV is simple, it can be duplicated.  */
2325                   if (!automatically_generated_chrec_p (scev))
2326                     {
2327                       tree step = evolution_part_in_loop_num (scev, loop->num);
2328                       if (step && step != chrec_dont_know
2329                           && TREE_CODE (step) == INTEGER_CST)
2330                         continue;
2331                     }
2332
2333                   /* The statement should not define a variable used
2334                      in the inner loop.  */
2335                   if (TREE_CODE (op0) == SSA_NAME)
2336                     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2337                       if (bb_for_stmt (USE_STMT (use_a))->loop_father
2338                           == loop->inner)
2339                         goto fail;
2340
2341                   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2342                     {
2343                       tree node, op = USE_FROM_PTR (use_a);
2344
2345                       /* The variables should not be used in both loops.  */
2346                       FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2347                       if (bb_for_stmt (USE_STMT (use_b))->loop_father
2348                           == loop->inner)
2349                         goto fail;
2350
2351                       /* The statement should not use the value of a
2352                          scalar that was modified in the loop.  */
2353                       node = SSA_NAME_DEF_STMT (op);
2354                       if (TREE_CODE (node) == PHI_NODE)
2355                         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2356                           {
2357                             tree arg = USE_FROM_PTR (use_b);
2358
2359                             if (TREE_CODE (arg) == SSA_NAME)
2360                               {
2361                                 tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2362
2363                                 if (bb_for_stmt (arg_stmt)->loop_father
2364                                     == loop->inner)
2365                                   goto fail;
2366                               }
2367                           }
2368                     }
2369
2370                   if (can_put_in_inner_loop (loop->inner, stmt)
2371                       || can_put_after_inner_loop (loop, stmt))
2372                     continue;
2373                 }
2374
2375               /* Otherwise, if the bb of a statement we care about isn't
2376                  dominated by the header of the inner loop, then we can't
2377                  handle this case right now.  This test ensures that the
2378                  statement comes completely *after* the inner loop.  */
2379               if (!dominated_by_p (CDI_DOMINATORS,
2380                                    bb_for_stmt (stmt),
2381                                    loop->inner->header))
2382                 goto fail;
2383             }
2384         }
2385     }
2386
2387   /* We also need to make sure the loop exit only has simple copy phis in it,
2388      otherwise we don't know how to transform it into a perfect nest right
2389      now.  */
2390   exitdest = loop->single_exit->dest;
2391
2392   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2393     if (PHI_NUM_ARGS (phi) != 1)
2394       goto fail;
2395
2396   free (bbs);
2397   return true;
2398
2399  fail:
2400   free (bbs);
2401   return false;
2402 }
2403
2404 /* Transform the loop nest into a perfect nest, if possible.
2405    LOOPS is the current struct loops *
2406    LOOP is the loop nest to transform into a perfect nest
2407    LBOUNDS are the lower bounds for the loops to transform
2408    UBOUNDS are the upper bounds for the loops to transform
2409    STEPS is the STEPS for the loops to transform.
2410    LOOPIVS is the induction variables for the loops to transform.
2411
2412    Basically, for the case of
2413
2414    FOR (i = 0; i < 50; i++)
2415     {
2416      FOR (j =0; j < 50; j++)
2417      {
2418         <whatever>
2419      }
2420      <some code>
2421     }
2422
2423    This function will transform it into a perfect loop nest by splitting the
2424    outer loop into two loops, like so:
2425
2426    FOR (i = 0; i < 50; i++)
2427    {
2428      FOR (j = 0; j < 50; j++)
2429      {
2430          <whatever>
2431      }
2432    }
2433
2434    FOR (i = 0; i < 50; i ++)
2435    {
2436     <some code>
2437    }
2438
2439    Return FALSE if we can't make this loop into a perfect nest.  */
2440
2441 static bool
2442 perfect_nestify (struct loops *loops,
2443                  struct loop *loop,
2444                  VEC(tree,heap) *lbounds,
2445                  VEC(tree,heap) *ubounds,
2446                  VEC(int,heap) *steps,
2447                  VEC(tree,heap) *loopivs)
2448 {
2449   basic_block *bbs;
2450   tree exit_condition;
2451   tree then_label, else_label, cond_stmt;
2452   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2453   int i;
2454   block_stmt_iterator bsi, firstbsi;
2455   bool insert_after;
2456   edge e;
2457   struct loop *newloop;
2458   tree phi;
2459   tree uboundvar;
2460   tree stmt;
2461   tree oldivvar, ivvar, ivvarinced;
2462   VEC(tree,heap) *phis = NULL;
2463   htab_t replacements = NULL;
2464
2465   /* Create the new loop.  */
2466   olddest = loop->single_exit->dest;
2467   preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2468   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2469
2470   /* Push the exit phi nodes that we are moving.  */
2471   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2472     {
2473       VEC_reserve (tree, heap, phis, 2);
2474       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2475       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2476     }
2477   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2478
2479   /* Remove the exit phis from the old basic block.  Make sure to set
2480      PHI_RESULT to null so it doesn't get released.  */
2481   while (phi_nodes (olddest) != NULL)
2482     {
2483       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2484       remove_phi_node (phi_nodes (olddest), NULL);
2485     }
2486
2487   /* and add them back to the new basic block.  */
2488   while (VEC_length (tree, phis) != 0)
2489     {
2490       tree def;
2491       tree phiname;
2492       def = VEC_pop (tree, phis);
2493       phiname = VEC_pop (tree, phis);
2494       phi = create_phi_node (phiname, preheaderbb);
2495       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2496     }
2497   flush_pending_stmts (e);
2498   VEC_free (tree, heap, phis);
2499
2500   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2501   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2502   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2503   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2504   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2505   cond_stmt = build3 (COND_EXPR, void_type_node,
2506                       build2 (NE_EXPR, boolean_type_node,
2507                               integer_one_node,
2508                               integer_zero_node),
2509                       then_label, else_label);
2510   bsi = bsi_start (bodybb);
2511   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2512   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2513   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2514   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2515
2516   /* Update the loop structures.  */
2517   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2518   newloop->header = headerbb;
2519   newloop->latch = latchbb;
2520   newloop->single_exit = e;
2521   add_bb_to_loop (latchbb, newloop);
2522   add_bb_to_loop (bodybb, newloop);
2523   add_bb_to_loop (headerbb, newloop);
2524   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2525   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2526   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2527                            loop->single_exit->src);
2528   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2529   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2530   /* Create the new iv.  */
2531   oldivvar = VEC_index (tree, loopivs, 0);
2532   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2533   add_referenced_var (ivvar);
2534   standard_iv_increment_position (newloop, &bsi, &insert_after);
2535   create_iv (VEC_index (tree, lbounds, 0),
2536              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2537              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2538
2539   /* Create the new upper bound.  This may be not just a variable, so we copy
2540      it to one just in case.  */
2541
2542   exit_condition = get_loop_exit_condition (newloop);
2543   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2544   add_referenced_var (uboundvar);
2545   stmt = build2 (MODIFY_EXPR, void_type_node, uboundvar,
2546                  VEC_index (tree, ubounds, 0));
2547   uboundvar = make_ssa_name (uboundvar, stmt);
2548   TREE_OPERAND (stmt, 0) = uboundvar;
2549
2550   if (insert_after)
2551     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2552   else
2553     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2554   update_stmt (stmt);
2555   COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2556                                             boolean_type_node,
2557                                             uboundvar,
2558                                             ivvarinced);
2559   update_stmt (exit_condition);
2560   replacements = htab_create_ggc (20, tree_map_hash,
2561                                   tree_map_eq, NULL);
2562   bbs = get_loop_body_in_dom_order (loop);
2563   /* Now move the statements, and replace the induction variable in the moved
2564      statements with the correct loop induction variable.  */
2565   oldivvar = VEC_index (tree, loopivs, 0);
2566   firstbsi = bsi_start (bodybb);
2567   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2568     {
2569       block_stmt_iterator tobsi = bsi_last (bodybb);
2570       if (bbs[i]->loop_father == loop)
2571         {
2572           /* If this is true, we are *before* the inner loop.
2573              If this isn't true, we are *after* it.
2574
2575              The only time can_convert_to_perfect_nest returns true when we
2576              have statements before the inner loop is if they can be moved
2577              into the inner loop.
2578
2579              The only time can_convert_to_perfect_nest returns true when we
2580              have statements after the inner loop is if they can be moved into
2581              the new split loop.  */
2582
2583           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2584             {
2585               block_stmt_iterator header_bsi
2586                 = bsi_after_labels (loop->inner->header);
2587
2588               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2589                 {
2590                   tree stmt = bsi_stmt (bsi);
2591
2592                   if (stmt == exit_condition
2593                       || not_interesting_stmt (stmt)
2594                       || stmt_is_bumper_for_loop (loop, stmt))
2595                     {
2596                       bsi_next (&bsi);
2597                       continue;
2598                     }
2599
2600                   bsi_move_before (&bsi, &header_bsi);
2601                 }
2602             }
2603           else
2604             {
2605               /* Note that the bsi only needs to be explicitly incremented
2606                  when we don't move something, since it is automatically
2607                  incremented when we do.  */
2608               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2609                 {
2610                   ssa_op_iter i;
2611                   tree n, stmt = bsi_stmt (bsi);
2612
2613                   if (stmt == exit_condition
2614                       || not_interesting_stmt (stmt)
2615                       || stmt_is_bumper_for_loop (loop, stmt))
2616                     {
2617                       bsi_next (&bsi);
2618                       continue;
2619                     }
2620
2621                   replace_uses_equiv_to_x_with_y
2622                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2623                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2624
2625                   bsi_move_before (&bsi, &tobsi);
2626
2627                   /* If the statement has any virtual operands, they may
2628                      need to be rewired because the original loop may
2629                      still reference them.  */
2630                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2631                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2632                 }
2633             }
2634
2635         }
2636     }
2637
2638   free (bbs);
2639   htab_delete (replacements);
2640   return perfect_nest_p (loop);
2641 }
2642
2643 /* Return true if TRANS is a legal transformation matrix that respects
2644    the dependence vectors in DISTS and DIRS.  The conservative answer
2645    is false.
2646
2647    "Wolfe proves that a unimodular transformation represented by the
2648    matrix T is legal when applied to a loop nest with a set of
2649    lexicographically non-negative distance vectors RDG if and only if
2650    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2651    i.e.: if and only if it transforms the lexicographically positive
2652    distance vectors to lexicographically positive vectors.  Note that
2653    a unimodular matrix must transform the zero vector (and only it) to
2654    the zero vector." S.Muchnick.  */
2655
2656 bool
2657 lambda_transform_legal_p (lambda_trans_matrix trans,
2658                           int nb_loops,
2659                           VEC (ddr_p, heap) *dependence_relations)
2660 {
2661   unsigned int i, j;
2662   lambda_vector distres;
2663   struct data_dependence_relation *ddr;
2664
2665   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2666               && LTM_ROWSIZE (trans) == nb_loops);
2667
2668   /* When there is an unknown relation in the dependence_relations, we
2669      know that it is no worth looking at this loop nest: give up.  */
2670   ddr = VEC_index (ddr_p, dependence_relations, 0);
2671   if (ddr == NULL)
2672     return true;
2673   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2674     return false;
2675
2676   distres = lambda_vector_new (nb_loops);
2677
2678   /* For each distance vector in the dependence graph.  */
2679   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2680     {
2681       /* Don't care about relations for which we know that there is no
2682          dependence, nor about read-read (aka. output-dependences):
2683          these data accesses can happen in any order.  */
2684       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2685           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2686         continue;
2687
2688       /* Conservatively answer: "this transformation is not valid".  */
2689       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2690         return false;
2691
2692       /* If the dependence could not be captured by a distance vector,
2693          conservatively answer that the transform is not valid.  */
2694       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2695         return false;
2696
2697       /* Compute trans.dist_vect */
2698       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2699         {
2700           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2701                                      DDR_DIST_VECT (ddr, j), distres);
2702
2703           if (!lambda_vector_lexico_pos (distres, nb_loops))
2704             return false;
2705         }
2706     }
2707   return true;
2708 }